parser.cpp

00001 /*
00002  *  This program is free software; you can redistribute it and/or modify
00003  *  it under the terms of the GNU General Public License as published by
00004  *  the Free Software Foundation; either version 2 of the License, or
00005  *  (at your option) any later version.
00006  *
00007  *  This program is distributed in the hope that it will be useful,
00008  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00009  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00010  *  GNU General Public License for more details.
00011  *
00012  *  You should have received a copy of the GNU General Public License
00013  *  along with this program; if not, write to the Free Software
00014  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00015  *
00016  * (c)Copyright 2006 Hewlett-Packard Development Company, LP.
00017  *
00018  */
00019 
00020 #include "parser.hpp"
00021 #include "iostream_t.hpp"
00022 #include <regex.h>
00023 #include "string_t.hpp"
00024 #include "Utils.hpp"
00025 
00029 C_ProtocolFrame::T_MsgError parse_xml (char   *P_buf,
00030                                        size_t *P_size,
00031                                        char   *P_buf_header,
00032                                        size_t  P_size_header) {
00033 
00034   C_ProtocolFrame::T_MsgError L_error = C_ProtocolFrame::E_MSG_ERROR_DECODING ;
00035   regex_t    L_reg_expr ;
00036   int        L_status ;
00037   char       L_buffer[100];
00038   regmatch_t L_pmatch[2] ;
00039   size_t     L_size = 0 ;
00040   char       L_tag[100] ;
00041   regoff_t   L_next ;
00042 
00043   size_t     L_parsed_size = 0 ;
00044 
00045   regex_t    L_reg_final ;
00046   string_t   L_string = "<[[:blank:]]*[/]" ;
00047 
00048   int        L_i = 0 ;
00049   char      *L_ptr = P_buf ;
00050 
00051   int        L_nb_loop = 0 ;
00052 
00053   // no body 
00054 
00055   if ((*P_size >= 2) && (P_buf[0] == '\r') && (P_buf[1]='\n')) {
00056     // there is no body 
00057     *P_size -= 2 ;
00058     L_error = C_ProtocolFrame::E_MSG_OK ;
00059   } else {
00060 
00061 
00062   L_status = regcomp (&L_reg_expr, 
00063                       "[[:blank:]]*<[[:blank:]]*([!-=?-z]*)",
00064                       REG_EXTENDED) ;
00065   
00066   if (L_status != 0) {
00067 
00068     regerror(L_status, &L_reg_expr, L_buffer, 100);
00069     regfree (&L_reg_expr) ;
00070 
00071   } else {
00072 
00073 
00074     while (L_nb_loop < 2) {
00075   
00076       L_tag[0]='\0' ;
00077 
00078       L_status = regexec (&L_reg_expr, L_ptr, 2, L_pmatch, 0) ;
00079 
00080 
00081       if (L_status == 0) {
00082         
00083         L_parsed_size += L_pmatch[0].rm_eo ;
00084         
00085         L_next = L_pmatch[0].rm_eo ;
00086         L_size = L_pmatch[1].rm_eo - L_pmatch[1].rm_so ; // tag xml
00087         
00088         memcpy(L_tag, L_ptr+L_pmatch[1].rm_so, L_size);
00089         L_tag[L_size]='\0' ;
00090         
00091         if (strcmp(L_tag,(char*)"?xml") == 0 ) {
00092           while ( ((L_next + L_i) <= (int)*P_size ) && (*(L_ptr+L_next+L_i) != '>') ) {
00093             L_i++;
00094           }
00095 
00096           L_ptr += L_next + L_i + 1 ;
00097 
00098           L_parsed_size += (L_i+1) ;
00099 
00100         } else {
00101           L_string += L_tag ;
00102           L_string += "[[:blank:]]*>[[:blank:]]*" ;
00103           L_status = regcomp (&L_reg_final, 
00104                               L_string.c_str(),
00105                               REG_EXTENDED) ;
00106           
00107           if (L_status != 0) {
00108             regerror(L_status, &L_reg_final, L_buffer, 100);
00109             regfree (&L_reg_final) ;
00110             break ;
00111           } else {
00112             L_status = regexec (&L_reg_final, L_ptr+L_next, 
00113                                 1, L_pmatch, 0) ;
00114             regfree (&L_reg_final) ;
00115             
00116             if (L_status == 0) {
00117 
00118               L_parsed_size += L_pmatch[0].rm_eo ;
00119 
00120               // find \r\n at the end
00121               if ((L_parsed_size+2) <= *P_size) {
00122                 if (   (*((L_ptr+L_next)+L_pmatch[0].rm_eo) == '\r')
00123                        && (*((L_ptr+L_next)+L_pmatch[0].rm_eo+1) == '\n')) {
00124                   L_parsed_size += 2 ; // \r\n parsed
00125                   L_error = C_ProtocolFrame::E_MSG_OK ;
00126                   break ;
00127                 } else {
00128                   L_error = C_ProtocolFrame::E_MSG_ERROR_DECODING ;
00129                   break ;
00130                 }
00131               } else {
00132                 L_error = C_ProtocolFrame::E_MSG_ERROR_DECODING_SIZE_LESS ;
00133                 break ;
00134               }
00135               
00136             } else {
00137               L_error = C_ProtocolFrame::E_MSG_ERROR_DECODING_SIZE_LESS ;
00138               break ;
00139             }
00140             
00141           }
00142         } 
00143       } else {
00144         L_error = C_ProtocolFrame::E_MSG_ERROR_DECODING ;
00145         break ;
00146       }
00147       
00148       L_nb_loop ++ ;
00149     } // while 
00150 
00151     regfree (&L_reg_expr) ;
00152     if (L_error == C_ProtocolFrame::E_MSG_OK) {
00153       *P_size -= L_parsed_size ;
00154     }
00155 
00156   }
00157 
00158   }
00159 
00160   return (L_error) ;
00161 }
00162 
00163 
00164 char* skip_blank(char    *P_ptr, 
00165                  char    *P_buffer, 
00166                  size_t   P_size_buffer,
00167                  size_t  *P_size) {
00168   
00169   char     *L_blank_ptr    = NULL     ;
00170   char     *L_new_ptr      = P_ptr    ;
00171 
00172 
00173   L_blank_ptr = P_ptr ;
00174   while (((L_blank_ptr) && (L_blank_ptr < (P_buffer + P_size_buffer))) &&
00175          ((*L_blank_ptr == ' ') ||
00176           (*L_blank_ptr == '\t'))) { L_blank_ptr++ ; }
00177   if (L_blank_ptr != P_ptr) {
00178     *(P_size) = (L_blank_ptr - P_ptr) ;
00179     L_new_ptr = L_blank_ptr ;
00180   }
00181 
00182   return (L_new_ptr) ;
00183 }
00184 
00185 
00186 
00187 
00188 char * filter_xml(char* P_buffer) {
00189 
00190   size_t    L_size         = 0        ;
00191 
00192   size_t    L_size_buffer  = 0        ;
00193   size_t    L_size_end     = 0        ;
00194   
00195   char     *L_pos          = NULL     ;
00196   char     *L_ptr          = P_buffer ;
00197 
00198   char     *L_result       = NULL     ;
00199   char     *L_new          = NULL     ;
00200 
00201   bool      L_skip_blank   = true     ;
00202   size_t    L_size_blank   = 0        ;
00203 
00204 
00205   if ((P_buffer != NULL) && 
00206       ((L_size_buffer = strlen(P_buffer)) > 0 )) {
00207 
00208     L_size_end = L_size_buffer ;
00209 
00210     ALLOC_TABLE(L_result, 
00211                 char*, 
00212                 sizeof(char), 
00213                 (2*L_size_buffer));
00214     
00215 
00216     if ((strchr(L_ptr,'\n')) == NULL) {
00217 
00218       L_new = L_result ;
00219       L_size = L_size_buffer ;
00220 
00221       // skip blank
00222       if (L_skip_blank) {
00223         L_ptr = skip_blank(L_ptr,P_buffer, L_size_buffer, &L_size_blank) ;
00224         L_size -= L_size_blank ;
00225       }
00226 
00227       memcpy(L_new, L_ptr, L_size);
00228       L_new += (L_size - 1) ;
00229       if (*L_new != '\r') {
00230         L_new += 1 ;
00231         *L_new = '\r' ;
00232       }
00233       L_new += 2 ;
00234       *L_new = '\0' ;
00235       *(L_new-1) = '\n' ;
00236 
00237     } else {
00238       // if '\n' exists
00239 
00240       while(   (L_ptr) 
00241             && (L_pos = strchr(L_ptr,'\n')) != NULL) {
00242 
00243         L_size_blank = 0 ;
00244         // L_size : from start to '\n' not included
00245         L_size = L_pos - L_ptr ;
00246         // skip blank
00247         if (L_skip_blank) {
00248 
00249           L_ptr = skip_blank(L_ptr,P_buffer, L_size_buffer, &L_size_blank) ;
00250           L_size -= L_size_blank ;
00251           L_size_end -= L_size_blank ;
00252 
00253         }
00254 
00255         if (L_new == NULL) { L_new = L_result ; } else { L_new += 1 ; }
00256         memcpy(L_new, L_ptr, L_size);
00257         L_new += (L_size - 1) ;
00258         // test end needed ? for L_ptr
00259         if ((L_pos + 1) <= (P_buffer+L_size_buffer)) { 
00260           L_ptr = L_pos + 1 ; 
00261         } else { 
00262           L_ptr = NULL ; 
00263         }
00264 
00265         L_size_end -= (L_size + 1) ;
00266 
00267         if (*L_new != '\r') {
00268           L_new += 1 ;
00269           *(L_new) = '\r' ;
00270         }
00271         L_new += 1 ;
00272         *(L_new) = '\n' ;
00273 
00274       } // while
00275 
00276         
00277       // ctrl the end of buffer
00278       if (L_size_end > 0) {
00279 
00280         L_size = L_size_end ;
00281         L_size_blank = 0 ;
00282 
00283         // skip blank
00284         if (L_skip_blank) {
00285 
00286           L_ptr = skip_blank(L_ptr,P_buffer, L_size_buffer, &L_size_blank) ;
00287           L_size -= L_size_blank ;
00288         }
00289 
00290         if (L_size) {
00291           L_new +=1 ;
00292 
00293           memcpy(L_new, L_ptr, L_size);
00294           L_new += (L_size-1) ;
00295         
00296           if (*L_new != '\r') {
00297             L_new += 1 ;
00298             *(L_new) = '\r' ;
00299           }
00300           L_new += 2 ;
00301           *L_new = '\0' ;
00302           *(L_new-1) = '\n' ;
00303         } else {
00304           // add final '\0' 
00305           L_new += 1 ;
00306           *L_new = '\0' ;
00307         }
00308       } else {
00309         // add final '\0' 
00310         L_new += 1 ;
00311         *L_new = '\0' ;
00312         
00313       }
00314     }
00315   }
00316 
00317   if (L_result != NULL) {
00318     L_ptr = L_result ;
00319     while ((L_ptr = strstr(L_ptr, "\r\n\r\n")) != NULL ) {
00320       memmove(L_ptr+2, L_ptr+4, strlen(L_ptr+4));
00321       L_ptr += 2 ;
00322     }
00323   }
00324 
00325   return (L_result);
00326 }

Generated on Wed Mar 7 14:57:54 2007 for Seagull by  doxygen 1.4.6