xml_parse_lib.c
Go to the documentation of this file.
00001 /*************************************************************************/
00002 /* XML Parse Lib - A set of routines for parsing and generating XML.     */
00003 /*                                                                       */
00004 /* For Documentation and Usage Notes, see:                               */
00005 /*                              http://xmlparselib.sourceforge.net/      */
00006 /*                                                                       */
00007 /* Public Low-level functions:                                           */
00008 /*      xml_parse( fileptr, tag, content, maxlen, linenum );             */
00009 /*      xml_grab_tag_name( tag, name, maxlen );                          */
00010 /*      xml_grab_attrib( tag, name, value, maxlen );                     */
00011 /* Public Higher-level functions:                                        */
00012 /*      Xml_Read_File( filename );                                       */
00013 /*      Xml_Write_File( filename, xml_tree );                            */
00014 /*                                                                       */
00015 /* Xml_Parse_Lib.c - MIT License:                                        */
00016 /*  Copyright (C) 2001, Carl Kindman                                     */
00017 /* Permission is hereby granted, free of charge, to any person obtaining */
00018 /* a copy of this software and associated documentation files (the       */
00019 /* "Software"), to deal in the Software without restriction, including   */
00020 /* without limitation the rights to use, copy, modify, merge, publish,   */
00021 /* distribute, sublicense, and/or sell copies of the Software, and to    */
00022 /* permit persons to whom the Software is furnished to do so, subject to */
00023 /* the following conditions:                                             */
00024 /*                                                                       */
00025 /* The above copyright notice and this permission notice shall be        */
00026 /* included in all copies or substantial portions of the Software.       */
00027 /*                                                                       */
00028 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
00029 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
00030 /* MERCHANTABILITY, FITNESS FOR PARTICULAR PURPOSE AND NONINFRINGEMENT.  */
00031 /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
00032 /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
00033 /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
00034 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
00035 /*                                                                       */
00036 /*  Carl Kindman 8-21-2001     carlkindman@yahoo.com                     */
00037 /*  8-15-07 - Changed from strncpy to xml_strncpy for safety & speed.    */
00038 /*  10-2-07 - Changed to gracefully handle un-escaped ampersands.        */
00039 /*  11-19-08 - Added handling of escaped characters (&#xxxx).            */
00040 /*************************************************************************/
00041 
00042 #include <stdlib.h>
00043 #include <string.h>
00044 #include <stdio.h>
00045 #include "xml_parse_lib.h"
00046 
00047 /*.......................................................................
00048   . XML_NEXT_WORD - accepts a line of text, and returns with the        .
00049   . next word in that text in the third parameter, the original line    .
00050   . is shortened from the beginning so that the word is removed.        .
00051   . If the line encountered is empty, then the word returned will be    .
00052   . empty.                                                              .
00053   . NEXTWORD can parse on an arbitrary number of delimiters, and it     .
00054   . returns everthing that was cut away in the second parameter.        .
00055   .......................................................................*/
00056 void Xml_Next_Word( char *line, char *word, int maxlen, char *delim )
00057 {
00058  int i=0, j=0, m=0, flag=1;
00059 
00060  while ((line[i]!='\0') && (flag))   /* Eat away preceding garbage */
00061   {
00062    j = 0;
00063    while ((delim[j]!='\0') && (line[i]!=delim[j]))  j = j + 1;
00064    if (line[i]==delim[j])  i++;  else  flag = 0;
00065   }
00066  maxlen--;
00067  while ((line[i]!='\0') && (m < maxlen) && (!flag))  /* Copy the word until the next delimiter. */
00068   {
00069    word[m++] = line[i++];
00070    if (line[i]!='\0')
00071     {
00072      j = 0;
00073      while ((delim[j]!='\0') && (line[i]!=delim[j]))  j = j + 1;
00074      if (line[i]==delim[j])  flag = 1;
00075     }
00076   }
00077  j = 0;                  /* Shorten line. */
00078  while (line[i]!='\0') line[j++] = line[i++];
00079  line[j] = '\0';         /* Terminate the char-strings. */
00080  word[m] = '\0';
00081 }
00082 /********************************************************************************/
00083 /* xml_strncpy - Copy src string to dst string, up to maxlen characters.        */
00084 /* Safer and faster than strncpy, because it does not fill destination string,  */
00085 /* but only copies up to the length needed.  Src string should be               */
00086 /* null-terminated, and must-be if its allocated length is shorter than maxlen. */
00087 /* Up to maxlen-1 characters are copied to dst string. The dst string is always */
00088 /* null-terminated.  The dst string should be pre-allocated to at least maxlen  */
00089 /* bytes.  However, this function will work safely for dst arrays that are less */
00090 /* than maxlen, as long as the null-terminated src string is known to be        */
00091 /* shorter than the allocated length of dst, just like regular strcpy.          */
00092 /********************************************************************************/
00093 void xml_strncpy( char *dst, const char *src, int maxlen )
00094 { 
00095   int j=0, oneless;
00096   oneless = maxlen - 1;
00097   while ((j < oneless) && (src[j] != '\0')) { dst[j] = src[j];  j++; }
00098   dst[j] = '\0';
00099 }
00100 
00101 
00102 void xml_remove_leading_trailing_spaces( char *word )
00103 {
00104  int i=0, j=0;
00105  while ((word[i]!='\0') && ((word[i]==' ') || (word[i]=='\t') || (word[i]=='\n') || (word[i]=='\r')))
00106   i = i + 1;
00107  do { word[j++] = word[i++]; } while (word[i-1]!='\0');
00108  j = j - 2;
00109  while ((j>=0) && ((word[j]==' ') || (word[j]=='\t') || (word[j]=='\n') || (word[j]=='\r')))
00110   j = j - 1;
00111  word[j+1] = '\0';
00112 }
00113 
00114 
00115 void xml_escape_symbols( char *phrase, int maxlen )
00116 { /* Replace any ampersand (&), quotes ("), or brackets (<,>), with XML escapes. */
00117   int j=0, k, m, n;
00118   n = strlen(phrase);
00119   do
00120    {
00121     if (phrase[j]=='&') 
00122      {
00123       k = n + 4;  m = n;  n = n + 4;
00124       if (n > maxlen) {printf("xml_Parse: MaxStrLen %d exceeded.\n",maxlen); return;}
00125       do phrase[k--] = phrase[m--]; while (m > j);
00126       j++;  phrase[j++] = 'a';  phrase[j++] = 'm';  phrase[j++] = 'p';  phrase[j++] = ';';
00127      } else
00128     if (phrase[j]=='"') 
00129      {
00130       k = n + 5;  m = n;  n = n + 5;
00131       if (n > maxlen) {printf("xml_Parse: MaxStrLen %d exceeded.\n",maxlen); return;}
00132       do phrase[k--] = phrase[m--]; while (m > j);
00133       phrase[j++] = '&';  phrase[j++] = 'q';  phrase[j++] = 'u';  phrase[j++] = 'o';  phrase[j++] = 't';  phrase[j++] = ';';
00134      } else
00135     if (phrase[j]=='<') 
00136      {
00137       k = n + 3;  m = n;  n = n + 3;
00138       if (n > maxlen) {printf("xml_Parse: MaxStrLen %d exceeded.\n",maxlen); return;}
00139       do phrase[k--] = phrase[m--]; while (m > j);
00140       phrase[j++] = '&';  phrase[j++] = 'l';  phrase[j++] = 't';  phrase[j++] = ';';
00141      } else
00142     if (phrase[j]=='>') 
00143      {
00144       k = n + 3;  m = n;  n = n + 3;
00145       if (n > maxlen) {printf("xml_Parse: MaxStrLen %d exceeded.\n",maxlen); return;}
00146       do phrase[k--] = phrase[m--]; while (m > j);
00147       phrase[j++] = '&';  phrase[j++] = 'g';  phrase[j++] = 't';  phrase[j++] = ';';
00148      } else j++;
00149    }
00150   while (phrase[j] != '\0');
00151 }
00152 
00153 
00154 int xml_ishexadecimal( char ch, int *hex, int *sum )    /* Return true if character is a numeric or hexadeximal symbol, else zero. */
00155 {                                                       /* If numeric, capture value and set hex true if hex or false if base-10. */
00156  if (ch < '0') return 0;
00157  if (*hex)  *sum = 16 * *sum;  else  *sum = 10 * *sum;
00158  if (ch <= '9') { *sum = *sum + ch - 48;  return 1; }
00159  if (ch < 'A') return 0;
00160  if ((*hex) && (ch <= 'F')) { *sum = *sum + ch - 55;  return 1; }
00161  if ((ch == 'X') && (*hex != 1) && (*sum == 0)) { *hex = 1;  return 1; }
00162  if (ch < 'a') return 0;
00163  if ((*hex) && (ch <= 'f')) { *sum = *sum + ch - 87;  return 1; }
00164  if ((ch == 'x') && (*hex != 1) && (*sum == 0)) { *hex = 1;  return 1; } else return 0;
00165 }
00166 
00167 
00168 void xml_restore_escapes( char *phrase )
00169 { /* Replace any xml-escapes for (&), quotes ("), or brackets (<,>), with original symbols. */
00170   int j=0, k, m, n;
00171 
00172   n = strlen(phrase);
00173   if (n == 0) return;
00174   do
00175    {
00176     if (phrase[j]=='&') 
00177      {
00178       switch (phrase[j+1])
00179        {
00180         case 'a':   /* &amp; */
00181           j++;  m = j;  k = j + 4;
00182           if (k > n) {printf("xml_Parse: String ends prematurely after ampersand '%s'.\n",phrase); return;}
00183           // if (strncmp( &(phrase[j]), "amp;", 4 ) != 0) {printf("xml_Parse: Unexpected &-escape '%s'.\n",phrase); return;}
00184           n = n - 4;
00185           do phrase[m++] = phrase[k++]; while (phrase[k-1] != '\0');
00186          break;
00187         case 'q':   /* &quot; */
00188           phrase[j++] = '"';
00189           m = j;  k = j + 5;
00190           if (k > n) {printf("xml_Parse: String ends prematurely after ampersand '%s'.\n",phrase); return;}
00191           // if (strncmp( &(phrase[j]), "quot;", 5 ) != 0) {printf("xml_Parse: Unexpected &-escape '%s'.\n",phrase); return;}
00192           n = n - 5;
00193           do phrase[m++] = phrase[k++]; while (phrase[k-1] != '\0');
00194          break;
00195         case 'l':   /* &lt; */
00196           phrase[j++] = '<';
00197           m = j;  k = j + 3;
00198           if (k > n) {printf("xml_Parse: String ends prematurely after ampersand '%s'.\n",phrase); return;}
00199           // if (strncmp( &(phrase[j]), "lt;", 3 ) != 0) {printf("xml_Parse: Unexpected &-escape '%s'.\n",phrase); return;}
00200           n = n - 3;
00201           do phrase[m++] = phrase[k++]; while (phrase[k-1] != '\0');
00202          break;
00203         case 'g':   /* &gt; */
00204           phrase[j++] = '>';
00205           m = j;  k = j + 3;
00206           if (k > n) {printf("xml_Parse: String ends prematurely after ampersand '%s'.\n",phrase); return;}
00207           // if (strncmp( &(phrase[j]), "gt;", 3 ) != 0) {printf("xml_Parse: Unexpected &-escape '%s'.\n",phrase); return;}
00208           n = n - 3;
00209           do phrase[m++] = phrase[k++]; while (phrase[k-1] != '\0');
00210          break;
00211         case '#':   /* &#0000; */
00212           { int hex=0, sum = 0;
00213            k = j + 2;
00214            while ((k < j + 6) && (k < n) && (phrase[k] != ';') && (xml_ishexadecimal( phrase[k], &hex, &sum )))  k++;
00215            if ((k > n) || (phrase[k] != ';'))
00216             {printf("xml_Parse: String ends prematurely after ampersand '%s'.\n",phrase); return;}
00217            phrase[j++] = sum;  m = j;  k++;
00218            do phrase[m++] = phrase[k++]; while (phrase[k-1] != '\0');
00219           }
00220          break;
00221         default: printf("xml_Parse: Unexpected char (%c) follows ampersand (&) in xml. (phrase='%s')\n", phrase[j+1], phrase );  j++;
00222        } 
00223      } else j++;
00224    }
00225   while (phrase[j] != '\0');
00226 }
00227 
00228 
00229 
00230 /************************************************************************/
00231 /* XML_GRAB_TAG_NAME - This routine gets the tag-name, and shortens the */
00232 /*  xml-tag by removing it from the tag-string.  Use after calling      */
00233 /*  xml_parse to get the next tag-string from a file.                   */
00234 /*  If the tag is just a closing-tag, it will return "/".               */
00235 /*  Use in combination with xml_grab_attribute to parse any following   */
00236 /*  attributes within the tag-string.                                   */
00237 /* Inputs:      tag - String as read by xml_parse.                      */
00238 /*              malen - Maximum length of returned name that can be     */
00239 /*                      returned.  (Buffer-size.)                       */
00240 /* Output:      name - Character string.                                */
00241 /************************************************************************/
00242 void xml_grab_tag_name( char *tag, char *name, int maxlen )
00243 {
00244  int j; 
00245  Xml_Next_Word( tag, name, maxlen, " \t\n\r");
00246  j = strlen(name);
00247  if ((j > 1) && (name[j-1] == '/'))     /* Check for case where slash was attached to end of tag-name. */
00248   {
00249    name[j-1] = '\0';    /* Move slash back to tag. */
00250    j = 0;  do { tag[j+1] = tag[j];  j++; } while (tag[j-1] != '\0');
00251    tag[0] = '/';
00252   }
00253 }
00254 
00255 
00256 
00257 /************************************************************************/
00258 /* XML_GRAB_ATTRIBVALUE - This routine grabs the next name-value pair   */
00259 /*  within an xml-tag, if any.  Use after calling xml_parse and         */
00260 /*  xml_grab_tag_name, to get the following tag attribute string.  Then */
00261 /*  call this sequentially to grab each                                 */
00262 /*              name = "value"                                          */
00263 /*  attribute pair, if any, until exhausted.  If the tag is closed by   */
00264 /*  "/", the last name returned will be "/" and the value will be empty.*/
00265 /*  This routine expands any escaped symbols in the value-string before */
00266 /*  returning.                                                          */
00267 /* Inputs:      tag - String as read by xml_parse.                      */
00268 /*              malen - Maximum length of returned name or value that   */
00269 /*                      can be returned.  (Buffer-sizes.)               */
00270 /* Outputs:     name - Character string.                                */
00271 /*              value - Character string.                               */
00272 /************************************************************************/
00273 void xml_grab_attrib( char *tag, char *name, char *value, int maxlen )
00274 { 
00275  int j=0, k=0, m;
00276 
00277  Xml_Next_Word( tag, name, maxlen, " \t=\n\r");  /* Get the next attribute's name. */
00278  /* Now get the attribute's value-string. */
00279  /* Sequence up to first quote.  Expect only white-space and equals-sign. */
00280  while ((tag[j]!='\0') && (tag[j]!='\"'))
00281   {
00282    if ((tag[j]!=' ') && (tag[j]!='\t') && (tag[j]!='\n') && (tag[j]!='\r') && (tag[j]!='='))
00283     printf("xml error: unexpected char before attribute value quote '%s'\n", tag);
00284    j++;
00285   }
00286  if (tag[j]=='\0')  { value[0] = '\0';  tag[0] = '\0';  return; }
00287  if (tag[j++]!='\"')
00288   { printf("xml error: missing attribute value quote '%s'\n", tag); tag[0] = '\0'; value[0] = '\0'; return;}
00289  while ((tag[j]!='\0') && (tag[j]!='\"')) { value[k++] = tag[j++]; } 
00290  value[k] = '\0';
00291  if (tag[j]!='\"') printf("xml error: unclosed attribute value quote '%s'\n", tag);  else j++;
00292  xml_restore_escapes( value );
00293  /* Now remove the attribute (name="value") from the original tag-string. */
00294  k = 0;
00295  do tag[k++] = tag[j++]; while (tag[k-1] != '\0');
00296 }
00297 
00298 
00299 
00300 /****************************************************************/
00301 /* XML_PARSE - This routine finds the next <xxx> tag, and grabs */
00302 /*      it, and then grabs whatever follows, up to the next tag.*/
00303 /*      It returns the tag and its following contents.          */
00304 /*      It cleans any trailing white-space from the contents.   */
00305 /*  This routine is intended to be called iteratively, to parse */
00306 /*  XML-formatted data.  Specifically, it pulls tag-string of   */
00307 /*  each tag (<...>) and content-string between tags (>...<).   */
00308 /* Inputs:                                                      */
00309 /*      fileptr - Opened file pointer to read from.             */
00310 /*      malen - Maximum length of returned tag or content that  */
00311 /*              can be returned.  (Buffer-sizes.)               */
00312 /* Outputs:                                                     */
00313 /*      tag - Char string of text between next <...> brackets.  */
00314 /*      content - Char string of text after > up to next <      */
00315 /*                bracket.                                      */
00316 /****************************************************************/
00317 void xml_parse( FILE *fileptr, char *tag, char *content, int maxlen, int *lnn )
00318 {
00319  int i;  char ch;
00320 
00321  /* Get up to next tag. */
00322  do { ch = getc(fileptr);  if (ch=='\n') (*lnn)++; } while ((!feof(fileptr)) && (ch != '<'));
00323 
00324  i = 0;         /* Grab this tag. */
00325  do 
00326   { do { tag[i] = getc(fileptr);  if (tag[i]=='\n') tag[i] = ' '; }
00327     while ((tag[i]=='\r') && (!feof(fileptr)));  i=i+1; 
00328     if ((i==3) && (tag[0]=='!') && (tag[1]=='-') && (tag[2]=='-'))
00329      { /*Filter_comment.*/
00330        i = 0;
00331        do { ch = getc(fileptr); if (ch=='-') i = i + 1; else if ((ch!='>') || (i==1)) i = 0; } 
00332        while ((!feof(fileptr)) && ((i<2) || (ch!='>')));
00333        do { ch = getc(fileptr);  if (ch=='\n') (*lnn)++; } while ((!feof(fileptr)) && (ch != '<'));
00334        i = 0;
00335      } /*Filter_comment.*/
00336   } while ((!feof(fileptr)) && (i < maxlen) && (tag[i-1] != '>'));
00337  if (i==0) i = 1;
00338  tag[i-1] = '\0';
00339 
00340  i = 0;         /* Now grab contents until next tag. */
00341  do
00342   { do  content[i] = getc(fileptr);  while ((content[i]=='\r') && (!feof(fileptr)));
00343     if (content[i]==10) (*lnn)++; i=i+1;
00344   }
00345  while ((!feof(fileptr)) && (i < maxlen) && (content[i-1] != '<'));
00346  ungetc( content[i-1], fileptr );
00347  if (i==0) i = 1;
00348  content[i-1] = '\0';
00349 
00350  /* Clean-up contents by removing trailing white-spaces, and restoring any escaped characters. */
00351  xml_remove_leading_trailing_spaces( tag );
00352  xml_remove_leading_trailing_spaces( content );
00353  xml_restore_escapes( content );
00354 }
00355 
00356 //added by Zhan Wei
00357 
00358 void xml_parse_tag_only( FILE *fileptr, char *tag, int maxlen, int *lnn )
00359 {
00360  int i;  char ch;
00361 
00362  /* Get up to next tag. */
00363  do { ch = getc(fileptr);  if (ch=='\n') (*lnn)++; } while ((!feof(fileptr)) && (ch != '<'));
00364 
00365  i = 0;         /* Grab this tag. */
00366  do 
00367   { do { tag[i] = getc(fileptr);  if (tag[i]=='\n') tag[i] = ' '; }
00368     while ((tag[i]=='\r') && (!feof(fileptr)));  i=i+1; 
00369     if ((i==3) && (tag[0]=='!') && (tag[1]=='-') && (tag[2]=='-'))
00370      { /*Filter_comment.*/
00371        i = 0;
00372        do { ch = getc(fileptr); if (ch=='-') i = i + 1; else if ((ch!='>') || (i==1)) i = 0; } 
00373        while ((!feof(fileptr)) && ((i<2) || (ch!='>')));
00374        do { ch = getc(fileptr);  if (ch=='\n') (*lnn)++; } while ((!feof(fileptr)) && (ch != '<'));
00375        i = 0;
00376      } /*Filter_comment.*/
00377   } while ((!feof(fileptr)) && (i < maxlen) && (tag[i-1] != '>'));
00378  if (i==0) i = 1;
00379  tag[i-1] = '\0';
00380 
00381  xml_remove_leading_trailing_spaces( tag );
00382  //don't grab the content, let user do low-level stream parsing
00383 }
00384 
00385 /* ============================================================== */
00386 /* End of Re-Usable XML Parser Routines.                          */
00387 /* ============================================================== */
00388 


appl
Author(s): petercai
autogenerated on Tue Jan 7 2014 11:02:29