json_tokener.c
Go to the documentation of this file.
00001 /*
00002  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
00003  *
00004  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
00005  * Michael Clark <michael@metaparadigm.com>
00006  *
00007  * This library is free software; you can redistribute it and/or modify
00008  * it under the terms of the MIT license. See COPYING for details.
00009  *
00010  */
00011 
00012 #include "config.h"
00013 
00014 #include <stdio.h>
00015 #include <stdlib.h>
00016 #include <ctype.h>
00017 #include <string.h>
00018 #include <strings.h>
00019 
00020 #include "bits.h"
00021 #include "debug.h"
00022 #include "printbuf.h"
00023 #include "arraylist.h"
00024 #include "json_object.h"
00025 #include "json_tokener.h"
00026 
00027 
00028 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
00029   /* MSC has the version as _strnicmp */
00030 # define strncasecmp _strnicmp
00031 #elif !HAVE_STRNCASECMP
00032 /*# error You do not have strncasecmp on your system.*/
00033 #endif /* HAVE_STRNCASECMP */
00034 
00035 
00036 static const char* json_null_str = "null";
00037 static const char* json_true_str = "true";
00038 static const char* json_false_str = "false";
00039 
00040 const char* json_tokener_errors[] = {
00041   "success",
00042   "continue",
00043   "nesting to deep",
00044   "unexpected end of data",
00045   "unexpected character",
00046   "null expected",
00047   "boolean expected",
00048   "number expected",
00049   "array value separator ',' expected",
00050   "quoted object property name expected",
00051   "object property name separator ':' expected",
00052   "object value separator ',' expected",
00053   "invalid string sequence",
00054   "expected comment",
00055 };
00056 
00057 
00058 struct json_tokener* json_tokener_new()
00059 {
00060   struct json_tokener *tok = calloc(1, sizeof(struct json_tokener));
00061   tok->pb = printbuf_new();
00062   json_tokener_reset(tok);
00063   return tok;
00064 }
00065 
00066 void json_tokener_free(struct json_tokener *tok)
00067 {
00068   json_tokener_reset(tok);
00069   if(tok) printbuf_free(tok->pb);
00070   free(tok);
00071 }
00072 
00073 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
00074 {
00075   tok->stack[depth].state = json_tokener_state_eatws;
00076   tok->stack[depth].saved_state = json_tokener_state_start;
00077   json_object_put(tok->stack[depth].current);
00078   tok->stack[depth].current = NULL;
00079   free(tok->stack[depth].obj_field_name);
00080   tok->stack[depth].obj_field_name = NULL;
00081 }
00082 
00083 void json_tokener_reset(struct json_tokener *tok)
00084 {
00085   int i;
00086   for(i = tok->depth; i >= 0; i--)
00087     json_tokener_reset_level(tok, i);
00088   tok->depth = 0;
00089   tok->err = json_tokener_success;
00090 }
00091 
00092 struct json_object* json_tokener_parse(const char *str)
00093 {
00094   struct json_tokener* tok;
00095   struct json_object* obj;
00096 
00097   tok = json_tokener_new();
00098   obj = json_tokener_parse_ex(tok, str, -1);
00099   if(tok->err != json_tokener_success)
00100     obj = error_ptr(-tok->err);
00101   json_tokener_free(tok);
00102   return obj;
00103 }
00104 
00105 
00106 /*#if !HAVE_STRNDUP*/
00107 /* CAW: compliant version of strndup() */
00108 char* json_c_strndup(const char* str, size_t n)
00109 {
00110   if(str) {
00111     size_t len = strlen(str);
00112     size_t nn = min(len,n);
00113     char* s = (char*)malloc(sizeof(char) * (nn + 1));
00114 
00115     if(s) {
00116       memcpy(s, str, nn);
00117       s[nn] = '\0';
00118     }
00119 
00120     return s;
00121   }
00122 
00123   return NULL;
00124 }
00125 /*#endif*/
00126 
00127 
00128 #define state  tok->stack[tok->depth].state
00129 #define saved_state  tok->stack[tok->depth].saved_state
00130 #define current tok->stack[tok->depth].current
00131 #define obj_field_name tok->stack[tok->depth].obj_field_name
00132 
00133 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
00134                                           const char *str, int len)
00135 {
00136   struct json_object *obj = NULL;
00137   char c;
00138 
00139   tok->char_offset = 0;
00140   tok->err = json_tokener_success;
00141 
00142   do {
00143     if(tok->char_offset == len) {
00144       if(tok->depth == 0 && state == json_tokener_state_eatws &&
00145          saved_state == json_tokener_state_finish)
00146         tok->err = json_tokener_success;
00147       else
00148         tok->err = json_tokener_continue;
00149       goto out;
00150     }
00151 
00152     c = *str;
00153   redo_char:
00154     switch(state) {
00155 
00156     case json_tokener_state_eatws:
00157       if(isspace(c)) {
00158         /* okay */
00159       } else if(c == '/') {
00160         printbuf_reset(tok->pb);
00161         printbuf_memappend(tok->pb, &c, 1);
00162         state = json_tokener_state_comment_start;
00163       } else {
00164         state = saved_state;
00165         goto redo_char;
00166       }
00167       break;
00168 
00169     case json_tokener_state_start:
00170       switch(c) {
00171       case '{':
00172         state = json_tokener_state_eatws;
00173         saved_state = json_tokener_state_object_field_start;
00174         current = json_object_new_object();
00175         break;
00176       case '[':
00177         state = json_tokener_state_eatws;
00178         saved_state = json_tokener_state_array;
00179         current = json_object_new_array();
00180         break;
00181       case 'N':
00182       case 'n':
00183         state = json_tokener_state_null;
00184         printbuf_reset(tok->pb);
00185         tok->st_pos = 0;
00186         goto redo_char;
00187       case '"':
00188       case '\'':
00189         state = json_tokener_state_string;
00190         printbuf_reset(tok->pb);
00191         tok->quote_char = c;
00192         break;
00193       case 'T':
00194       case 't':
00195       case 'F':
00196       case 'f':
00197         state = json_tokener_state_boolean;
00198         printbuf_reset(tok->pb);
00199         tok->st_pos = 0;
00200         goto redo_char;
00201 #if 0 /*defined(__GNUC__)*/
00202           case '0' ... '9':
00203 #else
00204           case '0':
00205       case '1':
00206       case '2':
00207       case '3':
00208       case '4':
00209       case '5':
00210       case '6':
00211       case '7':
00212       case '8':
00213       case '9':
00214 #endif
00215       case '-':
00216         state = json_tokener_state_number;
00217         printbuf_reset(tok->pb);
00218         tok->is_double = 0;
00219         goto redo_char;
00220       default:
00221         tok->err = json_tokener_error_parse_unexpected;
00222         goto out;
00223       }
00224       break;
00225 
00226     case json_tokener_state_finish:
00227       if(tok->depth == 0) goto out;
00228       obj = json_object_get(current);
00229       json_tokener_reset_level(tok, tok->depth);
00230       tok->depth--;
00231       goto redo_char;
00232 
00233     case json_tokener_state_null:
00234       printbuf_memappend(tok->pb, &c, 1);
00235       if(strncasecmp(json_null_str, tok->pb->buf,
00236                      min( (size_t) (tok->st_pos+1), strlen(json_null_str))) == 0) {
00237         if(  ((size_t) tok->st_pos) == strlen(json_null_str)) {
00238           current = NULL;
00239           saved_state = json_tokener_state_finish;
00240           state = json_tokener_state_eatws;
00241           goto redo_char;
00242         }
00243       } else {
00244         tok->err = json_tokener_error_parse_null;
00245         goto out;
00246       }
00247       tok->st_pos++;
00248       break;
00249 
00250     case json_tokener_state_comment_start:
00251       if(c == '*') {
00252         state = json_tokener_state_comment;
00253       } else if(c == '/') {
00254         state = json_tokener_state_comment_eol;
00255       } else {
00256         tok->err = json_tokener_error_parse_comment;
00257         goto out;
00258       }
00259       printbuf_memappend(tok->pb, &c, 1);
00260       break;
00261 
00262     case json_tokener_state_comment:
00263       if(c == '*') state = json_tokener_state_comment_end;
00264       printbuf_memappend(tok->pb, &c, 1);
00265       break;
00266 
00267     case json_tokener_state_comment_eol:
00268       if(c == '\n') {
00269         mc_debug("json_tokener_comment: %s\n", tok->pb->buf);
00270         state = json_tokener_state_eatws;
00271       } else {
00272         printbuf_memappend(tok->pb, &c, 1);
00273       }
00274       break;
00275 
00276     case json_tokener_state_comment_end:
00277       printbuf_memappend(tok->pb, &c, 1);
00278       if(c == '/') {
00279         mc_debug("json_tokener_comment: %s\n", tok->pb->buf);
00280         state = json_tokener_state_eatws;
00281       } else {
00282         state = json_tokener_state_comment;
00283       }
00284       break;
00285 
00286     case json_tokener_state_string:
00287       if(c == tok->quote_char) {
00288         current = json_object_new_string(tok->pb->buf);
00289         saved_state = json_tokener_state_finish;
00290         state = json_tokener_state_eatws;
00291       } else if(c == '\\') {
00292         saved_state = json_tokener_state_string;
00293         state = json_tokener_state_string_escape;
00294       } else {
00295         printbuf_memappend(tok->pb, &c, 1);
00296       }
00297       break;
00298 
00299     case json_tokener_state_string_escape:
00300       switch(c) {
00301       case '"':
00302       case '\\':
00303       case '/':
00304         printbuf_memappend(tok->pb, &c, 1);
00305         state = saved_state;
00306         break;
00307       case 'b':
00308       case 'n':
00309       case 'r':
00310       case 't':
00311         if(c == 'b') printbuf_memappend(tok->pb, "\b", 1);
00312         else if(c == 'n') printbuf_memappend(tok->pb, "\n", 1);
00313         else if(c == 'r') printbuf_memappend(tok->pb, "\r", 1);
00314         else if(c == 't') printbuf_memappend(tok->pb, "\t", 1);
00315         state = saved_state;
00316         break;
00317       case 'u':
00318         tok->ucs_char = 0;
00319         tok->st_pos = 0;
00320         state = json_tokener_state_escape_unicode;
00321         break;
00322       default:
00323         tok->err = json_tokener_error_parse_string;
00324         goto out;
00325       }
00326       break;
00327 
00328     case json_tokener_state_escape_unicode:
00329       if(strchr(json_hex_chars, c)) {
00330         tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
00331         if(tok->st_pos == 4) {
00332           unsigned char utf_out[3];
00333           if (tok->ucs_char < 0x80) {
00334             utf_out[0] = tok->ucs_char;
00335             printbuf_memappend(tok->pb, (char*)utf_out, 1);
00336           } else if (tok->ucs_char < 0x800) {
00337             utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
00338             utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
00339             printbuf_memappend(tok->pb, (char*)utf_out, 2);
00340           } else {
00341             utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
00342             utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
00343             utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
00344             printbuf_memappend(tok->pb, (char*)utf_out, 3);
00345           }
00346           state = saved_state;
00347         }
00348       } else {
00349         tok->err = json_tokener_error_parse_string;
00350         goto out;
00351       }
00352       break;
00353 
00354     case json_tokener_state_boolean:
00355       printbuf_memappend(tok->pb, &c, 1);
00356       if(strncasecmp(json_true_str, tok->pb->buf,
00357                       min( (size_t)(tok->st_pos+1), strlen(json_true_str))) == 0) {
00358         if(((size_t) tok->st_pos) == strlen(json_true_str)) {
00359           current = json_object_new_boolean(1);
00360           saved_state = json_tokener_state_finish;
00361           state = json_tokener_state_eatws;
00362           goto redo_char;
00363         }
00364       } else if(strncasecmp(json_false_str, tok->pb->buf,
00365                             min((size_t)(tok->st_pos+1), strlen(json_false_str))) == 0) {
00366         if(( (size_t) tok->st_pos) == strlen(json_false_str)) {
00367           current = json_object_new_boolean(0);
00368           saved_state = json_tokener_state_finish;
00369           state = json_tokener_state_eatws;
00370           goto redo_char;
00371         }
00372       } else {
00373         tok->err = json_tokener_error_parse_boolean;
00374         goto out;
00375       }
00376       tok->st_pos++;
00377       break;
00378 
00379     case json_tokener_state_number:
00380       if(c && strchr(json_number_chars, c)) {
00381         printbuf_memappend(tok->pb, &c, 1);     
00382         if(c == '.' || c == 'e') tok->is_double = 1;
00383       } else {
00384         int numi;
00385         double numd;
00386         if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
00387           current = json_object_new_int(numi);
00388         } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
00389           current = json_object_new_double(numd);
00390         } else {
00391           tok->err = json_tokener_error_parse_number;
00392           goto out;
00393         }
00394         saved_state = json_tokener_state_finish;
00395         state = json_tokener_state_eatws;
00396         goto redo_char;
00397       }
00398       break;
00399 
00400     case json_tokener_state_array:
00401       if(c == ']') {
00402         saved_state = json_tokener_state_finish;
00403         state = json_tokener_state_eatws;
00404       } else {
00405         if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
00406           tok->err = json_tokener_error_depth;
00407           goto out;
00408         }
00409         state = json_tokener_state_array_add;
00410         tok->depth++;
00411         json_tokener_reset_level(tok, tok->depth);
00412         goto redo_char;
00413       }
00414       break;
00415 
00416     case json_tokener_state_array_add:
00417       json_object_array_add(current, obj);
00418       saved_state = json_tokener_state_array_sep;
00419       state = json_tokener_state_eatws;
00420       goto redo_char;
00421 
00422     case json_tokener_state_array_sep:
00423       if(c == ']') {
00424         saved_state = json_tokener_state_finish;
00425         state = json_tokener_state_eatws;
00426       } else if(c == ',') {
00427         saved_state = json_tokener_state_array;
00428         state = json_tokener_state_eatws;
00429       } else {
00430         tok->err = json_tokener_error_parse_array;
00431         goto out;
00432       }
00433       break;
00434 
00435     case json_tokener_state_object_field_start:
00436       if(c == '}') {
00437         saved_state = json_tokener_state_finish;
00438         state = json_tokener_state_eatws;
00439       } else if (c == '"' || c == '\'') {
00440         tok->quote_char = c;
00441         printbuf_reset(tok->pb);
00442         state = json_tokener_state_object_field;
00443       } else {
00444         tok->err = json_tokener_error_parse_object_key_name;
00445         goto out;
00446       }
00447       break;
00448 
00449     case json_tokener_state_object_field:
00450       if(c == tok->quote_char) {
00451         obj_field_name = strdup(tok->pb->buf);
00452         saved_state = json_tokener_state_object_field_end;
00453         state = json_tokener_state_eatws;
00454       } else if(c == '\\') {
00455         saved_state = json_tokener_state_object_field;
00456         state = json_tokener_state_string_escape;
00457       } else {
00458         printbuf_memappend(tok->pb, &c, 1);
00459       }
00460       break;
00461 
00462     case json_tokener_state_object_field_end:
00463       if(c == ':') {
00464         saved_state = json_tokener_state_object_value;
00465         state = json_tokener_state_eatws;
00466       } else {
00467         tok->err = json_tokener_error_parse_object_key_sep;
00468         goto out;
00469       }
00470       break;
00471 
00472     case json_tokener_state_object_value:
00473       if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
00474         tok->err = json_tokener_error_depth;
00475         goto out;
00476       }
00477       state = json_tokener_state_object_value_add;
00478       tok->depth++;
00479       json_tokener_reset_level(tok, tok->depth);
00480       goto redo_char;
00481 
00482     case json_tokener_state_object_value_add:
00483       json_object_object_add(current, obj_field_name, obj);
00484       free(obj_field_name);
00485       obj_field_name = NULL;
00486       saved_state = json_tokener_state_object_sep;
00487       state = json_tokener_state_eatws;
00488       goto redo_char;
00489 
00490     case json_tokener_state_object_sep:
00491       if(c == '}') {
00492         saved_state = json_tokener_state_finish;
00493         state = json_tokener_state_eatws;
00494       } else if(c == ',') {
00495         saved_state = json_tokener_state_object_field_start;
00496         state = json_tokener_state_eatws;
00497       } else {
00498         tok->err = json_tokener_error_parse_object_value_sep;
00499         goto out;
00500       }
00501       break;
00502 
00503     }
00504     str++;
00505     tok->char_offset++;
00506   } while(c);
00507 
00508   if(state != json_tokener_state_finish &&
00509      saved_state != json_tokener_state_finish)
00510     tok->err = json_tokener_error_parse_eof;
00511 
00512  out:
00513   if(tok->err == json_tokener_success) return json_object_get(current);
00514   mc_debug("json_tokener_parse_ex: error %s at offset %d\n",
00515            json_tokener_errors[tok->err], tok->char_offset);
00516   return NULL;
00517 }


csm
Author(s): Andrea Censi
autogenerated on Mon Jan 16 2017 03:48:29