root/src/json-lib/json_tokener.c

Revision f632f7d9a1b4222c469abf2fc43923fe4868fe37, 17.4 kB (checked in by Theo Schlossnagle <jesus@omniti.com>, 6 years ago)

provide overflow detection and recover for json_objects and leverage that in the lua stuff

  • Property mode set to 100644
Line 
1 /*
2  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
3  *
4  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
5  * Michael Clark <michael@metaparadigm.com>
6  *
7  * This library is free software; you can redistribute it and/or modify
8  * it under the terms of the MIT license. See COPYING for details.
9  *
10  *
11  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
12  * The copyrights to the contents of this file are licensed under the MIT License
13  * (http://www.opensource.org/licenses/mit-license.php)
14  */
15
16 #include "noit_config.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stddef.h>
21 #include <ctype.h>
22 #include <string.h>
23
24 #include "bits.h"
25 #include "debug.h"
26 #include "printbuf.h"
27 #include "arraylist.h"
28 #include "json_object.h"
29 #include "json_tokener.h"
30
31 static const char* json_null_str = "null";
32 static const char* json_true_str = "true";
33 static const char* json_false_str = "false";
34
35 const char* json_tokener_errors[] = {
36   "success",
37   "continue",
38   "nesting to deep",
39   "unexpected end of data",
40   "unexpected character",
41   "null expected",
42   "boolean expected",
43   "number expected",
44   "array value separator ',' expected",
45   "quoted object property name expected",
46   "object property name separator ':' expected",
47   "object value separator ',' expected",
48   "invalid string sequence",
49   "expected comment",
50 };
51
52
53 struct json_tokener* json_tokener_new(void)
54 {
55   struct json_tokener *tok;
56
57   tok = (struct json_tokener*)calloc(1, sizeof(struct json_tokener));
58   if (!tok) return NULL;
59   tok->pb = printbuf_new();
60   json_tokener_reset(tok);
61   return tok;
62 }
63
64 void json_tokener_free(struct json_tokener *tok)
65 {
66   json_tokener_reset(tok);
67   if(tok) printbuf_free(tok->pb);
68   free(tok);
69 }
70
71 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
72 {
73   tok->stack[depth].state = json_tokener_state_eatws;
74   tok->stack[depth].saved_state = json_tokener_state_start;
75   json_object_put(tok->stack[depth].current);
76   tok->stack[depth].current = NULL;
77   free(tok->stack[depth].obj_field_name);
78   tok->stack[depth].obj_field_name = NULL;
79 }
80
81 void json_tokener_reset(struct json_tokener *tok)
82 {
83   int i;
84   if (!tok)
85     return;
86
87   for(i = tok->depth; i >= 0; i--)
88     json_tokener_reset_level(tok, i);
89   tok->depth = 0;
90   tok->err = json_tokener_success;
91 }
92
93 struct json_object* json_tokener_parse(const char *str)
94 {
95   struct json_tokener* tok;
96   struct json_object* obj;
97
98   tok = json_tokener_new();
99   obj = json_tokener_parse_ex(tok, str, -1);
100   if(tok->err != json_tokener_success)
101     obj = (struct json_object*)error_ptr(-tok->err);
102   json_tokener_free(tok);
103   return obj;
104 }
105
106
107 #if !HAVE_STRNDUP
108 /* CAW: compliant version of strndup() */
109 char* strndup(const char* str, size_t n)
110 {
111   if(str) {
112     size_t len = strlen(str);
113     size_t nn = json_min(len,n);
114     char* s = (char*)malloc(sizeof(char) * (nn + 1));
115
116     if(s) {
117       memcpy(s, str, nn);
118       s[nn] = '\0';
119     }
120
121     return s;
122   }
123
124   return NULL;
125 }
126 #endif
127
128
129 #define state  tok->stack[tok->depth].state
130 #define saved_state  tok->stack[tok->depth].saved_state
131 #define current tok->stack[tok->depth].current
132 #define obj_field_name tok->stack[tok->depth].obj_field_name
133
134 /* Optimization:
135  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
136  * iterating character-by character.  A large performance boost is
137  * achieved by using tighter loops to locally handle units such as
138  * comments and strings.  Loops that handle an entire token within
139  * their scope also gather entire strings and pass them to
140  * printbuf_memappend() in a single call, rather than calling
141  * printbuf_memappend() one char at a time.
142  *
143  * POP_CHAR() and ADVANCE_CHAR() macros are used for code that is
144  * common to both the main loop and the tighter loops.
145  */
146
147 /* POP_CHAR(dest, tok) macro:
148  *   Not really a pop()...peeks at the current char and stores it in dest.
149  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
150  *   Implicit inputs:  str, len vars
151  */
152 #define POP_CHAR(dest, tok)                                                  \
153   (((tok)->char_offset == len) ?                                          \
154    (((tok)->depth == 0 && state == json_tokener_state_eatws && saved_state == json_tokener_state_finish) ? \
155     (((tok)->err = json_tokener_success), 0)                              \
156     :                                                                   \
157     (((tok)->err = json_tokener_continue), 0)                             \
158     ) :                                                                 \
159    (((dest) = *str), 1)                                                 \
160    )
161  
162 /* ADVANCE_CHAR() macro:
163  *   Incrementes str & tok->char_offset.
164  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
165  *   Implicit inputs:  c var
166  */
167 #define ADVANCE_CHAR(str, tok) \
168   ( ++(str), ((tok)->char_offset)++, c)
169
170 /* End optimization macro defs */
171
172
173 struct json_object* json_tokener_parse_ex(struct json_tokener *tok,
174                                           const char *str, int len)
175 {
176   struct json_object *obj = NULL;
177   char c = '\1';
178
179   tok->char_offset = 0;
180   tok->err = json_tokener_success;
181
182   while (POP_CHAR(c, tok)) {
183
184   redo_char:
185     switch(state) {
186
187     case json_tokener_state_eatws:
188       /* Advance until we change state */
189       while (isspace(c)) {
190         if ((!ADVANCE_CHAR(str, tok)) || (!POP_CHAR(c, tok)))
191           goto out;
192       }
193       if(c == '/') {
194         printbuf_reset(tok->pb);
195         printbuf_memappend_fast(tok->pb, &c, 1);
196         state = json_tokener_state_comment_start;
197       } else {
198         state = saved_state;
199         goto redo_char;
200       }
201       break;
202
203     case json_tokener_state_start:
204       switch(c) {
205       case '{':
206         state = json_tokener_state_eatws;
207         saved_state = json_tokener_state_object_field_start;
208         current = json_object_new_object();
209         break;
210       case '[':
211         state = json_tokener_state_eatws;
212         saved_state = json_tokener_state_array;
213         current = json_object_new_array();
214         break;
215       case 'N':
216       case 'n':
217         state = json_tokener_state_null;
218         printbuf_reset(tok->pb);
219         tok->st_pos = 0;
220         goto redo_char;
221       case '"':
222       case '\'':
223         state = json_tokener_state_string;
224         printbuf_reset(tok->pb);
225         tok->quote_char = c;
226         break;
227       case 'T':
228       case 't':
229       case 'F':
230       case 'f':
231         state = json_tokener_state_boolean;
232         printbuf_reset(tok->pb);
233         tok->st_pos = 0;
234         goto redo_char;
235 #if defined(__GNUC__)
236           case '0' ... '9':
237 #else
238           case '0':
239       case '1':
240       case '2':
241       case '3':
242       case '4':
243       case '5':
244       case '6':
245       case '7':
246       case '8':
247       case '9':
248 #endif
249       case '-':
250         state = json_tokener_state_number;
251         printbuf_reset(tok->pb);
252         tok->is_double = 0;
253         goto redo_char;
254       default:
255         tok->err = json_tokener_error_parse_unexpected;
256         goto out;
257       }
258       break;
259
260     case json_tokener_state_finish:
261       if(tok->depth == 0) goto out;
262       obj = json_object_get(current);
263       json_tokener_reset_level(tok, tok->depth);
264       tok->depth--;
265       goto redo_char;
266
267     case json_tokener_state_null:
268       printbuf_memappend_fast(tok->pb, &c, 1);
269       if(strncasecmp(json_null_str, tok->pb->buf,
270                      json_min(tok->st_pos+1, strlen(json_null_str))) == 0) {
271         if(tok->st_pos == strlen(json_null_str)) {
272           current = NULL;
273           saved_state = json_tokener_state_finish;
274           state = json_tokener_state_eatws;
275           goto redo_char;
276         }
277       } else {
278         tok->err = json_tokener_error_parse_null;
279         goto out;
280       }
281       tok->st_pos++;
282       break;
283
284     case json_tokener_state_comment_start:
285       if(c == '*') {
286         state = json_tokener_state_comment;
287       } else if(c == '/') {
288         state = json_tokener_state_comment_eol;
289       } else {
290         tok->err = json_tokener_error_parse_comment;
291         goto out;
292       }
293       printbuf_memappend_fast(tok->pb, &c, 1);
294       break;
295
296     case json_tokener_state_comment:
297               {
298           /* Advance until we change state */
299           const char *case_start = str;
300           while(c != '*') {
301             if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
302               printbuf_memappend_fast(tok->pb, case_start, str-case_start);
303               goto out;
304             }
305           }
306           printbuf_memappend_fast(tok->pb, case_start, 1+str-case_start);
307           state = json_tokener_state_comment_end;
308         }
309             break;
310
311     case json_tokener_state_comment_eol:
312       {
313         /* Advance until we change state */
314         const char *case_start = str;
315         while(c != '\n') {
316           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
317             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
318             goto out;
319           }
320         }
321         printbuf_memappend_fast(tok->pb, case_start, str-case_start);
322         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
323         state = json_tokener_state_eatws;
324       }
325       break;
326
327     case json_tokener_state_comment_end:
328       printbuf_memappend_fast(tok->pb, &c, 1);
329       if(c == '/') {
330         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
331         state = json_tokener_state_eatws;
332       } else {
333         state = json_tokener_state_comment;
334       }
335       break;
336
337     case json_tokener_state_string:
338       {
339         /* Advance until we change state */
340         const char *case_start = str;
341         while(1) {
342           if(c == tok->quote_char) {
343             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
344             current = json_object_new_string(tok->pb->buf);
345             saved_state = json_tokener_state_finish;
346             state = json_tokener_state_eatws;
347             break;
348           } else if(c == '\\') {
349             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
350             saved_state = json_tokener_state_string;
351             state = json_tokener_state_string_escape;
352             break;
353           }
354           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
355             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
356             goto out;
357           }
358         }
359       }
360       break;
361
362     case json_tokener_state_string_escape:
363       switch(c) {
364       case '"':
365       case '\\':
366       case '/':
367         printbuf_memappend_fast(tok->pb, &c, 1);
368         state = saved_state;
369         break;
370       case 'b':
371       case 'n':
372       case 'r':
373       case 't':
374         if(c == 'b') printbuf_memappend_fast(tok->pb, "\b", 1);
375         else if(c == 'n') printbuf_memappend_fast(tok->pb, "\n", 1);
376         else if(c == 'r') printbuf_memappend_fast(tok->pb, "\r", 1);
377         else if(c == 't') printbuf_memappend_fast(tok->pb, "\t", 1);
378         state = saved_state;
379         break;
380       case 'u':
381         tok->ucs_char = 0;
382         tok->st_pos = 0;
383         state = json_tokener_state_escape_unicode;
384         break;
385       default:
386         tok->err = json_tokener_error_parse_string;
387         goto out;
388       }
389       break;
390
391     case json_tokener_state_escape_unicode:
392             /* Note that the following code is inefficient for handling large
393        * chunks of extended chars, calling printbuf_memappend() once
394        * for each multi-byte character of input.
395        * This is a good area for future optimization.
396        */
397         {
398           /* Advance until we change state */
399           while(1) {
400             if(strchr(json_hex_chars, c)) {
401               tok->ucs_char += ((unsigned int)hexdigit(c) << ((3-tok->st_pos++)*4));
402               if(tok->st_pos == 4) {
403                 unsigned char utf_out[3];
404                 if (tok->ucs_char < 0x80) {
405                   utf_out[0] = tok->ucs_char;
406                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 1);
407                 } else if (tok->ucs_char < 0x800) {
408                   utf_out[0] = 0xc0 | (tok->ucs_char >> 6);
409                   utf_out[1] = 0x80 | (tok->ucs_char & 0x3f);
410                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 2);
411                 } else {
412                   utf_out[0] = 0xe0 | (tok->ucs_char >> 12);
413                   utf_out[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
414                   utf_out[2] = 0x80 | (tok->ucs_char & 0x3f);
415                   printbuf_memappend_fast(tok->pb, (char*)utf_out, 3);
416                 }
417                 state = saved_state;
418                 break;
419               }
420             } else {
421               tok->err = json_tokener_error_parse_string;
422               goto out;
423                   }
424           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok))
425             goto out;
426         }
427       }
428       break;
429
430     case json_tokener_state_boolean:
431       printbuf_memappend_fast(tok->pb, &c, 1);
432       if(strncasecmp(json_true_str, tok->pb->buf,
433                      json_min(tok->st_pos+1, strlen(json_true_str))) == 0) {
434         if(tok->st_pos == strlen(json_true_str)) {
435           current = json_object_new_boolean(1);
436           saved_state = json_tokener_state_finish;
437           state = json_tokener_state_eatws;
438           goto redo_char;
439         }
440       } else if(strncasecmp(json_false_str, tok->pb->buf,
441                             json_min(tok->st_pos+1, strlen(json_false_str))) == 0) {
442         if(tok->st_pos == strlen(json_false_str)) {
443           current = json_object_new_boolean(0);
444           saved_state = json_tokener_state_finish;
445           state = json_tokener_state_eatws;
446           goto redo_char;
447         }
448       } else {
449         tok->err = json_tokener_error_parse_boolean;
450         goto out;
451       }
452       tok->st_pos++;
453       break;
454
455     case json_tokener_state_number:
456       {
457         /* Advance until we change state */
458         const char *case_start = str;
459         int case_len=0;
460         while(c && strchr(json_number_chars, c)) {
461           ++case_len;
462           if(c == '.' || c == 'e') tok->is_double = 1;
463           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
464             printbuf_memappend_fast(tok->pb, case_start, case_len);
465             goto out;
466           }
467         }
468         if (case_len>0)
469           printbuf_memappend_fast(tok->pb, case_start, case_len);
470       }
471       {
472         int numi;
473         double numd;
474         if(!tok->is_double && sscanf(tok->pb->buf, "%d", &numi) == 1) {
475           current = json_object_new_int(numi);
476           if(tok->pb->buf[0] == '-') {
477             int64_t i64;
478             i64 = strtoll(tok->pb->buf, NULL, 10);
479             json_object_set_int64(current, i64);
480             if(i64 != numi)
481               json_object_set_int_overflow(current, json_overflow_int64);
482           }
483           else {
484             u_int64_t u64;
485             u64 = strtoll(tok->pb->buf, NULL, 10);
486             json_object_set_uint64(current, u64);
487             if(u64 != numi)
488               json_object_set_int_overflow(current, json_overflow_uint64);
489           }
490         } else if(tok->is_double && sscanf(tok->pb->buf, "%lf", &numd) == 1) {
491           current = json_object_new_double(numd);
492         } else {
493           tok->err = json_tokener_error_parse_number;
494           goto out;
495         }
496         saved_state = json_tokener_state_finish;
497         state = json_tokener_state_eatws;
498         goto redo_char;
499       }
500       break;
501
502     case json_tokener_state_array:
503       if(c == ']') {
504         saved_state = json_tokener_state_finish;
505         state = json_tokener_state_eatws;
506       } else {
507         if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
508           tok->err = json_tokener_error_depth;
509           goto out;
510         }
511         state = json_tokener_state_array_add;
512         tok->depth++;
513         json_tokener_reset_level(tok, tok->depth);
514         goto redo_char;
515       }
516       break;
517
518     case json_tokener_state_array_add:
519       json_object_array_add(current, obj);
520       saved_state = json_tokener_state_array_sep;
521       state = json_tokener_state_eatws;
522       goto redo_char;
523
524     case json_tokener_state_array_sep:
525       if(c == ']') {
526         saved_state = json_tokener_state_finish;
527         state = json_tokener_state_eatws;
528       } else if(c == ',') {
529         saved_state = json_tokener_state_array;
530         state = json_tokener_state_eatws;
531       } else {
532         tok->err = json_tokener_error_parse_array;
533         goto out;
534       }
535       break;
536
537     case json_tokener_state_object_field_start:
538       if(c == '}') {
539         saved_state = json_tokener_state_finish;
540         state = json_tokener_state_eatws;
541       } else if (c == '"' || c == '\'') {
542         tok->quote_char = c;
543         printbuf_reset(tok->pb);
544         state = json_tokener_state_object_field;
545       } else {
546         tok->err = json_tokener_error_parse_object_key_name;
547         goto out;
548       }
549       break;
550
551     case json_tokener_state_object_field:
552       {
553         /* Advance until we change state */
554         const char *case_start = str;
555         while(1) {
556           if(c == tok->quote_char) {
557             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
558             obj_field_name = strdup(tok->pb->buf);
559             saved_state = json_tokener_state_object_field_end;
560             state = json_tokener_state_eatws;
561             break;
562           } else if(c == '\\') {
563             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
564             saved_state = json_tokener_state_object_field;
565             state = json_tokener_state_string_escape;
566             break;
567           }
568           if (!ADVANCE_CHAR(str, tok) || !POP_CHAR(c, tok)) {
569             printbuf_memappend_fast(tok->pb, case_start, str-case_start);
570             goto out;
571           }
572         }
573       }
574       break;
575
576     case json_tokener_state_object_field_end:
577       if(c == ':') {
578         saved_state = json_tokener_state_object_value;
579         state = json_tokener_state_eatws;
580       } else {
581         tok->err = json_tokener_error_parse_object_key_sep;
582         goto out;
583       }
584       break;
585
586     case json_tokener_state_object_value:
587       if(tok->depth >= JSON_TOKENER_MAX_DEPTH-1) {
588         tok->err = json_tokener_error_depth;
589         goto out;
590       }
591       state = json_tokener_state_object_value_add;
592       tok->depth++;
593       json_tokener_reset_level(tok, tok->depth);
594       goto redo_char;
595
596     case json_tokener_state_object_value_add:
597       json_object_object_add(current, obj_field_name, obj);
598       free(obj_field_name);
599       obj_field_name = NULL;
600       saved_state = json_tokener_state_object_sep;
601       state = json_tokener_state_eatws;
602       goto redo_char;
603
604     case json_tokener_state_object_sep:
605       if(c == '}') {
606         saved_state = json_tokener_state_finish;
607         state = json_tokener_state_eatws;
608       } else if(c == ',') {
609         saved_state = json_tokener_state_object_field_start;
610         state = json_tokener_state_eatws;
611       } else {
612         tok->err = json_tokener_error_parse_object_value_sep;
613         goto out;
614       }
615       break;
616
617     }
618     if (!ADVANCE_CHAR(str, tok))
619       goto out;
620   } /* while(POP_CHAR) */
621
622  out:
623   if (!c) { /* We hit an eof char (0) */
624     if(state != json_tokener_state_finish &&
625        saved_state != json_tokener_state_finish)
626       tok->err = json_tokener_error_parse_eof;
627   }
628
629   if(tok->err == json_tokener_success) return json_object_get(current);
630   MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n",
631            json_tokener_errors[tok->err], tok->char_offset);
632   return NULL;
633 }
Note: See TracBrowser for help on using the browser.