libsheepy
laxjson.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2013 Andrew Kelley
3  *
4  * This file is part of liblaxjson, which is MIT licensed.
5  * See http://opensource.org/licenses/MIT
6  */
7 
8 #include "laxjson.h"
9 
10 #include <stdlib.h>
11 #include <assert.h>
12 
13 #include "../../release/libsheepy.h"
14 
15 #define WHITESPACE \
16  ' ': \
17  case '\t': \
18  case '\n': \
19  case '\f': \
20  case '\r': \
21  case 0xb
22 
23 #define DIGIT \
24  '0': \
25  case '1': \
26  case '2': \
27  case '3': \
28  case '4': \
29  case '5': \
30  case '6': \
31  case '7': \
32  case '8': \
33  case '9'
34 
35 #define ALPHANUMERIC \
36  'a': \
37  case 'b': \
38  case 'c': \
39  case 'd': \
40  case 'e': \
41  case 'f': \
42  case 'g': \
43  case 'h': \
44  case 'i': \
45  case 'j': \
46  case 'k': \
47  case 'l': \
48  case 'm': \
49  case 'n': \
50  case 'o': \
51  case 'p': \
52  case 'q': \
53  case 'r': \
54  case 's': \
55  case 't': \
56  case 'u': \
57  case 'v': \
58  case 'w': \
59  case 'x': \
60  case 'y': \
61  case 'z': \
62  case 'A': \
63  case 'B': \
64  case 'C': \
65  case 'D': \
66  case 'E': \
67  case 'F': \
68  case 'G': \
69  case 'H': \
70  case 'I': \
71  case 'J': \
72  case 'K': \
73  case 'L': \
74  case 'M': \
75  case 'N': \
76  case 'O': \
77  case 'P': \
78  case 'Q': \
79  case 'R': \
80  case 'S': \
81  case 'T': \
82  case 'U': \
83  case 'V': \
84  case 'W': \
85  case 'X': \
86  case 'Y': \
87  case 'Z': \
88  case DIGIT
89 
90 #define VALID_UNQUOTED \
91  '-': \
92  case '_': \
93  case '#': \
94  case '$': \
95  case '%': \
96  case '&': \
97  case '<': \
98  case '>': \
99  case '=': \
100  case '~': \
101  case '|': \
102  case '@': \
103  case '?': \
104  case ';': \
105  case '.': \
106  case '+': \
107  case '*': \
108  case '(': \
109  case ')': \
110  case ALPHANUMERIC
111 
112 #define NUMBER_TERMINATOR \
113  ',': \
114  case WHITESPACE: \
115  case ']': \
116  case '}': \
117  case '/'
118 
119 static const unsigned int HEX_MULT[] = {4096, 256, 16, 1};
120 
121 /* static const char *STATE_NAMES[] = { */
122 /* "LaxJsonStateValue", */
123 /* "LaxJsonStateObject", */
124 /* "LaxJsonStateArray", */
125 /* "LaxJsonStateString", */
126 /* "LaxJsonStateStringEscape", */
127 /* "LaxJsonStateUnicodeEscape", */
128 /* "LaxJsonStateBareProp", */
129 /* "LaxJsonStateCommentBegin", */
130 /* "LaxJsonStateCommentLine", */
131 /* "LaxJsonStateCommentMultiLine", */
132 /* "LaxJsonStateCommentMultiLineStar", */
133 /* "LaxJsonStateExpect", */
134 /* "LaxJsonStateEnd", */
135 /* "LaxJsonStateColon", */
136 /* "LaxJsonStateNumber", */
137 /* "LaxJsonStateNumberDecimal", */
138 /* "LaxJsonStateNumberExponent", */
139 /* "LaxJsonStateNumberExponentSign" */
140 /* }; */
141 
142 static enum LaxJsonError push_state(struct LaxJsonContext *context, enum LaxJsonState state) {
143  enum LaxJsonState *new_ptr;
144 
145  /* fprintf(stderr, "push state %s\n", STATE_NAMES[state]); */
146  if (context->state_stack_index >= context->state_stack_size) {
147  context->state_stack_size += 1024;
148  if (context->state_stack_size > context->max_state_stack_size)
150  new_ptr = realloc(context->state_stack,
151  (size_t)context->state_stack_size * sizeof(enum LaxJsonState));
152  if (!new_ptr)
153  return LaxJsonErrorNoMem;
154  context->state_stack = new_ptr;
155  }
156  context->state_stack[context->state_stack_index] = state;
157  context->state_stack_index += 1;
158  return LaxJsonErrorNone;
159 }
160 
162  struct LaxJsonContext *context = calloc(1, sizeof(struct LaxJsonContext));
163 
164  if (!context)
165  return NULL;
166 
167  context->value_buffer_size = 1024;
168  context->value_buffer = malloc((size_t)context->value_buffer_size);
169 
170  if (!context->value_buffer) {
171  lax_json_destroy(context);
172  return NULL;
173  }
174 
175  context->state_stack_size = 1024;
176  context->state_stack = malloc((size_t)context->state_stack_size * sizeof(enum LaxJsonState));
177  if (!context->state_stack) {
178  lax_json_destroy(context);
179  return NULL;
180  }
181 
182  context->line = 1;
183  context->max_state_stack_size = 16384;
184  context->max_value_buffer_size = 1048576; /* 1 MB */
185 
186  push_state(context, LaxJsonStateEnd);
187 
188  return context;
189 }
190 
191 void lax_json_destroy(struct LaxJsonContext *context) {
192  free(context->state_stack);
193  free(context->value_buffer);
194  free(context);
195 }
196 
197 static void pop_state(struct LaxJsonContext *context) {
198  context->state_stack_index -= 1;
199  context->state = context->state_stack[context->state_stack_index];
200  assert(context->state_stack_index >= 0);
201 }
202 
203 static enum LaxJsonError buffer_char(struct LaxJsonContext *context, char c) {
204  char *new_ptr;
205  if (context->value_buffer_index >= context->value_buffer_size) {
206  context->value_buffer_size += 16384;
207  if (context->value_buffer_size > context->max_value_buffer_size)
209  new_ptr = realloc(context->value_buffer, (size_t)context->value_buffer_size);
210  if (!new_ptr)
211  return LaxJsonErrorNoMem;
212  context->value_buffer = new_ptr;
213  }
214  context->value_buffer[context->value_buffer_index] = c;
215  context->value_buffer_index += 1;
216  return LaxJsonErrorNone;
217 }
218 
219 enum LaxJsonError lax_json_feed(struct LaxJsonContext *context, int size, const char *data) {
220 #define PUSH_STATE(state) \
221  err = push_state(context, state); \
222  if (err) return err;
223 #define BUFFER_CHAR(c) \
224  err = buffer_char(context, c); \
225  if (err) return err;
226 
228  unsigned int x;
229  const char *end;
230  char c;
231  unsigned char byte;
232  for (end = data + size; data < end; data += 1) {
233  c = *data;
234  if (c == '\n') {
235  //puts(STATE_NAMES[context->state]);
236  context->line += 1;
237  context->column = 0;
238  } else {
239  context->column += 1;
240  }
241  /* fprintf(stderr, "line %d col %d state %s char %c\n", context->line, context->column,
242  STATE_NAMES[context->state], c); */
243  /* printf("line %d col %d state %s char %c\n", context->line, context->column, */
244  /* STATE_NAMES[context->state], c); */
245  switch (context->state) {
246  case LaxJsonStateEnd:
247  switch (c) {
248  case WHITESPACE:
249  /* ignore */
250  break;
251  case '/':
252  context->state = LaxJsonStateCommentBegin;
254  break;
255  default:
257  }
258  break;
259  case LaxJsonStateObject:
260  switch (c) {
261  case WHITESPACE:
262  case ',':
263  /* do nothing except eat these characters */
264  break;
265  case '/':
266  context->state = LaxJsonStateCommentBegin;
268  break;
269  case '"':
270  case '\'':
271  context->state = LaxJsonStateString;
272  context->value_buffer_index = 0;
273  context->delim = c;
274  context->string_type = LaxJsonTypeProperty;
276  break;
277  case VALID_UNQUOTED:
278  context->state = LaxJsonStateBareProp;
279  context->value_buffer[0] = c;
280  context->value_buffer_index = 1;
281  context->delim = 0;
282  break;
283  case '}':
284  if (context->end(context, LaxJsonTypeObject))
285  return LaxJsonErrorAborted;
286  pop_state(context);
287  break;
288  default:
290  }
291  break;
293  switch (c) {
294  case VALID_UNQUOTED:
295  BUFFER_CHAR(c);
296  break;
297  case WHITESPACE:
298  BUFFER_CHAR('\0');
299  if (context->string(context, LaxJsonTypeProperty, context->value_buffer,
300  context->value_buffer_index - 1))
301  {
302  return LaxJsonErrorAborted;
303  }
304  context->state = LaxJsonStateColon;
305  break;
306  case ':':
307  BUFFER_CHAR('\0');
308  if (context->string(context, LaxJsonTypeProperty, context->value_buffer,
309  context->value_buffer_index - 1))
310  {
311  return LaxJsonErrorAborted;
312  }
313  context->state = LaxJsonStateValue;
314  context->string_type = LaxJsonTypeString;
316  break;
317  default:
319  }
320  break;
321  case LaxJsonStateString:
322  if (c == context->delim) {
323  BUFFER_CHAR('\0');
324  if (context->string(context, context->string_type, context->value_buffer,
325  context->value_buffer_index - 1))
326  {
327  return LaxJsonErrorAborted;
328  }
329  pop_state(context);
330  } else if (c == '\\') {
331  context->state = LaxJsonStateStringEscape;
332  } else {
333  BUFFER_CHAR(c);
334  }
335  break;
337  switch (c) {
338  case '\'':
339  case '"':
340  case '/':
341  case '\\':
342  BUFFER_CHAR(c);
343  context->state = LaxJsonStateString;
344  break;
345  case 'b':
346  BUFFER_CHAR('\b');
347  context->state = LaxJsonStateString;
348  break;
349  case 'f':
350  BUFFER_CHAR('\f');
351  context->state = LaxJsonStateString;
352  break;
353  case 'n':
354  BUFFER_CHAR('\n');
355  context->state = LaxJsonStateString;
356  break;
357  case 'r':
358  BUFFER_CHAR('\r');
359  context->state = LaxJsonStateString;
360  break;
361  case 't':
362  BUFFER_CHAR('\t');
363  context->state = LaxJsonStateString;
364  break;
365  case 'u':
366  context->state = LaxJsonStateUnicodeEscape;
367  context->unicode_digit_index = 0;
368  context->unicode_point = 0;
369  break;
370  default:;
371  // "\" should be escaped
372  return LaxJsonErrorAborted;
373  }
374  break;
376  switch (c) {
377  case '0':
378  x = 0;
379  break;
380  case '1':
381  x = 1;
382  break;
383  case '2':
384  x = 2;
385  break;
386  case '3':
387  x = 3;
388  break;
389  case '4':
390  x = 4;
391  break;
392  case '5':
393  x = 5;
394  break;
395  case '6':
396  x = 6;
397  break;
398  case '7':
399  x = 7;
400  break;
401  case '8':
402  x = 8;
403  break;
404  case '9':
405  x = 9;
406  break;
407  case 'a':
408  case 'A':
409  x = 10;
410  break;
411  case 'b':
412  case 'B':
413  x = 11;
414  break;
415  case 'c':
416  case 'C':
417  x = 12;
418  break;
419  case 'd':
420  case 'D':
421  x = 13;
422  break;
423  case 'e':
424  case 'E':
425  x = 14;
426  break;
427  case 'f':
428  case 'F':
429  x = 15;
430  break;
431  default:
433  }
434  context->unicode_point += x * HEX_MULT[context->unicode_digit_index];
435  context->unicode_digit_index += 1;
436  if (context->unicode_digit_index == 4) {
437  if (context->unicode_point <= 0x007f) {
438  /* 1 byte */
439  BUFFER_CHAR((char)context->unicode_point);
440  context->state = LaxJsonStateString;
441  } else if (context->unicode_point <= 0x07ff) {
442  /* 2 bytes */
443  byte = (unsigned char)(0xc0 | (context->unicode_point >> 6));
444  BUFFER_CHAR(*(char *)(&byte));
445  byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f));
446  BUFFER_CHAR(*(char *)(&byte));
447  } else if (context->unicode_point <= 0xffff) {
448  /* 3 bytes */
449  byte = (unsigned char)(0xe0 | (context->unicode_point >> 12));
450  BUFFER_CHAR(*(char *)(&byte));
451  byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f));
452  BUFFER_CHAR(*(char *)(&byte));
453  byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f));
454  BUFFER_CHAR(*(char *)(&byte));
455  } else if (context->unicode_point <= 0x1fffff) {
456  /* 4 bytes */
457  byte = (unsigned char)(0xf0 | (context->unicode_point >> 18));
458  BUFFER_CHAR(*(char *)(&byte));
459  byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f));
460  BUFFER_CHAR(*(char *)(&byte));
461  byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f));
462  BUFFER_CHAR(*(char *)(&byte));
463  byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f));
464  BUFFER_CHAR(*(char *)(&byte));
465  } else if (context->unicode_point <= 0x3ffffff) {
466  /* 5 bytes */
467  byte = (unsigned char)(0xf8 | (context->unicode_point >> 24));
468  BUFFER_CHAR(*(char *)(&byte));
469  byte = (unsigned char)(0x80 | (context->unicode_point >> 18));
470  BUFFER_CHAR(*(char *)(&byte));
471  byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f));
472  BUFFER_CHAR(*(char *)(&byte));
473  byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f));
474  BUFFER_CHAR(*(char *)(&byte));
475  byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f));
476  BUFFER_CHAR(*(char *)(&byte));
477  } else if (context->unicode_point <= 0x7fffffff) {
478  /* 6 bytes */
479  byte = (unsigned char)(0xfc | (context->unicode_point >> 30));
480  BUFFER_CHAR(*(char *)(&byte));
481  byte = (unsigned char)(0x80 | ((context->unicode_point >> 24) & 0x3f));
482  BUFFER_CHAR(*(char *)(&byte));
483  byte = (unsigned char)(0x80 | ((context->unicode_point >> 18) & 0x3f));
484  BUFFER_CHAR(*(char *)(&byte));
485  byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f));
486  BUFFER_CHAR(*(char *)(&byte));
487  byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f));
488  BUFFER_CHAR(*(char *)(&byte));
489  byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f));
490  BUFFER_CHAR(*(char *)(&byte));
491  } else {
493  }
494  context->state = LaxJsonStateString;
495  }
496  break;
497  case LaxJsonStateColon:
498  switch (c) {
499  case WHITESPACE:
500  /* ignore it */
501  break;
502  case '/':
503  context->state = LaxJsonStateCommentBegin;
505  break;
506  case ':':
507  context->state = LaxJsonStateValue;
508  context->string_type = LaxJsonTypeString;
510  break;
511  default:
513  }
514  break;
515  case LaxJsonStateValue:
516  switch (c) {
517  case WHITESPACE:
518  /* ignore */
519  break;
520  case '/':
521  context->state = LaxJsonStateCommentBegin;
523  break;
524  case '{':
525  if (context->begin(context, LaxJsonTypeObject))
526  return LaxJsonErrorAborted;
527  context->state = LaxJsonStateObject;
528  break;
529  case '[':
530  if (context->begin(context, LaxJsonTypeArray))
531  return LaxJsonErrorAborted;
532  context->state = LaxJsonStateArray;
533  break;
534  case '\'':
535  case '"':
536  context->state = LaxJsonStateString;
537  context->delim = c;
538  context->value_buffer_index = 0;
539  break;
540  case '-':
541  context->state = LaxJsonStateNumber;
542  context->value_buffer[0] = c;
543  context->value_buffer_index = 1;
544  break;
545  case '+':
546  context->state = LaxJsonStateNumber;
547  context->value_buffer_index = 0;
548  break;
549  case DIGIT:
550  context->state = LaxJsonStateNumber;
551  context->value_buffer_index = 1;
552  context->value_buffer[0] = c;
553  break;
554  case 't':
555  if (context->primitive(context, LaxJsonTypeTrue))
556  return LaxJsonErrorAborted;
557  context->state = LaxJsonStateExpect;
558  context->expected = "rue";
559  break;
560  case 'f':
561  if (context->primitive(context, LaxJsonTypeFalse))
562  return LaxJsonErrorAborted;
563  context->state = LaxJsonStateExpect;
564  context->expected = "alse";
565  break;
566  case 'n':
567  if (context->primitive(context, LaxJsonTypeNull))
568  return LaxJsonErrorAborted;
569  context->state = LaxJsonStateExpect;
570  context->expected = "ull";
571  break;
572  default:
574  }
575  break;
576  case LaxJsonStateArray:
577  switch (c) {
578  case WHITESPACE:
579  case ',':
580  /* ignore */
581  break;
582  case '/':
583  context->state = LaxJsonStateCommentBegin;
585  break;
586  case ']':
587  if (context->end(context, LaxJsonTypeArray))
588  return LaxJsonErrorAborted;
589  pop_state(context);
590  break;
591  default:
592  context->state = LaxJsonStateValue;
594 
595  /* rewind 1 character */
596  data -= 1;
597  context->column -= 1;
598  continue;
599  }
600  break;
601  case LaxJsonStateNumber:
602  switch (c) {
603  case DIGIT:
604  BUFFER_CHAR(c);
605  break;
606  case '.':
607  BUFFER_CHAR(c);
608  context->state = LaxJsonStateNumberDecimal;
609  break;
610  case NUMBER_TERMINATOR:
611  BUFFER_CHAR('\0');
612  if (context->number(context, context->value_buffer))
613  return LaxJsonErrorAborted;
614  pop_state(context);
615 
616  /* rewind 1 */
617  data -= 1;
618  context->column -= 1;
619  continue;
620  default:
622  }
623  break;
625  switch (c) {
626  case DIGIT:
627  BUFFER_CHAR(c);
628  break;
629  case 'e':
630  case 'E':
631  BUFFER_CHAR('e');
633  break;
634  case NUMBER_TERMINATOR:
635  context->state = LaxJsonStateNumber;
636  /* rewind 1 */
637  data -= 1;
638  context->column -= 1;
639  break;
640  default:
642  }
643  break;
645  switch (c) {
646  case DIGIT:
647  case '+':
648  case '-':
649  BUFFER_CHAR(c);
651  break;
652  default:
654  }
655  break;
657  switch (c) {
658  case DIGIT:
659  BUFFER_CHAR(c);
660  break;
661  case ',':
662  case WHITESPACE:
663  case ']':
664  case '}':
665  case '/':
666  BUFFER_CHAR('\0');
667  if (context->number(context, context->value_buffer))
668  return LaxJsonErrorAborted;
669  pop_state(context);
670 
671  /* rewind 1 */
672  data -= 1;
673  context->column -= 1;
674  continue;
675  default:
677  }
678  break;
679  case LaxJsonStateExpect:
680  if (c == *context->expected) {
681  context->expected += 1;
682  if (*context->expected == 0) {
683  pop_state(context);
684  }
685  } else {
687  }
688  break;
690  switch (c) {
691  case '/':
692  context->state = LaxJsonStateCommentLine;
693  break;
694  case '*':
696  break;
697  default:
699  }
700  break;
702  if (c == '\n')
703  pop_state(context);
704  break;
706  if (c == '*')
708  break;
710  if (c == '/')
711  pop_state(context);
712  else
714  break;
715  default:;
716  // not reachable
717  logC("Check this function!");
718  }
719  }
720  if (context->state != LaxJsonStateEnd) {
721  switch (context->state) {
722  case LaxJsonStateNumber:
724  BUFFER_CHAR('\0');
725  if (context->number(context, context->value_buffer))
726  return LaxJsonErrorAborted;
727  pop_state(context);
728  break;
729  default:
730  return LaxJsonErrorAborted;
731  }
732  err = lax_json_eof(context);
733  }
734  return err;
735 }
736 
737 enum LaxJsonError lax_json_eof(struct LaxJsonContext *context) {
738  for (;;) {
739  switch (context->state) {
740  case LaxJsonStateEnd:
741  return LaxJsonErrorNone;
743  pop_state(context);
744  continue;
745  default:
747  }
748  }
749 }
750 
751 const char *lax_json_str_err(enum LaxJsonError err) {
752  switch (err) {
753  case LaxJsonErrorNone: return "none";
754  case LaxJsonErrorUnexpectedChar: return "unexpected character";
755  case LaxJsonErrorExpectedEof: return "expected end of file";
756  case LaxJsonErrorExceededMaxStack: return "exceeded max stack";
757  case LaxJsonErrorNoMem: return "out of memory";
758  case LaxJsonErrorExceededMaxValueSize: return "exceeded maximum value size";
759  case LaxJsonErrorInvalidHexDigit: return "invalid hex digit";
760  case LaxJsonErrorInvalidUnicodePoint: return "invalid unicode point";
761  case LaxJsonErrorExpectedColon: return "expected colon";
762  case LaxJsonErrorUnexpectedEof: return "unexpected end of file";
763  case LaxJsonErrorAborted: return "aborted";
764  default:;
765  // return invalid error code
766  }
767  return "invalid error code";
768 }
#define BUFFER_CHAR(c)
int(* begin)(struct LaxJsonContext *, enum LaxJsonType type)
Definition: laxjson.h:71
free(s)
int state_stack_size
Definition: laxjson.h:85
enum LaxJsonType string_type
Definition: laxjson.h:96
int value_buffer_size
Definition: laxjson.h:89
char * value_buffer
Definition: laxjson.h:87
int(* end)(struct LaxJsonContext *, enum LaxJsonType type)
Definition: laxjson.h:73
#define VALID_UNQUOTED
Definition: laxjson.c:90
int value_buffer_index
Definition: laxjson.h:88
char data[7]
Definition: libsheepy.c:19690
#define WHITESPACE
Definition: laxjson.c:15
enum LaxJsonError lax_json_eof(struct LaxJsonContext *context)
Definition: laxjson.c:737
#define err(str)
Definition: tpool.c:35
enum LaxJsonError lax_json_feed(struct LaxJsonContext *context, int size, const char *data)
Definition: laxjson.c:219
#define DIGIT
Definition: laxjson.c:23
unsigned int unicode_point
Definition: laxjson.h:91
struct LaxJsonContext * lax_json_create(void)
Definition: laxjson.c:161
char c
int state_stack_index
Definition: laxjson.h:84
LaxJsonError
Definition: laxjson.h:47
char delim
Definition: laxjson.h:95
void lax_json_destroy(struct LaxJsonContext *context)
Definition: laxjson.c:191
char * expected
Definition: laxjson.h:94
int max_state_stack_size
Definition: laxjson.h:78
int(* string)(struct LaxJsonContext *, enum LaxJsonType type, const char *value, int length)
Definition: laxjson.h:65
int(* primitive)(struct LaxJsonContext *, enum LaxJsonType type)
Definition: laxjson.h:69
enum LaxJsonState state
Definition: laxjson.h:82
LaxJsonState
Definition: laxjson.h:26
int(* number)(struct LaxJsonContext *, char *x)
Definition: laxjson.h:67
const char * lax_json_str_err(enum LaxJsonError err)
Definition: laxjson.c:751
#define NUMBER_TERMINATOR
Definition: laxjson.c:112
int max_value_buffer_size
Definition: laxjson.h:79
#define PUSH_STATE(state)
enum LaxJsonState * state_stack
Definition: laxjson.h:83
unsigned int unicode_digit_index
Definition: laxjson.h:92
#define logC(...)
Definition: libsheepy.h:1059