laxjson.c (28222B)
1 /* 2 * Copyright (c) 2013 Andrew Kelley 3 * 4 * This file is part of liblaxjson, which is MIT licensed. 5 * See http://opensource.org/licenses/MIT 6 */ 7 8 #include "laxjson.h" 9 10 #include <stdlib.h> 11 #include <assert.h> 12 13 #include "../../release/libsheepy.h" 14 15 #define WHITESPACE \ 16 ' ': \ 17 case '\t': \ 18 case '\n': \ 19 case '\f': \ 20 case '\r': \ 21 case 0xb 22 23 #define DIGIT \ 24 '0': \ 25 case '1': \ 26 case '2': \ 27 case '3': \ 28 case '4': \ 29 case '5': \ 30 case '6': \ 31 case '7': \ 32 case '8': \ 33 case '9' 34 35 #define ALPHANUMERIC \ 36 'a': \ 37 case 'b': \ 38 case 'c': \ 39 case 'd': \ 40 case 'e': \ 41 case 'f': \ 42 case 'g': \ 43 case 'h': \ 44 case 'i': \ 45 case 'j': \ 46 case 'k': \ 47 case 'l': \ 48 case 'm': \ 49 case 'n': \ 50 case 'o': \ 51 case 'p': \ 52 case 'q': \ 53 case 'r': \ 54 case 's': \ 55 case 't': \ 56 case 'u': \ 57 case 'v': \ 58 case 'w': \ 59 case 'x': \ 60 case 'y': \ 61 case 'z': \ 62 case 'A': \ 63 case 'B': \ 64 case 'C': \ 65 case 'D': \ 66 case 'E': \ 67 case 'F': \ 68 case 'G': \ 69 case 'H': \ 70 case 'I': \ 71 case 'J': \ 72 case 'K': \ 73 case 'L': \ 74 case 'M': \ 75 case 'N': \ 76 case 'O': \ 77 case 'P': \ 78 case 'Q': \ 79 case 'R': \ 80 case 'S': \ 81 case 'T': \ 82 case 'U': \ 83 case 'V': \ 84 case 'W': \ 85 case 'X': \ 86 case 'Y': \ 87 case 'Z': \ 88 case DIGIT 89 90 #define VALID_UNQUOTED \ 91 '-': \ 92 case '_': \ 93 case '#': \ 94 case '$': \ 95 case '%': \ 96 case '&': \ 97 case '<': \ 98 case '>': \ 99 case '=': \ 100 case '~': \ 101 case '|': \ 102 case '@': \ 103 case '?': \ 104 case ';': \ 105 case '.': \ 106 case '+': \ 107 case '*': \ 108 case '(': \ 109 case ')': \ 110 case ALPHANUMERIC 111 112 #define NUMBER_TERMINATOR \ 113 ',': \ 114 case WHITESPACE: \ 115 case ']': \ 116 case '}': \ 117 case '/' 118 119 static const unsigned int HEX_MULT[] = {4096, 256, 16, 1}; 120 121 /* static const char *STATE_NAMES[] = { */ 122 /* "LaxJsonStateValue", */ 123 /* "LaxJsonStateObject", */ 124 /* "LaxJsonStateArray", */ 125 /* "LaxJsonStateString", */ 126 /* "LaxJsonStateStringEscape", */ 127 /* "LaxJsonStateUnicodeEscape", */ 128 /* "LaxJsonStateBareProp", */ 129 /* "LaxJsonStateCommentBegin", */ 130 /* "LaxJsonStateCommentLine", */ 131 /* "LaxJsonStateCommentMultiLine", */ 132 /* "LaxJsonStateCommentMultiLineStar", */ 133 /* "LaxJsonStateExpect", */ 134 /* "LaxJsonStateEnd", */ 135 /* "LaxJsonStateColon", */ 136 /* "LaxJsonStateNumber", */ 137 /* "LaxJsonStateNumberDecimal", */ 138 /* "LaxJsonStateNumberExponent", */ 139 /* "LaxJsonStateNumberExponentSign" */ 140 /* }; */ 141 142 static enum LaxJsonError push_state(struct LaxJsonContext *context, enum LaxJsonState state) { 143 enum LaxJsonState *new_ptr; 144 145 /* fprintf(stderr, "push state %s\n", STATE_NAMES[state]); */ 146 if (context->state_stack_index >= context->state_stack_size) { 147 context->state_stack_size += 1024; 148 if (context->state_stack_size > context->max_state_stack_size) 149 return LaxJsonErrorExceededMaxStack; 150 new_ptr = realloc(context->state_stack, 151 (size_t)context->state_stack_size * sizeof(enum LaxJsonState)); 152 if (!new_ptr) 153 return LaxJsonErrorNoMem; 154 context->state_stack = new_ptr; 155 } 156 context->state_stack[context->state_stack_index] = state; 157 context->state_stack_index += 1; 158 return LaxJsonErrorNone; 159 } 160 161 struct LaxJsonContext *lax_json_create(void) { 162 struct LaxJsonContext *context = calloc(1, sizeof(struct LaxJsonContext)); 163 164 if (!context) 165 return NULL; 166 167 context->value_buffer_size = 1024; 168 context->value_buffer = malloc((size_t)context->value_buffer_size); 169 170 if (!context->value_buffer) { 171 lax_json_destroy(context); 172 return NULL; 173 } 174 175 context->state_stack_size = 1024; 176 context->state_stack = malloc((size_t)context->state_stack_size * sizeof(enum LaxJsonState)); 177 if (!context->state_stack) { 178 lax_json_destroy(context); 179 return NULL; 180 } 181 182 context->line = 1; 183 context->max_state_stack_size = 16384; 184 context->max_value_buffer_size = 1048576; /* 1 MB */ 185 186 push_state(context, LaxJsonStateEnd); 187 188 return context; 189 } 190 191 void lax_json_destroy(struct LaxJsonContext *context) { 192 free(context->state_stack); 193 free(context->value_buffer); 194 free(context); 195 } 196 197 static void pop_state(struct LaxJsonContext *context) { 198 context->state_stack_index -= 1; 199 context->state = context->state_stack[context->state_stack_index]; 200 assert(context->state_stack_index >= 0); 201 } 202 203 static enum LaxJsonError buffer_char(struct LaxJsonContext *context, char c) { 204 char *new_ptr; 205 if (context->value_buffer_index >= context->value_buffer_size) { 206 context->value_buffer_size += 16384; 207 if (context->value_buffer_size > context->max_value_buffer_size) 208 return LaxJsonErrorExceededMaxValueSize; 209 new_ptr = realloc(context->value_buffer, (size_t)context->value_buffer_size); 210 if (!new_ptr) 211 return LaxJsonErrorNoMem; 212 context->value_buffer = new_ptr; 213 } 214 context->value_buffer[context->value_buffer_index] = c; 215 context->value_buffer_index += 1; 216 return LaxJsonErrorNone; 217 } 218 219 enum LaxJsonError lax_json_feed(struct LaxJsonContext *context, int size, const char *data) { 220 #define PUSH_STATE(state) \ 221 err = push_state(context, state); \ 222 if (err) return err; 223 #define BUFFER_CHAR(c) \ 224 err = buffer_char(context, c); \ 225 if (err) return err; 226 227 enum LaxJsonError err = LaxJsonErrorNone; 228 unsigned int x; 229 const char *end; 230 char c; 231 unsigned char byte; 232 for (end = data + size; data < end; data += 1) { 233 c = *data; 234 if (c == '\n') { 235 //puts(STATE_NAMES[context->state]); 236 context->line += 1; 237 context->column = 0; 238 } else { 239 context->column += 1; 240 } 241 /* fprintf(stderr, "line %d col %d state %s char %c\n", context->line, context->column, 242 STATE_NAMES[context->state], c); */ 243 /* printf("line %d col %d state %s char %c\n", context->line, context->column, */ 244 /* STATE_NAMES[context->state], c); */ 245 switch (context->state) { 246 case LaxJsonStateEnd: 247 switch (c) { 248 case WHITESPACE: 249 /* ignore */ 250 break; 251 case '/': 252 context->state = LaxJsonStateCommentBegin; 253 PUSH_STATE(LaxJsonStateEnd); 254 break; 255 default: 256 return LaxJsonErrorExpectedEof; 257 } 258 break; 259 case LaxJsonStateObject: 260 switch (c) { 261 case WHITESPACE: 262 case ',': 263 /* do nothing except eat these characters */ 264 break; 265 case '/': 266 context->state = LaxJsonStateCommentBegin; 267 PUSH_STATE(LaxJsonStateObject); 268 break; 269 case '"': 270 case '\'': 271 context->state = LaxJsonStateString; 272 context->value_buffer_index = 0; 273 context->delim = c; 274 context->string_type = LaxJsonTypeProperty; 275 PUSH_STATE(LaxJsonStateColon); 276 break; 277 case VALID_UNQUOTED: 278 context->state = LaxJsonStateBareProp; 279 context->value_buffer[0] = c; 280 context->value_buffer_index = 1; 281 context->delim = 0; 282 break; 283 case '}': 284 if (context->end(context, LaxJsonTypeObject)) 285 return LaxJsonErrorAborted; 286 pop_state(context); 287 break; 288 default: 289 return LaxJsonErrorUnexpectedChar; 290 } 291 break; 292 case LaxJsonStateBareProp: 293 switch (c) { 294 case VALID_UNQUOTED: 295 BUFFER_CHAR(c); 296 break; 297 case WHITESPACE: 298 BUFFER_CHAR('\0'); 299 if (context->string(context, LaxJsonTypeProperty, context->value_buffer, 300 context->value_buffer_index - 1)) 301 { 302 return LaxJsonErrorAborted; 303 } 304 context->state = LaxJsonStateColon; 305 break; 306 case ':': 307 BUFFER_CHAR('\0'); 308 if (context->string(context, LaxJsonTypeProperty, context->value_buffer, 309 context->value_buffer_index - 1)) 310 { 311 return LaxJsonErrorAborted; 312 } 313 context->state = LaxJsonStateValue; 314 context->string_type = LaxJsonTypeString; 315 PUSH_STATE(LaxJsonStateObject); 316 break; 317 default: 318 return LaxJsonErrorUnexpectedChar; 319 } 320 break; 321 case LaxJsonStateString: 322 if (c == context->delim) { 323 BUFFER_CHAR('\0'); 324 if (context->string(context, context->string_type, context->value_buffer, 325 context->value_buffer_index - 1)) 326 { 327 return LaxJsonErrorAborted; 328 } 329 pop_state(context); 330 } else if (c == '\\') { 331 context->state = LaxJsonStateStringEscape; 332 } else { 333 BUFFER_CHAR(c); 334 } 335 break; 336 case LaxJsonStateStringEscape: 337 switch (c) { 338 case '\'': 339 case '"': 340 case '/': 341 case '\\': 342 BUFFER_CHAR(c); 343 context->state = LaxJsonStateString; 344 break; 345 case 'b': 346 BUFFER_CHAR('\b'); 347 context->state = LaxJsonStateString; 348 break; 349 case 'f': 350 BUFFER_CHAR('\f'); 351 context->state = LaxJsonStateString; 352 break; 353 case 'n': 354 BUFFER_CHAR('\n'); 355 context->state = LaxJsonStateString; 356 break; 357 case 'r': 358 BUFFER_CHAR('\r'); 359 context->state = LaxJsonStateString; 360 break; 361 case 't': 362 BUFFER_CHAR('\t'); 363 context->state = LaxJsonStateString; 364 break; 365 case 'u': 366 context->state = LaxJsonStateUnicodeEscape; 367 context->unicode_digit_index = 0; 368 context->unicode_point = 0; 369 break; 370 default:; 371 // "\" should be escaped 372 return LaxJsonErrorAborted; 373 } 374 break; 375 case LaxJsonStateUnicodeEscape: 376 switch (c) { 377 case '0': 378 x = 0; 379 break; 380 case '1': 381 x = 1; 382 break; 383 case '2': 384 x = 2; 385 break; 386 case '3': 387 x = 3; 388 break; 389 case '4': 390 x = 4; 391 break; 392 case '5': 393 x = 5; 394 break; 395 case '6': 396 x = 6; 397 break; 398 case '7': 399 x = 7; 400 break; 401 case '8': 402 x = 8; 403 break; 404 case '9': 405 x = 9; 406 break; 407 case 'a': 408 case 'A': 409 x = 10; 410 break; 411 case 'b': 412 case 'B': 413 x = 11; 414 break; 415 case 'c': 416 case 'C': 417 x = 12; 418 break; 419 case 'd': 420 case 'D': 421 x = 13; 422 break; 423 case 'e': 424 case 'E': 425 x = 14; 426 break; 427 case 'f': 428 case 'F': 429 x = 15; 430 break; 431 default: 432 return LaxJsonErrorInvalidHexDigit; 433 } 434 context->unicode_point += x * HEX_MULT[context->unicode_digit_index]; 435 context->unicode_digit_index += 1; 436 if (context->unicode_digit_index == 4) { 437 if (context->unicode_point <= 0x007f) { 438 /* 1 byte */ 439 BUFFER_CHAR((char)context->unicode_point); 440 context->state = LaxJsonStateString; 441 } else if (context->unicode_point <= 0x07ff) { 442 /* 2 bytes */ 443 byte = (unsigned char)(0xc0 | (context->unicode_point >> 6)); 444 BUFFER_CHAR(*(char *)(&byte)); 445 byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f)); 446 BUFFER_CHAR(*(char *)(&byte)); 447 } else if (context->unicode_point <= 0xffff) { 448 /* 3 bytes */ 449 byte = (unsigned char)(0xe0 | (context->unicode_point >> 12)); 450 BUFFER_CHAR(*(char *)(&byte)); 451 byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f)); 452 BUFFER_CHAR(*(char *)(&byte)); 453 byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f)); 454 BUFFER_CHAR(*(char *)(&byte)); 455 } else if (context->unicode_point <= 0x1fffff) { 456 /* 4 bytes */ 457 byte = (unsigned char)(0xf0 | (context->unicode_point >> 18)); 458 BUFFER_CHAR(*(char *)(&byte)); 459 byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f)); 460 BUFFER_CHAR(*(char *)(&byte)); 461 byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f)); 462 BUFFER_CHAR(*(char *)(&byte)); 463 byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f)); 464 BUFFER_CHAR(*(char *)(&byte)); 465 } else if (context->unicode_point <= 0x3ffffff) { 466 /* 5 bytes */ 467 byte = (unsigned char)(0xf8 | (context->unicode_point >> 24)); 468 BUFFER_CHAR(*(char *)(&byte)); 469 byte = (unsigned char)(0x80 | (context->unicode_point >> 18)); 470 BUFFER_CHAR(*(char *)(&byte)); 471 byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f)); 472 BUFFER_CHAR(*(char *)(&byte)); 473 byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f)); 474 BUFFER_CHAR(*(char *)(&byte)); 475 byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f)); 476 BUFFER_CHAR(*(char *)(&byte)); 477 } else if (context->unicode_point <= 0x7fffffff) { 478 /* 6 bytes */ 479 byte = (unsigned char)(0xfc | (context->unicode_point >> 30)); 480 BUFFER_CHAR(*(char *)(&byte)); 481 byte = (unsigned char)(0x80 | ((context->unicode_point >> 24) & 0x3f)); 482 BUFFER_CHAR(*(char *)(&byte)); 483 byte = (unsigned char)(0x80 | ((context->unicode_point >> 18) & 0x3f)); 484 BUFFER_CHAR(*(char *)(&byte)); 485 byte = (unsigned char)(0x80 | ((context->unicode_point >> 12) & 0x3f)); 486 BUFFER_CHAR(*(char *)(&byte)); 487 byte = (unsigned char)(0x80 | ((context->unicode_point >> 6) & 0x3f)); 488 BUFFER_CHAR(*(char *)(&byte)); 489 byte = (unsigned char)(0x80 | (context->unicode_point & 0x3f)); 490 BUFFER_CHAR(*(char *)(&byte)); 491 } else { 492 return LaxJsonErrorInvalidUnicodePoint; 493 } 494 context->state = LaxJsonStateString; 495 } 496 break; 497 case LaxJsonStateColon: 498 switch (c) { 499 case WHITESPACE: 500 /* ignore it */ 501 break; 502 case '/': 503 context->state = LaxJsonStateCommentBegin; 504 PUSH_STATE(LaxJsonStateColon); 505 break; 506 case ':': 507 context->state = LaxJsonStateValue; 508 context->string_type = LaxJsonTypeString; 509 PUSH_STATE(LaxJsonStateObject); 510 break; 511 default: 512 return LaxJsonErrorExpectedColon; 513 } 514 break; 515 case LaxJsonStateValue: 516 switch (c) { 517 case WHITESPACE: 518 /* ignore */ 519 break; 520 case '/': 521 context->state = LaxJsonStateCommentBegin; 522 PUSH_STATE(LaxJsonStateValue); 523 break; 524 case '{': 525 if (context->begin(context, LaxJsonTypeObject)) 526 return LaxJsonErrorAborted; 527 context->state = LaxJsonStateObject; 528 break; 529 case '[': 530 if (context->begin(context, LaxJsonTypeArray)) 531 return LaxJsonErrorAborted; 532 context->state = LaxJsonStateArray; 533 break; 534 case '\'': 535 case '"': 536 context->state = LaxJsonStateString; 537 context->delim = c; 538 context->value_buffer_index = 0; 539 break; 540 case '-': 541 context->state = LaxJsonStateNumber; 542 context->value_buffer[0] = c; 543 context->value_buffer_index = 1; 544 break; 545 case '+': 546 context->state = LaxJsonStateNumber; 547 context->value_buffer_index = 0; 548 break; 549 case DIGIT: 550 context->state = LaxJsonStateNumber; 551 context->value_buffer_index = 1; 552 context->value_buffer[0] = c; 553 break; 554 case 't': 555 if (context->primitive(context, LaxJsonTypeTrue)) 556 return LaxJsonErrorAborted; 557 context->state = LaxJsonStateExpect; 558 context->expected = "rue"; 559 break; 560 case 'f': 561 if (context->primitive(context, LaxJsonTypeFalse)) 562 return LaxJsonErrorAborted; 563 context->state = LaxJsonStateExpect; 564 context->expected = "alse"; 565 break; 566 case 'n': 567 if (context->primitive(context, LaxJsonTypeNull)) 568 return LaxJsonErrorAborted; 569 context->state = LaxJsonStateExpect; 570 context->expected = "ull"; 571 break; 572 default: 573 return LaxJsonErrorUnexpectedChar; 574 } 575 break; 576 case LaxJsonStateArray: 577 switch (c) { 578 case WHITESPACE: 579 case ',': 580 /* ignore */ 581 break; 582 case '/': 583 context->state = LaxJsonStateCommentBegin; 584 PUSH_STATE(LaxJsonStateArray); 585 break; 586 case ']': 587 if (context->end(context, LaxJsonTypeArray)) 588 return LaxJsonErrorAborted; 589 pop_state(context); 590 break; 591 default: 592 context->state = LaxJsonStateValue; 593 PUSH_STATE(LaxJsonStateArray); 594 595 /* rewind 1 character */ 596 data -= 1; 597 context->column -= 1; 598 continue; 599 } 600 break; 601 case LaxJsonStateNumber: 602 switch (c) { 603 case DIGIT: 604 BUFFER_CHAR(c); 605 break; 606 case '.': 607 BUFFER_CHAR(c); 608 context->state = LaxJsonStateNumberDecimal; 609 break; 610 case NUMBER_TERMINATOR: 611 BUFFER_CHAR('\0'); 612 if (context->number(context, context->value_buffer)) 613 return LaxJsonErrorAborted; 614 pop_state(context); 615 616 /* rewind 1 */ 617 data -= 1; 618 context->column -= 1; 619 continue; 620 default: 621 return LaxJsonErrorUnexpectedChar; 622 } 623 break; 624 case LaxJsonStateNumberDecimal: 625 switch (c) { 626 case DIGIT: 627 BUFFER_CHAR(c); 628 break; 629 case 'e': 630 case 'E': 631 BUFFER_CHAR('e'); 632 context->state = LaxJsonStateNumberExponentSign; 633 break; 634 case NUMBER_TERMINATOR: 635 context->state = LaxJsonStateNumber; 636 /* rewind 1 */ 637 data -= 1; 638 context->column -= 1; 639 break; 640 default: 641 return LaxJsonErrorUnexpectedChar; 642 } 643 break; 644 case LaxJsonStateNumberExponentSign: 645 switch (c) { 646 case DIGIT: 647 case '+': 648 case '-': 649 BUFFER_CHAR(c); 650 context->state = LaxJsonStateNumberExponent; 651 break; 652 default: 653 return LaxJsonErrorUnexpectedChar; 654 } 655 break; 656 case LaxJsonStateNumberExponent: 657 switch (c) { 658 case DIGIT: 659 BUFFER_CHAR(c); 660 break; 661 case ',': 662 case WHITESPACE: 663 case ']': 664 case '}': 665 case '/': 666 BUFFER_CHAR('\0'); 667 if (context->number(context, context->value_buffer)) 668 return LaxJsonErrorAborted; 669 pop_state(context); 670 671 /* rewind 1 */ 672 data -= 1; 673 context->column -= 1; 674 continue; 675 default: 676 return LaxJsonErrorUnexpectedChar; 677 } 678 break; 679 case LaxJsonStateExpect: 680 if (c == *context->expected) { 681 context->expected += 1; 682 if (*context->expected == 0) { 683 pop_state(context); 684 } 685 } else { 686 return LaxJsonErrorUnexpectedChar; 687 } 688 break; 689 case LaxJsonStateCommentBegin: 690 switch (c) { 691 case '/': 692 context->state = LaxJsonStateCommentLine; 693 break; 694 case '*': 695 context->state = LaxJsonStateCommentMultiLine; 696 break; 697 default: 698 return LaxJsonErrorUnexpectedChar; 699 } 700 break; 701 case LaxJsonStateCommentLine: 702 if (c == '\n') 703 pop_state(context); 704 break; 705 case LaxJsonStateCommentMultiLine: 706 if (c == '*') 707 context->state = LaxJsonStateCommentMultiLineStar; 708 break; 709 case LaxJsonStateCommentMultiLineStar: 710 if (c == '/') 711 pop_state(context); 712 else 713 context->state = LaxJsonStateCommentMultiLine; 714 break; 715 default:; 716 // not reachable 717 logC("Check this function!"); 718 } 719 } 720 if (context->state != LaxJsonStateEnd) { 721 switch (context->state) { 722 case LaxJsonStateNumber: 723 case LaxJsonStateNumberDecimal: 724 BUFFER_CHAR('\0'); 725 if (context->number(context, context->value_buffer)) 726 return LaxJsonErrorAborted; 727 pop_state(context); 728 break; 729 default: 730 return LaxJsonErrorAborted; 731 } 732 err = lax_json_eof(context); 733 } 734 return err; 735 } 736 737 enum LaxJsonError lax_json_eof(struct LaxJsonContext *context) { 738 for (;;) { 739 switch (context->state) { 740 case LaxJsonStateEnd: 741 return LaxJsonErrorNone; 742 case LaxJsonStateCommentLine: 743 pop_state(context); 744 continue; 745 default: 746 return LaxJsonErrorUnexpectedEof; 747 } 748 } 749 } 750 751 const char *lax_json_str_err(enum LaxJsonError err) { 752 switch (err) { 753 case LaxJsonErrorNone: return "none"; 754 case LaxJsonErrorUnexpectedChar: return "unexpected character"; 755 case LaxJsonErrorExpectedEof: return "expected end of file"; 756 case LaxJsonErrorExceededMaxStack: return "exceeded max stack"; 757 case LaxJsonErrorNoMem: return "out of memory"; 758 case LaxJsonErrorExceededMaxValueSize: return "exceeded maximum value size"; 759 case LaxJsonErrorInvalidHexDigit: return "invalid hex digit"; 760 case LaxJsonErrorInvalidUnicodePoint: return "invalid unicode point"; 761 case LaxJsonErrorExpectedColon: return "expected colon"; 762 case LaxJsonErrorUnexpectedEof: return "unexpected end of file"; 763 case LaxJsonErrorAborted: return "aborted"; 764 default:; 765 // return invalid error code 766 } 767 return "invalid error code"; 768 }