libsheepy
ymlScanner.c
Go to the documentation of this file.
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward. The issues are "block collection start" and
23  * "simple keys". Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented. We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  * STREAM-START(encoding) # The stream start.
31  * STREAM-END # The stream end.
32  * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33  * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34  * DOCUMENT-START # '---'
35  * DOCUMENT-END # '...'
36  * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37  * BLOCK-MAPPING-START # sequence or a block mapping.
38  * BLOCK-END # Indentation decrease.
39  * FLOW-SEQUENCE-START # '['
40  * FLOW-SEQUENCE-END # ']'
41  * FLOW-MAPPING-START # '{'
42  * FLOW-MAPPING-END # '}'
43  * BLOCK-ENTRY # '-'
44  * FLOW-ENTRY # ','
45  * KEY # '?' or nothing (simple keys).
46  * VALUE # ':'
47  * ALIAS(anchor) # '*anchor'
48  * ANCHOR(anchor) # '&anchor'
49  * TAG(handle,suffix) # '!handle!suffix'
50  * SCALAR(value,style) # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  * STREAM-START(encoding)
56  * STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  * VERSION-DIRECTIVE(major,minor)
64  * TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  * %YAML 1.1
69  * %TAG ! !foo
70  * %TAG !yaml! tag:yaml.org,2002:
71  * ---
72  *
73  * The corresponding sequence of tokens:
74  *
75  * STREAM-START(utf-8)
76  * VERSION-DIRECTIVE(1,1)
77  * TAG-DIRECTIVE("!","!foo")
78  * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  * DOCUMENT-START
80  * STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  * DOCUMENT-START
88  * DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  * 1. An implicit document:
98  *
99  * 'a scalar'
100  *
101  * Tokens:
102  *
103  * STREAM-START(utf-8)
104  * SCALAR("a scalar",single-quoted)
105  * STREAM-END
106  *
107  * 2. An explicit document:
108  *
109  * ---
110  * 'a scalar'
111  * ...
112  *
113  * Tokens:
114  *
115  * STREAM-START(utf-8)
116  * DOCUMENT-START
117  * SCALAR("a scalar",single-quoted)
118  * DOCUMENT-END
119  * STREAM-END
120  *
121  * 3. Several documents in a stream:
122  *
123  * 'a scalar'
124  * ---
125  * 'another scalar'
126  * ---
127  * 'yet another scalar'
128  *
129  * Tokens:
130  *
131  * STREAM-START(utf-8)
132  * SCALAR("a scalar",single-quoted)
133  * DOCUMENT-START
134  * SCALAR("another scalar",single-quoted)
135  * DOCUMENT-START
136  * SCALAR("yet another scalar",single-quoted)
137  * STREAM-END
138  *
139  * We have already introduced the SCALAR token above. The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  * ALIAS(anchor)
143  * ANCHOR(anchor)
144  * TAG(handle,suffix)
145  * SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  * 1. A recursive sequence:
150  *
151  * &A [ *A ]
152  *
153  * Tokens:
154  *
155  * STREAM-START(utf-8)
156  * ANCHOR("A")
157  * FLOW-SEQUENCE-START
158  * ALIAS("A")
159  * FLOW-SEQUENCE-END
160  * STREAM-END
161  *
162  * 2. A tagged scalar:
163  *
164  * !!float "3.14" # A good approximation.
165  *
166  * Tokens:
167  *
168  * STREAM-START(utf-8)
169  * TAG("!!","float")
170  * SCALAR("3.14",double-quoted)
171  * STREAM-END
172  *
173  * 3. Various scalar styles:
174  *
175  * --- # Implicit empty plain scalars do not produce tokens.
176  * --- a plain scalar
177  * --- 'a single-quoted scalar'
178  * --- "a double-quoted scalar"
179  * --- |-
180  * a literal scalar
181  * --- >-
182  * a folded
183  * scalar
184  *
185  * Tokens:
186  *
187  * STREAM-START(utf-8)
188  * DOCUMENT-START
189  * DOCUMENT-START
190  * SCALAR("a plain scalar",plain)
191  * DOCUMENT-START
192  * SCALAR("a single-quoted scalar",single-quoted)
193  * DOCUMENT-START
194  * SCALAR("a double-quoted scalar",double-quoted)
195  * DOCUMENT-START
196  * SCALAR("a literal scalar",literal)
197  * DOCUMENT-START
198  * SCALAR("a folded scalar",folded)
199  * STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  * FLOW-SEQUENCE-START
205  * FLOW-SEQUENCE-END
206  * FLOW-MAPPING-START
207  * FLOW-MAPPING-END
208  * FLOW-ENTRY
209  * KEY
210  * VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  * 1. A flow sequence:
221  *
222  * [item 1, item 2, item 3]
223  *
224  * Tokens:
225  *
226  * STREAM-START(utf-8)
227  * FLOW-SEQUENCE-START
228  * SCALAR("item 1",plain)
229  * FLOW-ENTRY
230  * SCALAR("item 2",plain)
231  * FLOW-ENTRY
232  * SCALAR("item 3",plain)
233  * FLOW-SEQUENCE-END
234  * STREAM-END
235  *
236  * 2. A flow mapping:
237  *
238  * {
239  * a simple key: a value, # Note that the KEY token is produced.
240  * ? a complex key: another value,
241  * }
242  *
243  * Tokens:
244  *
245  * STREAM-START(utf-8)
246  * FLOW-MAPPING-START
247  * KEY
248  * SCALAR("a simple key",plain)
249  * VALUE
250  * SCALAR("a value",plain)
251  * FLOW-ENTRY
252  * KEY
253  * SCALAR("a complex key",plain)
254  * VALUE
255  * SCALAR("another value",plain)
256  * FLOW-ENTRY
257  * FLOW-MAPPING-END
258  * STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator. Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  * BLOCK-SEQUENCE-START
267  * BLOCK-MAPPING-START
268  * BLOCK-END
269  * BLOCK-ENTRY
270  * KEY
271  * VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  * 1. Block sequences:
286  *
287  * - item 1
288  * - item 2
289  * -
290  * - item 3.1
291  * - item 3.2
292  * -
293  * key 1: value 1
294  * key 2: value 2
295  *
296  * Tokens:
297  *
298  * STREAM-START(utf-8)
299  * BLOCK-SEQUENCE-START
300  * BLOCK-ENTRY
301  * SCALAR("item 1",plain)
302  * BLOCK-ENTRY
303  * SCALAR("item 2",plain)
304  * BLOCK-ENTRY
305  * BLOCK-SEQUENCE-START
306  * BLOCK-ENTRY
307  * SCALAR("item 3.1",plain)
308  * BLOCK-ENTRY
309  * SCALAR("item 3.2",plain)
310  * BLOCK-END
311  * BLOCK-ENTRY
312  * BLOCK-MAPPING-START
313  * KEY
314  * SCALAR("key 1",plain)
315  * VALUE
316  * SCALAR("value 1",plain)
317  * KEY
318  * SCALAR("key 2",plain)
319  * VALUE
320  * SCALAR("value 2",plain)
321  * BLOCK-END
322  * BLOCK-END
323  * STREAM-END
324  *
325  * 2. Block mappings:
326  *
327  * a simple key: a value # The KEY token is produced here.
328  * ? a complex key
329  * : another value
330  * a mapping:
331  * key 1: value 1
332  * key 2: value 2
333  * a sequence:
334  * - item 1
335  * - item 2
336  *
337  * Tokens:
338  *
339  * STREAM-START(utf-8)
340  * BLOCK-MAPPING-START
341  * KEY
342  * SCALAR("a simple key",plain)
343  * VALUE
344  * SCALAR("a value",plain)
345  * KEY
346  * SCALAR("a complex key",plain)
347  * VALUE
348  * SCALAR("another value",plain)
349  * KEY
350  * SCALAR("a mapping",plain)
351  * VALUE
352  * BLOCK-MAPPING-START
353  * KEY
354  * SCALAR("key 1",plain)
355  * VALUE
356  * SCALAR("value 1",plain)
357  * KEY
358  * SCALAR("key 2",plain)
359  * VALUE
360  * SCALAR("value 2",plain)
361  * BLOCK-END
362  * KEY
363  * SCALAR("a sequence",plain)
364  * VALUE
365  * BLOCK-SEQUENCE-START
366  * BLOCK-ENTRY
367  * SCALAR("item 1",plain)
368  * BLOCK-ENTRY
369  * SCALAR("item 2",plain)
370  * BLOCK-END
371  * BLOCK-END
372  * STREAM-END
373  *
374  * YAML does not always require to start a new block collection from a new
375  * line. If the current line contains only '-', '?', and ':' indicators, a new
376  * block collection may start at the current line. The following examples
377  * illustrate this case:
378  *
379  * 1. Collections in a sequence:
380  *
381  * - - item 1
382  * - item 2
383  * - key 1: value 1
384  * key 2: value 2
385  * - ? complex key
386  * : complex value
387  *
388  * Tokens:
389  *
390  * STREAM-START(utf-8)
391  * BLOCK-SEQUENCE-START
392  * BLOCK-ENTRY
393  * BLOCK-SEQUENCE-START
394  * BLOCK-ENTRY
395  * SCALAR("item 1",plain)
396  * BLOCK-ENTRY
397  * SCALAR("item 2",plain)
398  * BLOCK-END
399  * BLOCK-ENTRY
400  * BLOCK-MAPPING-START
401  * KEY
402  * SCALAR("key 1",plain)
403  * VALUE
404  * SCALAR("value 1",plain)
405  * KEY
406  * SCALAR("key 2",plain)
407  * VALUE
408  * SCALAR("value 2",plain)
409  * BLOCK-END
410  * BLOCK-ENTRY
411  * BLOCK-MAPPING-START
412  * KEY
413  * SCALAR("complex key")
414  * VALUE
415  * SCALAR("complex value")
416  * BLOCK-END
417  * BLOCK-END
418  * STREAM-END
419  *
420  * 2. Collections in a mapping:
421  *
422  * ? a sequence
423  * : - item 1
424  * - item 2
425  * ? a mapping
426  * : key 1: value 1
427  * key 2: value 2
428  *
429  * Tokens:
430  *
431  * STREAM-START(utf-8)
432  * BLOCK-MAPPING-START
433  * KEY
434  * SCALAR("a sequence",plain)
435  * VALUE
436  * BLOCK-SEQUENCE-START
437  * BLOCK-ENTRY
438  * SCALAR("item 1",plain)
439  * BLOCK-ENTRY
440  * SCALAR("item 2",plain)
441  * BLOCK-END
442  * KEY
443  * SCALAR("a mapping",plain)
444  * VALUE
445  * BLOCK-MAPPING-START
446  * KEY
447  * SCALAR("key 1",plain)
448  * VALUE
449  * SCALAR("value 1",plain)
450  * KEY
451  * SCALAR("key 2",plain)
452  * VALUE
453  * SCALAR("value 2",plain)
454  * BLOCK-END
455  * BLOCK-END
456  * STREAM-END
457  *
458  * YAML also permits non-indented sequences if they are included into a block
459  * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
460  *
461  * key:
462  * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
463  * - item 2
464  *
465  * Tokens:
466  *
467  * STREAM-START(utf-8)
468  * BLOCK-MAPPING-START
469  * KEY
470  * SCALAR("key",plain)
471  * VALUE
472  * BLOCK-ENTRY
473  * SCALAR("item 1",plain)
474  * BLOCK-ENTRY
475  * SCALAR("item 2",plain)
476  * BLOCK-END
477  */
478 
479 #include "yaml_private.h"
480 
481 /*
482  * Ensure that the buffer contains the required number of characters.
483  * Return 1 on success, 0 on failure (reader error or memory error).
484  */
485 
486 #define CACHE(parser,length) \
487  (parser->unread >= (length) \
488  ? 1 \
489  : yaml_parser_update_buffer(parser, (length)))
490 
491 /*
492  * Advance the buffer pointer.
493  */
494 
495 #define SKIP(parser) \
496  (parser->mark.index ++, \
497  parser->mark.column ++, \
498  parser->unread --, \
499  parser->buffer.pointer += WIDTH(parser->buffer))
500 
501 #define SKIP_LINE(parser) \
502  (IS_CRLF(parser->buffer) ? \
503  (parser->mark.index += 2, \
504  parser->mark.column = 0, \
505  parser->mark.line ++, \
506  parser->unread -= 2, \
507  parser->buffer.pointer += 2) : \
508  IS_BREAK(parser->buffer) ? \
509  (parser->mark.index ++, \
510  parser->mark.column = 0, \
511  parser->mark.line ++, \
512  parser->unread --, \
513  parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
514 
515 /*
516  * Copy a character to a string buffer and advance pointers.
517  */
518 
519 #define READ(parser,string) \
520  (STRING_EXTEND(parser,string) ? \
521  (COPY(string,parser->buffer), \
522  parser->mark.index ++, \
523  parser->mark.column ++, \
524  parser->unread --, \
525  1) : 0)
526 
527 /*
528  * Copy a line break character to a string buffer and advance pointers.
529  */
530 
531 #define READ_LINE(parser,string) \
532  (STRING_EXTEND(parser,string) ? \
533  (((CHECK_AT(parser->buffer,'\r',0) \
534  && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
535  (*((string).pointer++) = (yaml_char_t) '\n', \
536  parser->buffer.pointer += 2, \
537  parser->mark.index += 2, \
538  parser->mark.column = 0, \
539  parser->mark.line ++, \
540  parser->unread -= 2) : \
541  (CHECK_AT(parser->buffer,'\r',0) \
542  || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
543  (*((string).pointer++) = (yaml_char_t) '\n', \
544  parser->buffer.pointer ++, \
545  parser->mark.index ++, \
546  parser->mark.column = 0, \
547  parser->mark.line ++, \
548  parser->unread --) : \
549  (CHECK_AT(parser->buffer,'\xC2',0) \
550  && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
551  (*((string).pointer++) = (yaml_char_t) '\n', \
552  parser->buffer.pointer += 2, \
553  parser->mark.index ++, \
554  parser->mark.column = 0, \
555  parser->mark.line ++, \
556  parser->unread --) : \
557  (CHECK_AT(parser->buffer,'\xE2',0) && \
558  CHECK_AT(parser->buffer,'\x80',1) && \
559  (CHECK_AT(parser->buffer,'\xA8',2) || \
560  CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
561  (*((string).pointer++) = *(parser->buffer.pointer++), \
562  *((string).pointer++) = *(parser->buffer.pointer++), \
563  *((string).pointer++) = *(parser->buffer.pointer++), \
564  parser->mark.index ++, \
565  parser->mark.column = 0, \
566  parser->mark.line ++, \
567  parser->unread --) : 0), \
568  1) : 0)
569 
570 /*
571  * Public API declarations.
572  */
573 
574 YAML_DECLARE(int)
576 
577 /*
578  * Error handling.
579  */
580 
581 static int
582 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583  yaml_mark_t context_mark, const char *problem);
584 
585 /*
586  * High-level token API.
587  */
588 
589 YAML_DECLARE(int)
591 
592 static int
593 yaml_parser_fetch_next_token(yaml_parser_t *parser);
594 
595 /*
596  * Potential simple keys.
597  */
598 
599 static int
600 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
601 
602 static int
603 yaml_parser_save_simple_key(yaml_parser_t *parser);
604 
605 static int
606 yaml_parser_remove_simple_key(yaml_parser_t *parser);
607 
608 static int
609 yaml_parser_increase_flow_level(yaml_parser_t *parser);
610 
611 static int
612 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
613 
614 /*
615  * Indentation treatment.
616  */
617 
618 static int
619 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
621 
622 static int
623 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
624 
625 /*
626  * Token fetchers.
627  */
628 
629 static int
630 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
631 
632 static int
633 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
634 
635 static int
636 yaml_parser_fetch_directive(yaml_parser_t *parser);
637 
638 static int
639 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640  yaml_token_type_t type);
641 
642 static int
643 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644  yaml_token_type_t type);
645 
646 static int
647 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648  yaml_token_type_t type);
649 
650 static int
651 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
652 
653 static int
654 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
655 
656 static int
657 yaml_parser_fetch_key(yaml_parser_t *parser);
658 
659 static int
660 yaml_parser_fetch_value(yaml_parser_t *parser);
661 
662 static int
663 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
664 
665 static int
666 yaml_parser_fetch_tag(yaml_parser_t *parser);
667 
668 static int
669 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
670 
671 static int
672 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
673 
674 static int
675 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
676 
677 /*
678  * Token scanners.
679  */
680 
681 static int
682 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
683 
684 static int
685 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
686 
687 static int
688 yaml_parser_scan_directive_name(yaml_parser_t *parser,
689  yaml_mark_t start_mark, yaml_char_t **name);
690 
691 static int
692 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693  yaml_mark_t start_mark, int *major, int *minor);
694 
695 static int
696 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697  yaml_mark_t start_mark, int *number);
698 
699 static int
700 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701  yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
702 
703 static int
704 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705  yaml_token_type_t type);
706 
707 static int
708 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
709 
710 static int
711 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712  yaml_mark_t start_mark, yaml_char_t **handle);
713 
714 static int
715 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
717 
718 static int
719 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720  yaml_mark_t start_mark, yaml_string_t *string);
721 
722 static int
723 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724  int literal);
725 
726 static int
727 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728  int *indent, yaml_string_t *breaks,
729  yaml_mark_t start_mark, yaml_mark_t *end_mark);
730 
731 static int
732 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733  int single);
734 
735 static int
736 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
737 
738 /*
739  * Get the next token.
740  */
741 
742 YAML_DECLARE(int)
744 {
745  assert(parser); /* Non-NULL parser object is expected. */
746  assert(token); /* Non-NULL token object is expected. */
747 
748  /* Erase the token object. */
749 
750  memset(token, 0, sizeof(yaml_token_t));
751 
752  /* No tokens after STREAM-END or error. */
753 
754  if (parser->stream_end_produced || parser->error) {
755  return 1;
756  }
757 
758  /* Ensure that the tokens queue contains enough tokens. */
759 
760  if (!parser->token_available) {
761  if (!yaml_parser_fetch_more_tokens(parser))
762  return 0;
763  }
764 
765  /* Fetch the next token from the queue. */
766 
767  *token = DEQUEUE(parser, parser->tokens);
768  parser->token_available = 0;
769  parser->tokens_parsed ++;
770 
771  if (token->type == YAML_STREAM_END_TOKEN) {
772  parser->stream_end_produced = 1;
773  }
774 
775  return 1;
776 }
777 
778 /*
779  * Set the scanner error and return 0.
780  */
781 
782 static int
783 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
784  yaml_mark_t context_mark, const char *problem)
785 {
786  parser->error = YAML_SCANNER_ERROR;
787  parser->context = context;
788  parser->context_mark = context_mark;
789  parser->problem = problem;
790  parser->problem_mark = parser->mark;
791 
792  return 0;
793 }
794 
795 /*
796  * Ensure that the tokens queue contains at least one token which can be
797  * returned to the Parser.
798  */
799 
800 YAML_DECLARE(int)
802 {
803  int need_more_tokens;
804 
805  /* While we need more tokens to fetch, do it. */
806 
807  while (1)
808  {
809  /*
810  * Check if we really need to fetch more tokens.
811  */
812 
813  need_more_tokens = 0;
814 
815  if (parser->tokens.head == parser->tokens.tail)
816  {
817  /* Queue is empty. */
818 
819  need_more_tokens = 1;
820  }
821  else
822  {
823  yaml_simple_key_t *simple_key;
824 
825  /* Check if any potential simple key may occupy the head position. */
826 
827  if (!yaml_parser_stale_simple_keys(parser))
828  return 0;
829 
830  for (simple_key = parser->simple_keys.start;
831  simple_key != parser->simple_keys.top; simple_key++) {
832  if (simple_key->possible
833  && simple_key->token_number == parser->tokens_parsed) {
834  need_more_tokens = 1;
835  break;
836  }
837  }
838  }
839 
840  /* We are finished. */
841 
842  if (!need_more_tokens)
843  break;
844 
845  /* Fetch the next token. */
846 
847  if (!yaml_parser_fetch_next_token(parser))
848  return 0;
849  }
850 
851  parser->token_available = 1;
852 
853  return 1;
854 }
855 
856 /*
857  * The dispatcher for token fetchers.
858  */
859 
860 static int
861 yaml_parser_fetch_next_token(yaml_parser_t *parser)
862 {
863  /* Ensure that the buffer is initialized. */
864 
865  if (!CACHE(parser, 1))
866  return 0;
867 
868  /* Check if we just started scanning. Fetch STREAM-START then. */
869 
870  if (!parser->stream_start_produced)
871  return yaml_parser_fetch_stream_start(parser);
872 
873  /* Eat whitespaces and comments until we reach the next token. */
874 
875  if (!yaml_parser_scan_to_next_token(parser))
876  return 0;
877 
878  /* Remove obsolete potential simple keys. */
879 
880  if (!yaml_parser_stale_simple_keys(parser))
881  return 0;
882 
883  /* Check the indentation level against the current column. */
884 
885  if (!yaml_parser_unroll_indent(parser, parser->mark.column))
886  return 0;
887 
888  /*
889  * Ensure that the buffer contains at least 4 characters. 4 is the length
890  * of the longest indicators ('--- ' and '... ').
891  */
892 
893  if (!CACHE(parser, 4))
894  return 0;
895 
896  /* Is it the end of the stream? */
897 
898  if (IS_Z(parser->buffer))
899  return yaml_parser_fetch_stream_end(parser);
900 
901  /* Is it a directive? */
902 
903  if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
904  return yaml_parser_fetch_directive(parser);
905 
906  /* Is it the document start indicator? */
907 
908  if (parser->mark.column == 0
909  && CHECK_AT(parser->buffer, '-', 0)
910  && CHECK_AT(parser->buffer, '-', 1)
911  && CHECK_AT(parser->buffer, '-', 2)
912  && IS_BLANKZ_AT(parser->buffer, 3))
913  return yaml_parser_fetch_document_indicator(parser,
915 
916  /* Is it the document end indicator? */
917 
918  if (parser->mark.column == 0
919  && CHECK_AT(parser->buffer, '.', 0)
920  && CHECK_AT(parser->buffer, '.', 1)
921  && CHECK_AT(parser->buffer, '.', 2)
922  && IS_BLANKZ_AT(parser->buffer, 3))
923  return yaml_parser_fetch_document_indicator(parser,
925 
926  /* Is it the flow sequence start indicator? */
927 
928  if (CHECK(parser->buffer, '['))
929  return yaml_parser_fetch_flow_collection_start(parser,
931 
932  /* Is it the flow mapping start indicator? */
933 
934  if (CHECK(parser->buffer, '{'))
935  return yaml_parser_fetch_flow_collection_start(parser,
937 
938  /* Is it the flow sequence end indicator? */
939 
940  if (CHECK(parser->buffer, ']'))
941  return yaml_parser_fetch_flow_collection_end(parser,
943 
944  /* Is it the flow mapping end indicator? */
945 
946  if (CHECK(parser->buffer, '}'))
947  return yaml_parser_fetch_flow_collection_end(parser,
949 
950  /* Is it the flow entry indicator? */
951 
952  if (CHECK(parser->buffer, ','))
953  return yaml_parser_fetch_flow_entry(parser);
954 
955  /* Is it the block entry indicator? */
956 
957  if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
958  return yaml_parser_fetch_block_entry(parser);
959 
960  /* Is it the key indicator? */
961 
962  if (CHECK(parser->buffer, '?')
963  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
964  return yaml_parser_fetch_key(parser);
965 
966  /* Is it the value indicator? */
967 
968  if (CHECK(parser->buffer, ':')
969  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
970  return yaml_parser_fetch_value(parser);
971 
972  /* Is it an alias? */
973 
974  if (CHECK(parser->buffer, '*'))
975  return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
976 
977  /* Is it an anchor? */
978 
979  if (CHECK(parser->buffer, '&'))
980  return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
981 
982  /* Is it a tag? */
983 
984  if (CHECK(parser->buffer, '!'))
985  return yaml_parser_fetch_tag(parser);
986 
987  /* Is it a literal scalar? */
988 
989  if (CHECK(parser->buffer, '|') && !parser->flow_level)
990  return yaml_parser_fetch_block_scalar(parser, 1);
991 
992  /* Is it a folded scalar? */
993 
994  if (CHECK(parser->buffer, '>') && !parser->flow_level)
995  return yaml_parser_fetch_block_scalar(parser, 0);
996 
997  /* Is it a single-quoted scalar? */
998 
999  if (CHECK(parser->buffer, '\''))
1000  return yaml_parser_fetch_flow_scalar(parser, 1);
1001 
1002  /* Is it a double-quoted scalar? */
1003 
1004  if (CHECK(parser->buffer, '"'))
1005  return yaml_parser_fetch_flow_scalar(parser, 0);
1006 
1007  /*
1008  * Is it a plain scalar?
1009  *
1010  * A plain scalar may start with any non-blank characters except
1011  *
1012  * '-', '?', ':', ',', '[', ']', '{', '}',
1013  * '#', '&', '*', '!', '|', '>', '\'', '\"',
1014  * '%', '@', '`'.
1015  *
1016  * In the block context (and, for the '-' indicator, in the flow context
1017  * too), it may also start with the characters
1018  *
1019  * '-', '?', ':'
1020  *
1021  * if it is followed by a non-space character.
1022  *
1023  * The last rule is more restrictive than the specification requires.
1024  */
1025 
1026  if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1027  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1028  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1029  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1030  || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1031  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1032  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1033  || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1034  || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1035  || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1036  (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1037  (!parser->flow_level &&
1038  (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1039  && !IS_BLANKZ_AT(parser->buffer, 1)))
1040  return yaml_parser_fetch_plain_scalar(parser);
1041 
1042  /*
1043  * If we don't determine the token type so far, it is an error.
1044  */
1045 
1046  return yaml_parser_set_scanner_error(parser,
1047  "while scanning for the next token", parser->mark,
1048  "found character that cannot start any token");
1049 }
1050 
1051 /*
1052  * Check the list of potential simple keys and remove the positions that
1053  * cannot contain simple keys anymore.
1054  */
1055 
1056 static int
1057 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1058 {
1059  yaml_simple_key_t *simple_key;
1060 
1061  /* Check for a potential simple key for each flow level. */
1062 
1063  for (simple_key = parser->simple_keys.start;
1064  simple_key != parser->simple_keys.top; simple_key ++)
1065  {
1066  /*
1067  * The specification requires that a simple key
1068  *
1069  * - is limited to a single line,
1070  * - is shorter than 1024 characters.
1071  */
1072 
1073  if (simple_key->possible
1074  && (simple_key->mark.line < parser->mark.line
1075  || simple_key->mark.index+1024 < parser->mark.index)) {
1076 
1077  /* Check if the potential simple key to be removed is required. */
1078 
1079  if (simple_key->required) {
1080  return yaml_parser_set_scanner_error(parser,
1081  "while scanning a simple key", simple_key->mark,
1082  "could not find expected ':'");
1083  }
1084 
1085  simple_key->possible = 0;
1086  }
1087  }
1088 
1089  return 1;
1090 }
1091 
1092 /*
1093  * Check if a simple key may start at the current position and add it if
1094  * needed.
1095  */
1096 
1097 static int
1098 yaml_parser_save_simple_key(yaml_parser_t *parser)
1099 {
1100  /*
1101  * A simple key is required at the current position if the scanner is in
1102  * the block context and the current column coincides with the indentation
1103  * level.
1104  */
1105 
1106  int required = (!parser->flow_level
1107  && parser->indent == (ptrdiff_t)parser->mark.column);
1108 
1109  /*
1110  * If the current position may start a simple key, save it.
1111  */
1112 
1113  if (parser->simple_key_allowed)
1114  {
1115  yaml_simple_key_t simple_key;
1116  simple_key.possible = 1;
1117  simple_key.required = required;
1118  simple_key.token_number =
1119  parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1120  simple_key.mark = parser->mark;
1121 
1122  if (!yaml_parser_remove_simple_key(parser)) return 0;
1123 
1124  *(parser->simple_keys.top-1) = simple_key;
1125  }
1126 
1127  return 1;
1128 }
1129 
1130 /*
1131  * Remove a potential simple key at the current flow level.
1132  */
1133 
1134 static int
1135 yaml_parser_remove_simple_key(yaml_parser_t *parser)
1136 {
1137  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1138 
1139  if (simple_key->possible)
1140  {
1141  /* If the key is required, it is an error. */
1142 
1143  if (simple_key->required) {
1144  return yaml_parser_set_scanner_error(parser,
1145  "while scanning a simple key", simple_key->mark,
1146  "could not find expected ':'");
1147  }
1148  }
1149 
1150  /* Remove the key from the stack. */
1151 
1152  simple_key->possible = 0;
1153 
1154  return 1;
1155 }
1156 
1157 /*
1158  * Increase the flow level and resize the simple key list if needed.
1159  */
1160 
1161 static int
1162 yaml_parser_increase_flow_level(yaml_parser_t *parser)
1163 {
1164  yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1165 
1166  /* Reset the simple key on the next level. */
1167 
1168  if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1169  return 0;
1170 
1171  /* Increase the flow level. */
1172 
1173  if (parser->flow_level == INT_MAX) {
1174  parser->error = YAML_MEMORY_ERROR;
1175  return 0;
1176  }
1177 
1178  parser->flow_level++;
1179 
1180  return 1;
1181 }
1182 
1183 /*
1184  * Decrease the flow level.
1185  */
1186 
1187 static int
1188 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1189 {
1190  if (parser->flow_level) {
1191  parser->flow_level --;
1192  (void)POP(parser, parser->simple_keys);
1193  }
1194 
1195  return 1;
1196 }
1197 
1198 /*
1199  * Push the current indentation level to the stack and set the new level
1200  * the current column is greater than the indentation level. In this case,
1201  * append or insert the specified token into the token queue.
1202  *
1203  */
1204 
1205 static int
1206 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1207  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1208 {
1209  yaml_token_t token;
1210 
1211  /* In the flow context, do nothing. */
1212 
1213  if (parser->flow_level)
1214  return 1;
1215 
1216  if (parser->indent < column)
1217  {
1218  /*
1219  * Push the current indentation level to the stack and set the new
1220  * indentation level.
1221  */
1222 
1223  if (!PUSH(parser, parser->indents, parser->indent))
1224  return 0;
1225 
1226  if (column > INT_MAX) {
1227  parser->error = YAML_MEMORY_ERROR;
1228  return 0;
1229  }
1230 
1231  parser->indent = column;
1232 
1233  /* Create a token and insert it into the queue. */
1234 
1235  TOKEN_INIT(token, type, mark, mark);
1236 
1237  if (number == -1) {
1238  if (!ENQUEUE(parser, parser->tokens, token))
1239  return 0;
1240  }
1241  else {
1242  if (!QUEUE_INSERT(parser,
1243  parser->tokens, number - parser->tokens_parsed, token))
1244  return 0;
1245  }
1246  }
1247 
1248  return 1;
1249 }
1250 
1251 /*
1252  * Pop indentation levels from the indents stack until the current level
1253  * becomes less or equal to the column. For each indentation level, append
1254  * the BLOCK-END token.
1255  */
1256 
1257 
1258 static int
1259 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1260 {
1261  yaml_token_t token;
1262 
1263  /* In the flow context, do nothing. */
1264 
1265  if (parser->flow_level)
1266  return 1;
1267 
1268  /* Loop through the indentation levels in the stack. */
1269 
1270  while (parser->indent > column)
1271  {
1272  /* Create a token and append it to the queue. */
1273 
1274  TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1275 
1276  if (!ENQUEUE(parser, parser->tokens, token))
1277  return 0;
1278 
1279  /* Pop the indentation level. */
1280 
1281  parser->indent = POP(parser, parser->indents);
1282  }
1283 
1284  return 1;
1285 }
1286 
1287 /*
1288  * Initialize the scanner and produce the STREAM-START token.
1289  */
1290 
1291 static int
1292 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1293 {
1294  yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1295  yaml_token_t token;
1296 
1297  /* Set the initial indentation. */
1298 
1299  parser->indent = -1;
1300 
1301  /* Initialize the simple key stack. */
1302 
1303  if (!PUSH(parser, parser->simple_keys, simple_key))
1304  return 0;
1305 
1306  /* A simple key is allowed at the beginning of the stream. */
1307 
1308  parser->simple_key_allowed = 1;
1309 
1310  /* We have started. */
1311 
1312  parser->stream_start_produced = 1;
1313 
1314  /* Create the STREAM-START token and append it to the queue. */
1315 
1316  STREAM_START_TOKEN_INIT(token, parser->encoding,
1317  parser->mark, parser->mark);
1318 
1319  if (!ENQUEUE(parser, parser->tokens, token))
1320  return 0;
1321 
1322  return 1;
1323 }
1324 
1325 /*
1326  * Produce the STREAM-END token and shut down the scanner.
1327  */
1328 
1329 static int
1330 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1331 {
1332  yaml_token_t token;
1333 
1334  /* Force new line. */
1335 
1336  if (parser->mark.column != 0) {
1337  parser->mark.column = 0;
1338  parser->mark.line ++;
1339  }
1340 
1341  /* Reset the indentation level. */
1342 
1343  if (!yaml_parser_unroll_indent(parser, -1))
1344  return 0;
1345 
1346  /* Reset simple keys. */
1347 
1348  if (!yaml_parser_remove_simple_key(parser))
1349  return 0;
1350 
1351  parser->simple_key_allowed = 0;
1352 
1353  /* Create the STREAM-END token and append it to the queue. */
1354 
1355  STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1356 
1357  if (!ENQUEUE(parser, parser->tokens, token))
1358  return 0;
1359 
1360  return 1;
1361 }
1362 
1363 /*
1364  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1365  */
1366 
1367 static int
1368 yaml_parser_fetch_directive(yaml_parser_t *parser)
1369 {
1370  yaml_token_t token;
1371 
1372  /* Reset the indentation level. */
1373 
1374  if (!yaml_parser_unroll_indent(parser, -1))
1375  return 0;
1376 
1377  /* Reset simple keys. */
1378 
1379  if (!yaml_parser_remove_simple_key(parser))
1380  return 0;
1381 
1382  parser->simple_key_allowed = 0;
1383 
1384  /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1385 
1386  if (!yaml_parser_scan_directive(parser, &token))
1387  return 0;
1388 
1389  /* Append the token to the queue. */
1390 
1391  if (!ENQUEUE(parser, parser->tokens, token)) {
1392  yaml_token_delete(&token);
1393  return 0;
1394  }
1395 
1396  return 1;
1397 }
1398 
1399 /*
1400  * Produce the DOCUMENT-START or DOCUMENT-END token.
1401  */
1402 
1403 static int
1404 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1405  yaml_token_type_t type)
1406 {
1407  yaml_mark_t start_mark, end_mark;
1408  yaml_token_t token;
1409 
1410  /* Reset the indentation level. */
1411 
1412  if (!yaml_parser_unroll_indent(parser, -1))
1413  return 0;
1414 
1415  /* Reset simple keys. */
1416 
1417  if (!yaml_parser_remove_simple_key(parser))
1418  return 0;
1419 
1420  parser->simple_key_allowed = 0;
1421 
1422  /* Consume the token. */
1423 
1424  start_mark = parser->mark;
1425 
1426  SKIP(parser);
1427  SKIP(parser);
1428  SKIP(parser);
1429 
1430  end_mark = parser->mark;
1431 
1432  /* Create the DOCUMENT-START or DOCUMENT-END token. */
1433 
1434  TOKEN_INIT(token, type, start_mark, end_mark);
1435 
1436  /* Append the token to the queue. */
1437 
1438  if (!ENQUEUE(parser, parser->tokens, token))
1439  return 0;
1440 
1441  return 1;
1442 }
1443 
1444 /*
1445  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1446  */
1447 
1448 static int
1449 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1450  yaml_token_type_t type)
1451 {
1452  yaml_mark_t start_mark, end_mark;
1453  yaml_token_t token;
1454 
1455  /* The indicators '[' and '{' may start a simple key. */
1456 
1457  if (!yaml_parser_save_simple_key(parser))
1458  return 0;
1459 
1460  /* Increase the flow level. */
1461 
1462  if (!yaml_parser_increase_flow_level(parser))
1463  return 0;
1464 
1465  /* A simple key may follow the indicators '[' and '{'. */
1466 
1467  parser->simple_key_allowed = 1;
1468 
1469  /* Consume the token. */
1470 
1471  start_mark = parser->mark;
1472  SKIP(parser);
1473  end_mark = parser->mark;
1474 
1475  /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1476 
1477  TOKEN_INIT(token, type, start_mark, end_mark);
1478 
1479  /* Append the token to the queue. */
1480 
1481  if (!ENQUEUE(parser, parser->tokens, token))
1482  return 0;
1483 
1484  return 1;
1485 }
1486 
1487 /*
1488  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1489  */
1490 
1491 static int
1492 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1493  yaml_token_type_t type)
1494 {
1495  yaml_mark_t start_mark, end_mark;
1496  yaml_token_t token;
1497 
1498  /* Reset any potential simple key on the current flow level. */
1499 
1500  if (!yaml_parser_remove_simple_key(parser))
1501  return 0;
1502 
1503  /* Decrease the flow level. */
1504 
1505  if (!yaml_parser_decrease_flow_level(parser))
1506  return 0;
1507 
1508  /* No simple keys after the indicators ']' and '}'. */
1509 
1510  parser->simple_key_allowed = 0;
1511 
1512  /* Consume the token. */
1513 
1514  start_mark = parser->mark;
1515  SKIP(parser);
1516  end_mark = parser->mark;
1517 
1518  /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1519 
1520  TOKEN_INIT(token, type, start_mark, end_mark);
1521 
1522  /* Append the token to the queue. */
1523 
1524  if (!ENQUEUE(parser, parser->tokens, token))
1525  return 0;
1526 
1527  return 1;
1528 }
1529 
1530 /*
1531  * Produce the FLOW-ENTRY token.
1532  */
1533 
1534 static int
1535 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1536 {
1537  yaml_mark_t start_mark, end_mark;
1538  yaml_token_t token;
1539 
1540  /* Reset any potential simple keys on the current flow level. */
1541 
1542  if (!yaml_parser_remove_simple_key(parser))
1543  return 0;
1544 
1545  /* Simple keys are allowed after ','. */
1546 
1547  parser->simple_key_allowed = 1;
1548 
1549  /* Consume the token. */
1550 
1551  start_mark = parser->mark;
1552  SKIP(parser);
1553  end_mark = parser->mark;
1554 
1555  /* Create the FLOW-ENTRY token and append it to the queue. */
1556 
1557  TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1558 
1559  if (!ENQUEUE(parser, parser->tokens, token))
1560  return 0;
1561 
1562  return 1;
1563 }
1564 
1565 /*
1566  * Produce the BLOCK-ENTRY token.
1567  */
1568 
1569 static int
1570 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1571 {
1572  yaml_mark_t start_mark, end_mark;
1573  yaml_token_t token;
1574 
1575  /* Check if the scanner is in the block context. */
1576 
1577  if (!parser->flow_level)
1578  {
1579  /* Check if we are allowed to start a new entry. */
1580 
1581  if (!parser->simple_key_allowed) {
1582  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1583  "block sequence entries are not allowed in this context");
1584  }
1585 
1586  /* Add the BLOCK-SEQUENCE-START token if needed. */
1587 
1588  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1590  return 0;
1591  }
1592  else
1593  {
1594  /*
1595  * It is an error for the '-' indicator to occur in the flow context,
1596  * but we let the Parser detect and report about it because the Parser
1597  * is able to point to the context.
1598  */
1599  }
1600 
1601  /* Reset any potential simple keys on the current flow level. */
1602 
1603  if (!yaml_parser_remove_simple_key(parser))
1604  return 0;
1605 
1606  /* Simple keys are allowed after '-'. */
1607 
1608  parser->simple_key_allowed = 1;
1609 
1610  /* Consume the token. */
1611 
1612  start_mark = parser->mark;
1613  SKIP(parser);
1614  end_mark = parser->mark;
1615 
1616  /* Create the BLOCK-ENTRY token and append it to the queue. */
1617 
1618  TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1619 
1620  if (!ENQUEUE(parser, parser->tokens, token))
1621  return 0;
1622 
1623  return 1;
1624 }
1625 
1626 /*
1627  * Produce the KEY token.
1628  */
1629 
1630 static int
1631 yaml_parser_fetch_key(yaml_parser_t *parser)
1632 {
1633  yaml_mark_t start_mark, end_mark;
1634  yaml_token_t token;
1635 
1636  /* In the block context, additional checks are required. */
1637 
1638  if (!parser->flow_level)
1639  {
1640  /* Check if we are allowed to start a new key (not necessary simple). */
1641 
1642  if (!parser->simple_key_allowed) {
1643  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1644  "mapping keys are not allowed in this context");
1645  }
1646 
1647  /* Add the BLOCK-MAPPING-START token if needed. */
1648 
1649  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1651  return 0;
1652  }
1653 
1654  /* Reset any potential simple keys on the current flow level. */
1655 
1656  if (!yaml_parser_remove_simple_key(parser))
1657  return 0;
1658 
1659  /* Simple keys are allowed after '?' in the block context. */
1660 
1661  parser->simple_key_allowed = (!parser->flow_level);
1662 
1663  /* Consume the token. */
1664 
1665  start_mark = parser->mark;
1666  SKIP(parser);
1667  end_mark = parser->mark;
1668 
1669  /* Create the KEY token and append it to the queue. */
1670 
1671  TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1672 
1673  if (!ENQUEUE(parser, parser->tokens, token))
1674  return 0;
1675 
1676  return 1;
1677 }
1678 
1679 /*
1680  * Produce the VALUE token.
1681  */
1682 
1683 static int
1684 yaml_parser_fetch_value(yaml_parser_t *parser)
1685 {
1686  yaml_mark_t start_mark, end_mark;
1687  yaml_token_t token;
1688  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1689 
1690  /* Have we found a simple key? */
1691 
1692  if (simple_key->possible)
1693  {
1694 
1695  /* Create the KEY token and insert it into the queue. */
1696 
1697  TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1698 
1699  if (!QUEUE_INSERT(parser, parser->tokens,
1700  simple_key->token_number - parser->tokens_parsed, token))
1701  return 0;
1702 
1703  /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1704 
1705  if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1706  simple_key->token_number,
1707  YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1708  return 0;
1709 
1710  /* Remove the simple key. */
1711 
1712  simple_key->possible = 0;
1713 
1714  /* A simple key cannot follow another simple key. */
1715 
1716  parser->simple_key_allowed = 0;
1717  }
1718  else
1719  {
1720  /* The ':' indicator follows a complex key. */
1721 
1722  /* In the block context, extra checks are required. */
1723 
1724  if (!parser->flow_level)
1725  {
1726  /* Check if we are allowed to start a complex value. */
1727 
1728  if (!parser->simple_key_allowed) {
1729  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1730  "mapping values are not allowed in this context");
1731  }
1732 
1733  /* Add the BLOCK-MAPPING-START token if needed. */
1734 
1735  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1737  return 0;
1738  }
1739 
1740  /* Simple keys after ':' are allowed in the block context. */
1741 
1742  parser->simple_key_allowed = (!parser->flow_level);
1743  }
1744 
1745  /* Consume the token. */
1746 
1747  start_mark = parser->mark;
1748  SKIP(parser);
1749  end_mark = parser->mark;
1750 
1751  /* Create the VALUE token and append it to the queue. */
1752 
1753  TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1754 
1755  if (!ENQUEUE(parser, parser->tokens, token))
1756  return 0;
1757 
1758  return 1;
1759 }
1760 
1761 /*
1762  * Produce the ALIAS or ANCHOR token.
1763  */
1764 
1765 static int
1766 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1767 {
1768  yaml_token_t token;
1769 
1770  /* An anchor or an alias could be a simple key. */
1771 
1772  if (!yaml_parser_save_simple_key(parser))
1773  return 0;
1774 
1775  /* A simple key cannot follow an anchor or an alias. */
1776 
1777  parser->simple_key_allowed = 0;
1778 
1779  /* Create the ALIAS or ANCHOR token and append it to the queue. */
1780 
1781  if (!yaml_parser_scan_anchor(parser, &token, type))
1782  return 0;
1783 
1784  if (!ENQUEUE(parser, parser->tokens, token)) {
1785  yaml_token_delete(&token);
1786  return 0;
1787  }
1788  return 1;
1789 }
1790 
1791 /*
1792  * Produce the TAG token.
1793  */
1794 
1795 static int
1796 yaml_parser_fetch_tag(yaml_parser_t *parser)
1797 {
1798  yaml_token_t token;
1799 
1800  /* A tag could be a simple key. */
1801 
1802  if (!yaml_parser_save_simple_key(parser))
1803  return 0;
1804 
1805  /* A simple key cannot follow a tag. */
1806 
1807  parser->simple_key_allowed = 0;
1808 
1809  /* Create the TAG token and append it to the queue. */
1810 
1811  if (!yaml_parser_scan_tag(parser, &token))
1812  return 0;
1813 
1814  if (!ENQUEUE(parser, parser->tokens, token)) {
1815  yaml_token_delete(&token);
1816  return 0;
1817  }
1818 
1819  return 1;
1820 }
1821 
1822 /*
1823  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1824  */
1825 
1826 static int
1827 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1828 {
1829  yaml_token_t token;
1830 
1831  /* Remove any potential simple keys. */
1832 
1833  if (!yaml_parser_remove_simple_key(parser))
1834  return 0;
1835 
1836  /* A simple key may follow a block scalar. */
1837 
1838  parser->simple_key_allowed = 1;
1839 
1840  /* Create the SCALAR token and append it to the queue. */
1841 
1842  if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1843  return 0;
1844 
1845  if (!ENQUEUE(parser, parser->tokens, token)) {
1846  yaml_token_delete(&token);
1847  return 0;
1848  }
1849 
1850  return 1;
1851 }
1852 
1853 /*
1854  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1855  */
1856 
1857 static int
1858 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1859 {
1860  yaml_token_t token;
1861 
1862  /* A plain scalar could be a simple key. */
1863 
1864  if (!yaml_parser_save_simple_key(parser))
1865  return 0;
1866 
1867  /* A simple key cannot follow a flow scalar. */
1868 
1869  parser->simple_key_allowed = 0;
1870 
1871  /* Create the SCALAR token and append it to the queue. */
1872 
1873  if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1874  return 0;
1875 
1876  if (!ENQUEUE(parser, parser->tokens, token)) {
1877  yaml_token_delete(&token);
1878  return 0;
1879  }
1880 
1881  return 1;
1882 }
1883 
1884 /*
1885  * Produce the SCALAR(...,plain) token.
1886  */
1887 
1888 static int
1889 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1890 {
1891  yaml_token_t token;
1892 
1893  /* A plain scalar could be a simple key. */
1894 
1895  if (!yaml_parser_save_simple_key(parser))
1896  return 0;
1897 
1898  /* A simple key cannot follow a flow scalar. */
1899 
1900  parser->simple_key_allowed = 0;
1901 
1902  /* Create the SCALAR token and append it to the queue. */
1903 
1904  if (!yaml_parser_scan_plain_scalar(parser, &token))
1905  return 0;
1906 
1907  if (!ENQUEUE(parser, parser->tokens, token)) {
1908  yaml_token_delete(&token);
1909  return 0;
1910  }
1911 
1912  return 1;
1913 }
1914 
1915 /*
1916  * Eat whitespaces and comments until the next token is found.
1917  */
1918 
1919 static int
1920 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1921 {
1922  /* Until the next token is not found. */
1923 
1924  while (1)
1925  {
1926  /* Allow the BOM mark to start a line. */
1927 
1928  if (!CACHE(parser, 1)) return 0;
1929 
1930  if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1931  SKIP(parser);
1932 
1933  /*
1934  * Eat whitespaces.
1935  *
1936  * Tabs are allowed:
1937  *
1938  * - in the flow context;
1939  * - in the block context, but not at the beginning of the line or
1940  * after '-', '?', or ':' (complex value).
1941  */
1942 
1943  if (!CACHE(parser, 1)) return 0;
1944 
1945  while (CHECK(parser->buffer,' ') ||
1946  ((parser->flow_level || !parser->simple_key_allowed) &&
1947  CHECK(parser->buffer, '\t'))) {
1948  SKIP(parser);
1949  if (!CACHE(parser, 1)) return 0;
1950  }
1951 
1952  /* Eat a comment until a line break. */
1953 
1954  if (CHECK(parser->buffer, '#')) {
1955  while (!IS_BREAKZ(parser->buffer)) {
1956  SKIP(parser);
1957  if (!CACHE(parser, 1)) return 0;
1958  }
1959  }
1960 
1961  /* If it is a line break, eat it. */
1962 
1963  if (IS_BREAK(parser->buffer))
1964  {
1965  if (!CACHE(parser, 2)) return 0;
1966  SKIP_LINE(parser);
1967 
1968  /* In the block context, a new line may start a simple key. */
1969 
1970  if (!parser->flow_level) {
1971  parser->simple_key_allowed = 1;
1972  }
1973  }
1974  else
1975  {
1976  /* We have found a token. */
1977 
1978  break;
1979  }
1980  }
1981 
1982  return 1;
1983 }
1984 
1985 /*
1986  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1987  *
1988  * Scope:
1989  * %YAML 1.1 # a comment \n
1990  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1991  * %TAG !yaml! tag:yaml.org,2002: \n
1992  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1993  */
1994 
1995 int
1996 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1997 {
1998  yaml_mark_t start_mark, end_mark;
1999  yaml_char_t *name = NULL;
2000  int major, minor;
2001  yaml_char_t *handle = NULL, *prefix = NULL;
2002 
2003  /* Eat '%'. */
2004 
2005  start_mark = parser->mark;
2006 
2007  SKIP(parser);
2008 
2009  /* Scan the directive name. */
2010 
2011  if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2012  goto error;
2013 
2014  /* Is it a YAML directive? */
2015 
2016  if (strcmp((char *)name, "YAML") == 0)
2017  {
2018  /* Scan the VERSION directive value. */
2019 
2020  if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2021  &major, &minor))
2022  goto error;
2023 
2024  end_mark = parser->mark;
2025 
2026  /* Create a VERSION-DIRECTIVE token. */
2027 
2028  VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2029  start_mark, end_mark);
2030  }
2031 
2032  /* Is it a TAG directive? */
2033 
2034  else if (strcmp((char *)name, "TAG") == 0)
2035  {
2036  /* Scan the TAG directive value. */
2037 
2038  if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2039  &handle, &prefix))
2040  goto error;
2041 
2042  end_mark = parser->mark;
2043 
2044  /* Create a TAG-DIRECTIVE token. */
2045 
2046  TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2047  start_mark, end_mark);
2048  }
2049 
2050  /* Unknown directive. */
2051 
2052  else
2053  {
2054  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2055  start_mark, "found unknown directive name");
2056  goto error;
2057  }
2058 
2059  /* Eat the rest of the line including any comments. */
2060 
2061  if (!CACHE(parser, 1)) goto error;
2062 
2063  while (IS_BLANK(parser->buffer)) {
2064  SKIP(parser);
2065  if (!CACHE(parser, 1)) goto error;
2066  }
2067 
2068  if (CHECK(parser->buffer, '#')) {
2069  while (!IS_BREAKZ(parser->buffer)) {
2070  SKIP(parser);
2071  if (!CACHE(parser, 1)) goto error;
2072  }
2073  }
2074 
2075  /* Check if we are at the end of the line. */
2076 
2077  if (!IS_BREAKZ(parser->buffer)) {
2078  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2079  start_mark, "did not find expected comment or line break");
2080  goto error;
2081  }
2082 
2083  /* Eat a line break. */
2084 
2085  if (IS_BREAK(parser->buffer)) {
2086  if (!CACHE(parser, 2)) goto error;
2087  SKIP_LINE(parser);
2088  }
2089 
2090  yaml_free(name);
2091 
2092  return 1;
2093 
2094 error:
2095  yaml_free(prefix);
2096  yaml_free(handle);
2097  yaml_free(name);
2098  return 0;
2099 }
2100 
2101 /*
2102  * Scan the directive name.
2103  *
2104  * Scope:
2105  * %YAML 1.1 # a comment \n
2106  * ^^^^
2107  * %TAG !yaml! tag:yaml.org,2002: \n
2108  * ^^^
2109  */
2110 
2111 static int
2112 yaml_parser_scan_directive_name(yaml_parser_t *parser,
2113  yaml_mark_t start_mark, yaml_char_t **name)
2114 {
2115  yaml_string_t string = NULL_STRING;
2116 
2117  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2118 
2119  /* Consume the directive name. */
2120 
2121  if (!CACHE(parser, 1)) goto error;
2122 
2123  while (IS_ALPHA(parser->buffer))
2124  {
2125  if (!READ(parser, string)) goto error;
2126  if (!CACHE(parser, 1)) goto error;
2127  }
2128 
2129  /* Check if the name is empty. */
2130 
2131  if (string.start == string.pointer) {
2132  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2133  start_mark, "could not find expected directive name");
2134  goto error;
2135  }
2136 
2137  /* Check for an blank character after the name. */
2138 
2139  if (!IS_BLANKZ(parser->buffer)) {
2140  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141  start_mark, "found unexpected non-alphabetical character");
2142  goto error;
2143  }
2144 
2145  *name = string.start;
2146 
2147  return 1;
2148 
2149 error:
2150  STRING_DEL(parser, string);
2151  return 0;
2152 }
2153 
2154 /*
2155  * Scan the value of VERSION-DIRECTIVE.
2156  *
2157  * Scope:
2158  * %YAML 1.1 # a comment \n
2159  * ^^^^^^
2160  */
2161 
2162 static int
2163 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2164  yaml_mark_t start_mark, int *major, int *minor)
2165 {
2166  /* Eat whitespaces. */
2167 
2168  if (!CACHE(parser, 1)) return 0;
2169 
2170  while (IS_BLANK(parser->buffer)) {
2171  SKIP(parser);
2172  if (!CACHE(parser, 1)) return 0;
2173  }
2174 
2175  /* Consume the major version number. */
2176 
2177  if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2178  return 0;
2179 
2180  /* Eat '.'. */
2181 
2182  if (!CHECK(parser->buffer, '.')) {
2183  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2184  start_mark, "did not find expected digit or '.' character");
2185  }
2186 
2187  SKIP(parser);
2188 
2189  /* Consume the minor version number. */
2190 
2191  if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2192  return 0;
2193 
2194  return 1;
2195 }
2196 
2197 #define MAX_NUMBER_LENGTH 9
2198 
2199 /*
2200  * Scan the version number of VERSION-DIRECTIVE.
2201  *
2202  * Scope:
2203  * %YAML 1.1 # a comment \n
2204  * ^
2205  * %YAML 1.1 # a comment \n
2206  * ^
2207  */
2208 
2209 static int
2210 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2211  yaml_mark_t start_mark, int *number)
2212 {
2213  int value = 0;
2214  size_t length = 0;
2215 
2216  /* Repeat while the next character is digit. */
2217 
2218  if (!CACHE(parser, 1)) return 0;
2219 
2220  while (IS_DIGIT(parser->buffer))
2221  {
2222  /* Check if the number is too long. */
2223 
2224  if (++length > MAX_NUMBER_LENGTH) {
2225  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2226  start_mark, "found extremely long version number");
2227  }
2228 
2229  value = value*10 + AS_DIGIT(parser->buffer);
2230 
2231  SKIP(parser);
2232 
2233  if (!CACHE(parser, 1)) return 0;
2234  }
2235 
2236  /* Check if the number was present. */
2237 
2238  if (!length) {
2239  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2240  start_mark, "did not find expected version number");
2241  }
2242 
2243  *number = value;
2244 
2245  return 1;
2246 }
2247 
2248 /*
2249  * Scan the value of a TAG-DIRECTIVE token.
2250  *
2251  * Scope:
2252  * %TAG !yaml! tag:yaml.org,2002: \n
2253  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2254  */
2255 
2256 static int
2257 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2258  yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2259 {
2260  yaml_char_t *handle_value = NULL;
2261  yaml_char_t *prefix_value = NULL;
2262 
2263  /* Eat whitespaces. */
2264 
2265  if (!CACHE(parser, 1)) goto error;
2266 
2267  while (IS_BLANK(parser->buffer)) {
2268  SKIP(parser);
2269  if (!CACHE(parser, 1)) goto error;
2270  }
2271 
2272  /* Scan a handle. */
2273 
2274  if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2275  goto error;
2276 
2277  /* Expect a whitespace. */
2278 
2279  if (!CACHE(parser, 1)) goto error;
2280 
2281  if (!IS_BLANK(parser->buffer)) {
2282  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2283  start_mark, "did not find expected whitespace");
2284  goto error;
2285  }
2286 
2287  /* Eat whitespaces. */
2288 
2289  while (IS_BLANK(parser->buffer)) {
2290  SKIP(parser);
2291  if (!CACHE(parser, 1)) goto error;
2292  }
2293 
2294  /* Scan a prefix. */
2295 
2296  if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2297  goto error;
2298 
2299  /* Expect a whitespace or line break. */
2300 
2301  if (!CACHE(parser, 1)) goto error;
2302 
2303  if (!IS_BLANKZ(parser->buffer)) {
2304  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2305  start_mark, "did not find expected whitespace or line break");
2306  goto error;
2307  }
2308 
2309  *handle = handle_value;
2310  *prefix = prefix_value;
2311 
2312  return 1;
2313 
2314 error:
2315  yaml_free(handle_value);
2316  yaml_free(prefix_value);
2317  return 0;
2318 }
2319 
2320 static int
2321 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2322  yaml_token_type_t type)
2323 {
2324  int length = 0;
2325  yaml_mark_t start_mark, end_mark;
2326  yaml_string_t string = NULL_STRING;
2327 
2328  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2329 
2330  /* Eat the indicator character. */
2331 
2332  start_mark = parser->mark;
2333 
2334  SKIP(parser);
2335 
2336  /* Consume the value. */
2337 
2338  if (!CACHE(parser, 1)) goto error;
2339 
2340  while (IS_ALPHA(parser->buffer)) {
2341  if (!READ(parser, string)) goto error;
2342  if (!CACHE(parser, 1)) goto error;
2343  length ++;
2344  }
2345 
2346  end_mark = parser->mark;
2347 
2348  /*
2349  * Check if length of the anchor is greater than 0 and it is followed by
2350  * a whitespace character or one of the indicators:
2351  *
2352  * '?', ':', ',', ']', '}', '%', '@', '`'.
2353  */
2354 
2355  if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2356  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2357  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2358  || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2359  || CHECK(parser->buffer, '`'))) {
2360  yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2361  "while scanning an anchor" : "while scanning an alias", start_mark,
2362  "did not find expected alphabetic or numeric character");
2363  goto error;
2364  }
2365 
2366  /* Create a token. */
2367 
2368  if (type == YAML_ANCHOR_TOKEN) {
2369  ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2370  }
2371  else {
2372  ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2373  }
2374 
2375  return 1;
2376 
2377 error:
2378  STRING_DEL(parser, string);
2379  return 0;
2380 }
2381 
2382 /*
2383  * Scan a TAG token.
2384  */
2385 
2386 static int
2387 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2388 {
2389  yaml_char_t *handle = NULL;
2390  yaml_char_t *suffix = NULL;
2391  yaml_mark_t start_mark, end_mark;
2392 
2393  start_mark = parser->mark;
2394 
2395  /* Check if the tag is in the canonical form. */
2396 
2397  if (!CACHE(parser, 2)) goto error;
2398 
2399  if (CHECK_AT(parser->buffer, '<', 1))
2400  {
2401  /* Set the handle to '' */
2402 
2403  handle = YAML_MALLOC(1);
2404  if (!handle) goto error;
2405  handle[0] = '\0';
2406 
2407  /* Eat '!<' */
2408 
2409  SKIP(parser);
2410  SKIP(parser);
2411 
2412  /* Consume the tag value. */
2413 
2414  if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2415  goto error;
2416 
2417  /* Check for '>' and eat it. */
2418 
2419  if (!CHECK(parser->buffer, '>')) {
2420  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2421  start_mark, "did not find the expected '>'");
2422  goto error;
2423  }
2424 
2425  SKIP(parser);
2426  }
2427  else
2428  {
2429  /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2430 
2431  /* First, try to scan a handle. */
2432 
2433  if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2434  goto error;
2435 
2436  /* Check if it is, indeed, handle. */
2437 
2438  if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2439  {
2440  /* Scan the suffix now. */
2441 
2442  if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2443  goto error;
2444  }
2445  else
2446  {
2447  /* It wasn't a handle after all. Scan the rest of the tag. */
2448 
2449  if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2450  goto error;
2451 
2452  /* Set the handle to '!'. */
2453 
2454  yaml_free(handle);
2455  handle = YAML_MALLOC(2);
2456  if (!handle) goto error;
2457  handle[0] = '!';
2458  handle[1] = '\0';
2459 
2460  /*
2461  * A special case: the '!' tag. Set the handle to '' and the
2462  * suffix to '!'.
2463  */
2464 
2465  if (suffix[0] == '\0') {
2466  yaml_char_t *tmp = handle;
2467  handle = suffix;
2468  suffix = tmp;
2469  }
2470  }
2471  }
2472 
2473  /* Check the character which ends the tag. */
2474 
2475  if (!CACHE(parser, 1)) goto error;
2476 
2477  if (!IS_BLANKZ(parser->buffer)) {
2478  if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2479  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2480  start_mark, "did not find expected whitespace or line break");
2481  goto error;
2482  }
2483  }
2484 
2485  end_mark = parser->mark;
2486 
2487  /* Create a token. */
2488 
2489  TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2490 
2491  return 1;
2492 
2493 error:
2494  yaml_free(handle);
2495  yaml_free(suffix);
2496  return 0;
2497 }
2498 
2499 /*
2500  * Scan a tag handle.
2501  */
2502 
2503 static int
2504 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2505  yaml_mark_t start_mark, yaml_char_t **handle)
2506 {
2507  yaml_string_t string = NULL_STRING;
2508 
2509  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2510 
2511  /* Check the initial '!' character. */
2512 
2513  if (!CACHE(parser, 1)) goto error;
2514 
2515  if (!CHECK(parser->buffer, '!')) {
2516  yaml_parser_set_scanner_error(parser, directive ?
2517  "while scanning a tag directive" : "while scanning a tag",
2518  start_mark, "did not find expected '!'");
2519  goto error;
2520  }
2521 
2522  /* Copy the '!' character. */
2523 
2524  if (!READ(parser, string)) goto error;
2525 
2526  /* Copy all subsequent alphabetical and numerical characters. */
2527 
2528  if (!CACHE(parser, 1)) goto error;
2529 
2530  while (IS_ALPHA(parser->buffer))
2531  {
2532  if (!READ(parser, string)) goto error;
2533  if (!CACHE(parser, 1)) goto error;
2534  }
2535 
2536  /* Check if the trailing character is '!' and copy it. */
2537 
2538  if (CHECK(parser->buffer, '!'))
2539  {
2540  if (!READ(parser, string)) goto error;
2541  }
2542  else
2543  {
2544  /*
2545  * It's either the '!' tag or not really a tag handle. If it's a %TAG
2546  * directive, it's an error. If it's a tag token, it must be a part of
2547  * URI.
2548  */
2549 
2550  if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2551  yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2552  start_mark, "did not find expected '!'");
2553  goto error;
2554  }
2555  }
2556 
2557  *handle = string.start;
2558 
2559  return 1;
2560 
2561 error:
2562  STRING_DEL(parser, string);
2563  return 0;
2564 }
2565 
2566 /*
2567  * Scan a tag.
2568  */
2569 
2570 static int
2571 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2572  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2573 {
2574  size_t length = head ? strlen((char *)head) : 0;
2575  yaml_string_t string = NULL_STRING;
2576 
2577  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2578 
2579  /* Resize the string to include the head. */
2580 
2581  while ((size_t)(string.end - string.start) <= length) {
2582  if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2583  parser->error = YAML_MEMORY_ERROR;
2584  goto error;
2585  }
2586  }
2587 
2588  /*
2589  * Copy the head if needed.
2590  *
2591  * Note that we don't copy the leading '!' character.
2592  */
2593 
2594  if (length > 1) {
2595  memcpy(string.start, head+1, length-1);
2596  string.pointer += length-1;
2597  }
2598 
2599  /* Scan the tag. */
2600 
2601  if (!CACHE(parser, 1)) goto error;
2602 
2603  /*
2604  * The set of characters that may appear in URI is as follows:
2605  *
2606  * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2607  * '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2608  *
2609  * If we are inside a verbatim tag <...> (parameter uri_char is true)
2610  * then also the following flow indicators are allowed:
2611  * ',', '[', ']'
2612  */
2613 
2614  while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2615  || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2616  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2617  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2618  || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2619  || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2620  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2621  || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2622  || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2623  || (uri_char && (
2624  CHECK(parser->buffer, ',')
2625  || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2626  )
2627  ))
2628  {
2629  /* Check if it is a URI-escape sequence. */
2630 
2631  if (CHECK(parser->buffer, '%')) {
2632  if (!STRING_EXTEND(parser, string))
2633  goto error;
2634 
2635  if (!yaml_parser_scan_uri_escapes(parser,
2636  directive, start_mark, &string)) goto error;
2637  }
2638  else {
2639  if (!READ(parser, string)) goto error;
2640  }
2641 
2642  length ++;
2643  if (!CACHE(parser, 1)) goto error;
2644  }
2645 
2646  /* Check if the tag is non-empty. */
2647 
2648  if (!length) {
2649  if (!STRING_EXTEND(parser, string))
2650  goto error;
2651 
2652  yaml_parser_set_scanner_error(parser, directive ?
2653  "while parsing a %TAG directive" : "while parsing a tag",
2654  start_mark, "did not find expected tag URI");
2655  goto error;
2656  }
2657 
2658  *uri = string.start;
2659 
2660  return 1;
2661 
2662 error:
2663  STRING_DEL(parser, string);
2664  return 0;
2665 }
2666 
2667 /*
2668  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669  */
2670 
2671 static int
2672 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2673  yaml_mark_t start_mark, yaml_string_t *string)
2674 {
2675  int width = 0;
2676 
2677  /* Decode the required number of characters. */
2678 
2679  do {
2680 
2681  unsigned char octet = 0;
2682 
2683  /* Check for a URI-escaped octet. */
2684 
2685  if (!CACHE(parser, 3)) return 0;
2686 
2687  if (!(CHECK(parser->buffer, '%')
2688  && IS_HEX_AT(parser->buffer, 1)
2689  && IS_HEX_AT(parser->buffer, 2))) {
2690  return yaml_parser_set_scanner_error(parser, directive ?
2691  "while parsing a %TAG directive" : "while parsing a tag",
2692  start_mark, "did not find URI escaped octet");
2693  }
2694 
2695  /* Get the octet. */
2696 
2697  octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2698 
2699  /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2700 
2701  if (!width)
2702  {
2703  width = (octet & 0x80) == 0x00 ? 1 :
2704  (octet & 0xE0) == 0xC0 ? 2 :
2705  (octet & 0xF0) == 0xE0 ? 3 :
2706  (octet & 0xF8) == 0xF0 ? 4 : 0;
2707  if (!width) {
2708  return yaml_parser_set_scanner_error(parser, directive ?
2709  "while parsing a %TAG directive" : "while parsing a tag",
2710  start_mark, "found an incorrect leading UTF-8 octet");
2711  }
2712  }
2713  else
2714  {
2715  /* Check if the trailing octet is correct. */
2716 
2717  if ((octet & 0xC0) != 0x80) {
2718  return yaml_parser_set_scanner_error(parser, directive ?
2719  "while parsing a %TAG directive" : "while parsing a tag",
2720  start_mark, "found an incorrect trailing UTF-8 octet");
2721  }
2722  }
2723 
2724  /* Copy the octet and move the pointers. */
2725 
2726  *(string->pointer++) = octet;
2727  SKIP(parser);
2728  SKIP(parser);
2729  SKIP(parser);
2730 
2731  } while (--width);
2732 
2733  return 1;
2734 }
2735 
2736 /*
2737  * Scan a block scalar.
2738  */
2739 
2740 static int
2741 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2742  int literal)
2743 {
2744  yaml_mark_t start_mark;
2745  yaml_mark_t end_mark;
2746  yaml_string_t string = NULL_STRING;
2747  yaml_string_t leading_break = NULL_STRING;
2748  yaml_string_t trailing_breaks = NULL_STRING;
2749  int chomping = 0;
2750  int increment = 0;
2751  int indent = 0;
2752  int leading_blank = 0;
2753  int trailing_blank = 0;
2754 
2755  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2758 
2759  /* Eat the indicator '|' or '>'. */
2760 
2761  start_mark = parser->mark;
2762 
2763  SKIP(parser);
2764 
2765  /* Scan the additional block scalar indicators. */
2766 
2767  if (!CACHE(parser, 1)) goto error;
2768 
2769  /* Check for a chomping indicator. */
2770 
2771  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772  {
2773  /* Set the chomping method and eat the indicator. */
2774 
2775  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2776 
2777  SKIP(parser);
2778 
2779  /* Check for an indentation indicator. */
2780 
2781  if (!CACHE(parser, 1)) goto error;
2782 
2783  if (IS_DIGIT(parser->buffer))
2784  {
2785  /* Check that the indentation is greater than 0. */
2786 
2787  if (CHECK(parser->buffer, '0')) {
2788  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789  start_mark, "found an indentation indicator equal to 0");
2790  goto error;
2791  }
2792 
2793  /* Get the indentation level and eat the indicator. */
2794 
2795  increment = AS_DIGIT(parser->buffer);
2796 
2797  SKIP(parser);
2798  }
2799  }
2800 
2801  /* Do the same as above, but in the opposite order. */
2802 
2803  else if (IS_DIGIT(parser->buffer))
2804  {
2805  if (CHECK(parser->buffer, '0')) {
2806  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807  start_mark, "found an indentation indicator equal to 0");
2808  goto error;
2809  }
2810 
2811  increment = AS_DIGIT(parser->buffer);
2812 
2813  SKIP(parser);
2814 
2815  if (!CACHE(parser, 1)) goto error;
2816 
2817  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2819 
2820  SKIP(parser);
2821  }
2822  }
2823 
2824  /* Eat whitespaces and comments to the end of the line. */
2825 
2826  if (!CACHE(parser, 1)) goto error;
2827 
2828  while (IS_BLANK(parser->buffer)) {
2829  SKIP(parser);
2830  if (!CACHE(parser, 1)) goto error;
2831  }
2832 
2833  if (CHECK(parser->buffer, '#')) {
2834  while (!IS_BREAKZ(parser->buffer)) {
2835  SKIP(parser);
2836  if (!CACHE(parser, 1)) goto error;
2837  }
2838  }
2839 
2840  /* Check if we are at the end of the line. */
2841 
2842  if (!IS_BREAKZ(parser->buffer)) {
2843  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844  start_mark, "did not find expected comment or line break");
2845  goto error;
2846  }
2847 
2848  /* Eat a line break. */
2849 
2850  if (IS_BREAK(parser->buffer)) {
2851  if (!CACHE(parser, 2)) goto error;
2852  SKIP_LINE(parser);
2853  }
2854 
2855  end_mark = parser->mark;
2856 
2857  /* Set the indentation level if it was specified. */
2858 
2859  if (increment) {
2860  indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861  }
2862 
2863  /* Scan the leading line breaks and determine the indentation level if needed. */
2864 
2865  if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866  start_mark, &end_mark)) goto error;
2867 
2868  /* Scan the block scalar content. */
2869 
2870  if (!CACHE(parser, 1)) goto error;
2871 
2872  while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2873  {
2874  /*
2875  * We are at the beginning of a non-empty line.
2876  */
2877 
2878  /* Is it a trailing whitespace? */
2879 
2880  trailing_blank = IS_BLANK(parser->buffer);
2881 
2882  /* Check if we need to fold the leading line break. */
2883 
2884  if (!literal && (*leading_break.start == '\n')
2885  && !leading_blank && !trailing_blank)
2886  {
2887  /* Do we need to join the lines by space? */
2888 
2889  if (*trailing_breaks.start == '\0') {
2890  if (!STRING_EXTEND(parser, string)) goto error;
2891  *(string.pointer ++) = ' ';
2892  }
2893 
2894  CLEAR(parser, leading_break);
2895  }
2896  else {
2897  if (!JOIN(parser, string, leading_break)) goto error;
2898  CLEAR(parser, leading_break);
2899  }
2900 
2901  /* Append the remaining line breaks. */
2902 
2903  if (!JOIN(parser, string, trailing_breaks)) goto error;
2904  CLEAR(parser, trailing_breaks);
2905 
2906  /* Is it a leading whitespace? */
2907 
2908  leading_blank = IS_BLANK(parser->buffer);
2909 
2910  /* Consume the current line. */
2911 
2912  while (!IS_BREAKZ(parser->buffer)) {
2913  if (!READ(parser, string)) goto error;
2914  if (!CACHE(parser, 1)) goto error;
2915  }
2916 
2917  /* Consume the line break. */
2918 
2919  if (!CACHE(parser, 2)) goto error;
2920 
2921  if (!READ_LINE(parser, leading_break)) goto error;
2922 
2923  /* Eat the following indentation spaces and line breaks. */
2924 
2925  if (!yaml_parser_scan_block_scalar_breaks(parser,
2926  &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927  }
2928 
2929  /* Chomp the tail. */
2930 
2931  if (chomping != -1) {
2932  if (!JOIN(parser, string, leading_break)) goto error;
2933  }
2934  if (chomping == 1) {
2935  if (!JOIN(parser, string, trailing_breaks)) goto error;
2936  }
2937 
2938  /* Create a token. */
2939 
2940  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2942  start_mark, end_mark);
2943 
2944  STRING_DEL(parser, leading_break);
2945  STRING_DEL(parser, trailing_breaks);
2946 
2947  return 1;
2948 
2949 error:
2950  STRING_DEL(parser, string);
2951  STRING_DEL(parser, leading_break);
2952  STRING_DEL(parser, trailing_breaks);
2953 
2954  return 0;
2955 }
2956 
2957 /*
2958  * Scan indentation spaces and line breaks for a block scalar. Determine the
2959  * indentation level if needed.
2960  */
2961 
2962 static int
2963 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2964  int *indent, yaml_string_t *breaks,
2965  yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966 {
2967  int max_indent = 0;
2968 
2969  *end_mark = parser->mark;
2970 
2971  /* Eat the indentation spaces and line breaks. */
2972 
2973  while (1)
2974  {
2975  /* Eat the indentation spaces. */
2976 
2977  if (!CACHE(parser, 1)) return 0;
2978 
2979  while ((!*indent || (int)parser->mark.column < *indent)
2980  && IS_SPACE(parser->buffer)) {
2981  SKIP(parser);
2982  if (!CACHE(parser, 1)) return 0;
2983  }
2984 
2985  if ((int)parser->mark.column > max_indent)
2986  max_indent = (int)parser->mark.column;
2987 
2988  /* Check for a tab character messing the indentation. */
2989 
2990  if ((!*indent || (int)parser->mark.column < *indent)
2991  && IS_TAB(parser->buffer)) {
2992  return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993  start_mark, "found a tab character where an indentation space is expected");
2994  }
2995 
2996  /* Have we found a non-empty line? */
2997 
2998  if (!IS_BREAK(parser->buffer)) break;
2999 
3000  /* Consume the line break. */
3001 
3002  if (!CACHE(parser, 2)) return 0;
3003  if (!READ_LINE(parser, *breaks)) return 0;
3004  *end_mark = parser->mark;
3005  }
3006 
3007  /* Determine the indentation level if needed. */
3008 
3009  if (!*indent) {
3010  *indent = max_indent;
3011  if (*indent < parser->indent + 1)
3012  *indent = parser->indent + 1;
3013  if (*indent < 1)
3014  *indent = 1;
3015  }
3016 
3017  return 1;
3018 }
3019 
3020 /*
3021  * Scan a quoted scalar.
3022  */
3023 
3024 static int
3025 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3026  int single)
3027 {
3028  yaml_mark_t start_mark;
3029  yaml_mark_t end_mark;
3030  yaml_string_t string = NULL_STRING;
3031  yaml_string_t leading_break = NULL_STRING;
3032  yaml_string_t trailing_breaks = NULL_STRING;
3033  yaml_string_t whitespaces = NULL_STRING;
3034  int leading_blanks;
3035 
3036  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3040 
3041  /* Eat the left quote. */
3042 
3043  start_mark = parser->mark;
3044 
3045  SKIP(parser);
3046 
3047  /* Consume the content of the quoted scalar. */
3048 
3049  while (1)
3050  {
3051  /* Check that there are no document indicators at the beginning of the line. */
3052 
3053  if (!CACHE(parser, 4)) goto error;
3054 
3055  if (parser->mark.column == 0 &&
3056  ((CHECK_AT(parser->buffer, '-', 0) &&
3057  CHECK_AT(parser->buffer, '-', 1) &&
3058  CHECK_AT(parser->buffer, '-', 2)) ||
3059  (CHECK_AT(parser->buffer, '.', 0) &&
3060  CHECK_AT(parser->buffer, '.', 1) &&
3061  CHECK_AT(parser->buffer, '.', 2))) &&
3062  IS_BLANKZ_AT(parser->buffer, 3))
3063  {
3064  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065  start_mark, "found unexpected document indicator");
3066  goto error;
3067  }
3068 
3069  /* Check for EOF. */
3070 
3071  if (IS_Z(parser->buffer)) {
3072  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073  start_mark, "found unexpected end of stream");
3074  goto error;
3075  }
3076 
3077  /* Consume non-blank characters. */
3078 
3079  if (!CACHE(parser, 2)) goto error;
3080 
3081  leading_blanks = 0;
3082 
3083  while (!IS_BLANKZ(parser->buffer))
3084  {
3085  /* Check for an escaped single quote. */
3086 
3087  if (single && CHECK_AT(parser->buffer, '\'', 0)
3088  && CHECK_AT(parser->buffer, '\'', 1))
3089  {
3090  if (!STRING_EXTEND(parser, string)) goto error;
3091  *(string.pointer++) = '\'';
3092  SKIP(parser);
3093  SKIP(parser);
3094  }
3095 
3096  /* Check for the right quote. */
3097 
3098  else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099  {
3100  break;
3101  }
3102 
3103  /* Check for an escaped line break. */
3104 
3105  else if (!single && CHECK(parser->buffer, '\\')
3106  && IS_BREAK_AT(parser->buffer, 1))
3107  {
3108  if (!CACHE(parser, 3)) goto error;
3109  SKIP(parser);
3110  SKIP_LINE(parser);
3111  leading_blanks = 1;
3112  break;
3113  }
3114 
3115  /* Check for an escape sequence. */
3116 
3117  else if (!single && CHECK(parser->buffer, '\\'))
3118  {
3119  size_t code_length = 0;
3120 
3121  if (!STRING_EXTEND(parser, string)) goto error;
3122 
3123  /* Check the escape character. */
3124 
3125  switch (parser->buffer.pointer[1])
3126  {
3127  case '0':
3128  *(string.pointer++) = '\0';
3129  break;
3130 
3131  case 'a':
3132  *(string.pointer++) = '\x07';
3133  break;
3134 
3135  case 'b':
3136  *(string.pointer++) = '\x08';
3137  break;
3138 
3139  case 't':
3140  case '\t':
3141  *(string.pointer++) = '\x09';
3142  break;
3143 
3144  case 'n':
3145  *(string.pointer++) = '\x0A';
3146  break;
3147 
3148  case 'v':
3149  *(string.pointer++) = '\x0B';
3150  break;
3151 
3152  case 'f':
3153  *(string.pointer++) = '\x0C';
3154  break;
3155 
3156  case 'r':
3157  *(string.pointer++) = '\x0D';
3158  break;
3159 
3160  case 'e':
3161  *(string.pointer++) = '\x1B';
3162  break;
3163 
3164  case ' ':
3165  *(string.pointer++) = '\x20';
3166  break;
3167 
3168  case '"':
3169  *(string.pointer++) = '"';
3170  break;
3171 
3172  case '/':
3173  *(string.pointer++) = '/';
3174  break;
3175 
3176  case '\\':
3177  *(string.pointer++) = '\\';
3178  break;
3179 
3180  case 'N': /* NEL (#x85) */
3181  *(string.pointer++) = '\xC2';
3182  *(string.pointer++) = '\x85';
3183  break;
3184 
3185  case '_': /* #xA0 */
3186  *(string.pointer++) = '\xC2';
3187  *(string.pointer++) = '\xA0';
3188  break;
3189 
3190  case 'L': /* LS (#x2028) */
3191  *(string.pointer++) = '\xE2';
3192  *(string.pointer++) = '\x80';
3193  *(string.pointer++) = '\xA8';
3194  break;
3195 
3196  case 'P': /* PS (#x2029) */
3197  *(string.pointer++) = '\xE2';
3198  *(string.pointer++) = '\x80';
3199  *(string.pointer++) = '\xA9';
3200  break;
3201 
3202  case 'x':
3203  code_length = 2;
3204  break;
3205 
3206  case 'u':
3207  code_length = 4;
3208  break;
3209 
3210  case 'U':
3211  code_length = 8;
3212  break;
3213 
3214  default:
3215  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216  start_mark, "found unknown escape character");
3217  goto error;
3218  }
3219 
3220  SKIP(parser);
3221  SKIP(parser);
3222 
3223  /* Consume an arbitrary escape code. */
3224 
3225  if (code_length)
3226  {
3227  unsigned int value = 0;
3228  size_t k;
3229 
3230  /* Scan the character value. */
3231 
3232  if (!CACHE(parser, code_length)) goto error;
3233 
3234  for (k = 0; k < code_length; k ++) {
3235  if (!IS_HEX_AT(parser->buffer, k)) {
3236  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237  start_mark, "did not find expected hexdecimal number");
3238  goto error;
3239  }
3240  value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241  }
3242 
3243  /* Check the value and write the character. */
3244 
3245  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247  start_mark, "found invalid Unicode character escape code");
3248  goto error;
3249  }
3250 
3251  if (value <= 0x7F) {
3252  *(string.pointer++) = value;
3253  }
3254  else if (value <= 0x7FF) {
3255  *(string.pointer++) = 0xC0 + (value >> 6);
3256  *(string.pointer++) = 0x80 + (value & 0x3F);
3257  }
3258  else if (value <= 0xFFFF) {
3259  *(string.pointer++) = 0xE0 + (value >> 12);
3260  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261  *(string.pointer++) = 0x80 + (value & 0x3F);
3262  }
3263  else {
3264  *(string.pointer++) = 0xF0 + (value >> 18);
3265  *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267  *(string.pointer++) = 0x80 + (value & 0x3F);
3268  }
3269 
3270  /* Advance the pointer. */
3271 
3272  for (k = 0; k < code_length; k ++) {
3273  SKIP(parser);
3274  }
3275  }
3276  }
3277 
3278  else
3279  {
3280  /* It is a non-escaped non-blank character. */
3281 
3282  if (!READ(parser, string)) goto error;
3283  }
3284 
3285  if (!CACHE(parser, 2)) goto error;
3286  }
3287 
3288  /* Check if we are at the end of the scalar. */
3289 
3290  /* Fix for crash unitialized value crash
3291  * Credit for the bug and input is to OSS Fuzz
3292  * Credit for the fix to Alex Gaynor
3293  */
3294  if (!CACHE(parser, 1)) goto error;
3295  if (CHECK(parser->buffer, single ? '\'' : '"'))
3296  break;
3297 
3298  /* Consume blank characters. */
3299 
3300  if (!CACHE(parser, 1)) goto error;
3301 
3302  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3303  {
3304  if (IS_BLANK(parser->buffer))
3305  {
3306  /* Consume a space or a tab character. */
3307 
3308  if (!leading_blanks) {
3309  if (!READ(parser, whitespaces)) goto error;
3310  }
3311  else {
3312  SKIP(parser);
3313  }
3314  }
3315  else
3316  {
3317  if (!CACHE(parser, 2)) goto error;
3318 
3319  /* Check if it is a first line break. */
3320 
3321  if (!leading_blanks)
3322  {
3323  CLEAR(parser, whitespaces);
3324  if (!READ_LINE(parser, leading_break)) goto error;
3325  leading_blanks = 1;
3326  }
3327  else
3328  {
3329  if (!READ_LINE(parser, trailing_breaks)) goto error;
3330  }
3331  }
3332  if (!CACHE(parser, 1)) goto error;
3333  }
3334 
3335  /* Join the whitespaces or fold line breaks. */
3336 
3337  if (leading_blanks)
3338  {
3339  /* Do we need to fold line breaks? */
3340 
3341  if (leading_break.start[0] == '\n') {
3342  if (trailing_breaks.start[0] == '\0') {
3343  if (!STRING_EXTEND(parser, string)) goto error;
3344  *(string.pointer++) = ' ';
3345  }
3346  else {
3347  if (!JOIN(parser, string, trailing_breaks)) goto error;
3348  CLEAR(parser, trailing_breaks);
3349  }
3350  CLEAR(parser, leading_break);
3351  }
3352  else {
3353  if (!JOIN(parser, string, leading_break)) goto error;
3354  if (!JOIN(parser, string, trailing_breaks)) goto error;
3355  CLEAR(parser, leading_break);
3356  CLEAR(parser, trailing_breaks);
3357  }
3358  }
3359  else
3360  {
3361  if (!JOIN(parser, string, whitespaces)) goto error;
3362  CLEAR(parser, whitespaces);
3363  }
3364  }
3365 
3366  /* Eat the right quote. */
3367 
3368  SKIP(parser);
3369 
3370  end_mark = parser->mark;
3371 
3372  /* Create a token. */
3373 
3374  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3376  start_mark, end_mark);
3377 
3378  STRING_DEL(parser, leading_break);
3379  STRING_DEL(parser, trailing_breaks);
3380  STRING_DEL(parser, whitespaces);
3381 
3382  return 1;
3383 
3384 error:
3385  STRING_DEL(parser, string);
3386  STRING_DEL(parser, leading_break);
3387  STRING_DEL(parser, trailing_breaks);
3388  STRING_DEL(parser, whitespaces);
3389 
3390  return 0;
3391 }
3392 
3393 /*
3394  * Scan a plain scalar.
3395  */
3396 
3397 static int
3398 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3399 {
3400  yaml_mark_t start_mark;
3401  yaml_mark_t end_mark;
3402  yaml_string_t string = NULL_STRING;
3403  yaml_string_t leading_break = NULL_STRING;
3404  yaml_string_t trailing_breaks = NULL_STRING;
3405  yaml_string_t whitespaces = NULL_STRING;
3406  int leading_blanks = 0;
3407  int indent = parser->indent+1;
3408 
3409  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3410  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3411  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3412  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3413 
3414  start_mark = end_mark = parser->mark;
3415 
3416  /* Consume the content of the plain scalar. */
3417 
3418  while (1)
3419  {
3420  /* Check for a document indicator. */
3421 
3422  if (!CACHE(parser, 4)) goto error;
3423 
3424  if (parser->mark.column == 0 &&
3425  ((CHECK_AT(parser->buffer, '-', 0) &&
3426  CHECK_AT(parser->buffer, '-', 1) &&
3427  CHECK_AT(parser->buffer, '-', 2)) ||
3428  (CHECK_AT(parser->buffer, '.', 0) &&
3429  CHECK_AT(parser->buffer, '.', 1) &&
3430  CHECK_AT(parser->buffer, '.', 2))) &&
3431  IS_BLANKZ_AT(parser->buffer, 3)) break;
3432 
3433  /* Check for a comment. */
3434 
3435  if (CHECK(parser->buffer, '#'))
3436  break;
3437 
3438  /* Consume non-blank characters. */
3439 
3440  while (!IS_BLANKZ(parser->buffer))
3441  {
3442  /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3443  * This is not completely according to the spec
3444  * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3445  */
3446 
3447  if (parser->flow_level
3448  && CHECK(parser->buffer, ':')
3449  && (
3450  CHECK_AT(parser->buffer, ',', 1)
3451  || CHECK_AT(parser->buffer, '?', 1)
3452  || CHECK_AT(parser->buffer, '[', 1)
3453  || CHECK_AT(parser->buffer, ']', 1)
3454  || CHECK_AT(parser->buffer, '{', 1)
3455  || CHECK_AT(parser->buffer, '}', 1)
3456  )
3457  ) {
3458  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3459  start_mark, "found unexpected ':'");
3460  goto error;
3461  }
3462 
3463  /* Check for indicators that may end a plain scalar. */
3464 
3465  if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3466  || (parser->flow_level &&
3467  (CHECK(parser->buffer, ',')
3468  || CHECK(parser->buffer, '[')
3469  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3470  || CHECK(parser->buffer, '}'))))
3471  break;
3472 
3473  /* Check if we need to join whitespaces and breaks. */
3474 
3475  if (leading_blanks || whitespaces.start != whitespaces.pointer)
3476  {
3477  if (leading_blanks)
3478  {
3479  /* Do we need to fold line breaks? */
3480 
3481  if (leading_break.start[0] == '\n') {
3482  if (trailing_breaks.start[0] == '\0') {
3483  if (!STRING_EXTEND(parser, string)) goto error;
3484  *(string.pointer++) = ' ';
3485  }
3486  else {
3487  if (!JOIN(parser, string, trailing_breaks)) goto error;
3488  CLEAR(parser, trailing_breaks);
3489  }
3490  CLEAR(parser, leading_break);
3491  }
3492  else {
3493  if (!JOIN(parser, string, leading_break)) goto error;
3494  if (!JOIN(parser, string, trailing_breaks)) goto error;
3495  CLEAR(parser, leading_break);
3496  CLEAR(parser, trailing_breaks);
3497  }
3498 
3499  leading_blanks = 0;
3500  }
3501  else
3502  {
3503  if (!JOIN(parser, string, whitespaces)) goto error;
3504  CLEAR(parser, whitespaces);
3505  }
3506  }
3507 
3508  /* Copy the character. */
3509 
3510  if (!READ(parser, string)) goto error;
3511 
3512  end_mark = parser->mark;
3513 
3514  if (!CACHE(parser, 2)) goto error;
3515  }
3516 
3517  /* Is it the end? */
3518 
3519  if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3520  break;
3521 
3522  /* Consume blank characters. */
3523 
3524  if (!CACHE(parser, 1)) goto error;
3525 
3526  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3527  {
3528  if (IS_BLANK(parser->buffer))
3529  {
3530  /* Check for tab characters that abuse indentation. */
3531 
3532  if (leading_blanks && (int)parser->mark.column < indent
3533  && IS_TAB(parser->buffer)) {
3534  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3535  start_mark, "found a tab character that violates indentation");
3536  goto error;
3537  }
3538 
3539  /* Consume a space or a tab character. */
3540 
3541  if (!leading_blanks) {
3542  if (!READ(parser, whitespaces)) goto error;
3543  }
3544  else {
3545  SKIP(parser);
3546  }
3547  }
3548  else
3549  {
3550  if (!CACHE(parser, 2)) goto error;
3551 
3552  /* Check if it is a first line break. */
3553 
3554  if (!leading_blanks)
3555  {
3556  CLEAR(parser, whitespaces);
3557  if (!READ_LINE(parser, leading_break)) goto error;
3558  leading_blanks = 1;
3559  }
3560  else
3561  {
3562  if (!READ_LINE(parser, trailing_breaks)) goto error;
3563  }
3564  }
3565  if (!CACHE(parser, 1)) goto error;
3566  }
3567 
3568  /* Check indentation level. */
3569 
3570  if (!parser->flow_level && (int)parser->mark.column < indent)
3571  break;
3572  }
3573 
3574  /* Create a token. */
3575 
3576  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3577  YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3578 
3579  /* Note that we change the 'simple_key_allowed' flag. */
3580 
3581  if (leading_blanks) {
3582  parser->simple_key_allowed = 1;
3583  }
3584 
3585  STRING_DEL(parser, leading_break);
3586  STRING_DEL(parser, trailing_breaks);
3587  STRING_DEL(parser, whitespaces);
3588 
3589  return 1;
3590 
3591 error:
3592  STRING_DEL(parser, string);
3593  STRING_DEL(parser, leading_break);
3594  STRING_DEL(parser, trailing_breaks);
3595  STRING_DEL(parser, whitespaces);
3596 
3597  return 0;
3598 }
The double-quoted scalar style.
Definition: yaml.h:176
#define READ(parser, string)
Definition: ymlScanner.c:519
A BLOCK-SEQUENCE-START token.
Definition: yaml.h:234
The pointer position.
Definition: yaml.h:147
yaml_token_t * tail
The tail of the tokens queue.
Definition: yaml.h:1206
A FLOW-SEQUENCE-START token.
Definition: yaml.h:241
#define NULL_STRING
Definition: yaml_private.h:121
A VALUE token.
Definition: yaml.h:256
#define IS_BOM(string)
Definition: yaml_private.h:296
Cannot allocate or reallocate a block of memory.
Definition: yaml.h:129
#define ALIAS_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:510
A BLOCK-END token.
Definition: yaml.h:238
#define AS_DIGIT(string)
Definition: yaml_private.h:215
int stream_start_produced
Have we started to scan the input stream?
Definition: yaml.h:1189
yaml_encoding_t encoding
The input encoding.
Definition: yaml.h:1171
The parser structure.
Definition: yaml.h:1087
A BLOCK-MAPPING-START token.
Definition: yaml.h:236
struct yaml_parser_s::@37 tokens
The tokens queue.
A FLOW-ENTRY token.
Definition: yaml.h:252
unsigned char yaml_char_t
The character type (UTF-8 octet).
Definition: yaml.h:80
const unsigned char * start
The string start pointer.
Definition: yaml.h:1129
const char * context
The error context.
Definition: yaml.h:1105
yaml_mark_t mark
The position mark.
Definition: yaml.h:1006
#define IS_BREAK_AT(string, offset)
Definition: yaml_private.h:327
yaml_char_t * pointer
Definition: yaml_private.h:109
int indent
The current indentation level.
Definition: yaml.h:1226
#define IS_BLANK(string)
Definition: yaml_private.h:321
#define TAG_DIRECTIVE_TOKEN_INIT(token, token_handle, token_prefix, start_mark, end_mark)
Definition: yaml_private.h:534
#define PUSH(context, stack, value)
Definition: yaml_private.h:442
yaml_char_t * pointer
The current position of the buffer.
Definition: yaml.h:1150
An ALIAS token.
Definition: yaml.h:259
yaml_mark_t mark
The mark of the current position.
Definition: yaml.h:1177
#define IS_HEX_AT(string, offset)
Definition: yaml_private.h:221
yaml_char_t * start
Definition: yaml_private.h:107
#define DEQUEUE(context, queue)
Definition: yaml_private.h:478
A FLOW-SEQUENCE-END token.
Definition: yaml.h:243
int token_available
Does the tokens queue contain a token ready for dequeueing.
Definition: yaml.h:1213
#define INITIAL_STRING_SIZE
Definition: yaml_private.h:84
#define STRING_DEL(context, string)
Definition: yaml_private.h:139
#define IS_SPACE(string)
Definition: yaml_private.h:304
The folded scalar style.
Definition: yaml.h:181
#define AS_HEX_AT(string, offset)
Definition: yaml_private.h:235
int * top
The top of the stack.
Definition: yaml.h:1222
#define QUEUE_INSERT(context, queue, index, value)
Definition: yaml_private.h:481
int required
Is a simple key required?
Definition: yaml.h:1000
int simple_key_allowed
May a simple key occur at the current position?
Definition: yaml.h:1229
#define IS_TAB(string)
Definition: yaml_private.h:312
yaml_free(void *ptr)
Definition: ymlApi.c:51
#define IS_BREAKZ(string)
Definition: yaml_private.h:353
yaml_string_extend(yaml_char_t **start, yaml_char_t **pointer, yaml_char_t **end)
Definition: ymlApi.c:74
#define CHECK(string, octet)
Definition: yaml_private.h:179
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
Definition: ymlScanner.c:801
#define IS_ALPHA(string)
Definition: yaml_private.h:196
yaml_token_delete(yaml_token_t *token)
Free any memory allocated for a token object.
Definition: ymlApi.c:584
#define TOKEN_INIT(token, token_type, token_start_mark, token_end_mark)
Definition: yaml_private.h:497
#define YAML_DECLARE(type)
The public API declaration.
Definition: yaml.h:40
A FLOW-MAPPING-START token.
Definition: yaml.h:245
#define TAG_TOKEN_INIT(token, token_handle, token_suffix, start_mark, end_mark)
Definition: yaml_private.h:518
#define IS_BLANK_AT(string, offset)
Definition: yaml_private.h:318
struct yaml_parser_s::@39 simple_keys
The stack of simple keys.
yaml_error_type_t error
Error type.
Definition: yaml.h:1095
#define SKIP(parser)
Definition: ymlScanner.c:495
#define IS_DIGIT(string)
Definition: yaml_private.h:206
struct yaml_parser_s::@35 buffer
The working buffer.
#define STREAM_START_TOKEN_INIT(token, token_encoding, start_mark, end_mark)
Definition: yaml_private.h:503
#define CHECK_AT(string, octet, offset)
Definition: yaml_private.h:172
size_t token_number
The number of the token.
Definition: yaml.h:1003
#define CACHE(parser, length)
Definition: ymlScanner.c:486
#define MAX_NUMBER_LENGTH
Definition: ymlScanner.c:2197
A STREAM-END token.
Definition: yaml.h:222
enum yaml_token_type_e yaml_token_type_t
Token types.
yaml_token_t * head
The head of the tokens queue.
Definition: yaml.h:1204
#define ENQUEUE(context, queue, value)
Definition: yaml_private.h:469
An ANCHOR token.
Definition: yaml.h:261
#define JOIN(context, string_a, string_b)
Definition: yaml_private.h:155
#define ANCHOR_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:514
#define SCALAR_TOKEN_INIT(token, token_value, token_length, token_style, start_mark, end_mark)
Definition: yaml_private.h:523
yaml_mark_t context_mark
The context position.
Definition: yaml.h:1107
#define READ_LINE(parser, string)
Definition: ymlScanner.c:531
A FLOW-MAPPING-END token.
Definition: yaml.h:247
yaml_mark_t problem_mark
The problem position.
Definition: yaml.h:1103
in value value
size_t tokens_parsed
The number of tokens fetched from the queue.
Definition: yaml.h:1210
#define STRING_EXTEND(context, string)
Definition: yaml_private.h:143
#define IS_BLANKZ_AT(string, offset)
Definition: yaml_private.h:368
A DOCUMENT-START token.
Definition: yaml.h:229
#define YAML_MALLOC(size)
Definition: yaml_private.h:682
#define STRING_INIT(context, string, size)
Definition: yaml_private.h:130
#define STREAM_END_TOKEN_INIT(token, start_mark, end_mark)
Definition: yaml_private.h:507
This structure holds information about a potential simple key.
Definition: yaml.h:995
The plain scalar style.
Definition: yaml.h:171
The literal scalar style.
Definition: yaml.h:179
size_t line
The position line.
Definition: yaml.h:152
#define POP(context, stack)
Definition: yaml_private.h:451
A DOCUMENT-END token.
Definition: yaml.h:231
#define SKIP_LINE(parser)
Definition: ymlScanner.c:501
#define IS_Z(string)
Definition: yaml_private.h:285
Cannot scan the input stream.
Definition: yaml.h:134
size_t index
The position index.
Definition: yaml.h:149
#define IS_BLANKZ(string)
Definition: yaml_private.h:371
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
Scan the input stream and produce the next token.
Definition: ymlScanner.c:743
const char * problem
Error description.
Definition: yaml.h:1097
int possible
Is a simple key possible?
Definition: yaml.h:997
A BLOCK-ENTRY token.
Definition: yaml.h:250
#define VERSION_DIRECTIVE_TOKEN_INIT(token, token_major, token_minor, start_mark, end_mark)
Definition: yaml_private.h:529
#define IS_BREAK(string)
Definition: yaml_private.h:339
size_t column
The position column.
Definition: yaml.h:155
struct yaml_parser_s::@38 indents
The indentation levels stack.
A KEY token.
Definition: yaml.h:254
int flow_level
The number of unclosed &#39;[&#39; and &#39;{&#39; indicators.
Definition: yaml.h:1195
The token structure.
Definition: yaml.h:269
#define CLEAR(context, string)
Definition: yaml_private.h:151
The single-quoted scalar style.
Definition: yaml.h:174