1 /* Parser implementation */
  2 
  3 /* For a description, see the comments at end of this file */
  4 
  5 /* XXX To do: error recovery */
  6 
  7 #include "Python.h"
  8 #include "pgenheaders.h"
  9 #include "token.h"
 10 #include "grammar.h"
 11 #include "node.h"
 12 #include "parser.h"
 13 #include "errcode.h"
 14 
 15 
 16 #ifdef Py_DEBUG
 17 extern int Py_DebugFlag;
 18 #define D(x) if (!Py_DebugFlag); else x
 19 #else
 20 #define D(x)
 21 #endif
 22 
 23 
 24 /* STACK DATA TYPE */
 25 
 26 static void s_reset(stack *);
 27 
 28 static void
 29 s_reset(stack *s)
 30 {
 31     s->s_top = &s->s_base[MAXSTACK];
 32 }
 33 
 34 #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
 35 
 36 static int
 37 s_push(register stack *s, dfa *d, node *parent)
 38 {
 39     register stackentry *top;
 40     if (s->s_top == s->s_base) {
 41         fprintf(stderr, "s_push: parser stack overflow\n");
 42         return E_NOMEM;
 43     }
 44     top = --s->s_top;
 45     top->s_dfa = d;
 46     top->s_parent = parent;
 47     top->s_state = 0;
 48     return 0;
 49 }
 50 
 51 #ifdef Py_DEBUG
 52 
 53 static void
 54 s_pop(register stack *s)
 55 {
 56     if (s_empty(s))
 57         Py_FatalError("s_pop: parser stack underflow -- FATAL");
 58     s->s_top++;
 59 }
 60 
 61 #else /* !Py_DEBUG */
 62 
 63 #define s_pop(s) (s)->s_top++
 64 
 65 #endif
 66 
 67 
 68 /* PARSER CREATION */
 69 
 70 parser_state *
 71 PyParser_New(grammar *g, int start)
 72 {
 73     parser_state *ps;
 74 
 75     if (!g->g_accel)
 76         PyGrammar_AddAccelerators(g);
 77     ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
 78     if (ps == NULL)
 79         return NULL;
 80     ps->p_grammar = g;
 81 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
 82     ps->p_flags = 0;
 83 #endif
 84     ps->p_tree = PyNode_New(start);
 85     if (ps->p_tree == NULL) {
 86         PyMem_FREE(ps);
 87         return NULL;
 88     }
 89     s_reset(&ps->p_stack);
 90     (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
 91     return ps;
 92 }
 93 
 94 void
 95 PyParser_Delete(parser_state *ps)
 96 {
 97     /* NB If you want to save the parse tree,
 98        you must set p_tree to NULL before calling delparser! */
 99     PyNode_Free(ps->p_tree);
100     PyMem_FREE(ps);
101 }
102 
103 
104 /* PARSER STACK OPERATIONS */
105 
106 static int
107 shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
108 {
109     int err;
110     assert(!s_empty(s));
111     err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
112     if (err)
113         return err;
114     s->s_top->s_state = newstate;
115     return 0;
116 }
117 
118 static int
119 push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
120 {
121     int err;
122     register node *n;
123     n = s->s_top->s_parent;
124     assert(!s_empty(s));
125     err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
126     if (err)
127         return err;
128     s->s_top->s_state = newstate;
129     return s_push(s, d, CHILD(n, NCH(n)-1));
130 }
131 
132 
133 /* PARSER PROPER */
134 
135 static int
136 classify(parser_state *ps, int type, char *str)
137 {
138     grammar *g = ps->p_grammar;
139     register int n = g->g_ll.ll_nlabels;
140 
141     if (type == NAME) {
142         register char *s = str;
143         register label *l = g->g_ll.ll_label;
144         register int i;
145         for (i = n; i > 0; i--, l++) {
146             if (l->lb_type != NAME || l->lb_str == NULL ||
147                 l->lb_str[0] != s[0] ||
148                 strcmp(l->lb_str, s) != 0)
149                 continue;
150 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
151             if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
152                 s[0] == 'p' && strcmp(s, "print") == 0) {
153                 break; /* no longer a keyword */
154             }
155 #endif
156             D(printf("It's a keyword\n"));
157             return n - i;
158         }
159     }
160 
161     {
162         register label *l = g->g_ll.ll_label;
163         register int i;
164         for (i = n; i > 0; i--, l++) {
165             if (l->lb_type == type && l->lb_str == NULL) {
166                 D(printf("It's a token we know\n"));
167                 return n - i;
168             }
169         }
170     }
171 
172     D(printf("Illegal token\n"));
173     return -1;
174 }
175 
176 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
177 static void
178 future_hack(parser_state *ps)
179 {
180     node *n = ps->p_stack.s_top->s_parent;
181     node *ch, *cch;
182     int i;
183 
184     /* from __future__ import ..., must have at least 4 children */
185     n = CHILD(n, 0);
186     if (NCH(n) < 4)
187         return;
188     ch = CHILD(n, 0);
189     if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
190         return;
191     ch = CHILD(n, 1);
192     if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
193         strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
194         return;
195     ch = CHILD(n, 3);
196     /* ch can be a star, a parenthesis or import_as_names */
197     if (TYPE(ch) == STAR)
198         return;
199     if (TYPE(ch) == LPAR)
200         ch = CHILD(n, 4);
201 
202     for (i = 0; i < NCH(ch); i += 2) {
203         cch = CHILD(ch, i);
204         if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
205             char *str_ch = STR(CHILD(cch, 0));
206             if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
207                 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
208             } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
209                 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
210             } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
211                 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
212             }
213         }
214     }
215 }
216 #endif /* future keyword */
217 
218 int
219 PyParser_AddToken(register parser_state *ps, register int type, char *str,
220                   int lineno, int col_offset, int *expected_ret)
221 {
222     register int ilabel;
223     int err;
224 
225     D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
226 
227     /* Find out which label this token is */
228     ilabel = classify(ps, type, str);
229     if (ilabel < 0)
230         return E_SYNTAX;
231 
232     /* Loop until the token is shifted or an error occurred */
233     for (;;) {
234         /* Fetch the current dfa and state */
235         register dfa *d = ps->p_stack.s_top->s_dfa;
236         register state *s = &d->d_state[ps->p_stack.s_top->s_state];
237 
238         D(printf(" DFA '%s', state %d:",
239             d->d_name, ps->p_stack.s_top->s_state));
240 
241         /* Check accelerator */
242         if (s->s_lower <= ilabel && ilabel < s->s_upper) {
243             register int x = s->s_accel[ilabel - s->s_lower];
244             if (x != -1) {
245                 if (x & (1<<7)) {
246                     /* Push non-terminal */
247                     int nt = (x >> 8) + NT_OFFSET;
248                     int arrow = x & ((1<<7)-1);
249                     dfa *d1 = PyGrammar_FindDFA(
250                         ps->p_grammar, nt);
251                     if ((err = push(&ps->p_stack, nt, d1,
252                         arrow, lineno, col_offset)) > 0) {
253                         D(printf(" MemError: push\n"));
254                         return err;
255                     }
256                     D(printf(" Push ...\n"));
257                     continue;
258                 }
259 
260                 /* Shift the token */
261                 if ((err = shift(&ps->p_stack, type, str,
262                                 x, lineno, col_offset)) > 0) {
263                     D(printf(" MemError: shift.\n"));
264                     return err;
265                 }
266                 D(printf(" Shift.\n"));
267                 /* Pop while we are in an accept-only state */
268                 while (s = &d->d_state
269                                 [ps->p_stack.s_top->s_state],
270                     s->s_accept && s->s_narcs == 1) {
271                     D(printf("  DFA '%s', state %d: "
272                              "Direct pop.\n",
273                              d->d_name,
274                              ps->p_stack.s_top->s_state));
275 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
276                     if (d->d_name[0] == 'i' &&
277                         strcmp(d->d_name,
278                            "import_stmt") == 0)
279                         future_hack(ps);
280 #endif
281                     s_pop(&ps->p_stack);
282                     if (s_empty(&ps->p_stack)) {
283                         D(printf("  ACCEPT.\n"));
284                         return E_DONE;
285                     }
286                     d = ps->p_stack.s_top->s_dfa;
287                 }
288                 return E_OK;
289             }
290         }
291 
292         if (s->s_accept) {
293 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
294             if (d->d_name[0] == 'i' &&
295                 strcmp(d->d_name, "import_stmt") == 0)
296                 future_hack(ps);
297 #endif
298             /* Pop this dfa and try again */
299             s_pop(&ps->p_stack);
300             D(printf(" Pop ...\n"));
301             if (s_empty(&ps->p_stack)) {
302                 D(printf(" Error: bottom of stack.\n"));
303                 return E_SYNTAX;
304             }
305             continue;
306         }
307 
308         /* Stuck, report syntax error */
309         D(printf(" Error.\n"));
310         if (expected_ret) {
311             if (s->s_lower == s->s_upper - 1) {
312                 /* Only one possible expected token */
313                 *expected_ret = ps->p_grammar->
314                     g_ll.ll_label[s->s_lower].lb_type;
315             }
316             else
317                 *expected_ret = -1;
318         }
319         return E_SYNTAX;
320     }
321 }
322 
323 
324 #ifdef Py_DEBUG
325 
326 /* DEBUG OUTPUT */
327 
328 void
329 dumptree(grammar *g, node *n)
330 {
331     int i;
332 
333     if (n == NULL)
334         printf("NIL");
335     else {
336         label l;
337         l.lb_type = TYPE(n);
338         l.lb_str = STR(n);
339         printf("%s", PyGrammar_LabelRepr(&l));
340         if (ISNONTERMINAL(TYPE(n))) {
341             printf("(");
342             for (i = 0; i < NCH(n); i++) {
343                 if (i > 0)
344                     printf(",");
345                 dumptree(g, CHILD(n, i));
346             }
347             printf(")");
348         }
349     }
350 }
351 
352 void
353 showtree(grammar *g, node *n)
354 {
355     int i;
356 
357     if (n == NULL)
358         return;
359     if (ISNONTERMINAL(TYPE(n))) {
360         for (i = 0; i < NCH(n); i++)
361             showtree(g, CHILD(n, i));
362     }
363     else if (ISTERMINAL(TYPE(n))) {
364         printf("%s", _PyParser_TokenNames[TYPE(n)]);
365         if (TYPE(n) == NUMBER || TYPE(n) == NAME)
366             printf("(%s)", STR(n));
367         printf(" ");
368     }
369     else
370         printf("? ");
371 }
372 
373 void
374 printtree(parser_state *ps)
375 {
376     if (Py_DebugFlag) {
377         printf("Parse tree:\n");
378         dumptree(ps->p_grammar, ps->p_tree);
379         printf("\n");
380         printf("Tokens:\n");
381         showtree(ps->p_grammar, ps->p_tree);
382         printf("\n");
383     }
384     printf("Listing:\n");
385     PyNode_ListTree(ps->p_tree);
386     printf("\n");
387 }
388 
389 #endif /* Py_DEBUG */
390 
391 /*
392 
393 Description
394 -----------
395 
396 The parser's interface is different than usual: the function addtoken()
397 must be called for each token in the input.  This makes it possible to
398 turn it into an incremental parsing system later.  The parsing system
399 constructs a parse tree as it goes.
400 
401 A parsing rule is represented as a Deterministic Finite-state Automaton
402 (DFA).  A node in a DFA represents a state of the parser; an arc represents
403 a transition.  Transitions are either labeled with terminal symbols or
404 with non-terminals.  When the parser decides to follow an arc labeled
405 with a non-terminal, it is invoked recursively with the DFA representing
406 the parsing rule for that as its initial state; when that DFA accepts,
407 the parser that invoked it continues.  The parse tree constructed by the
408 recursively called parser is inserted as a child in the current parse tree.
409 
410 The DFA's can be constructed automatically from a more conventional
411 language description.  An extended LL(1) grammar (ELL(1)) is suitable.
412 Certain restrictions make the parser's life easier: rules that can produce
413 the empty string should be outlawed (there are other ways to put loops
414 or optional parts in the language).  To avoid the need to construct
415 FIRST sets, we can require that all but the last alternative of a rule
416 (really: arc going out of a DFA's state) must begin with a terminal
417 symbol.
418 
419 As an example, consider this grammar:
420 
421 expr:   term (OP term)*
422 term:   CONSTANT | '(' expr ')'
423 
424 The DFA corresponding to the rule for expr is:
425 
426 ------->.---term-->.------->
427     ^          |
428     |          |
429     \----OP----/
430 
431 The parse tree generated for the input a+b is:
432 
433 (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
434 
435 */
Python-2.7.3/Parser/parser.c