No issues found
1 /* Parser implementation */
2
3 /* For a description, see the comments at end of this file */
4
5 /* XXX To do: error recovery */
6
7 #include "Python.h"
8 #include "pgenheaders.h"
9 #include "token.h"
10 #include "grammar.h"
11 #include "node.h"
12 #include "parser.h"
13 #include "errcode.h"
14
15
16 #ifdef Py_DEBUG
17 extern int Py_DebugFlag;
18 #define D(x) if (!Py_DebugFlag); else x
19 #else
20 #define D(x)
21 #endif
22
23
24 /* STACK DATA TYPE */
25
26 static void s_reset(stack *);
27
28 static void
29 s_reset(stack *s)
30 {
31 s->s_top = &s->s_base[MAXSTACK];
32 }
33
34 #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
35
36 static int
37 s_push(register stack *s, dfa *d, node *parent)
38 {
39 register stackentry *top;
40 if (s->s_top == s->s_base) {
41 fprintf(stderr, "s_push: parser stack overflow\n");
42 return E_NOMEM;
43 }
44 top = --s->s_top;
45 top->s_dfa = d;
46 top->s_parent = parent;
47 top->s_state = 0;
48 return 0;
49 }
50
51 #ifdef Py_DEBUG
52
53 static void
54 s_pop(register stack *s)
55 {
56 if (s_empty(s))
57 Py_FatalError("s_pop: parser stack underflow -- FATAL");
58 s->s_top++;
59 }
60
61 #else /* !Py_DEBUG */
62
63 #define s_pop(s) (s)->s_top++
64
65 #endif
66
67
68 /* PARSER CREATION */
69
70 parser_state *
71 PyParser_New(grammar *g, int start)
72 {
73 parser_state *ps;
74
75 if (!g->g_accel)
76 PyGrammar_AddAccelerators(g);
77 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
78 if (ps == NULL)
79 return NULL;
80 ps->p_grammar = g;
81 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
82 ps->p_flags = 0;
83 #endif
84 ps->p_tree = PyNode_New(start);
85 if (ps->p_tree == NULL) {
86 PyMem_FREE(ps);
87 return NULL;
88 }
89 s_reset(&ps->p_stack);
90 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);
91 return ps;
92 }
93
94 void
95 PyParser_Delete(parser_state *ps)
96 {
97 /* NB If you want to save the parse tree,
98 you must set p_tree to NULL before calling delparser! */
99 PyNode_Free(ps->p_tree);
100 PyMem_FREE(ps);
101 }
102
103
104 /* PARSER STACK OPERATIONS */
105
106 static int
107 shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)
108 {
109 int err;
110 assert(!s_empty(s));
111 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
112 if (err)
113 return err;
114 s->s_top->s_state = newstate;
115 return 0;
116 }
117
118 static int
119 push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
120 {
121 int err;
122 register node *n;
123 n = s->s_top->s_parent;
124 assert(!s_empty(s));
125 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);
126 if (err)
127 return err;
128 s->s_top->s_state = newstate;
129 return s_push(s, d, CHILD(n, NCH(n)-1));
130 }
131
132
133 /* PARSER PROPER */
134
135 static int
136 classify(parser_state *ps, int type, char *str)
137 {
138 grammar *g = ps->p_grammar;
139 register int n = g->g_ll.ll_nlabels;
140
141 if (type == NAME) {
142 register char *s = str;
143 register label *l = g->g_ll.ll_label;
144 register int i;
145 for (i = n; i > 0; i--, l++) {
146 if (l->lb_type != NAME || l->lb_str == NULL ||
147 l->lb_str[0] != s[0] ||
148 strcmp(l->lb_str, s) != 0)
149 continue;
150 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
151 if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&
152 s[0] == 'p' && strcmp(s, "print") == 0) {
153 break; /* no longer a keyword */
154 }
155 #endif
156 D(printf("It's a keyword\n"));
157 return n - i;
158 }
159 }
160
161 {
162 register label *l = g->g_ll.ll_label;
163 register int i;
164 for (i = n; i > 0; i--, l++) {
165 if (l->lb_type == type && l->lb_str == NULL) {
166 D(printf("It's a token we know\n"));
167 return n - i;
168 }
169 }
170 }
171
172 D(printf("Illegal token\n"));
173 return -1;
174 }
175
176 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
177 static void
178 future_hack(parser_state *ps)
179 {
180 node *n = ps->p_stack.s_top->s_parent;
181 node *ch, *cch;
182 int i;
183
184 /* from __future__ import ..., must have at least 4 children */
185 n = CHILD(n, 0);
186 if (NCH(n) < 4)
187 return;
188 ch = CHILD(n, 0);
189 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
190 return;
191 ch = CHILD(n, 1);
192 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
193 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
194 return;
195 ch = CHILD(n, 3);
196 /* ch can be a star, a parenthesis or import_as_names */
197 if (TYPE(ch) == STAR)
198 return;
199 if (TYPE(ch) == LPAR)
200 ch = CHILD(n, 4);
201
202 for (i = 0; i < NCH(ch); i += 2) {
203 cch = CHILD(ch, i);
204 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
205 char *str_ch = STR(CHILD(cch, 0));
206 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
207 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
208 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
209 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
210 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
211 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
212 }
213 }
214 }
215 }
216 #endif /* future keyword */
217
218 int
219 PyParser_AddToken(register parser_state *ps, register int type, char *str,
220 int lineno, int col_offset, int *expected_ret)
221 {
222 register int ilabel;
223 int err;
224
225 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));
226
227 /* Find out which label this token is */
228 ilabel = classify(ps, type, str);
229 if (ilabel < 0)
230 return E_SYNTAX;
231
232 /* Loop until the token is shifted or an error occurred */
233 for (;;) {
234 /* Fetch the current dfa and state */
235 register dfa *d = ps->p_stack.s_top->s_dfa;
236 register state *s = &d->d_state[ps->p_stack.s_top->s_state];
237
238 D(printf(" DFA '%s', state %d:",
239 d->d_name, ps->p_stack.s_top->s_state));
240
241 /* Check accelerator */
242 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
243 register int x = s->s_accel[ilabel - s->s_lower];
244 if (x != -1) {
245 if (x & (1<<7)) {
246 /* Push non-terminal */
247 int nt = (x >> 8) + NT_OFFSET;
248 int arrow = x & ((1<<7)-1);
249 dfa *d1 = PyGrammar_FindDFA(
250 ps->p_grammar, nt);
251 if ((err = push(&ps->p_stack, nt, d1,
252 arrow, lineno, col_offset)) > 0) {
253 D(printf(" MemError: push\n"));
254 return err;
255 }
256 D(printf(" Push ...\n"));
257 continue;
258 }
259
260 /* Shift the token */
261 if ((err = shift(&ps->p_stack, type, str,
262 x, lineno, col_offset)) > 0) {
263 D(printf(" MemError: shift.\n"));
264 return err;
265 }
266 D(printf(" Shift.\n"));
267 /* Pop while we are in an accept-only state */
268 while (s = &d->d_state
269 [ps->p_stack.s_top->s_state],
270 s->s_accept && s->s_narcs == 1) {
271 D(printf(" DFA '%s', state %d: "
272 "Direct pop.\n",
273 d->d_name,
274 ps->p_stack.s_top->s_state));
275 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
276 if (d->d_name[0] == 'i' &&
277 strcmp(d->d_name,
278 "import_stmt") == 0)
279 future_hack(ps);
280 #endif
281 s_pop(&ps->p_stack);
282 if (s_empty(&ps->p_stack)) {
283 D(printf(" ACCEPT.\n"));
284 return E_DONE;
285 }
286 d = ps->p_stack.s_top->s_dfa;
287 }
288 return E_OK;
289 }
290 }
291
292 if (s->s_accept) {
293 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
294 if (d->d_name[0] == 'i' &&
295 strcmp(d->d_name, "import_stmt") == 0)
296 future_hack(ps);
297 #endif
298 /* Pop this dfa and try again */
299 s_pop(&ps->p_stack);
300 D(printf(" Pop ...\n"));
301 if (s_empty(&ps->p_stack)) {
302 D(printf(" Error: bottom of stack.\n"));
303 return E_SYNTAX;
304 }
305 continue;
306 }
307
308 /* Stuck, report syntax error */
309 D(printf(" Error.\n"));
310 if (expected_ret) {
311 if (s->s_lower == s->s_upper - 1) {
312 /* Only one possible expected token */
313 *expected_ret = ps->p_grammar->
314 g_ll.ll_label[s->s_lower].lb_type;
315 }
316 else
317 *expected_ret = -1;
318 }
319 return E_SYNTAX;
320 }
321 }
322
323
324 #ifdef Py_DEBUG
325
326 /* DEBUG OUTPUT */
327
328 void
329 dumptree(grammar *g, node *n)
330 {
331 int i;
332
333 if (n == NULL)
334 printf("NIL");
335 else {
336 label l;
337 l.lb_type = TYPE(n);
338 l.lb_str = STR(n);
339 printf("%s", PyGrammar_LabelRepr(&l));
340 if (ISNONTERMINAL(TYPE(n))) {
341 printf("(");
342 for (i = 0; i < NCH(n); i++) {
343 if (i > 0)
344 printf(",");
345 dumptree(g, CHILD(n, i));
346 }
347 printf(")");
348 }
349 }
350 }
351
352 void
353 showtree(grammar *g, node *n)
354 {
355 int i;
356
357 if (n == NULL)
358 return;
359 if (ISNONTERMINAL(TYPE(n))) {
360 for (i = 0; i < NCH(n); i++)
361 showtree(g, CHILD(n, i));
362 }
363 else if (ISTERMINAL(TYPE(n))) {
364 printf("%s", _PyParser_TokenNames[TYPE(n)]);
365 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
366 printf("(%s)", STR(n));
367 printf(" ");
368 }
369 else
370 printf("? ");
371 }
372
373 void
374 printtree(parser_state *ps)
375 {
376 if (Py_DebugFlag) {
377 printf("Parse tree:\n");
378 dumptree(ps->p_grammar, ps->p_tree);
379 printf("\n");
380 printf("Tokens:\n");
381 showtree(ps->p_grammar, ps->p_tree);
382 printf("\n");
383 }
384 printf("Listing:\n");
385 PyNode_ListTree(ps->p_tree);
386 printf("\n");
387 }
388
389 #endif /* Py_DEBUG */
390
391 /*
392
393 Description
394 -----------
395
396 The parser's interface is different than usual: the function addtoken()
397 must be called for each token in the input. This makes it possible to
398 turn it into an incremental parsing system later. The parsing system
399 constructs a parse tree as it goes.
400
401 A parsing rule is represented as a Deterministic Finite-state Automaton
402 (DFA). A node in a DFA represents a state of the parser; an arc represents
403 a transition. Transitions are either labeled with terminal symbols or
404 with non-terminals. When the parser decides to follow an arc labeled
405 with a non-terminal, it is invoked recursively with the DFA representing
406 the parsing rule for that as its initial state; when that DFA accepts,
407 the parser that invoked it continues. The parse tree constructed by the
408 recursively called parser is inserted as a child in the current parse tree.
409
410 The DFA's can be constructed automatically from a more conventional
411 language description. An extended LL(1) grammar (ELL(1)) is suitable.
412 Certain restrictions make the parser's life easier: rules that can produce
413 the empty string should be outlawed (there are other ways to put loops
414 or optional parts in the language). To avoid the need to construct
415 FIRST sets, we can require that all but the last alternative of a rule
416 (really: arc going out of a DFA's state) must begin with a terminal
417 symbol.
418
419 As an example, consider this grammar:
420
421 expr: term (OP term)*
422 term: CONSTANT | '(' expr ')'
423
424 The DFA corresponding to the rule for expr is:
425
426 ------->.---term-->.------->
427 ^ |
428 | |
429 \----OP----/
430
431 The parse tree generated for the input a+b is:
432
433 (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
434
435 */