parsermodule.c revision c2683ddb29aa3639368058d20f79375a8e68e5a4
1/*  parsermodule.c
2 *
3 *  Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4 *  Institute and State University, Blacksburg, Virginia, USA.
5 *  Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6 *  Amsterdam, The Netherlands.  Copying is permitted under the terms
7 *  associated with the main Python distribution, with the additional
8 *  restriction that this additional notice be included and maintained
9 *  on all distributed copies.
10 *
11 *  This module serves to replace the original parser module written
12 *  by Guido.  The functionality is not matched precisely, but the
13 *  original may be implemented on top of this.  This is desirable
14 *  since the source of the text to be parsed is now divorced from
15 *  this interface.
16 *
17 *  Unlike the prior interface, the ability to give a parse tree
18 *  produced by Python code as a tuple to the compiler is enabled by
19 *  this module.  See the documentation for more details.
20 *
21 *  I've added some annotations that help with the lint code-checking
22 *  program, but they're not complete by a long shot.  The real errors
23 *  that lint detects are gone, but there are still warnings with
24 *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
25 *  look like "NOTE(...)".
26 */
27
28#include "Python.h"                     /* general Python API             */
29#include "graminit.h"                   /* symbols defined in the grammar */
30#include "node.h"                       /* internal parser structure      */
31#include "token.h"                      /* token definitions              */
32                                        /* ISTERMINAL() / ISNONTERMINAL() */
33#include "compile.h"                    /* PyNode_Compile()               */
34
35#ifdef lint
36#include <note.h>
37#else
38#define NOTE(x)
39#endif
40
41#ifdef macintosh
42char *strdup(char *);
43#endif
44
45/*  String constants used to initialize module attributes.
46 *
47 */
48static char*
49parser_copyright_string
50= "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
51University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
52Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\n\
53Centrum, Amsterdam, The Netherlands.";
54
55
56static char*
57parser_doc_string
58= "This is an interface to Python's internal parser.";
59
60static char*
61parser_version_string = "0.5";
62
63
64typedef PyObject* (*SeqMaker) (int length);
65typedef int (*SeqInserter) (PyObject* sequence,
66                            int index,
67                            PyObject* element);
68
69/*  The function below is copyrighted by Stichting Mathematisch Centrum.  The
70 *  original copyright statement is included below, and continues to apply
71 *  in full to the function immediately following.  All other material is
72 *  original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
73 *  Institute and State University.  Changes were made to comply with the
74 *  new naming conventions.  Added arguments to provide support for creating
75 *  lists as well as tuples, and optionally including the line numbers.
76 */
77
78
79static PyObject*
80node2tuple(node *n,                     /* node to convert               */
81           SeqMaker mkseq,              /* create sequence               */
82           SeqInserter addelem,         /* func. to add elem. in seq.    */
83           int lineno)                  /* include line numbers?         */
84{
85    if (n == NULL) {
86        Py_INCREF(Py_None);
87        return (Py_None);
88    }
89    if (ISNONTERMINAL(TYPE(n))) {
90        int i;
91        PyObject *v;
92        PyObject *w;
93
94        v = mkseq(1 + NCH(n));
95        if (v == NULL)
96            return (v);
97        w = PyInt_FromLong(TYPE(n));
98        if (w == NULL) {
99            Py_DECREF(v);
100            return ((PyObject*) NULL);
101        }
102        (void) addelem(v, 0, w);
103        for (i = 0; i < NCH(n); i++) {
104            w = node2tuple(CHILD(n, i), mkseq, addelem, lineno);
105            if (w == NULL) {
106                Py_DECREF(v);
107                return ((PyObject*) NULL);
108            }
109            (void) addelem(v, i+1, w);
110        }
111        return (v);
112    }
113    else if (ISTERMINAL(TYPE(n))) {
114        PyObject *result = mkseq(2 + lineno);
115        if (result != NULL) {
116            (void) addelem(result, 0, PyInt_FromLong(TYPE(n)));
117            (void) addelem(result, 1, PyString_FromString(STR(n)));
118            if (lineno == 1)
119                (void) addelem(result, 2, PyInt_FromLong(n->n_lineno));
120        }
121        return (result);
122    }
123    else {
124        PyErr_SetString(PyExc_SystemError,
125                        "unrecognized parse tree node type");
126        return ((PyObject*) NULL);
127    }
128}
129/*
130 *  End of material copyrighted by Stichting Mathematisch Centrum.
131 */
132
133
134
135/*  There are two types of intermediate objects we're interested in:
136 *  'eval' and 'exec' types.  These constants can be used in the st_type
137 *  field of the object type to identify which any given object represents.
138 *  These should probably go in an external header to allow other extensions
139 *  to use them, but then, we really should be using C++ too.  ;-)
140 */
141
142#define PyST_EXPR  1
143#define PyST_SUITE 2
144
145
146/*  These are the internal objects and definitions required to implement the
147 *  ST type.  Most of the internal names are more reminiscent of the 'old'
148 *  naming style, but the code uses the new naming convention.
149 */
150
151static PyObject*
152parser_error = 0;
153
154
155typedef struct {
156    PyObject_HEAD                       /* standard object header           */
157    node* st_node;                      /* the node* returned by the parser */
158    int   st_type;                      /* EXPR or SUITE ?                  */
159} PyST_Object;
160
161
162staticforward void
163parser_free(PyST_Object *st);
164
165staticforward int
166parser_compare(PyST_Object *left, PyST_Object *right);
167
168staticforward PyObject *
169parser_getattr(PyObject *self, char *name);
170
171
172static
173PyTypeObject PyST_Type = {
174    PyObject_HEAD_INIT(NULL)
175    0,
176    "st",                               /* tp_name              */
177    (int) sizeof(PyST_Object),          /* tp_basicsize         */
178    0,                                  /* tp_itemsize          */
179    (destructor)parser_free,            /* tp_dealloc           */
180    0,                                  /* tp_print             */
181    parser_getattr,                     /* tp_getattr           */
182    0,                                  /* tp_setattr           */
183    (cmpfunc)parser_compare,            /* tp_compare           */
184    0,                                  /* tp_repr              */
185    0,                                  /* tp_as_number         */
186    0,                                  /* tp_as_sequence       */
187    0,                                  /* tp_as_mapping        */
188    0,                                  /* tp_hash              */
189    0,                                  /* tp_call              */
190    0,                                  /* tp_str               */
191    0,                                  /* tp_getattro          */
192    0,                                  /* tp_setattro          */
193
194    /* Functions to access object as input/output buffer */
195    0,                                  /* tp_as_buffer         */
196
197    Py_TPFLAGS_DEFAULT,                 /* tp_flags             */
198
199    /* __doc__ */
200    "Intermediate representation of a Python parse tree."
201};  /* PyST_Type */
202
203
204static int
205parser_compare_nodes(node *left, node *right)
206{
207    int j;
208
209    if (TYPE(left) < TYPE(right))
210        return (-1);
211
212    if (TYPE(right) < TYPE(left))
213        return (1);
214
215    if (ISTERMINAL(TYPE(left)))
216        return (strcmp(STR(left), STR(right)));
217
218    if (NCH(left) < NCH(right))
219        return (-1);
220
221    if (NCH(right) < NCH(left))
222        return (1);
223
224    for (j = 0; j < NCH(left); ++j) {
225        int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
226
227        if (v != 0)
228            return (v);
229    }
230    return (0);
231}
232
233
234/*  int parser_compare(PyST_Object* left, PyST_Object* right)
235 *
236 *  Comparison function used by the Python operators ==, !=, <, >, <=, >=
237 *  This really just wraps a call to parser_compare_nodes() with some easy
238 *  checks and protection code.
239 *
240 */
241static int
242parser_compare(PyST_Object *left, PyST_Object *right)
243{
244    if (left == right)
245        return (0);
246
247    if ((left == 0) || (right == 0))
248        return (-1);
249
250    return (parser_compare_nodes(left->st_node, right->st_node));
251}
252
253
254/*  parser_newstobject(node* st)
255 *
256 *  Allocates a new Python object representing an ST.  This is simply the
257 *  'wrapper' object that holds a node* and allows it to be passed around in
258 *  Python code.
259 *
260 */
261static PyObject*
262parser_newstobject(node *st, int type)
263{
264    PyST_Object* o = PyObject_New(PyST_Object, &PyST_Type);
265
266    if (o != 0) {
267        o->st_node = st;
268        o->st_type = type;
269    }
270    else {
271        PyNode_Free(st);
272    }
273    return ((PyObject*)o);
274}
275
276
277/*  void parser_free(PyST_Object* st)
278 *
279 *  This is called by a del statement that reduces the reference count to 0.
280 *
281 */
282static void
283parser_free(PyST_Object *st)
284{
285    PyNode_Free(st->st_node);
286    PyObject_Del(st);
287}
288
289
290/*  parser_st2tuple(PyObject* self, PyObject* args, PyObject* kw)
291 *
292 *  This provides conversion from a node* to a tuple object that can be
293 *  returned to the Python-level caller.  The ST object is not modified.
294 *
295 */
296static PyObject*
297parser_st2tuple(PyST_Object *self, PyObject *args, PyObject *kw)
298{
299    PyObject *line_option = 0;
300    PyObject *res = 0;
301    int ok;
302
303    static char *keywords[] = {"ast", "line_info", NULL};
304
305    if (self == NULL) {
306        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O:st2tuple", keywords,
307                                         &PyST_Type, &self, &line_option);
308    }
309    else
310        ok = PyArg_ParseTupleAndKeywords(args, kw, "|O:totuple", &keywords[1],
311                                         &line_option);
312    if (ok != 0) {
313        int lineno = 0;
314        if (line_option != NULL) {
315            lineno = (PyObject_IsTrue(line_option) != 0) ? 1 : 0;
316        }
317        /*
318         *  Convert ST into a tuple representation.  Use Guido's function,
319         *  since it's known to work already.
320         */
321        res = node2tuple(((PyST_Object*)self)->st_node,
322                         PyTuple_New, PyTuple_SetItem, lineno);
323    }
324    return (res);
325}
326
327
328/*  parser_st2list(PyObject* self, PyObject* args, PyObject* kw)
329 *
330 *  This provides conversion from a node* to a list object that can be
331 *  returned to the Python-level caller.  The ST object is not modified.
332 *
333 */
334static PyObject*
335parser_st2list(PyST_Object *self, PyObject *args, PyObject *kw)
336{
337    PyObject *line_option = 0;
338    PyObject *res = 0;
339    int ok;
340
341    static char *keywords[] = {"ast", "line_info", NULL};
342
343    if (self == NULL)
344        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O:st2list", keywords,
345                                         &PyST_Type, &self, &line_option);
346    else
347        ok = PyArg_ParseTupleAndKeywords(args, kw, "|O:tolist", &keywords[1],
348                                         &line_option);
349    if (ok) {
350        int lineno = 0;
351        if (line_option != 0) {
352            lineno = PyObject_IsTrue(line_option) ? 1 : 0;
353        }
354        /*
355         *  Convert ST into a tuple representation.  Use Guido's function,
356         *  since it's known to work already.
357         */
358        res = node2tuple(self->st_node,
359                         PyList_New, PyList_SetItem, lineno);
360    }
361    return (res);
362}
363
364
365/*  parser_compilest(PyObject* self, PyObject* args)
366 *
367 *  This function creates code objects from the parse tree represented by
368 *  the passed-in data object.  An optional file name is passed in as well.
369 *
370 */
371static PyObject*
372parser_compilest(PyST_Object *self, PyObject *args, PyObject *kw)
373{
374    PyObject*     res = 0;
375    char*         str = "<syntax-tree>";
376    int ok;
377
378    static char *keywords[] = {"ast", "filename", NULL};
379
380    if (self == NULL)
381        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|s:compilest", keywords,
382                                         &PyST_Type, &self, &str);
383    else
384        ok = PyArg_ParseTupleAndKeywords(args, kw, "|s:compile", &keywords[1],
385                                         &str);
386
387    if (ok)
388        res = (PyObject *)PyNode_Compile(self->st_node, str);
389
390    return (res);
391}
392
393
394/*  PyObject* parser_isexpr(PyObject* self, PyObject* args)
395 *  PyObject* parser_issuite(PyObject* self, PyObject* args)
396 *
397 *  Checks the passed-in ST object to determine if it is an expression or
398 *  a statement suite, respectively.  The return is a Python truth value.
399 *
400 */
401static PyObject*
402parser_isexpr(PyST_Object *self, PyObject *args, PyObject *kw)
403{
404    PyObject* res = 0;
405    int ok;
406
407    static char *keywords[] = {"ast", NULL};
408
409    if (self == NULL)
410        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
411                                         &PyST_Type, &self);
412    else
413        ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
414
415    if (ok) {
416        /* Check to see if the ST represents an expression or not. */
417        res = (self->st_type == PyST_EXPR) ? Py_True : Py_False;
418        Py_INCREF(res);
419    }
420    return (res);
421}
422
423
424static PyObject*
425parser_issuite(PyST_Object *self, PyObject *args, PyObject *kw)
426{
427    PyObject* res = 0;
428    int ok;
429
430    static char *keywords[] = {"ast", NULL};
431
432    if (self == NULL)
433        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
434                                         &PyST_Type, &self);
435    else
436        ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
437
438    if (ok) {
439        /* Check to see if the ST represents an expression or not. */
440        res = (self->st_type == PyST_EXPR) ? Py_False : Py_True;
441        Py_INCREF(res);
442    }
443    return (res);
444}
445
446
447#define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
448
449static PyMethodDef
450parser_methods[] = {
451    {"compile",         (PyCFunction)parser_compilest,  PUBLIC_METHOD_TYPE,
452        "Compile this ST object into a code object."},
453    {"isexpr",          (PyCFunction)parser_isexpr,     PUBLIC_METHOD_TYPE,
454        "Determines if this ST object was created from an expression."},
455    {"issuite",         (PyCFunction)parser_issuite,    PUBLIC_METHOD_TYPE,
456        "Determines if this ST object was created from a suite."},
457    {"tolist",          (PyCFunction)parser_st2list,    PUBLIC_METHOD_TYPE,
458        "Creates a list-tree representation of this ST."},
459    {"totuple",         (PyCFunction)parser_st2tuple,   PUBLIC_METHOD_TYPE,
460        "Creates a tuple-tree representation of this ST."},
461
462    {NULL, NULL, 0, NULL}
463};
464
465
466static PyObject*
467parser_getattr(PyObject *self, char *name)
468{
469    return (Py_FindMethod(parser_methods, self, name));
470}
471
472
473/*  err_string(char* message)
474 *
475 *  Sets the error string for an exception of type ParserError.
476 *
477 */
478static void
479err_string(char *message)
480{
481    PyErr_SetString(parser_error, message);
482}
483
484
485/*  PyObject* parser_do_parse(PyObject* args, int type)
486 *
487 *  Internal function to actually execute the parse and return the result if
488 *  successful, or set an exception if not.
489 *
490 */
491static PyObject*
492parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
493{
494    char*     string = 0;
495    PyObject* res    = 0;
496
497    static char *keywords[] = {"source", NULL};
498
499    if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
500        node* n = PyParser_SimpleParseString(string,
501                                             (type == PyST_EXPR)
502                                             ? eval_input : file_input);
503
504        if (n != 0)
505            res = parser_newstobject(n, type);
506        else
507            err_string("could not parse string");
508    }
509    return (res);
510}
511
512
513/*  PyObject* parser_expr(PyObject* self, PyObject* args)
514 *  PyObject* parser_suite(PyObject* self, PyObject* args)
515 *
516 *  External interfaces to the parser itself.  Which is called determines if
517 *  the parser attempts to recognize an expression ('eval' form) or statement
518 *  suite ('exec' form).  The real work is done by parser_do_parse() above.
519 *
520 */
521static PyObject*
522parser_expr(PyST_Object *self, PyObject *args, PyObject *kw)
523{
524    NOTE(ARGUNUSED(self))
525    return (parser_do_parse(args, kw, "s:expr", PyST_EXPR));
526}
527
528
529static PyObject*
530parser_suite(PyST_Object *self, PyObject *args, PyObject *kw)
531{
532    NOTE(ARGUNUSED(self))
533    return (parser_do_parse(args, kw, "s:suite", PyST_SUITE));
534}
535
536
537
538/*  This is the messy part of the code.  Conversion from a tuple to an ST
539 *  object requires that the input tuple be valid without having to rely on
540 *  catching an exception from the compiler.  This is done to allow the
541 *  compiler itself to remain fast, since most of its input will come from
542 *  the parser directly, and therefore be known to be syntactically correct.
543 *  This validation is done to ensure that we don't core dump the compile
544 *  phase, returning an exception instead.
545 *
546 *  Two aspects can be broken out in this code:  creating a node tree from
547 *  the tuple passed in, and verifying that it is indeed valid.  It may be
548 *  advantageous to expand the number of ST types to include funcdefs and
549 *  lambdadefs to take advantage of the optimizer, recognizing those STs
550 *  here.  They are not necessary, and not quite as useful in a raw form.
551 *  For now, let's get expressions and suites working reliably.
552 */
553
554
555staticforward node* build_node_tree(PyObject *tuple);
556staticforward int   validate_expr_tree(node *tree);
557staticforward int   validate_file_input(node *tree);
558
559
560/*  PyObject* parser_tuple2st(PyObject* self, PyObject* args)
561 *
562 *  This is the public function, called from the Python code.  It receives a
563 *  single tuple object from the caller, and creates an ST object if the
564 *  tuple can be validated.  It does this by checking the first code of the
565 *  tuple, and, if acceptable, builds the internal representation.  If this
566 *  step succeeds, the internal representation is validated as fully as
567 *  possible with the various validate_*() routines defined below.
568 *
569 *  This function must be changed if support is to be added for PyST_FRAGMENT
570 *  ST objects.
571 *
572 */
573static PyObject*
574parser_tuple2st(PyST_Object *self, PyObject *args, PyObject *kw)
575{
576    NOTE(ARGUNUSED(self))
577    PyObject *st = 0;
578    PyObject *tuple;
579    node *tree;
580
581    static char *keywords[] = {"sequence", NULL};
582
583    if (!PyArg_ParseTupleAndKeywords(args, kw, "O:sequence2st", keywords,
584                                     &tuple))
585        return (0);
586    if (!PySequence_Check(tuple)) {
587        PyErr_SetString(PyExc_ValueError,
588                        "sequence2st() requires a single sequence argument");
589        return (0);
590    }
591    /*
592     *  Convert the tree to the internal form before checking it.
593     */
594    tree = build_node_tree(tuple);
595    if (tree != 0) {
596        int start_sym = TYPE(tree);
597        if (start_sym == eval_input) {
598            /*  Might be an eval form.  */
599            if (validate_expr_tree(tree))
600                st = parser_newstobject(tree, PyST_EXPR);
601        }
602        else if (start_sym == file_input) {
603            /*  This looks like an exec form so far.  */
604            if (validate_file_input(tree))
605                st = parser_newstobject(tree, PyST_SUITE);
606        }
607        else {
608            /*  This is a fragment, at best. */
609            PyNode_Free(tree);
610            err_string("parse tree does not use a valid start symbol");
611        }
612    }
613    /*  Make sure we throw an exception on all errors.  We should never
614     *  get this, but we'd do well to be sure something is done.
615     */
616    if (st == NULL && !PyErr_Occurred())
617        err_string("unspecified ST error occurred");
618
619    return st;
620}
621
622
623/*  node* build_node_children()
624 *
625 *  Iterate across the children of the current non-terminal node and build
626 *  their structures.  If successful, return the root of this portion of
627 *  the tree, otherwise, 0.  Any required exception will be specified already,
628 *  and no memory will have been deallocated.
629 *
630 */
631static node*
632build_node_children(PyObject *tuple, node *root, int *line_num)
633{
634    int len = PyObject_Size(tuple);
635    int i;
636
637    for (i = 1; i < len; ++i) {
638        /* elem must always be a sequence, however simple */
639        PyObject* elem = PySequence_GetItem(tuple, i);
640        int ok = elem != NULL;
641        long  type = 0;
642        char *strn = 0;
643
644        if (ok)
645            ok = PySequence_Check(elem);
646        if (ok) {
647            PyObject *temp = PySequence_GetItem(elem, 0);
648            if (temp == NULL)
649                ok = 0;
650            else {
651                ok = PyInt_Check(temp);
652                if (ok)
653                    type = PyInt_AS_LONG(temp);
654                Py_DECREF(temp);
655            }
656        }
657        if (!ok) {
658            PyErr_SetObject(parser_error,
659                            Py_BuildValue("os", elem,
660                                          "Illegal node construct."));
661            Py_XDECREF(elem);
662            return (0);
663        }
664        if (ISTERMINAL(type)) {
665            int len = PyObject_Size(elem);
666            PyObject *temp;
667
668            if ((len != 2) && (len != 3)) {
669                err_string("terminal nodes must have 2 or 3 entries");
670                return 0;
671            }
672            temp = PySequence_GetItem(elem, 1);
673            if (temp == NULL)
674                return 0;
675            if (!PyString_Check(temp)) {
676                PyErr_Format(parser_error,
677                             "second item in terminal node must be a string,"
678                             " found %s",
679                             ((PyTypeObject*)PyObject_Type(temp))->tp_name);
680                Py_DECREF(temp);
681                return 0;
682            }
683            if (len == 3) {
684                PyObject *o = PySequence_GetItem(elem, 2);
685                if (o != NULL) {
686                    if (PyInt_Check(o))
687                        *line_num = PyInt_AS_LONG(o);
688                    else {
689                        PyErr_Format(parser_error,
690                                     "third item in terminal node must be an"
691                                     " integer, found %s",
692                                ((PyTypeObject*)PyObject_Type(temp))->tp_name);
693                        Py_DECREF(o);
694                        Py_DECREF(temp);
695                        return 0;
696                    }
697                    Py_DECREF(o);
698                }
699            }
700            len = PyString_GET_SIZE(temp) + 1;
701            strn = (char *)PyMem_MALLOC(len);
702            if (strn != NULL)
703                (void) memcpy(strn, PyString_AS_STRING(temp), len);
704            Py_DECREF(temp);
705        }
706        else if (!ISNONTERMINAL(type)) {
707            /*
708             *  It has to be one or the other; this is an error.
709             *  Throw an exception.
710             */
711            PyErr_SetObject(parser_error,
712                            Py_BuildValue("os", elem, "unknown node type."));
713            Py_XDECREF(elem);
714            return (0);
715        }
716        PyNode_AddChild(root, type, strn, *line_num);
717
718        if (ISNONTERMINAL(type)) {
719            node* new_child = CHILD(root, i - 1);
720
721            if (new_child != build_node_children(elem, new_child, line_num)) {
722                Py_XDECREF(elem);
723                return (0);
724            }
725        }
726        else if (type == NEWLINE) {     /* It's true:  we increment the     */
727            ++(*line_num);              /* line number *after* the newline! */
728        }
729        Py_XDECREF(elem);
730    }
731    return (root);
732}
733
734
735static node*
736build_node_tree(PyObject *tuple)
737{
738    node* res = 0;
739    PyObject *temp = PySequence_GetItem(tuple, 0);
740    long num = -1;
741
742    if (temp != NULL)
743        num = PyInt_AsLong(temp);
744    Py_XDECREF(temp);
745    if (ISTERMINAL(num)) {
746        /*
747         *  The tuple is simple, but it doesn't start with a start symbol.
748         *  Throw an exception now and be done with it.
749         */
750        tuple = Py_BuildValue("os", tuple,
751                    "Illegal syntax-tree; cannot start with terminal symbol.");
752        PyErr_SetObject(parser_error, tuple);
753    }
754    else if (ISNONTERMINAL(num)) {
755        /*
756         *  Not efficient, but that can be handled later.
757         */
758        int line_num = 0;
759
760        res = PyNode_New(num);
761        if (res != build_node_children(tuple, res, &line_num)) {
762            PyNode_Free(res);
763            res = 0;
764        }
765    }
766    else
767        /*  The tuple is illegal -- if the number is neither TERMINAL nor
768         *  NONTERMINAL, we can't use it.  Not sure the implementation
769         *  allows this condition, but the API doesn't preclude it.
770         */
771        PyErr_SetObject(parser_error,
772                        Py_BuildValue("os", tuple,
773                                      "Illegal component tuple."));
774
775    return (res);
776}
777
778
779/*
780 *  Validation routines used within the validation section:
781 */
782staticforward int validate_terminal(node *terminal, int type, char *string);
783
784#define validate_ampersand(ch)  validate_terminal(ch,      AMPER, "&")
785#define validate_circumflex(ch) validate_terminal(ch, CIRCUMFLEX, "^")
786#define validate_colon(ch)      validate_terminal(ch,      COLON, ":")
787#define validate_comma(ch)      validate_terminal(ch,      COMMA, ",")
788#define validate_dedent(ch)     validate_terminal(ch,     DEDENT, "")
789#define validate_equal(ch)      validate_terminal(ch,      EQUAL, "=")
790#define validate_indent(ch)     validate_terminal(ch,     INDENT, (char*)NULL)
791#define validate_lparen(ch)     validate_terminal(ch,       LPAR, "(")
792#define validate_newline(ch)    validate_terminal(ch,    NEWLINE, (char*)NULL)
793#define validate_rparen(ch)     validate_terminal(ch,       RPAR, ")")
794#define validate_semi(ch)       validate_terminal(ch,       SEMI, ";")
795#define validate_star(ch)       validate_terminal(ch,       STAR, "*")
796#define validate_vbar(ch)       validate_terminal(ch,       VBAR, "|")
797#define validate_doublestar(ch) validate_terminal(ch, DOUBLESTAR, "**")
798#define validate_dot(ch)        validate_terminal(ch,        DOT, ".")
799#define validate_name(ch, str)  validate_terminal(ch,       NAME, str)
800
801#define VALIDATER(n)    static int validate_##n(node *tree)
802
803VALIDATER(node);                VALIDATER(small_stmt);
804VALIDATER(class);               VALIDATER(node);
805VALIDATER(parameters);          VALIDATER(suite);
806VALIDATER(testlist);            VALIDATER(varargslist);
807VALIDATER(fpdef);               VALIDATER(fplist);
808VALIDATER(stmt);                VALIDATER(simple_stmt);
809VALIDATER(expr_stmt);           VALIDATER(power);
810VALIDATER(print_stmt);          VALIDATER(del_stmt);
811VALIDATER(return_stmt);         VALIDATER(list_iter);
812VALIDATER(raise_stmt);          VALIDATER(import_stmt);
813VALIDATER(global_stmt);         VALIDATER(list_if);
814VALIDATER(assert_stmt);         VALIDATER(list_for);
815VALIDATER(exec_stmt);           VALIDATER(compound_stmt);
816VALIDATER(while);               VALIDATER(for);
817VALIDATER(try);                 VALIDATER(except_clause);
818VALIDATER(test);                VALIDATER(and_test);
819VALIDATER(not_test);            VALIDATER(comparison);
820VALIDATER(comp_op);             VALIDATER(expr);
821VALIDATER(xor_expr);            VALIDATER(and_expr);
822VALIDATER(shift_expr);          VALIDATER(arith_expr);
823VALIDATER(term);                VALIDATER(factor);
824VALIDATER(atom);                VALIDATER(lambdef);
825VALIDATER(trailer);             VALIDATER(subscript);
826VALIDATER(subscriptlist);       VALIDATER(sliceop);
827VALIDATER(exprlist);            VALIDATER(dictmaker);
828VALIDATER(arglist);             VALIDATER(argument);
829VALIDATER(listmaker);           VALIDATER(yield_stmt);
830
831#undef VALIDATER
832
833#define is_even(n)      (((n) & 1) == 0)
834#define is_odd(n)       (((n) & 1) == 1)
835
836
837static int
838validate_ntype(node *n, int t)
839{
840    if (TYPE(n) != t) {
841        PyErr_Format(parser_error, "Expected node type %d, got %d.",
842                     t, TYPE(n));
843        return 0;
844    }
845    return 1;
846}
847
848
849/*  Verifies that the number of child nodes is exactly 'num', raising
850 *  an exception if it isn't.  The exception message does not indicate
851 *  the exact number of nodes, allowing this to be used to raise the
852 *  "right" exception when the wrong number of nodes is present in a
853 *  specific variant of a statement's syntax.  This is commonly used
854 *  in that fashion.
855 */
856static int
857validate_numnodes(node *n, int num, const char *const name)
858{
859    if (NCH(n) != num) {
860        PyErr_Format(parser_error,
861                     "Illegal number of children for %s node.", name);
862        return 0;
863    }
864    return 1;
865}
866
867
868static int
869validate_terminal(node *terminal, int type, char *string)
870{
871    int res = (validate_ntype(terminal, type)
872               && ((string == 0) || (strcmp(string, STR(terminal)) == 0)));
873
874    if (!res && !PyErr_Occurred()) {
875        PyErr_Format(parser_error,
876                     "Illegal terminal: expected \"%s\"", string);
877    }
878    return (res);
879}
880
881
882/*  X (',' X) [',']
883 */
884static int
885validate_repeating_list(node *tree, int ntype, int (*vfunc)(node *),
886                        const char *const name)
887{
888    int nch = NCH(tree);
889    int res = (nch && validate_ntype(tree, ntype)
890               && vfunc(CHILD(tree, 0)));
891
892    if (!res && !PyErr_Occurred())
893        (void) validate_numnodes(tree, 1, name);
894    else {
895        if (is_even(nch))
896            res = validate_comma(CHILD(tree, --nch));
897        if (res && nch > 1) {
898            int pos = 1;
899            for ( ; res && pos < nch; pos += 2)
900                res = (validate_comma(CHILD(tree, pos))
901                       && vfunc(CHILD(tree, pos + 1)));
902        }
903    }
904    return (res);
905}
906
907
908/*  validate_class()
909 *
910 *  classdef:
911 *      'class' NAME ['(' testlist ')'] ':' suite
912 */
913static int
914validate_class(node *tree)
915{
916    int nch = NCH(tree);
917    int res = validate_ntype(tree, classdef) && ((nch == 4) || (nch == 7));
918
919    if (res) {
920        res = (validate_name(CHILD(tree, 0), "class")
921               && validate_ntype(CHILD(tree, 1), NAME)
922               && validate_colon(CHILD(tree, nch - 2))
923               && validate_suite(CHILD(tree, nch - 1)));
924    }
925    else
926        (void) validate_numnodes(tree, 4, "class");
927    if (res && (nch == 7)) {
928        res = (validate_lparen(CHILD(tree, 2))
929               && validate_testlist(CHILD(tree, 3))
930               && validate_rparen(CHILD(tree, 4)));
931    }
932    return (res);
933}
934
935
936/*  if_stmt:
937 *      'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
938 */
939static int
940validate_if(node *tree)
941{
942    int nch = NCH(tree);
943    int res = (validate_ntype(tree, if_stmt)
944               && (nch >= 4)
945               && validate_name(CHILD(tree, 0), "if")
946               && validate_test(CHILD(tree, 1))
947               && validate_colon(CHILD(tree, 2))
948               && validate_suite(CHILD(tree, 3)));
949
950    if (res && ((nch % 4) == 3)) {
951        /*  ... 'else' ':' suite  */
952        res = (validate_name(CHILD(tree, nch - 3), "else")
953               && validate_colon(CHILD(tree, nch - 2))
954               && validate_suite(CHILD(tree, nch - 1)));
955        nch -= 3;
956    }
957    else if (!res && !PyErr_Occurred())
958        (void) validate_numnodes(tree, 4, "if");
959    if ((nch % 4) != 0)
960        /* Will catch the case for nch < 4 */
961        res = validate_numnodes(tree, 0, "if");
962    else if (res && (nch > 4)) {
963        /*  ... ('elif' test ':' suite)+ ...  */
964        int j = 4;
965        while ((j < nch) && res) {
966            res = (validate_name(CHILD(tree, j), "elif")
967                   && validate_colon(CHILD(tree, j + 2))
968                   && validate_test(CHILD(tree, j + 1))
969                   && validate_suite(CHILD(tree, j + 3)));
970            j += 4;
971        }
972    }
973    return (res);
974}
975
976
977/*  parameters:
978 *      '(' [varargslist] ')'
979 *
980 */
981static int
982validate_parameters(node *tree)
983{
984    int nch = NCH(tree);
985    int res = validate_ntype(tree, parameters) && ((nch == 2) || (nch == 3));
986
987    if (res) {
988        res = (validate_lparen(CHILD(tree, 0))
989               && validate_rparen(CHILD(tree, nch - 1)));
990        if (res && (nch == 3))
991            res = validate_varargslist(CHILD(tree, 1));
992    }
993    else {
994        (void) validate_numnodes(tree, 2, "parameters");
995    }
996    return (res);
997}
998
999
1000/*  validate_suite()
1001 *
1002 *  suite:
1003 *      simple_stmt
1004 *    | NEWLINE INDENT stmt+ DEDENT
1005 */
1006static int
1007validate_suite(node *tree)
1008{
1009    int nch = NCH(tree);
1010    int res = (validate_ntype(tree, suite) && ((nch == 1) || (nch >= 4)));
1011
1012    if (res && (nch == 1))
1013        res = validate_simple_stmt(CHILD(tree, 0));
1014    else if (res) {
1015        /*  NEWLINE INDENT stmt+ DEDENT  */
1016        res = (validate_newline(CHILD(tree, 0))
1017               && validate_indent(CHILD(tree, 1))
1018               && validate_stmt(CHILD(tree, 2))
1019               && validate_dedent(CHILD(tree, nch - 1)));
1020
1021        if (res && (nch > 4)) {
1022            int i = 3;
1023            --nch;                      /* forget the DEDENT     */
1024            for ( ; res && (i < nch); ++i)
1025                res = validate_stmt(CHILD(tree, i));
1026        }
1027        else if (nch < 4)
1028            res = validate_numnodes(tree, 4, "suite");
1029    }
1030    return (res);
1031}
1032
1033
1034static int
1035validate_testlist(node *tree)
1036{
1037    return (validate_repeating_list(tree, testlist,
1038                                    validate_test, "testlist"));
1039}
1040
1041
1042/* '*' NAME [',' '**' NAME] | '**' NAME
1043 */
1044static int
1045validate_varargslist_trailer(node *tree, int start)
1046{
1047    int nch = NCH(tree);
1048    int res = 0;
1049    int sym;
1050
1051    if (nch <= start) {
1052        err_string("expected variable argument trailer for varargslist");
1053        return 0;
1054    }
1055    sym = TYPE(CHILD(tree, start));
1056    if (sym == STAR) {
1057        /*
1058         *  ('*' NAME [',' '**' NAME]
1059         */
1060        if (nch-start == 2)
1061            res = validate_name(CHILD(tree, start+1), NULL);
1062        else if (nch-start == 5)
1063            res = (validate_name(CHILD(tree, start+1), NULL)
1064                   && validate_comma(CHILD(tree, start+2))
1065                   && validate_doublestar(CHILD(tree, start+3))
1066                   && validate_name(CHILD(tree, start+4), NULL));
1067    }
1068    else if (sym == DOUBLESTAR) {
1069        /*
1070         *  '**' NAME
1071         */
1072        if (nch-start == 2)
1073            res = validate_name(CHILD(tree, start+1), NULL);
1074    }
1075    if (!res)
1076        err_string("illegal variable argument trailer for varargslist");
1077    return res;
1078}
1079
1080
1081/*  validate_varargslist()
1082 *
1083 *  varargslist:
1084 *      (fpdef ['=' test] ',')*
1085 *           ('*' NAME [',' '**' NAME]
1086 *         | '**' NAME)
1087 *    | fpdef ['=' test] (',' fpdef ['=' test])* [',']
1088 *
1089 */
1090static int
1091validate_varargslist(node *tree)
1092{
1093    int nch = NCH(tree);
1094    int res = validate_ntype(tree, varargslist) && (nch != 0);
1095    int sym;
1096
1097    if (!res)
1098        return 0;
1099    if (nch < 1) {
1100        err_string("varargslist missing child nodes");
1101        return 0;
1102    }
1103    sym = TYPE(CHILD(tree, 0));
1104    if (sym == STAR || sym == DOUBLESTAR)
1105        /* whole thing matches:
1106         *      '*' NAME [',' '**' NAME] | '**' NAME
1107         */
1108        res = validate_varargslist_trailer(tree, 0);
1109    else if (sym == fpdef) {
1110        int i = 0;
1111
1112        sym = TYPE(CHILD(tree, nch-1));
1113        if (sym == NAME) {
1114            /*
1115             *   (fpdef ['=' test] ',')+
1116             *       ('*' NAME [',' '**' NAME]
1117             *     | '**' NAME)
1118             */
1119            /* skip over (fpdef ['=' test] ',')+ */
1120            while (res && (i+2 <= nch)) {
1121                res = validate_fpdef(CHILD(tree, i));
1122                ++i;
1123                if (res && TYPE(CHILD(tree, i)) == EQUAL && (i+2 <= nch)) {
1124                    res = (validate_equal(CHILD(tree, i))
1125                           && validate_test(CHILD(tree, i+1)));
1126                    if (res)
1127                        i += 2;
1128                }
1129                if (res && i < nch) {
1130                    res = validate_comma(CHILD(tree, i));
1131                    ++i;
1132                    if (res && i < nch
1133                        && (TYPE(CHILD(tree, i)) == DOUBLESTAR
1134                            || TYPE(CHILD(tree, i)) == STAR))
1135                        break;
1136                }
1137            }
1138            /* ... '*' NAME [',' '**' NAME] | '**' NAME
1139             * i --^^^
1140             */
1141            if (res)
1142                res = validate_varargslist_trailer(tree, i);
1143        }
1144        else {
1145            /*
1146             *  fpdef ['=' test] (',' fpdef ['=' test])* [',']
1147             */
1148            /* strip trailing comma node */
1149            if (sym == COMMA) {
1150                res = validate_comma(CHILD(tree, nch-1));
1151                if (!res)
1152                    return 0;
1153                --nch;
1154            }
1155            /*
1156             *  fpdef ['=' test] (',' fpdef ['=' test])*
1157             */
1158            res = validate_fpdef(CHILD(tree, 0));
1159            ++i;
1160            if (res && (i+2 <= nch) && TYPE(CHILD(tree, i)) == EQUAL) {
1161                res = (validate_equal(CHILD(tree, i))
1162                       && validate_test(CHILD(tree, i+1)));
1163                i += 2;
1164            }
1165            /*
1166             *  ... (',' fpdef ['=' test])*
1167             *  i ---^^^
1168             */
1169            while (res && (nch - i) >= 2) {
1170                res = (validate_comma(CHILD(tree, i))
1171                       && validate_fpdef(CHILD(tree, i+1)));
1172                i += 2;
1173                if (res && (nch - i) >= 2 && TYPE(CHILD(tree, i)) == EQUAL) {
1174                    res = (validate_equal(CHILD(tree, i))
1175                           && validate_test(CHILD(tree, i+1)));
1176                    i += 2;
1177                }
1178            }
1179            if (res && nch - i != 0) {
1180                res = 0;
1181                err_string("illegal formation for varargslist");
1182            }
1183        }
1184    }
1185    return res;
1186}
1187
1188
1189/*  list_iter:  list_for | list_if
1190 */
1191static int
1192validate_list_iter(node *tree)
1193{
1194    int res = (validate_ntype(tree, list_iter)
1195               && validate_numnodes(tree, 1, "list_iter"));
1196    if (res && TYPE(CHILD(tree, 0)) == list_for)
1197        res = validate_list_for(CHILD(tree, 0));
1198    else
1199        res = validate_list_if(CHILD(tree, 0));
1200
1201    return res;
1202}
1203
1204/*  list_for:  'for' exprlist 'in' testlist [list_iter]
1205 */
1206static int
1207validate_list_for(node *tree)
1208{
1209    int nch = NCH(tree);
1210    int res;
1211
1212    if (nch == 5)
1213        res = validate_list_iter(CHILD(tree, 4));
1214    else
1215        res = validate_numnodes(tree, 4, "list_for");
1216
1217    if (res)
1218        res = (validate_name(CHILD(tree, 0), "for")
1219               && validate_exprlist(CHILD(tree, 1))
1220               && validate_name(CHILD(tree, 2), "in")
1221               && validate_testlist(CHILD(tree, 3)));
1222
1223    return res;
1224}
1225
1226/*  list_if:  'if' test [list_iter]
1227 */
1228static int
1229validate_list_if(node *tree)
1230{
1231    int nch = NCH(tree);
1232    int res;
1233
1234    if (nch == 3)
1235        res = validate_list_iter(CHILD(tree, 2));
1236    else
1237        res = validate_numnodes(tree, 2, "list_if");
1238
1239    if (res)
1240        res = (validate_name(CHILD(tree, 0), "if")
1241               && validate_test(CHILD(tree, 1)));
1242
1243    return res;
1244}
1245
1246
1247/*  validate_fpdef()
1248 *
1249 *  fpdef:
1250 *      NAME
1251 *    | '(' fplist ')'
1252 */
1253static int
1254validate_fpdef(node *tree)
1255{
1256    int nch = NCH(tree);
1257    int res = validate_ntype(tree, fpdef);
1258
1259    if (res) {
1260        if (nch == 1)
1261            res = validate_ntype(CHILD(tree, 0), NAME);
1262        else if (nch == 3)
1263            res = (validate_lparen(CHILD(tree, 0))
1264                   && validate_fplist(CHILD(tree, 1))
1265                   && validate_rparen(CHILD(tree, 2)));
1266        else
1267            res = validate_numnodes(tree, 1, "fpdef");
1268    }
1269    return (res);
1270}
1271
1272
1273static int
1274validate_fplist(node *tree)
1275{
1276    return (validate_repeating_list(tree, fplist,
1277                                    validate_fpdef, "fplist"));
1278}
1279
1280
1281/*  simple_stmt | compound_stmt
1282 *
1283 */
1284static int
1285validate_stmt(node *tree)
1286{
1287    int res = (validate_ntype(tree, stmt)
1288               && validate_numnodes(tree, 1, "stmt"));
1289
1290    if (res) {
1291        tree = CHILD(tree, 0);
1292
1293        if (TYPE(tree) == simple_stmt)
1294            res = validate_simple_stmt(tree);
1295        else
1296            res = validate_compound_stmt(tree);
1297    }
1298    return (res);
1299}
1300
1301
1302/*  small_stmt (';' small_stmt)* [';'] NEWLINE
1303 *
1304 */
1305static int
1306validate_simple_stmt(node *tree)
1307{
1308    int nch = NCH(tree);
1309    int res = (validate_ntype(tree, simple_stmt)
1310               && (nch >= 2)
1311               && validate_small_stmt(CHILD(tree, 0))
1312               && validate_newline(CHILD(tree, nch - 1)));
1313
1314    if (nch < 2)
1315        res = validate_numnodes(tree, 2, "simple_stmt");
1316    --nch;                              /* forget the NEWLINE    */
1317    if (res && is_even(nch))
1318        res = validate_semi(CHILD(tree, --nch));
1319    if (res && (nch > 2)) {
1320        int i;
1321
1322        for (i = 1; res && (i < nch); i += 2)
1323            res = (validate_semi(CHILD(tree, i))
1324                   && validate_small_stmt(CHILD(tree, i + 1)));
1325    }
1326    return (res);
1327}
1328
1329
1330static int
1331validate_small_stmt(node *tree)
1332{
1333    int nch = NCH(tree);
1334    int res = validate_numnodes(tree, 1, "small_stmt");
1335
1336    if (res) {
1337        int ntype = TYPE(CHILD(tree, 0));
1338
1339        if (  (ntype == expr_stmt)
1340              || (ntype == print_stmt)
1341              || (ntype == del_stmt)
1342              || (ntype == pass_stmt)
1343              || (ntype == flow_stmt)
1344              || (ntype == import_stmt)
1345              || (ntype == global_stmt)
1346              || (ntype == assert_stmt)
1347              || (ntype == exec_stmt))
1348            res = validate_node(CHILD(tree, 0));
1349        else {
1350            res = 0;
1351            err_string("illegal small_stmt child type");
1352        }
1353    }
1354    else if (nch == 1) {
1355        res = 0;
1356        PyErr_Format(parser_error,
1357                     "Unrecognized child node of small_stmt: %d.",
1358                     TYPE(CHILD(tree, 0)));
1359    }
1360    return (res);
1361}
1362
1363
1364/*  compound_stmt:
1365 *      if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef
1366 */
1367static int
1368validate_compound_stmt(node *tree)
1369{
1370    int res = (validate_ntype(tree, compound_stmt)
1371               && validate_numnodes(tree, 1, "compound_stmt"));
1372    int ntype;
1373
1374    if (!res)
1375        return (0);
1376
1377    tree = CHILD(tree, 0);
1378    ntype = TYPE(tree);
1379    if (  (ntype == if_stmt)
1380          || (ntype == while_stmt)
1381          || (ntype == for_stmt)
1382          || (ntype == try_stmt)
1383          || (ntype == funcdef)
1384          || (ntype == classdef))
1385        res = validate_node(tree);
1386    else {
1387        res = 0;
1388        PyErr_Format(parser_error,
1389                     "Illegal compound statement type: %d.", TYPE(tree));
1390    }
1391    return (res);
1392}
1393
1394
1395static int
1396validate_expr_stmt(node *tree)
1397{
1398    int j;
1399    int nch = NCH(tree);
1400    int res = (validate_ntype(tree, expr_stmt)
1401               && is_odd(nch)
1402               && validate_testlist(CHILD(tree, 0)));
1403
1404    if (res && nch == 3
1405        && TYPE(CHILD(tree, 1)) == augassign) {
1406        res = (validate_numnodes(CHILD(tree, 1), 1, "augassign")
1407               && validate_testlist(CHILD(tree, 2)));
1408
1409        if (res) {
1410            char *s = STR(CHILD(CHILD(tree, 1), 0));
1411
1412            res = (strcmp(s, "+=") == 0
1413                   || strcmp(s, "-=") == 0
1414                   || strcmp(s, "*=") == 0
1415                   || strcmp(s, "/=") == 0
1416                   || strcmp(s, "%=") == 0
1417                   || strcmp(s, "&=") == 0
1418                   || strcmp(s, "|=") == 0
1419                   || strcmp(s, "^=") == 0
1420                   || strcmp(s, "<<=") == 0
1421                   || strcmp(s, ">>=") == 0
1422                   || strcmp(s, "**=") == 0);
1423            if (!res)
1424                err_string("illegal augmmented assignment operator");
1425        }
1426    }
1427    else {
1428        for (j = 1; res && (j < nch); j += 2)
1429            res = (validate_equal(CHILD(tree, j))
1430                   && validate_testlist(CHILD(tree, j + 1)));
1431    }
1432    return (res);
1433}
1434
1435
1436/*  print_stmt:
1437 *
1438 *      'print' ( [ test (',' test)* [','] ]
1439 *              | '>>' test [ (',' test)+ [','] ] )
1440 */
1441static int
1442validate_print_stmt(node *tree)
1443{
1444    int nch = NCH(tree);
1445    int res = (validate_ntype(tree, print_stmt)
1446               && (nch > 0)
1447               && validate_name(CHILD(tree, 0), "print"));
1448
1449    if (res && nch > 1) {
1450        int sym = TYPE(CHILD(tree, 1));
1451        int i = 1;
1452        int allow_trailing_comma = 1;
1453
1454        if (sym == test)
1455            res = validate_test(CHILD(tree, i++));
1456        else {
1457            if (nch < 3)
1458                res = validate_numnodes(tree, 3, "print_stmt");
1459            else {
1460                res = (validate_ntype(CHILD(tree, i), RIGHTSHIFT)
1461                       && validate_test(CHILD(tree, i+1)));
1462                i += 2;
1463                allow_trailing_comma = 0;
1464            }
1465        }
1466        if (res) {
1467            /*  ... (',' test)* [',']  */
1468            while (res && i+2 <= nch) {
1469                res = (validate_comma(CHILD(tree, i))
1470                       && validate_test(CHILD(tree, i+1)));
1471                allow_trailing_comma = 1;
1472                i += 2;
1473            }
1474            if (res && !allow_trailing_comma)
1475                res = validate_numnodes(tree, i, "print_stmt");
1476            else if (res && i < nch)
1477                res = validate_comma(CHILD(tree, i));
1478        }
1479    }
1480    return (res);
1481}
1482
1483
1484static int
1485validate_del_stmt(node *tree)
1486{
1487    return (validate_numnodes(tree, 2, "del_stmt")
1488            && validate_name(CHILD(tree, 0), "del")
1489            && validate_exprlist(CHILD(tree, 1)));
1490}
1491
1492
1493static int
1494validate_return_stmt(node *tree)
1495{
1496    int nch = NCH(tree);
1497    int res = (validate_ntype(tree, return_stmt)
1498               && ((nch == 1) || (nch == 2))
1499               && validate_name(CHILD(tree, 0), "return"));
1500
1501    if (res && (nch == 2))
1502        res = validate_testlist(CHILD(tree, 1));
1503
1504    return (res);
1505}
1506
1507
1508static int
1509validate_raise_stmt(node *tree)
1510{
1511    int nch = NCH(tree);
1512    int res = (validate_ntype(tree, raise_stmt)
1513               && ((nch == 1) || (nch == 2) || (nch == 4) || (nch == 6)));
1514
1515    if (res) {
1516        res = validate_name(CHILD(tree, 0), "raise");
1517        if (res && (nch >= 2))
1518            res = validate_test(CHILD(tree, 1));
1519        if (res && nch > 2) {
1520            res = (validate_comma(CHILD(tree, 2))
1521                   && validate_test(CHILD(tree, 3)));
1522            if (res && (nch > 4))
1523                res = (validate_comma(CHILD(tree, 4))
1524                       && validate_test(CHILD(tree, 5)));
1525        }
1526    }
1527    else
1528        (void) validate_numnodes(tree, 2, "raise");
1529    if (res && (nch == 4))
1530        res = (validate_comma(CHILD(tree, 2))
1531               && validate_test(CHILD(tree, 3)));
1532
1533    return (res);
1534}
1535
1536
1537/* yield_stmt: 'yield' testlist
1538 */
1539static int
1540validate_yield_stmt(node *tree)
1541{
1542    return (validate_ntype(tree, yield_stmt)
1543            && validate_numnodes(tree, 2, "yield_stmt")
1544            && validate_name(CHILD(tree, 0), "yield")
1545            && validate_testlist(CHILD(tree, 1)));
1546}
1547
1548
1549static int
1550validate_import_as_name(node *tree)
1551{
1552    int nch = NCH(tree);
1553    int ok = validate_ntype(tree, import_as_name);
1554
1555    if (ok) {
1556        if (nch == 1)
1557            ok = validate_name(CHILD(tree, 0), NULL);
1558        else if (nch == 3)
1559            ok = (validate_name(CHILD(tree, 0), NULL)
1560                  && validate_name(CHILD(tree, 1), "as")
1561                  && validate_name(CHILD(tree, 2), NULL));
1562        else
1563            ok = validate_numnodes(tree, 3, "import_as_name");
1564    }
1565    return ok;
1566}
1567
1568
1569/* dotted_name:  NAME ("." NAME)*
1570 */
1571static int
1572validate_dotted_name(node *tree)
1573{
1574    int nch = NCH(tree);
1575    int res = (validate_ntype(tree, dotted_name)
1576               && is_odd(nch)
1577               && validate_name(CHILD(tree, 0), NULL));
1578    int i;
1579
1580    for (i = 1; res && (i < nch); i += 2) {
1581        res = (validate_dot(CHILD(tree, i))
1582               && validate_name(CHILD(tree, i+1), NULL));
1583    }
1584    return res;
1585}
1586
1587
1588/* dotted_as_name:  dotted_name [NAME NAME]
1589 */
1590static int
1591validate_dotted_as_name(node *tree)
1592{
1593    int nch = NCH(tree);
1594    int res = validate_ntype(tree, dotted_as_name);
1595
1596    if (res) {
1597        if (nch == 1)
1598            res = validate_dotted_name(CHILD(tree, 0));
1599        else if (nch == 3)
1600            res = (validate_dotted_name(CHILD(tree, 0))
1601                   && validate_name(CHILD(tree, 1), "as")
1602                   && validate_name(CHILD(tree, 2), NULL));
1603        else {
1604            res = 0;
1605            err_string("illegal number of children for dotted_as_name");
1606        }
1607    }
1608    return res;
1609}
1610
1611
1612/*  import_stmt:
1613 *
1614 *    'import' dotted_as_name (',' dotted_as_name)*
1615 *  | 'from' dotted_name 'import' ('*' | import_as_name (',' import_as_name)*)
1616 */
1617static int
1618validate_import_stmt(node *tree)
1619{
1620    int nch = NCH(tree);
1621    int res = (validate_ntype(tree, import_stmt)
1622               && (nch >= 2) && is_even(nch)
1623               && validate_ntype(CHILD(tree, 0), NAME));
1624
1625    if (res && (strcmp(STR(CHILD(tree, 0)), "import") == 0)) {
1626        int j;
1627
1628        res = validate_dotted_as_name(CHILD(tree, 1));
1629        for (j = 2; res && (j < nch); j += 2)
1630            res = (validate_comma(CHILD(tree, j))
1631                   && validate_dotted_as_name(CHILD(tree, j + 1)));
1632    }
1633    else if (res && (res = validate_name(CHILD(tree, 0), "from"))) {
1634        res = ((nch >= 4) && is_even(nch)
1635               && validate_dotted_name(CHILD(tree, 1))
1636               && validate_name(CHILD(tree, 2), "import"));
1637        if (nch == 4) {
1638            if (TYPE(CHILD(tree, 3)) == import_as_name)
1639                res = validate_import_as_name(CHILD(tree, 3));
1640            else
1641                res = validate_star(CHILD(tree, 3));
1642        }
1643        else {
1644            /*  'from' dotted_name 'import' import_as_name
1645             *      (',' import_as_name)+
1646             */
1647            int j;
1648            res = validate_import_as_name(CHILD(tree, 3));
1649            for (j = 4; res && (j < nch); j += 2)
1650                res = (validate_comma(CHILD(tree, j))
1651                       && validate_import_as_name(CHILD(tree, j + 1)));
1652        }
1653    }
1654    else
1655        res = 0;
1656
1657    return (res);
1658}
1659
1660
1661static int
1662validate_global_stmt(node *tree)
1663{
1664    int j;
1665    int nch = NCH(tree);
1666    int res = (validate_ntype(tree, global_stmt)
1667               && is_even(nch) && (nch >= 2));
1668
1669    if (res)
1670        res = (validate_name(CHILD(tree, 0), "global")
1671               && validate_ntype(CHILD(tree, 1), NAME));
1672    for (j = 2; res && (j < nch); j += 2)
1673        res = (validate_comma(CHILD(tree, j))
1674               && validate_ntype(CHILD(tree, j + 1), NAME));
1675
1676    return (res);
1677}
1678
1679
1680/*  exec_stmt:
1681 *
1682 *  'exec' expr ['in' test [',' test]]
1683 */
1684static int
1685validate_exec_stmt(node *tree)
1686{
1687    int nch = NCH(tree);
1688    int res = (validate_ntype(tree, exec_stmt)
1689               && ((nch == 2) || (nch == 4) || (nch == 6))
1690               && validate_name(CHILD(tree, 0), "exec")
1691               && validate_expr(CHILD(tree, 1)));
1692
1693    if (!res && !PyErr_Occurred())
1694        err_string("illegal exec statement");
1695    if (res && (nch > 2))
1696        res = (validate_name(CHILD(tree, 2), "in")
1697               && validate_test(CHILD(tree, 3)));
1698    if (res && (nch == 6))
1699        res = (validate_comma(CHILD(tree, 4))
1700               && validate_test(CHILD(tree, 5)));
1701
1702    return (res);
1703}
1704
1705
1706/*  assert_stmt:
1707 *
1708 *  'assert' test [',' test]
1709 */
1710static int
1711validate_assert_stmt(node *tree)
1712{
1713    int nch = NCH(tree);
1714    int res = (validate_ntype(tree, assert_stmt)
1715               && ((nch == 2) || (nch == 4))
1716               && (validate_name(CHILD(tree, 0), "__assert__") ||
1717                   validate_name(CHILD(tree, 0), "assert"))
1718               && validate_test(CHILD(tree, 1)));
1719
1720    if (!res && !PyErr_Occurred())
1721        err_string("illegal assert statement");
1722    if (res && (nch > 2))
1723        res = (validate_comma(CHILD(tree, 2))
1724               && validate_test(CHILD(tree, 3)));
1725
1726    return (res);
1727}
1728
1729
1730static int
1731validate_while(node *tree)
1732{
1733    int nch = NCH(tree);
1734    int res = (validate_ntype(tree, while_stmt)
1735               && ((nch == 4) || (nch == 7))
1736               && validate_name(CHILD(tree, 0), "while")
1737               && validate_test(CHILD(tree, 1))
1738               && validate_colon(CHILD(tree, 2))
1739               && validate_suite(CHILD(tree, 3)));
1740
1741    if (res && (nch == 7))
1742        res = (validate_name(CHILD(tree, 4), "else")
1743               && validate_colon(CHILD(tree, 5))
1744               && validate_suite(CHILD(tree, 6)));
1745
1746    return (res);
1747}
1748
1749
1750static int
1751validate_for(node *tree)
1752{
1753    int nch = NCH(tree);
1754    int res = (validate_ntype(tree, for_stmt)
1755               && ((nch == 6) || (nch == 9))
1756               && validate_name(CHILD(tree, 0), "for")
1757               && validate_exprlist(CHILD(tree, 1))
1758               && validate_name(CHILD(tree, 2), "in")
1759               && validate_testlist(CHILD(tree, 3))
1760               && validate_colon(CHILD(tree, 4))
1761               && validate_suite(CHILD(tree, 5)));
1762
1763    if (res && (nch == 9))
1764        res = (validate_name(CHILD(tree, 6), "else")
1765               && validate_colon(CHILD(tree, 7))
1766               && validate_suite(CHILD(tree, 8)));
1767
1768    return (res);
1769}
1770
1771
1772/*  try_stmt:
1773 *      'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite]
1774 *    | 'try' ':' suite 'finally' ':' suite
1775 *
1776 */
1777static int
1778validate_try(node *tree)
1779{
1780    int nch = NCH(tree);
1781    int pos = 3;
1782    int res = (validate_ntype(tree, try_stmt)
1783               && (nch >= 6) && ((nch % 3) == 0));
1784
1785    if (res)
1786        res = (validate_name(CHILD(tree, 0), "try")
1787               && validate_colon(CHILD(tree, 1))
1788               && validate_suite(CHILD(tree, 2))
1789               && validate_colon(CHILD(tree, nch - 2))
1790               && validate_suite(CHILD(tree, nch - 1)));
1791    else if (!PyErr_Occurred()) {
1792        const char* name = "except";
1793        if (TYPE(CHILD(tree, nch - 3)) != except_clause)
1794            name = STR(CHILD(tree, nch - 3));
1795
1796        PyErr_Format(parser_error,
1797                     "Illegal number of children for try/%s node.", name);
1798    }
1799    /*  Skip past except_clause sections:  */
1800    while (res && (TYPE(CHILD(tree, pos)) == except_clause)) {
1801        res = (validate_except_clause(CHILD(tree, pos))
1802               && validate_colon(CHILD(tree, pos + 1))
1803               && validate_suite(CHILD(tree, pos + 2)));
1804        pos += 3;
1805    }
1806    if (res && (pos < nch)) {
1807        res = validate_ntype(CHILD(tree, pos), NAME);
1808        if (res && (strcmp(STR(CHILD(tree, pos)), "finally") == 0))
1809            res = (validate_numnodes(tree, 6, "try/finally")
1810                   && validate_colon(CHILD(tree, 4))
1811                   && validate_suite(CHILD(tree, 5)));
1812        else if (res) {
1813            if (nch == (pos + 3)) {
1814                res = ((strcmp(STR(CHILD(tree, pos)), "except") == 0)
1815                       || (strcmp(STR(CHILD(tree, pos)), "else") == 0));
1816                if (!res)
1817                    err_string("illegal trailing triple in try statement");
1818            }
1819            else if (nch == (pos + 6)) {
1820                res = (validate_name(CHILD(tree, pos), "except")
1821                       && validate_colon(CHILD(tree, pos + 1))
1822                       && validate_suite(CHILD(tree, pos + 2))
1823                       && validate_name(CHILD(tree, pos + 3), "else"));
1824            }
1825            else
1826                res = validate_numnodes(tree, pos + 3, "try/except");
1827        }
1828    }
1829    return (res);
1830}
1831
1832
1833static int
1834validate_except_clause(node *tree)
1835{
1836    int nch = NCH(tree);
1837    int res = (validate_ntype(tree, except_clause)
1838               && ((nch == 1) || (nch == 2) || (nch == 4))
1839               && validate_name(CHILD(tree, 0), "except"));
1840
1841    if (res && (nch > 1))
1842        res = validate_test(CHILD(tree, 1));
1843    if (res && (nch == 4))
1844        res = (validate_comma(CHILD(tree, 2))
1845               && validate_test(CHILD(tree, 3)));
1846
1847    return (res);
1848}
1849
1850
1851static int
1852validate_test(node *tree)
1853{
1854    int nch = NCH(tree);
1855    int res = validate_ntype(tree, test) && is_odd(nch);
1856
1857    if (res && (TYPE(CHILD(tree, 0)) == lambdef))
1858        res = ((nch == 1)
1859               && validate_lambdef(CHILD(tree, 0)));
1860    else if (res) {
1861        int pos;
1862        res = validate_and_test(CHILD(tree, 0));
1863        for (pos = 1; res && (pos < nch); pos += 2)
1864            res = (validate_name(CHILD(tree, pos), "or")
1865                   && validate_and_test(CHILD(tree, pos + 1)));
1866    }
1867    return (res);
1868}
1869
1870
1871static int
1872validate_and_test(node *tree)
1873{
1874    int pos;
1875    int nch = NCH(tree);
1876    int res = (validate_ntype(tree, and_test)
1877               && is_odd(nch)
1878               && validate_not_test(CHILD(tree, 0)));
1879
1880    for (pos = 1; res && (pos < nch); pos += 2)
1881        res = (validate_name(CHILD(tree, pos), "and")
1882               && validate_not_test(CHILD(tree, 0)));
1883
1884    return (res);
1885}
1886
1887
1888static int
1889validate_not_test(node *tree)
1890{
1891    int nch = NCH(tree);
1892    int res = validate_ntype(tree, not_test) && ((nch == 1) || (nch == 2));
1893
1894    if (res) {
1895        if (nch == 2)
1896            res = (validate_name(CHILD(tree, 0), "not")
1897                   && validate_not_test(CHILD(tree, 1)));
1898        else if (nch == 1)
1899            res = validate_comparison(CHILD(tree, 0));
1900    }
1901    return (res);
1902}
1903
1904
1905static int
1906validate_comparison(node *tree)
1907{
1908    int pos;
1909    int nch = NCH(tree);
1910    int res = (validate_ntype(tree, comparison)
1911               && is_odd(nch)
1912               && validate_expr(CHILD(tree, 0)));
1913
1914    for (pos = 1; res && (pos < nch); pos += 2)
1915        res = (validate_comp_op(CHILD(tree, pos))
1916               && validate_expr(CHILD(tree, pos + 1)));
1917
1918    return (res);
1919}
1920
1921
1922static int
1923validate_comp_op(node *tree)
1924{
1925    int res = 0;
1926    int nch = NCH(tree);
1927
1928    if (!validate_ntype(tree, comp_op))
1929        return (0);
1930    if (nch == 1) {
1931        /*
1932         *  Only child will be a terminal with a well-defined symbolic name
1933         *  or a NAME with a string of either 'is' or 'in'
1934         */
1935        tree = CHILD(tree, 0);
1936        switch (TYPE(tree)) {
1937            case LESS:
1938            case GREATER:
1939            case EQEQUAL:
1940            case EQUAL:
1941            case LESSEQUAL:
1942            case GREATEREQUAL:
1943            case NOTEQUAL:
1944              res = 1;
1945              break;
1946            case NAME:
1947              res = ((strcmp(STR(tree), "in") == 0)
1948                     || (strcmp(STR(tree), "is") == 0));
1949              if (!res) {
1950                  PyErr_Format(parser_error,
1951                               "illegal operator '%s'", STR(tree));
1952              }
1953              break;
1954          default:
1955              err_string("illegal comparison operator type");
1956              break;
1957        }
1958    }
1959    else if ((res = validate_numnodes(tree, 2, "comp_op")) != 0) {
1960        res = (validate_ntype(CHILD(tree, 0), NAME)
1961               && validate_ntype(CHILD(tree, 1), NAME)
1962               && (((strcmp(STR(CHILD(tree, 0)), "is") == 0)
1963                    && (strcmp(STR(CHILD(tree, 1)), "not") == 0))
1964                   || ((strcmp(STR(CHILD(tree, 0)), "not") == 0)
1965                       && (strcmp(STR(CHILD(tree, 1)), "in") == 0))));
1966        if (!res && !PyErr_Occurred())
1967            err_string("unknown comparison operator");
1968    }
1969    return (res);
1970}
1971
1972
1973static int
1974validate_expr(node *tree)
1975{
1976    int j;
1977    int nch = NCH(tree);
1978    int res = (validate_ntype(tree, expr)
1979               && is_odd(nch)
1980               && validate_xor_expr(CHILD(tree, 0)));
1981
1982    for (j = 2; res && (j < nch); j += 2)
1983        res = (validate_xor_expr(CHILD(tree, j))
1984               && validate_vbar(CHILD(tree, j - 1)));
1985
1986    return (res);
1987}
1988
1989
1990static int
1991validate_xor_expr(node *tree)
1992{
1993    int j;
1994    int nch = NCH(tree);
1995    int res = (validate_ntype(tree, xor_expr)
1996               && is_odd(nch)
1997               && validate_and_expr(CHILD(tree, 0)));
1998
1999    for (j = 2; res && (j < nch); j += 2)
2000        res = (validate_circumflex(CHILD(tree, j - 1))
2001               && validate_and_expr(CHILD(tree, j)));
2002
2003    return (res);
2004}
2005
2006
2007static int
2008validate_and_expr(node *tree)
2009{
2010    int pos;
2011    int nch = NCH(tree);
2012    int res = (validate_ntype(tree, and_expr)
2013               && is_odd(nch)
2014               && validate_shift_expr(CHILD(tree, 0)));
2015
2016    for (pos = 1; res && (pos < nch); pos += 2)
2017        res = (validate_ampersand(CHILD(tree, pos))
2018               && validate_shift_expr(CHILD(tree, pos + 1)));
2019
2020    return (res);
2021}
2022
2023
2024static int
2025validate_chain_two_ops(node *tree, int (*termvalid)(node *), int op1, int op2)
2026 {
2027    int pos = 1;
2028    int nch = NCH(tree);
2029    int res = (is_odd(nch)
2030               && (*termvalid)(CHILD(tree, 0)));
2031
2032    for ( ; res && (pos < nch); pos += 2) {
2033        if (TYPE(CHILD(tree, pos)) != op1)
2034            res = validate_ntype(CHILD(tree, pos), op2);
2035        if (res)
2036            res = (*termvalid)(CHILD(tree, pos + 1));
2037    }
2038    return (res);
2039}
2040
2041
2042static int
2043validate_shift_expr(node *tree)
2044{
2045    return (validate_ntype(tree, shift_expr)
2046            && validate_chain_two_ops(tree, validate_arith_expr,
2047                                      LEFTSHIFT, RIGHTSHIFT));
2048}
2049
2050
2051static int
2052validate_arith_expr(node *tree)
2053{
2054    return (validate_ntype(tree, arith_expr)
2055            && validate_chain_two_ops(tree, validate_term, PLUS, MINUS));
2056}
2057
2058
2059static int
2060validate_term(node *tree)
2061{
2062    int pos = 1;
2063    int nch = NCH(tree);
2064    int res = (validate_ntype(tree, term)
2065               && is_odd(nch)
2066               && validate_factor(CHILD(tree, 0)));
2067
2068    for ( ; res && (pos < nch); pos += 2)
2069        res = (((TYPE(CHILD(tree, pos)) == STAR)
2070               || (TYPE(CHILD(tree, pos)) == SLASH)
2071               || (TYPE(CHILD(tree, pos)) == PERCENT))
2072               && validate_factor(CHILD(tree, pos + 1)));
2073
2074    return (res);
2075}
2076
2077
2078/*  factor:
2079 *
2080 *  factor: ('+'|'-'|'~') factor | power
2081 */
2082static int
2083validate_factor(node *tree)
2084{
2085    int nch = NCH(tree);
2086    int res = (validate_ntype(tree, factor)
2087               && (((nch == 2)
2088                    && ((TYPE(CHILD(tree, 0)) == PLUS)
2089                        || (TYPE(CHILD(tree, 0)) == MINUS)
2090                        || (TYPE(CHILD(tree, 0)) == TILDE))
2091                    && validate_factor(CHILD(tree, 1)))
2092                   || ((nch == 1)
2093                       && validate_power(CHILD(tree, 0)))));
2094    return (res);
2095}
2096
2097
2098/*  power:
2099 *
2100 *  power: atom trailer* ('**' factor)*
2101 */
2102static int
2103validate_power(node *tree)
2104{
2105    int pos = 1;
2106    int nch = NCH(tree);
2107    int res = (validate_ntype(tree, power) && (nch >= 1)
2108               && validate_atom(CHILD(tree, 0)));
2109
2110    while (res && (pos < nch) && (TYPE(CHILD(tree, pos)) == trailer))
2111        res = validate_trailer(CHILD(tree, pos++));
2112    if (res && (pos < nch)) {
2113        if (!is_even(nch - pos)) {
2114            err_string("illegal number of nodes for 'power'");
2115            return (0);
2116        }
2117        for ( ; res && (pos < (nch - 1)); pos += 2)
2118            res = (validate_doublestar(CHILD(tree, pos))
2119                   && validate_factor(CHILD(tree, pos + 1)));
2120    }
2121    return (res);
2122}
2123
2124
2125static int
2126validate_atom(node *tree)
2127{
2128    int pos;
2129    int nch = NCH(tree);
2130    int res = validate_ntype(tree, atom);
2131
2132    if (res && nch < 1)
2133        res = validate_numnodes(tree, nch+1, "atom");
2134    if (res) {
2135        switch (TYPE(CHILD(tree, 0))) {
2136          case LPAR:
2137            res = ((nch <= 3)
2138                   && (validate_rparen(CHILD(tree, nch - 1))));
2139
2140            if (res && (nch == 3))
2141                res = validate_testlist(CHILD(tree, 1));
2142            break;
2143          case LSQB:
2144            if (nch == 2)
2145                res = validate_ntype(CHILD(tree, 1), RSQB);
2146            else if (nch == 3)
2147                res = (validate_listmaker(CHILD(tree, 1))
2148                       && validate_ntype(CHILD(tree, 2), RSQB));
2149            else {
2150                res = 0;
2151                err_string("illegal list display atom");
2152            }
2153            break;
2154          case LBRACE:
2155            res = ((nch <= 3)
2156                   && validate_ntype(CHILD(tree, nch - 1), RBRACE));
2157
2158            if (res && (nch == 3))
2159                res = validate_dictmaker(CHILD(tree, 1));
2160            break;
2161          case BACKQUOTE:
2162            res = ((nch == 3)
2163                   && validate_testlist(CHILD(tree, 1))
2164                   && validate_ntype(CHILD(tree, 2), BACKQUOTE));
2165            break;
2166          case NAME:
2167          case NUMBER:
2168            res = (nch == 1);
2169            break;
2170          case STRING:
2171            for (pos = 1; res && (pos < nch); ++pos)
2172                res = validate_ntype(CHILD(tree, pos), STRING);
2173            break;
2174          default:
2175            res = 0;
2176            break;
2177        }
2178    }
2179    return (res);
2180}
2181
2182
2183/*  listmaker:
2184 *    test ( list_for | (',' test)* [','] )
2185 */
2186static int
2187validate_listmaker(node *tree)
2188{
2189    int nch = NCH(tree);
2190    int ok = nch;
2191
2192    if (nch == 0)
2193        err_string("missing child nodes of listmaker");
2194    else
2195        ok = validate_test(CHILD(tree, 0));
2196
2197    /*
2198     *  list_iter | (',' test)* [',']
2199     */
2200    if (nch == 2 && TYPE(CHILD(tree, 1)) == list_for)
2201        ok = validate_list_for(CHILD(tree, 1));
2202    else {
2203        /*  (',' test)* [',']  */
2204        int i = 1;
2205        while (ok && nch - i >= 2) {
2206            ok = (validate_comma(CHILD(tree, i))
2207                  && validate_test(CHILD(tree, i+1)));
2208            i += 2;
2209        }
2210        if (ok && i == nch-1)
2211            ok = validate_comma(CHILD(tree, i));
2212        else if (i != nch) {
2213            ok = 0;
2214            err_string("illegal trailing nodes for listmaker");
2215        }
2216    }
2217    return ok;
2218}
2219
2220
2221/*  funcdef:
2222 *      'def' NAME parameters ':' suite
2223 *
2224 */
2225static int
2226validate_funcdef(node *tree)
2227{
2228    return (validate_ntype(tree, funcdef)
2229            && validate_numnodes(tree, 5, "funcdef")
2230            && validate_name(CHILD(tree, 0), "def")
2231            && validate_ntype(CHILD(tree, 1), NAME)
2232            && validate_colon(CHILD(tree, 3))
2233            && validate_parameters(CHILD(tree, 2))
2234            && validate_suite(CHILD(tree, 4)));
2235}
2236
2237
2238static int
2239validate_lambdef(node *tree)
2240{
2241    int nch = NCH(tree);
2242    int res = (validate_ntype(tree, lambdef)
2243               && ((nch == 3) || (nch == 4))
2244               && validate_name(CHILD(tree, 0), "lambda")
2245               && validate_colon(CHILD(tree, nch - 2))
2246               && validate_test(CHILD(tree, nch - 1)));
2247
2248    if (res && (nch == 4))
2249        res = validate_varargslist(CHILD(tree, 1));
2250    else if (!res && !PyErr_Occurred())
2251        (void) validate_numnodes(tree, 3, "lambdef");
2252
2253    return (res);
2254}
2255
2256
2257/*  arglist:
2258 *
2259 *  (argument ',')* (argument [','] | '*' test [',' '**' test] | '**' test)
2260 */
2261static int
2262validate_arglist(node *tree)
2263{
2264    int nch = NCH(tree);
2265    int i = 0;
2266    int ok = 1;
2267
2268    if (nch <= 0)
2269        /* raise the right error from having an invalid number of children */
2270        return validate_numnodes(tree, nch + 1, "arglist");
2271
2272    while (ok && nch-i >= 2) {
2273        /* skip leading (argument ',') */
2274        ok = (validate_argument(CHILD(tree, i))
2275              && validate_comma(CHILD(tree, i+1)));
2276        if (ok)
2277            i += 2;
2278        else
2279            PyErr_Clear();
2280    }
2281    ok = 1;
2282    if (nch-i > 0) {
2283        /*
2284         * argument | '*' test [',' '**' test] | '**' test
2285         */
2286        int sym = TYPE(CHILD(tree, i));
2287
2288        if (sym == argument) {
2289            ok = validate_argument(CHILD(tree, i));
2290            if (ok && i+1 != nch) {
2291                err_string("illegal arglist specification"
2292                           " (extra stuff on end)");
2293                ok = 0;
2294            }
2295        }
2296        else if (sym == STAR) {
2297            ok = validate_star(CHILD(tree, i));
2298            if (ok && (nch-i == 2))
2299                ok = validate_test(CHILD(tree, i+1));
2300            else if (ok && (nch-i == 5))
2301                ok = (validate_test(CHILD(tree, i+1))
2302                      && validate_comma(CHILD(tree, i+2))
2303                      && validate_doublestar(CHILD(tree, i+3))
2304                      && validate_test(CHILD(tree, i+4)));
2305            else {
2306                err_string("illegal use of '*' in arglist");
2307                ok = 0;
2308            }
2309        }
2310        else if (sym == DOUBLESTAR) {
2311            if (nch-i == 2)
2312                ok = (validate_doublestar(CHILD(tree, i))
2313                      && validate_test(CHILD(tree, i+1)));
2314            else {
2315                err_string("illegal use of '**' in arglist");
2316                ok = 0;
2317            }
2318        }
2319        else {
2320            err_string("illegal arglist specification");
2321            ok = 0;
2322        }
2323    }
2324    return (ok);
2325}
2326
2327
2328
2329/*  argument:
2330 *
2331 *  [test '='] test
2332 */
2333static int
2334validate_argument(node *tree)
2335{
2336    int nch = NCH(tree);
2337    int res = (validate_ntype(tree, argument)
2338               && ((nch == 1) || (nch == 3))
2339               && validate_test(CHILD(tree, 0)));
2340
2341    if (res && (nch == 3))
2342        res = (validate_equal(CHILD(tree, 1))
2343               && validate_test(CHILD(tree, 2)));
2344
2345    return (res);
2346}
2347
2348
2349
2350/*  trailer:
2351 *
2352 *  '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2353 */
2354static int
2355validate_trailer(node *tree)
2356{
2357    int nch = NCH(tree);
2358    int res = validate_ntype(tree, trailer) && ((nch == 2) || (nch == 3));
2359
2360    if (res) {
2361        switch (TYPE(CHILD(tree, 0))) {
2362          case LPAR:
2363            res = validate_rparen(CHILD(tree, nch - 1));
2364            if (res && (nch == 3))
2365                res = validate_arglist(CHILD(tree, 1));
2366            break;
2367          case LSQB:
2368            res = (validate_numnodes(tree, 3, "trailer")
2369                   && validate_subscriptlist(CHILD(tree, 1))
2370                   && validate_ntype(CHILD(tree, 2), RSQB));
2371            break;
2372          case DOT:
2373            res = (validate_numnodes(tree, 2, "trailer")
2374                   && validate_ntype(CHILD(tree, 1), NAME));
2375            break;
2376          default:
2377            res = 0;
2378            break;
2379        }
2380    }
2381    else {
2382        (void) validate_numnodes(tree, 2, "trailer");
2383    }
2384    return (res);
2385}
2386
2387
2388/*  subscriptlist:
2389 *
2390 *  subscript (',' subscript)* [',']
2391 */
2392static int
2393validate_subscriptlist(node *tree)
2394{
2395    return (validate_repeating_list(tree, subscriptlist,
2396                                    validate_subscript, "subscriptlist"));
2397}
2398
2399
2400/*  subscript:
2401 *
2402 *  '.' '.' '.' | test | [test] ':' [test] [sliceop]
2403 */
2404static int
2405validate_subscript(node *tree)
2406{
2407    int offset = 0;
2408    int nch = NCH(tree);
2409    int res = validate_ntype(tree, subscript) && (nch >= 1) && (nch <= 4);
2410
2411    if (!res) {
2412        if (!PyErr_Occurred())
2413            err_string("invalid number of arguments for subscript node");
2414        return (0);
2415    }
2416    if (TYPE(CHILD(tree, 0)) == DOT)
2417        /* take care of ('.' '.' '.') possibility */
2418        return (validate_numnodes(tree, 3, "subscript")
2419                && validate_dot(CHILD(tree, 0))
2420                && validate_dot(CHILD(tree, 1))
2421                && validate_dot(CHILD(tree, 2)));
2422    if (nch == 1) {
2423        if (TYPE(CHILD(tree, 0)) == test)
2424            res = validate_test(CHILD(tree, 0));
2425        else
2426            res = validate_colon(CHILD(tree, 0));
2427        return (res);
2428    }
2429    /*  Must be [test] ':' [test] [sliceop],
2430     *  but at least one of the optional components will
2431     *  be present, but we don't know which yet.
2432     */
2433    if ((TYPE(CHILD(tree, 0)) != COLON) || (nch == 4)) {
2434        res = validate_test(CHILD(tree, 0));
2435        offset = 1;
2436    }
2437    if (res)
2438        res = validate_colon(CHILD(tree, offset));
2439    if (res) {
2440        int rem = nch - ++offset;
2441        if (rem) {
2442            if (TYPE(CHILD(tree, offset)) == test) {
2443                res = validate_test(CHILD(tree, offset));
2444                ++offset;
2445                --rem;
2446            }
2447            if (res && rem)
2448                res = validate_sliceop(CHILD(tree, offset));
2449        }
2450    }
2451    return (res);
2452}
2453
2454
2455static int
2456validate_sliceop(node *tree)
2457{
2458    int nch = NCH(tree);
2459    int res = ((nch == 1) || validate_numnodes(tree, 2, "sliceop"))
2460              && validate_ntype(tree, sliceop);
2461    if (!res && !PyErr_Occurred()) {
2462        res = validate_numnodes(tree, 1, "sliceop");
2463    }
2464    if (res)
2465        res = validate_colon(CHILD(tree, 0));
2466    if (res && (nch == 2))
2467        res = validate_test(CHILD(tree, 1));
2468
2469    return (res);
2470}
2471
2472
2473static int
2474validate_exprlist(node *tree)
2475{
2476    return (validate_repeating_list(tree, exprlist,
2477                                    validate_expr, "exprlist"));
2478}
2479
2480
2481static int
2482validate_dictmaker(node *tree)
2483{
2484    int nch = NCH(tree);
2485    int res = (validate_ntype(tree, dictmaker)
2486               && (nch >= 3)
2487               && validate_test(CHILD(tree, 0))
2488               && validate_colon(CHILD(tree, 1))
2489               && validate_test(CHILD(tree, 2)));
2490
2491    if (res && ((nch % 4) == 0))
2492        res = validate_comma(CHILD(tree, --nch));
2493    else if (res)
2494        res = ((nch % 4) == 3);
2495
2496    if (res && (nch > 3)) {
2497        int pos = 3;
2498        /*  ( ',' test ':' test )*  */
2499        while (res && (pos < nch)) {
2500            res = (validate_comma(CHILD(tree, pos))
2501                   && validate_test(CHILD(tree, pos + 1))
2502                   && validate_colon(CHILD(tree, pos + 2))
2503                   && validate_test(CHILD(tree, pos + 3)));
2504            pos += 4;
2505        }
2506    }
2507    return (res);
2508}
2509
2510
2511static int
2512validate_eval_input(node *tree)
2513{
2514    int pos;
2515    int nch = NCH(tree);
2516    int res = (validate_ntype(tree, eval_input)
2517               && (nch >= 2)
2518               && validate_testlist(CHILD(tree, 0))
2519               && validate_ntype(CHILD(tree, nch - 1), ENDMARKER));
2520
2521    for (pos = 1; res && (pos < (nch - 1)); ++pos)
2522        res = validate_ntype(CHILD(tree, pos), NEWLINE);
2523
2524    return (res);
2525}
2526
2527
2528static int
2529validate_node(node *tree)
2530{
2531    int   nch  = 0;                     /* num. children on current node  */
2532    int   res  = 1;                     /* result value                   */
2533    node* next = 0;                     /* node to process after this one */
2534
2535    while (res && (tree != 0)) {
2536        nch  = NCH(tree);
2537        next = 0;
2538        switch (TYPE(tree)) {
2539            /*
2540             *  Definition nodes.
2541             */
2542          case funcdef:
2543            res = validate_funcdef(tree);
2544            break;
2545          case classdef:
2546            res = validate_class(tree);
2547            break;
2548            /*
2549             *  "Trivial" parse tree nodes.
2550             *  (Why did I call these trivial?)
2551             */
2552          case stmt:
2553            res = validate_stmt(tree);
2554            break;
2555          case small_stmt:
2556            /*
2557             *  expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt
2558             *  | import_stmt | global_stmt | exec_stmt | assert_stmt
2559             */
2560            res = validate_small_stmt(tree);
2561            break;
2562          case flow_stmt:
2563            res  = (validate_numnodes(tree, 1, "flow_stmt")
2564                    && ((TYPE(CHILD(tree, 0)) == break_stmt)
2565                        || (TYPE(CHILD(tree, 0)) == continue_stmt)
2566                        || (TYPE(CHILD(tree, 0)) == yield_stmt)
2567                        || (TYPE(CHILD(tree, 0)) == return_stmt)
2568                        || (TYPE(CHILD(tree, 0)) == raise_stmt)));
2569            if (res)
2570                next = CHILD(tree, 0);
2571            else if (nch == 1)
2572                err_string("illegal flow_stmt type");
2573            break;
2574          case yield_stmt:
2575            res = validate_yield_stmt(tree);
2576            break;
2577            /*
2578             *  Compound statements.
2579             */
2580          case simple_stmt:
2581            res = validate_simple_stmt(tree);
2582            break;
2583          case compound_stmt:
2584            res = validate_compound_stmt(tree);
2585            break;
2586            /*
2587             *  Fundamental statements.
2588             */
2589          case expr_stmt:
2590            res = validate_expr_stmt(tree);
2591            break;
2592          case print_stmt:
2593            res = validate_print_stmt(tree);
2594            break;
2595          case del_stmt:
2596            res = validate_del_stmt(tree);
2597            break;
2598          case pass_stmt:
2599            res = (validate_numnodes(tree, 1, "pass")
2600                   && validate_name(CHILD(tree, 0), "pass"));
2601            break;
2602          case break_stmt:
2603            res = (validate_numnodes(tree, 1, "break")
2604                   && validate_name(CHILD(tree, 0), "break"));
2605            break;
2606          case continue_stmt:
2607            res = (validate_numnodes(tree, 1, "continue")
2608                   && validate_name(CHILD(tree, 0), "continue"));
2609            break;
2610          case return_stmt:
2611            res = validate_return_stmt(tree);
2612            break;
2613          case raise_stmt:
2614            res = validate_raise_stmt(tree);
2615            break;
2616          case import_stmt:
2617            res = validate_import_stmt(tree);
2618            break;
2619          case global_stmt:
2620            res = validate_global_stmt(tree);
2621            break;
2622          case exec_stmt:
2623            res = validate_exec_stmt(tree);
2624            break;
2625          case assert_stmt:
2626            res = validate_assert_stmt(tree);
2627            break;
2628          case if_stmt:
2629            res = validate_if(tree);
2630            break;
2631          case while_stmt:
2632            res = validate_while(tree);
2633            break;
2634          case for_stmt:
2635            res = validate_for(tree);
2636            break;
2637          case try_stmt:
2638            res = validate_try(tree);
2639            break;
2640          case suite:
2641            res = validate_suite(tree);
2642            break;
2643            /*
2644             *  Expression nodes.
2645             */
2646          case testlist:
2647            res = validate_testlist(tree);
2648            break;
2649          case test:
2650            res = validate_test(tree);
2651            break;
2652          case and_test:
2653            res = validate_and_test(tree);
2654            break;
2655          case not_test:
2656            res = validate_not_test(tree);
2657            break;
2658          case comparison:
2659            res = validate_comparison(tree);
2660            break;
2661          case exprlist:
2662            res = validate_exprlist(tree);
2663            break;
2664          case comp_op:
2665            res = validate_comp_op(tree);
2666            break;
2667          case expr:
2668            res = validate_expr(tree);
2669            break;
2670          case xor_expr:
2671            res = validate_xor_expr(tree);
2672            break;
2673          case and_expr:
2674            res = validate_and_expr(tree);
2675            break;
2676          case shift_expr:
2677            res = validate_shift_expr(tree);
2678            break;
2679          case arith_expr:
2680            res = validate_arith_expr(tree);
2681            break;
2682          case term:
2683            res = validate_term(tree);
2684            break;
2685          case factor:
2686            res = validate_factor(tree);
2687            break;
2688          case power:
2689            res = validate_power(tree);
2690            break;
2691          case atom:
2692            res = validate_atom(tree);
2693            break;
2694
2695          default:
2696            /* Hopefully never reached! */
2697            err_string("unrecognized node type");
2698            res = 0;
2699            break;
2700        }
2701        tree = next;
2702    }
2703    return (res);
2704}
2705
2706
2707static int
2708validate_expr_tree(node *tree)
2709{
2710    int res = validate_eval_input(tree);
2711
2712    if (!res && !PyErr_Occurred())
2713        err_string("could not validate expression tuple");
2714
2715    return (res);
2716}
2717
2718
2719/*  file_input:
2720 *      (NEWLINE | stmt)* ENDMARKER
2721 */
2722static int
2723validate_file_input(node *tree)
2724{
2725    int j;
2726    int nch = NCH(tree) - 1;
2727    int res = ((nch >= 0)
2728               && validate_ntype(CHILD(tree, nch), ENDMARKER));
2729
2730    for (j = 0; res && (j < nch); ++j) {
2731        if (TYPE(CHILD(tree, j)) == stmt)
2732            res = validate_stmt(CHILD(tree, j));
2733        else
2734            res = validate_newline(CHILD(tree, j));
2735    }
2736    /*  This stays in to prevent any internal failures from getting to the
2737     *  user.  Hopefully, this won't be needed.  If a user reports getting
2738     *  this, we have some debugging to do.
2739     */
2740    if (!res && !PyErr_Occurred())
2741        err_string("VALIDATION FAILURE: report this to the maintainer!");
2742
2743    return (res);
2744}
2745
2746
2747static PyObject*
2748pickle_constructor = NULL;
2749
2750
2751static PyObject*
2752parser__pickler(PyObject *self, PyObject *args)
2753{
2754    NOTE(ARGUNUSED(self))
2755    PyObject *result = NULL;
2756    PyObject *st = NULL;
2757    PyObject *empty_dict = NULL;
2758
2759    if (PyArg_ParseTuple(args, "O!:_pickler", &PyST_Type, &st)) {
2760        PyObject *newargs;
2761        PyObject *tuple;
2762
2763        if ((empty_dict = PyDict_New()) == NULL)
2764            goto finally;
2765        if ((newargs = Py_BuildValue("Oi", st, 1)) == NULL)
2766            goto finally;
2767        tuple = parser_st2tuple((PyST_Object*)NULL, newargs, empty_dict);
2768        if (tuple != NULL) {
2769            result = Py_BuildValue("O(O)", pickle_constructor, tuple);
2770            Py_DECREF(tuple);
2771        }
2772        Py_DECREF(empty_dict);
2773        Py_DECREF(newargs);
2774    }
2775  finally:
2776    Py_XDECREF(empty_dict);
2777
2778    return (result);
2779}
2780
2781
2782/*  Functions exported by this module.  Most of this should probably
2783 *  be converted into an ST object with methods, but that is better
2784 *  done directly in Python, allowing subclasses to be created directly.
2785 *  We'd really have to write a wrapper around it all anyway to allow
2786 *  inheritance.
2787 */
2788static PyMethodDef parser_functions[] =  {
2789    {"ast2tuple",       (PyCFunction)parser_st2tuple,  PUBLIC_METHOD_TYPE,
2790        "Creates a tuple-tree representation of an ST."},
2791    {"ast2list",        (PyCFunction)parser_st2list,   PUBLIC_METHOD_TYPE,
2792        "Creates a list-tree representation of an ST."},
2793    {"compileast",      (PyCFunction)parser_compilest, PUBLIC_METHOD_TYPE,
2794        "Compiles an ST object into a code object."},
2795    {"compilest",      (PyCFunction)parser_compilest,  PUBLIC_METHOD_TYPE,
2796        "Compiles an ST object into a code object."},
2797    {"expr",            (PyCFunction)parser_expr,      PUBLIC_METHOD_TYPE,
2798        "Creates an ST object from an expression."},
2799    {"isexpr",          (PyCFunction)parser_isexpr,    PUBLIC_METHOD_TYPE,
2800        "Determines if an ST object was created from an expression."},
2801    {"issuite",         (PyCFunction)parser_issuite,   PUBLIC_METHOD_TYPE,
2802        "Determines if an ST object was created from a suite."},
2803    {"suite",           (PyCFunction)parser_suite,     PUBLIC_METHOD_TYPE,
2804        "Creates an ST object from a suite."},
2805    {"sequence2ast",    (PyCFunction)parser_tuple2st,  PUBLIC_METHOD_TYPE,
2806        "Creates an ST object from a tree representation."},
2807    {"sequence2st",     (PyCFunction)parser_tuple2st,  PUBLIC_METHOD_TYPE,
2808        "Creates an ST object from a tree representation."},
2809    {"st2tuple",        (PyCFunction)parser_st2tuple,  PUBLIC_METHOD_TYPE,
2810        "Creates a tuple-tree representation of an ST."},
2811    {"st2list",         (PyCFunction)parser_st2list,   PUBLIC_METHOD_TYPE,
2812        "Creates a list-tree representation of an ST."},
2813    {"tuple2ast",       (PyCFunction)parser_tuple2st,  PUBLIC_METHOD_TYPE,
2814        "Creates an ST object from a tree representation."},
2815    {"tuple2st",        (PyCFunction)parser_tuple2st,  PUBLIC_METHOD_TYPE,
2816        "Creates an ST object from a tree representation."},
2817
2818    /* private stuff: support pickle module */
2819    {"_pickler",        (PyCFunction)parser__pickler,   METH_VARARGS,
2820        "Returns the pickle magic to allow ST objects to be pickled."},
2821
2822    {NULL, NULL, 0, NULL}
2823    };
2824
2825
2826DL_EXPORT(void) initparser(void);  /* supply a prototype */
2827
2828DL_EXPORT(void)
2829initparser(void)
2830{
2831    PyObject* module;
2832    PyObject* dict;
2833
2834    PyST_Type.ob_type = &PyType_Type;
2835    module = Py_InitModule("parser", parser_functions);
2836    dict = PyModule_GetDict(module);
2837
2838    if (parser_error == 0)
2839        parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
2840
2841    if ((parser_error == 0)
2842        || (PyDict_SetItemString(dict, "ParserError", parser_error) != 0)) {
2843        /* caller will check PyErr_Occurred() */
2844        return;
2845    }
2846    Py_INCREF(&PyST_Type);
2847    PyDict_SetItemString(dict, "ASTType", (PyObject*)&PyST_Type);
2848    Py_INCREF(&PyST_Type);
2849    PyDict_SetItemString(dict, "STType", (PyObject*)&PyST_Type);
2850
2851    PyDict_SetItemString(dict, "__copyright__",
2852                         PyString_FromString(parser_copyright_string));
2853    PyDict_SetItemString(dict, "__doc__",
2854                         PyString_FromString(parser_doc_string));
2855    PyDict_SetItemString(dict, "__version__",
2856                         PyString_FromString(parser_version_string));
2857
2858    /* register to support pickling */
2859    module = PyImport_ImportModule("copy_reg");
2860    if (module != NULL) {
2861        PyObject *func, *pickler;
2862
2863        func = PyObject_GetAttrString(module, "pickle");
2864        pickle_constructor = PyDict_GetItemString(dict, "sequence2st");
2865        pickler = PyDict_GetItemString(dict, "_pickler");
2866        Py_XINCREF(pickle_constructor);
2867        if ((func != NULL) && (pickle_constructor != NULL)
2868            && (pickler != NULL)) {
2869            PyObject *res;
2870
2871            res = PyObject_CallFunction(func, "OOO", &PyST_Type, pickler,
2872                                        pickle_constructor);
2873            Py_XDECREF(res);
2874        }
2875        Py_XDECREF(func);
2876        Py_DECREF(module);
2877    }
2878}
2879