parsermodule.c revision fd71b9e9d496caa510dec56a9b69966558d6ba5d
1/*  parsermodule.c
2 *
3 *  Copyright 1995-1996 by Fred L. Drake, Jr. and Virginia Polytechnic
4 *  Institute and State University, Blacksburg, Virginia, USA.
5 *  Portions copyright 1991-1995 by Stichting Mathematisch Centrum,
6 *  Amsterdam, The Netherlands.  Copying is permitted under the terms
7 *  associated with the main Python distribution, with the additional
8 *  restriction that this additional notice be included and maintained
9 *  on all distributed copies.
10 *
11 *  This module serves to replace the original parser module written
12 *  by Guido.  The functionality is not matched precisely, but the
13 *  original may be implemented on top of this.  This is desirable
14 *  since the source of the text to be parsed is now divorced from
15 *  this interface.
16 *
17 *  Unlike the prior interface, the ability to give a parse tree
18 *  produced by Python code as a tuple to the compiler is enabled by
19 *  this module.  See the documentation for more details.
20 *
21 *  I've added some annotations that help with the lint code-checking
22 *  program, but they're not complete by a long shot.  The real errors
23 *  that lint detects are gone, but there are still warnings with
24 *  Py_[X]DECREF() and Py_[X]INCREF() macros.  The lint annotations
25 *  look like "NOTE(...)".
26 */
27
28#include "Python.h"                     /* general Python API             */
29#include "graminit.h"                   /* symbols defined in the grammar */
30#include "node.h"                       /* internal parser structure      */
31#include "token.h"                      /* token definitions              */
32                                        /* ISTERMINAL() / ISNONTERMINAL() */
33#include "compile.h"                    /* PyNode_Compile()               */
34
35#ifdef lint
36#include <note.h>
37#else
38#define NOTE(x)
39#endif
40
41#ifdef macintosh
42char *strdup(char *);
43#endif
44
45/*  String constants used to initialize module attributes.
46 *
47 */
48static char*
49parser_copyright_string
50= "Copyright 1995-1996 by Virginia Polytechnic Institute & State\n\
51University, Blacksburg, Virginia, USA, and Fred L. Drake, Jr., Reston,\n\
52Virginia, USA.  Portions copyright 1991-1995 by Stichting Mathematisch\n\
53Centrum, Amsterdam, The Netherlands.";
54
55
56static char*
57parser_doc_string
58= "This is an interface to Python's internal parser.";
59
60static char*
61parser_version_string = "0.4";
62
63
64typedef PyObject* (*SeqMaker) (int length);
65typedef int (*SeqInserter) (PyObject* sequence,
66                            int index,
67                            PyObject* element);
68
69/*  The function below is copyrigthed by Stichting Mathematisch Centrum.  The
70 *  original copyright statement is included below, and continues to apply
71 *  in full to the function immediately following.  All other material is
72 *  original, copyrighted by Fred L. Drake, Jr. and Virginia Polytechnic
73 *  Institute and State University.  Changes were made to comply with the
74 *  new naming conventions.  Added arguments to provide support for creating
75 *  lists as well as tuples, and optionally including the line numbers.
76 */
77
78/***********************************************************
79Copyright 1991-1995 by Stichting Mathematisch Centrum, Amsterdam,
80The Netherlands.
81
82                        All Rights Reserved
83
84Copyright (c) 2000, BeOpen.com.
85Copyright (c) 1995-2000, Corporation for National Research Initiatives.
86Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
87All rights reserved.
88
89See the file "Misc/COPYRIGHT" for information on usage and
90redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
91
92******************************************************************/
93
94static PyObject*
95node2tuple(node *n,                     /* node to convert               */
96           SeqMaker mkseq,              /* create sequence               */
97           SeqInserter addelem,         /* func. to add elem. in seq.    */
98           int lineno)                  /* include line numbers?         */
99{
100    if (n == NULL) {
101        Py_INCREF(Py_None);
102        return (Py_None);
103    }
104    if (ISNONTERMINAL(TYPE(n))) {
105        int i;
106        PyObject *v;
107        PyObject *w;
108
109        v = mkseq(1 + NCH(n));
110        if (v == NULL)
111            return (v);
112        w = PyInt_FromLong(TYPE(n));
113        if (w == NULL) {
114            Py_DECREF(v);
115            return ((PyObject*) NULL);
116        }
117        (void) addelem(v, 0, w);
118        for (i = 0; i < NCH(n); i++) {
119            w = node2tuple(CHILD(n, i), mkseq, addelem, lineno);
120            if (w == NULL) {
121                Py_DECREF(v);
122                return ((PyObject*) NULL);
123            }
124            (void) addelem(v, i+1, w);
125        }
126        return (v);
127    }
128    else if (ISTERMINAL(TYPE(n))) {
129        PyObject *result = mkseq(2 + lineno);
130        if (result != NULL) {
131            (void) addelem(result, 0, PyInt_FromLong(TYPE(n)));
132            (void) addelem(result, 1, PyString_FromString(STR(n)));
133            if (lineno == 1)
134                (void) addelem(result, 2, PyInt_FromLong(n->n_lineno));
135        }
136        return (result);
137    }
138    else {
139        PyErr_SetString(PyExc_SystemError,
140                        "unrecognized parse tree node type");
141        return ((PyObject*) NULL);
142    }
143}
144/*
145 *  End of material copyrighted by Stichting Mathematisch Centrum.
146 */
147
148
149
150/*  There are two types of intermediate objects we're interested in:
151 *  'eval' and 'exec' types.  These constants can be used in the ast_type
152 *  field of the object type to identify which any given object represents.
153 *  These should probably go in an external header to allow other extensions
154 *  to use them, but then, we really should be using C++ too.  ;-)
155 *
156 *  The PyAST_FRAGMENT type is not currently supported.  Maybe not useful?
157 *  Haven't decided yet.
158 */
159
160#define PyAST_EXPR      1
161#define PyAST_SUITE     2
162#define PyAST_FRAGMENT  3
163
164
165/*  These are the internal objects and definitions required to implement the
166 *  AST type.  Most of the internal names are more reminiscent of the 'old'
167 *  naming style, but the code uses the new naming convention.
168 */
169
170static PyObject*
171parser_error = 0;
172
173
174typedef struct _PyAST_Object {
175    PyObject_HEAD                       /* standard object header           */
176    node* ast_node;                     /* the node* returned by the parser */
177    int   ast_type;                     /* EXPR or SUITE ?                  */
178} PyAST_Object;
179
180
181staticforward void
182parser_free(PyAST_Object *ast);
183
184staticforward int
185parser_compare(PyAST_Object *left, PyAST_Object *right);
186
187staticforward PyObject *
188parser_getattr(PyObject *self, char *name);
189
190
191static
192PyTypeObject PyAST_Type = {
193    PyObject_HEAD_INIT(NULL)
194    0,
195    "ast",                              /* tp_name              */
196    (int) sizeof(PyAST_Object),         /* tp_basicsize         */
197    0,                                  /* tp_itemsize          */
198    (destructor)parser_free,            /* tp_dealloc           */
199    0,                                  /* tp_print             */
200    parser_getattr,                     /* tp_getattr           */
201    0,                                  /* tp_setattr           */
202    (cmpfunc)parser_compare,            /* tp_compare           */
203    0,                                  /* tp_repr              */
204    0,                                  /* tp_as_number         */
205    0,                                  /* tp_as_sequence       */
206    0,                                  /* tp_as_mapping        */
207    0,                                  /* tp_hash              */
208    0,                                  /* tp_call              */
209    0,                                  /* tp_str               */
210    0,                                  /* tp_getattro          */
211    0,                                  /* tp_setattro          */
212
213    /* Functions to access object as input/output buffer */
214    0,                                  /* tp_as_buffer         */
215
216    Py_TPFLAGS_DEFAULT,                 /* tp_flags             */
217
218    /* __doc__ */
219    "Intermediate representation of a Python parse tree."
220};  /* PyAST_Type */
221
222
223static int
224parser_compare_nodes(node *left, node *right)
225{
226    int j;
227
228    if (TYPE(left) < TYPE(right))
229        return (-1);
230
231    if (TYPE(right) < TYPE(left))
232        return (1);
233
234    if (ISTERMINAL(TYPE(left)))
235        return (strcmp(STR(left), STR(right)));
236
237    if (NCH(left) < NCH(right))
238        return (-1);
239
240    if (NCH(right) < NCH(left))
241        return (1);
242
243    for (j = 0; j < NCH(left); ++j) {
244        int v = parser_compare_nodes(CHILD(left, j), CHILD(right, j));
245
246        if (v != 0)
247            return (v);
248    }
249    return (0);
250}
251
252
253/*  int parser_compare(PyAST_Object* left, PyAST_Object* right)
254 *
255 *  Comparison function used by the Python operators ==, !=, <, >, <=, >=
256 *  This really just wraps a call to parser_compare_nodes() with some easy
257 *  checks and protection code.
258 *
259 */
260static int
261parser_compare(PyAST_Object *left, PyAST_Object *right)
262{
263    if (left == right)
264        return (0);
265
266    if ((left == 0) || (right == 0))
267        return (-1);
268
269    return (parser_compare_nodes(left->ast_node, right->ast_node));
270}
271
272
273/*  parser_newastobject(node* ast)
274 *
275 *  Allocates a new Python object representing an AST.  This is simply the
276 *  'wrapper' object that holds a node* and allows it to be passed around in
277 *  Python code.
278 *
279 */
280static PyObject*
281parser_newastobject(node *ast, int type)
282{
283    PyAST_Object* o = PyObject_New(PyAST_Object, &PyAST_Type);
284
285    if (o != 0) {
286        o->ast_node = ast;
287        o->ast_type = type;
288    }
289    else {
290        PyNode_Free(ast);
291    }
292    return ((PyObject*)o);
293}
294
295
296/*  void parser_free(PyAST_Object* ast)
297 *
298 *  This is called by a del statement that reduces the reference count to 0.
299 *
300 */
301static void
302parser_free(PyAST_Object *ast)
303{
304    PyNode_Free(ast->ast_node);
305    PyObject_Del(ast);
306}
307
308
309/*  parser_ast2tuple(PyObject* self, PyObject* args, PyObject* kw)
310 *
311 *  This provides conversion from a node* to a tuple object that can be
312 *  returned to the Python-level caller.  The AST object is not modified.
313 *
314 */
315static PyObject*
316parser_ast2tuple(PyAST_Object *self, PyObject *args, PyObject *kw)
317{
318    PyObject *line_option = 0;
319    PyObject *res = 0;
320    int ok;
321
322    static char *keywords[] = {"ast", "line_info", NULL};
323
324    if (self == NULL) {
325        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O:ast2tuple", keywords,
326                                         &PyAST_Type, &self, &line_option);
327    }
328    else
329        ok = PyArg_ParseTupleAndKeywords(args, kw, "|O:totuple", &keywords[1],
330                                         &line_option);
331    if (ok != 0) {
332        int lineno = 0;
333        if (line_option != NULL) {
334            lineno = (PyObject_IsTrue(line_option) != 0) ? 1 : 0;
335        }
336        /*
337         *  Convert AST into a tuple representation.  Use Guido's function,
338         *  since it's known to work already.
339         */
340        res = node2tuple(((PyAST_Object*)self)->ast_node,
341                         PyTuple_New, PyTuple_SetItem, lineno);
342    }
343    return (res);
344}
345
346
347/*  parser_ast2list(PyObject* self, PyObject* args, PyObject* kw)
348 *
349 *  This provides conversion from a node* to a list object that can be
350 *  returned to the Python-level caller.  The AST object is not modified.
351 *
352 */
353static PyObject*
354parser_ast2list(PyAST_Object *self, PyObject *args, PyObject *kw)
355{
356    PyObject *line_option = 0;
357    PyObject *res = 0;
358    int ok;
359
360    static char *keywords[] = {"ast", "line_info", NULL};
361
362    if (self == NULL)
363        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|O:ast2list", keywords,
364                                         &PyAST_Type, &self, &line_option);
365    else
366        ok = PyArg_ParseTupleAndKeywords(args, kw, "|O:tolist", &keywords[1],
367                                         &line_option);
368    if (ok) {
369        int lineno = 0;
370        if (line_option != 0) {
371            lineno = PyObject_IsTrue(line_option) ? 1 : 0;
372        }
373        /*
374         *  Convert AST into a tuple representation.  Use Guido's function,
375         *  since it's known to work already.
376         */
377        res = node2tuple(self->ast_node,
378                         PyList_New, PyList_SetItem, lineno);
379    }
380    return (res);
381}
382
383
384/*  parser_compileast(PyObject* self, PyObject* args)
385 *
386 *  This function creates code objects from the parse tree represented by
387 *  the passed-in data object.  An optional file name is passed in as well.
388 *
389 */
390static PyObject*
391parser_compileast(PyAST_Object *self, PyObject *args, PyObject *kw)
392{
393    PyObject*     res = 0;
394    char*         str = "<ast>";
395    int ok;
396
397    static char *keywords[] = {"ast", "filename", NULL};
398
399    if (self == NULL)
400        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!|s:compileast", keywords,
401                                         &PyAST_Type, &self, &str);
402    else
403        ok = PyArg_ParseTupleAndKeywords(args, kw, "|s:compile", &keywords[1],
404                                         &str);
405
406    if (ok)
407        res = (PyObject *)PyNode_Compile(self->ast_node, str);
408
409    return (res);
410}
411
412
413/*  PyObject* parser_isexpr(PyObject* self, PyObject* args)
414 *  PyObject* parser_issuite(PyObject* self, PyObject* args)
415 *
416 *  Checks the passed-in AST object to determine if it is an expression or
417 *  a statement suite, respectively.  The return is a Python truth value.
418 *
419 */
420static PyObject*
421parser_isexpr(PyAST_Object *self, PyObject *args, PyObject *kw)
422{
423    PyObject* res = 0;
424    int ok;
425
426    static char *keywords[] = {"ast", NULL};
427
428    if (self == NULL)
429        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:isexpr", keywords,
430                                         &PyAST_Type, &self);
431    else
432        ok = PyArg_ParseTupleAndKeywords(args, kw, ":isexpr", &keywords[1]);
433
434    if (ok) {
435        /* Check to see if the AST represents an expression or not. */
436        res = (self->ast_type == PyAST_EXPR) ? Py_True : Py_False;
437        Py_INCREF(res);
438    }
439    return (res);
440}
441
442
443static PyObject*
444parser_issuite(PyAST_Object *self, PyObject *args, PyObject *kw)
445{
446    PyObject* res = 0;
447    int ok;
448
449    static char *keywords[] = {"ast", NULL};
450
451    if (self == NULL)
452        ok = PyArg_ParseTupleAndKeywords(args, kw, "O!:issuite", keywords,
453                                         &PyAST_Type, &self);
454    else
455        ok = PyArg_ParseTupleAndKeywords(args, kw, ":issuite", &keywords[1]);
456
457    if (ok) {
458        /* Check to see if the AST represents an expression or not. */
459        res = (self->ast_type == PyAST_EXPR) ? Py_False : Py_True;
460        Py_INCREF(res);
461    }
462    return (res);
463}
464
465
466#define PUBLIC_METHOD_TYPE (METH_VARARGS|METH_KEYWORDS)
467
468static PyMethodDef
469parser_methods[] = {
470    {"compile",         (PyCFunction)parser_compileast, PUBLIC_METHOD_TYPE,
471        "Compile this AST object into a code object."},
472    {"isexpr",          (PyCFunction)parser_isexpr,     PUBLIC_METHOD_TYPE,
473        "Determines if this AST object was created from an expression."},
474    {"issuite",         (PyCFunction)parser_issuite,    PUBLIC_METHOD_TYPE,
475        "Determines if this AST object was created from a suite."},
476    {"tolist",          (PyCFunction)parser_ast2list,   PUBLIC_METHOD_TYPE,
477        "Creates a list-tree representation of this AST."},
478    {"totuple",         (PyCFunction)parser_ast2tuple,  PUBLIC_METHOD_TYPE,
479        "Creates a tuple-tree representation of this AST."},
480
481    {NULL, NULL, 0, NULL}
482};
483
484
485static PyObject*
486parser_getattr(self, name)
487     PyObject *self;
488     char *name;
489{
490    return (Py_FindMethod(parser_methods, self, name));
491}
492
493
494/*  err_string(char* message)
495 *
496 *  Sets the error string for an exception of type ParserError.
497 *
498 */
499static void
500err_string(message)
501     char *message;
502{
503    PyErr_SetString(parser_error, message);
504}
505
506
507/*  PyObject* parser_do_parse(PyObject* args, int type)
508 *
509 *  Internal function to actually execute the parse and return the result if
510 *  successful, or set an exception if not.
511 *
512 */
513static PyObject*
514parser_do_parse(PyObject *args, PyObject *kw, char *argspec, int type)
515{
516    char*     string = 0;
517    PyObject* res    = 0;
518
519    static char *keywords[] = {"source", NULL};
520
521    if (PyArg_ParseTupleAndKeywords(args, kw, argspec, keywords, &string)) {
522        node* n = PyParser_SimpleParseString(string,
523                                             (type == PyAST_EXPR)
524                                             ? eval_input : file_input);
525
526        if (n != 0)
527            res = parser_newastobject(n, type);
528        else
529            err_string("Could not parse string.");
530    }
531    return (res);
532}
533
534
535/*  PyObject* parser_expr(PyObject* self, PyObject* args)
536 *  PyObject* parser_suite(PyObject* self, PyObject* args)
537 *
538 *  External interfaces to the parser itself.  Which is called determines if
539 *  the parser attempts to recognize an expression ('eval' form) or statement
540 *  suite ('exec' form).  The real work is done by parser_do_parse() above.
541 *
542 */
543static PyObject*
544parser_expr(PyAST_Object *self, PyObject *args, PyObject *kw)
545{
546    NOTE(ARGUNUSED(self))
547    return (parser_do_parse(args, kw, "s:expr", PyAST_EXPR));
548}
549
550
551static PyObject*
552parser_suite(PyAST_Object *self, PyObject *args, PyObject *kw)
553{
554    NOTE(ARGUNUSED(self))
555    return (parser_do_parse(args, kw, "s:suite", PyAST_SUITE));
556}
557
558
559
560/*  This is the messy part of the code.  Conversion from a tuple to an AST
561 *  object requires that the input tuple be valid without having to rely on
562 *  catching an exception from the compiler.  This is done to allow the
563 *  compiler itself to remain fast, since most of its input will come from
564 *  the parser directly, and therefore be known to be syntactically correct.
565 *  This validation is done to ensure that we don't core dump the compile
566 *  phase, returning an exception instead.
567 *
568 *  Two aspects can be broken out in this code:  creating a node tree from
569 *  the tuple passed in, and verifying that it is indeed valid.  It may be
570 *  advantageous to expand the number of AST types to include funcdefs and
571 *  lambdadefs to take advantage of the optimizer, recognizing those ASTs
572 *  here.  They are not necessary, and not quite as useful in a raw form.
573 *  For now, let's get expressions and suites working reliably.
574 */
575
576
577staticforward node* build_node_tree(PyObject *tuple);
578staticforward int   validate_expr_tree(node *tree);
579staticforward int   validate_file_input(node *tree);
580
581
582/*  PyObject* parser_tuple2ast(PyObject* self, PyObject* args)
583 *
584 *  This is the public function, called from the Python code.  It receives a
585 *  single tuple object from the caller, and creates an AST object if the
586 *  tuple can be validated.  It does this by checking the first code of the
587 *  tuple, and, if acceptable, builds the internal representation.  If this
588 *  step succeeds, the internal representation is validated as fully as
589 *  possible with the various validate_*() routines defined below.
590 *
591 *  This function must be changed if support is to be added for PyAST_FRAGMENT
592 *  AST objects.
593 *
594 */
595static PyObject*
596parser_tuple2ast(PyAST_Object *self, PyObject *args, PyObject *kw)
597{
598    NOTE(ARGUNUSED(self))
599    PyObject *ast = 0;
600    PyObject *tuple = 0;
601    PyObject *temp = 0;
602    int ok;
603    int start_sym = 0;
604
605    static char *keywords[] = {"sequence", NULL};
606
607    if (!PyArg_ParseTupleAndKeywords(args, kw, "O:tuple2ast", keywords,
608                                     &tuple))
609        return (0);
610    if (!PySequence_Check(tuple)) {
611        PyErr_SetString(PyExc_ValueError,
612                        "tuple2ast() requires a single sequence argument");
613        return (0);
614    }
615    /*
616     *  This mess of tests is written this way so we can use the abstract
617     *  object interface (AOI).  Unfortunately, the AOI increments reference
618     *  counts, which requires that we store a pointer to retrieved object
619     *  so we can DECREF it after the check.  But we really should accept
620     *  lists as well as tuples at the very least.
621     */
622    ok = PyObject_Length(tuple) >= 2;
623    if (ok) {
624        temp = PySequence_GetItem(tuple, 0);
625        ok = (temp != NULL) && PyInt_Check(temp);
626        if (ok)
627            /* this is used after the initial checks: */
628            start_sym = PyInt_AS_LONG(temp);
629        Py_XDECREF(temp);
630    }
631    if (ok) {
632        temp = PySequence_GetItem(tuple, 1);
633        ok = (temp != NULL) && PySequence_Check(temp);
634        Py_XDECREF(temp);
635    }
636    if (ok) {
637        temp = PySequence_GetItem(tuple, 1);
638        ok = (temp != NULL) && PyObject_Length(temp) >= 2;
639        if (ok) {
640            PyObject *temp2 = PySequence_GetItem(temp, 0);
641            if (temp2 != NULL) {
642                ok = PyInt_Check(temp2);
643                Py_DECREF(temp2);
644            }
645        }
646        Py_XDECREF(temp);
647    }
648    /* If we've failed at some point, get out of here. */
649    if (!ok) {
650        err_string("malformed sequence for tuple2ast()");
651        return (0);
652    }
653    /*
654     *  This might be a valid parse tree, but let's do a quick check
655     *  before we jump the gun.
656     */
657    if (start_sym == eval_input) {
658        /*  Might be an eval form.  */
659        node* expression = build_node_tree(tuple);
660
661        if ((expression != 0) && validate_expr_tree(expression))
662            ast = parser_newastobject(expression, PyAST_EXPR);
663    }
664    else if (start_sym == file_input) {
665        /*  This looks like an exec form so far.  */
666        node* suite_tree = build_node_tree(tuple);
667
668        if ((suite_tree != 0) && validate_file_input(suite_tree))
669            ast = parser_newastobject(suite_tree, PyAST_SUITE);
670    }
671    else
672        /*  This is a fragment, and is not yet supported.  Maybe they
673         *  will be if I find a use for them.
674         */
675        err_string("Fragmentary parse trees not supported.");
676
677    /*  Make sure we throw an exception on all errors.  We should never
678     *  get this, but we'd do well to be sure something is done.
679     */
680    if ((ast == 0) && !PyErr_Occurred())
681        err_string("Unspecified ast error occurred.");
682
683    return (ast);
684}
685
686
687/*  int check_terminal_tuple()
688 *
689 *  Check a tuple to determine that it is indeed a valid terminal
690 *  node.  The node is known to be required as a terminal, so we throw
691 *  an exception if there is a failure.
692 *
693 *  The format of an acceptable terminal tuple is "(is[i])": the fact
694 *  that elem is a tuple and the integer is a valid terminal symbol
695 *  has been established before this function is called.  We must
696 *  check the length of the tuple and the type of the second element
697 *  and optional third element.  We do *NOT* check the actual text of
698 *  the string element, which we could do in many cases.  This is done
699 *  by the validate_*() functions which operate on the internal
700 *  representation.
701 */
702static int
703check_terminal_tuple(PyObject *elem)
704{
705    int   len = PyObject_Length(elem);
706    int   res = 1;
707    char* str = "Illegal terminal symbol; bad node length.";
708
709    if ((len == 2) || (len == 3)) {
710        PyObject *temp = PySequence_GetItem(elem, 1);
711        res = PyString_Check(temp);
712        str = "Illegal terminal symbol; expected a string.";
713        if (res && (len == 3)) {
714            PyObject* third = PySequence_GetItem(elem, 2);
715            res = PyInt_Check(third);
716            str = "Invalid third element of terminal node.";
717            Py_XDECREF(third);
718        }
719        Py_XDECREF(temp);
720    }
721    else {
722        res = 0;
723    }
724    if (!res) {
725        elem = Py_BuildValue("(os)", elem, str);
726        PyErr_SetObject(parser_error, elem);
727    }
728    return (res);
729}
730
731
732/*  node* build_node_children()
733 *
734 *  Iterate across the children of the current non-terminal node and build
735 *  their structures.  If successful, return the root of this portion of
736 *  the tree, otherwise, 0.  Any required exception will be specified already,
737 *  and no memory will have been deallocated.
738 *
739 */
740static node*
741build_node_children(PyObject *tuple, node *root, int *line_num)
742{
743    int len = PyObject_Length(tuple);
744    int i;
745
746    for (i = 1; i < len; ++i) {
747        /* elem must always be a tuple, however simple */
748        PyObject* elem = PySequence_GetItem(tuple, i);
749        int ok = elem != NULL;
750        long  type = 0;
751        char *strn = 0;
752
753        if (ok)
754            ok = PySequence_Check(elem);
755        if (ok) {
756            PyObject *temp = PySequence_GetItem(elem, 0);
757            if (temp == NULL)
758                ok = 0;
759            else {
760                ok = PyInt_Check(temp);
761                if (ok)
762                    type = PyInt_AS_LONG(temp);
763                Py_DECREF(temp);
764            }
765        }
766        if (!ok) {
767            PyErr_SetObject(parser_error,
768                            Py_BuildValue("(os)", elem,
769                                          "Illegal node construct."));
770            Py_XDECREF(elem);
771            return (0);
772        }
773        if (ISTERMINAL(type)) {
774            if (check_terminal_tuple(elem)) {
775                PyObject *temp = PySequence_GetItem(elem, 1);
776
777                /* check_terminal_tuple() already verified it's a string */
778                strn = (char *)PyMem_MALLOC(PyString_GET_SIZE(temp) + 1);
779                if (strn != NULL)
780                    (void) strcpy(strn, PyString_AS_STRING(temp));
781                Py_DECREF(temp);
782
783                if (PyObject_Length(elem) == 3) {
784                    PyObject* temp = PySequence_GetItem(elem, 2);
785                    *line_num = PyInt_AsLong(temp);
786                    Py_DECREF(temp);
787                }
788            }
789            else {
790                Py_XDECREF(elem);
791                return (0);
792            }
793        }
794        else if (!ISNONTERMINAL(type)) {
795            /*
796             *  It has to be one or the other; this is an error.
797             *  Throw an exception.
798             */
799            PyErr_SetObject(parser_error,
800                            Py_BuildValue("(os)", elem,
801                                          "Unknown node type."));
802            Py_XDECREF(elem);
803            return (0);
804        }
805        PyNode_AddChild(root, type, strn, *line_num);
806
807        if (ISNONTERMINAL(type)) {
808            node* new_child = CHILD(root, i - 1);
809
810            if (new_child != build_node_children(elem, new_child, line_num)) {
811                Py_XDECREF(elem);
812                return (0);
813            }
814        }
815        else if (type == NEWLINE) {     /* It's true:  we increment the     */
816            ++(*line_num);              /* line number *after* the newline! */
817        }
818        Py_XDECREF(elem);
819    }
820    return (root);
821}
822
823
824static node*
825build_node_tree(PyObject *tuple)
826{
827    node* res = 0;
828    PyObject *temp = PySequence_GetItem(tuple, 0);
829    long  num = -1;
830
831    if (temp != NULL)
832        num = PyInt_AsLong(temp);
833    Py_XDECREF(temp);
834    if (ISTERMINAL(num)) {
835        /*
836         *  The tuple is simple, but it doesn't start with a start symbol.
837         *  Throw an exception now and be done with it.
838         */
839        tuple = Py_BuildValue("(os)", tuple,
840                    "Illegal ast tuple; cannot start with terminal symbol.");
841        PyErr_SetObject(parser_error, tuple);
842    }
843    else if (ISNONTERMINAL(num)) {
844        /*
845         *  Not efficient, but that can be handled later.
846         */
847        int line_num = 0;
848
849        res = PyNode_New(num);
850        if (res != build_node_children(tuple, res, &line_num)) {
851            PyNode_Free(res);
852            res = 0;
853        }
854    }
855    else
856        /*  The tuple is illegal -- if the number is neither TERMINAL nor
857         *  NONTERMINAL, we can't use it.
858         */
859        PyErr_SetObject(parser_error,
860                        Py_BuildValue("(os)", tuple,
861                                      "Illegal component tuple."));
862
863    return (res);
864}
865
866
867#ifdef HAVE_OLD_CPP
868#define VALIDATER(n)    static int validate_/**/n(node *tree)
869#else
870#define VALIDATER(n)    static int validate_##n(node *tree)
871#endif
872
873
874/*
875 *  Validation routines used within the validation section:
876 */
877staticforward int validate_terminal(node *terminal, int type, char *string);
878
879#define validate_ampersand(ch)  validate_terminal(ch,      AMPER, "&")
880#define validate_circumflex(ch) validate_terminal(ch, CIRCUMFLEX, "^")
881#define validate_colon(ch)      validate_terminal(ch,      COLON, ":")
882#define validate_comma(ch)      validate_terminal(ch,      COMMA, ",")
883#define validate_dedent(ch)     validate_terminal(ch,     DEDENT, "")
884#define validate_equal(ch)      validate_terminal(ch,      EQUAL, "=")
885#define validate_indent(ch)     validate_terminal(ch,     INDENT, (char*)NULL)
886#define validate_lparen(ch)     validate_terminal(ch,       LPAR, "(")
887#define validate_newline(ch)    validate_terminal(ch,    NEWLINE, (char*)NULL)
888#define validate_rparen(ch)     validate_terminal(ch,       RPAR, ")")
889#define validate_semi(ch)       validate_terminal(ch,       SEMI, ";")
890#define validate_star(ch)       validate_terminal(ch,       STAR, "*")
891#define validate_vbar(ch)       validate_terminal(ch,       VBAR, "|")
892#define validate_doublestar(ch) validate_terminal(ch, DOUBLESTAR, "**")
893#define validate_dot(ch)        validate_terminal(ch,        DOT, ".")
894#define validate_name(ch, str)  validate_terminal(ch,       NAME, str)
895
896VALIDATER(node);                VALIDATER(small_stmt);
897VALIDATER(class);               VALIDATER(node);
898VALIDATER(parameters);          VALIDATER(suite);
899VALIDATER(testlist);            VALIDATER(varargslist);
900VALIDATER(fpdef);               VALIDATER(fplist);
901VALIDATER(stmt);                VALIDATER(simple_stmt);
902VALIDATER(expr_stmt);           VALIDATER(power);
903VALIDATER(print_stmt);          VALIDATER(del_stmt);
904VALIDATER(return_stmt);
905VALIDATER(raise_stmt);          VALIDATER(import_stmt);
906VALIDATER(global_stmt);
907VALIDATER(assert_stmt);
908VALIDATER(exec_stmt);           VALIDATER(compound_stmt);
909VALIDATER(while);               VALIDATER(for);
910VALIDATER(try);                 VALIDATER(except_clause);
911VALIDATER(test);                VALIDATER(and_test);
912VALIDATER(not_test);            VALIDATER(comparison);
913VALIDATER(comp_op);             VALIDATER(expr);
914VALIDATER(xor_expr);            VALIDATER(and_expr);
915VALIDATER(shift_expr);          VALIDATER(arith_expr);
916VALIDATER(term);                VALIDATER(factor);
917VALIDATER(atom);                VALIDATER(lambdef);
918VALIDATER(trailer);             VALIDATER(subscript);
919VALIDATER(subscriptlist);       VALIDATER(sliceop);
920VALIDATER(exprlist);            VALIDATER(dictmaker);
921VALIDATER(arglist);             VALIDATER(argument);
922
923
924#define is_even(n)      (((n) & 1) == 0)
925#define is_odd(n)       (((n) & 1) == 1)
926
927
928static int
929validate_ntype(node *n, int t)
930{
931    int res = (TYPE(n) == t);
932
933    if (!res) {
934        char buffer[128];
935        (void) sprintf(buffer, "Expected node type %d, got %d.", t, TYPE(n));
936        err_string(buffer);
937    }
938    return (res);
939}
940
941
942/*  Verifies that the number of child nodes is exactly 'num', raising
943 *  an exception if it isn't.  The exception message does not indicate
944 *  the exact number of nodes, allowing this to be used to raise the
945 *  "right" exception when the wrong number of nodes is present in a
946 *  specific variant of a statement's syntax.  This is commonly used
947 *  in that fashion.
948 */
949static int
950validate_numnodes(node *n, int num, const char *const name)
951{
952    if (NCH(n) != num) {
953        char buff[60];
954        (void) sprintf(buff, "Illegal number of children for %s node.", name);
955        err_string(buff);
956    }
957    return (NCH(n) == num);
958}
959
960
961static int
962validate_terminal(node *terminal, int type, char *string)
963{
964    int res = (validate_ntype(terminal, type)
965               && ((string == 0) || (strcmp(string, STR(terminal)) == 0)));
966
967    if (!res && !PyErr_Occurred()) {
968        char buffer[60];
969        (void) sprintf(buffer, "Illegal terminal: expected \"%s\"", string);
970        err_string(buffer);
971    }
972    return (res);
973}
974
975
976/*  X (',' X) [',']
977 */
978static int
979validate_repeating_list(node *tree, int ntype, int (*vfunc)(),
980                        const char *const name)
981{
982    int nch = NCH(tree);
983    int res = (nch && validate_ntype(tree, ntype)
984               && vfunc(CHILD(tree, 0)));
985
986    if (!res && !PyErr_Occurred())
987        (void) validate_numnodes(tree, 1, name);
988    else {
989        if (is_even(nch))
990            res = validate_comma(CHILD(tree, --nch));
991        if (res && nch > 1) {
992            int pos = 1;
993            for ( ; res && pos < nch; pos += 2)
994                res = (validate_comma(CHILD(tree, pos))
995                       && vfunc(CHILD(tree, pos + 1)));
996        }
997    }
998    return (res);
999}
1000
1001
1002/*  VALIDATE(class)
1003 *
1004 *  classdef:
1005 *      'class' NAME ['(' testlist ')'] ':' suite
1006 */
1007static int
1008validate_class(node *tree)
1009{
1010    int nch = NCH(tree);
1011    int res = validate_ntype(tree, classdef) && ((nch == 4) || (nch == 7));
1012
1013    if (res) {
1014        res = (validate_name(CHILD(tree, 0), "class")
1015               && validate_ntype(CHILD(tree, 1), NAME)
1016               && validate_colon(CHILD(tree, nch - 2))
1017               && validate_suite(CHILD(tree, nch - 1)));
1018    }
1019    else
1020        (void) validate_numnodes(tree, 4, "class");
1021    if (res && (nch == 7)) {
1022        res = (validate_lparen(CHILD(tree, 2))
1023               && validate_testlist(CHILD(tree, 3))
1024               && validate_rparen(CHILD(tree, 4)));
1025    }
1026    return (res);
1027}
1028
1029
1030/*  if_stmt:
1031 *      'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
1032 */
1033static int
1034validate_if(node *tree)
1035{
1036    int nch = NCH(tree);
1037    int res = (validate_ntype(tree, if_stmt)
1038               && (nch >= 4)
1039               && validate_name(CHILD(tree, 0), "if")
1040               && validate_test(CHILD(tree, 1))
1041               && validate_colon(CHILD(tree, 2))
1042               && validate_suite(CHILD(tree, 3)));
1043
1044    if (res && ((nch % 4) == 3)) {
1045        /*  ... 'else' ':' suite  */
1046        res = (validate_name(CHILD(tree, nch - 3), "else")
1047               && validate_colon(CHILD(tree, nch - 2))
1048               && validate_suite(CHILD(tree, nch - 1)));
1049        nch -= 3;
1050    }
1051    else if (!res && !PyErr_Occurred())
1052        (void) validate_numnodes(tree, 4, "if");
1053    if ((nch % 4) != 0)
1054        /* Will catch the case for nch < 4 */
1055        res = validate_numnodes(tree, 0, "if");
1056    else if (res && (nch > 4)) {
1057        /*  ... ('elif' test ':' suite)+ ...  */
1058        int j = 4;
1059        while ((j < nch) && res) {
1060            res = (validate_name(CHILD(tree, j), "elif")
1061                   && validate_colon(CHILD(tree, j + 2))
1062                   && validate_test(CHILD(tree, j + 1))
1063                   && validate_suite(CHILD(tree, j + 3)));
1064            j += 4;
1065        }
1066    }
1067    return (res);
1068}
1069
1070
1071/*  parameters:
1072 *      '(' [varargslist] ')'
1073 *
1074 */
1075static int
1076validate_parameters(node *tree)
1077{
1078    int nch = NCH(tree);
1079    int res = validate_ntype(tree, parameters) && ((nch == 2) || (nch == 3));
1080
1081    if (res) {
1082        res = (validate_lparen(CHILD(tree, 0))
1083               && validate_rparen(CHILD(tree, nch - 1)));
1084        if (res && (nch == 3))
1085            res = validate_varargslist(CHILD(tree, 1));
1086    }
1087    else {
1088        (void) validate_numnodes(tree, 2, "parameters");
1089    }
1090    return (res);
1091}
1092
1093
1094/*  VALIDATE(suite)
1095 *
1096 *  suite:
1097 *      simple_stmt
1098 *    | NEWLINE INDENT stmt+ DEDENT
1099 */
1100static int
1101validate_suite(node *tree)
1102{
1103    int nch = NCH(tree);
1104    int res = (validate_ntype(tree, suite) && ((nch == 1) || (nch >= 4)));
1105
1106    if (res && (nch == 1))
1107        res = validate_simple_stmt(CHILD(tree, 0));
1108    else if (res) {
1109        /*  NEWLINE INDENT stmt+ DEDENT  */
1110        res = (validate_newline(CHILD(tree, 0))
1111               && validate_indent(CHILD(tree, 1))
1112               && validate_stmt(CHILD(tree, 2))
1113               && validate_dedent(CHILD(tree, nch - 1)));
1114
1115        if (res && (nch > 4)) {
1116            int i = 3;
1117            --nch;                      /* forget the DEDENT     */
1118            for ( ; res && (i < nch); ++i)
1119                res = validate_stmt(CHILD(tree, i));
1120        }
1121        else if (nch < 4)
1122            res = validate_numnodes(tree, 4, "suite");
1123    }
1124    return (res);
1125}
1126
1127
1128static int
1129validate_testlist(node *tree)
1130{
1131    return (validate_repeating_list(tree, testlist,
1132                                    validate_test, "testlist"));
1133}
1134
1135
1136/*  VALIDATE(varargslist)
1137 *
1138 *  varargslist:
1139 *      (fpdef ['=' test] ',')* ('*' NAME [',' '*' '*' NAME] | '*' '*' NAME)
1140 *    | fpdef ['=' test] (',' fpdef ['=' test])* [',']
1141 *
1142 *      (fpdef ['=' test] ',')*
1143 *           ('*' NAME [',' ('**'|'*' '*') NAME]
1144 *         | ('**'|'*' '*') NAME)
1145 *    | fpdef ['=' test] (',' fpdef ['=' test])* [',']
1146 *
1147 */
1148static int
1149validate_varargslist(node *tree)
1150{
1151    int nch = NCH(tree);
1152    int res = validate_ntype(tree, varargslist) && (nch != 0);
1153
1154    if (res && (nch >= 2) && (TYPE(CHILD(tree, nch - 1)) == NAME)) {
1155        /*  (fpdef ['=' test] ',')*
1156         *  ('*' NAME [',' '*' '*' NAME] | '*' '*' NAME)
1157         */
1158        int pos = 0;
1159        int remaining = nch;
1160
1161        while (res && (TYPE(CHILD(tree, pos)) == fpdef)) {
1162            res = validate_fpdef(CHILD(tree, pos));
1163            if (res) {
1164                if (TYPE(CHILD(tree, pos + 1)) == EQUAL) {
1165                    res = validate_test(CHILD(tree, pos + 2));
1166                    pos += 2;
1167                }
1168                res = res && validate_comma(CHILD(tree, pos + 1));
1169                pos += 2;
1170            }
1171        }
1172        if (res) {
1173            remaining = nch - pos;
1174            res = ((remaining == 2) || (remaining == 3)
1175                   || (remaining == 5) || (remaining == 6));
1176            if (!res)
1177                (void) validate_numnodes(tree, 2, "varargslist");
1178            else if (TYPE(CHILD(tree, pos)) == DOUBLESTAR)
1179                return ((remaining == 2)
1180                        && validate_ntype(CHILD(tree, pos+1), NAME));
1181            else {
1182                res = validate_star(CHILD(tree, pos++));
1183                --remaining;
1184            }
1185        }
1186        if (res) {
1187            if (remaining == 2) {
1188                res = (validate_star(CHILD(tree, pos))
1189                       && validate_ntype(CHILD(tree, pos + 1), NAME));
1190            }
1191            else {
1192                res = validate_ntype(CHILD(tree, pos++), NAME);
1193                if (res && (remaining >= 4)) {
1194                    res = validate_comma(CHILD(tree, pos));
1195                    if (--remaining == 3)
1196                        res = (validate_star(CHILD(tree, pos + 1))
1197                               && validate_star(CHILD(tree, pos + 2)));
1198                    else
1199                        res = validate_ntype(CHILD(tree, pos + 1), DOUBLESTAR);
1200                }
1201            }
1202        }
1203        if (!res && !PyErr_Occurred())
1204            err_string("Incorrect validation of variable arguments list.");
1205    }
1206    else if (res) {
1207        /*  fpdef ['=' test] (',' fpdef ['=' test])* [',']  */
1208        if (TYPE(CHILD(tree, nch - 1)) == COMMA)
1209            --nch;
1210
1211        /*  fpdef ['=' test] (',' fpdef ['=' test])*  */
1212        res = (is_odd(nch)
1213               && validate_fpdef(CHILD(tree, 0)));
1214
1215        if (res && (nch > 1)) {
1216            int pos = 1;
1217            if (TYPE(CHILD(tree, 1)) == EQUAL) {
1218                res = validate_test(CHILD(tree, 2));
1219                pos += 2;
1220            }
1221            /*  ... (',' fpdef ['=' test])*  */
1222            for ( ; res && (pos < nch); pos += 2) {
1223                /* ',' fpdef */
1224                res = (validate_comma(CHILD(tree, pos))
1225                       && validate_fpdef(CHILD(tree, pos + 1)));
1226                if (res
1227                    && ((nch - pos) > 2)
1228                    && (TYPE(CHILD(tree, pos + 2)) == EQUAL)) {
1229                    /* ['=' test] */
1230                    res = validate_test(CHILD(tree, pos + 3));
1231                    pos += 2;
1232                }
1233            }
1234        }
1235    }
1236    else {
1237        err_string("Improperly formed argument list.");
1238    }
1239    return (res);
1240}
1241
1242
1243/*  VALIDATE(fpdef)
1244 *
1245 *  fpdef:
1246 *      NAME
1247 *    | '(' fplist ')'
1248 */
1249static int
1250validate_fpdef(node *tree)
1251{
1252    int nch = NCH(tree);
1253    int res = validate_ntype(tree, fpdef);
1254
1255    if (res) {
1256        if (nch == 1)
1257            res = validate_ntype(CHILD(tree, 0), NAME);
1258        else if (nch == 3)
1259            res = (validate_lparen(CHILD(tree, 0))
1260                   && validate_fplist(CHILD(tree, 1))
1261                   && validate_rparen(CHILD(tree, 2)));
1262        else
1263            res = validate_numnodes(tree, 1, "fpdef");
1264    }
1265    return (res);
1266}
1267
1268
1269static int
1270validate_fplist(node *tree)
1271{
1272    return (validate_repeating_list(tree, fplist,
1273                                    validate_fpdef, "fplist"));
1274}
1275
1276
1277/*  simple_stmt | compound_stmt
1278 *
1279 */
1280static int
1281validate_stmt(node *tree)
1282{
1283    int res = (validate_ntype(tree, stmt)
1284               && validate_numnodes(tree, 1, "stmt"));
1285
1286    if (res) {
1287        tree = CHILD(tree, 0);
1288
1289        if (TYPE(tree) == simple_stmt)
1290            res = validate_simple_stmt(tree);
1291        else
1292            res = validate_compound_stmt(tree);
1293    }
1294    return (res);
1295}
1296
1297
1298/*  small_stmt (';' small_stmt)* [';'] NEWLINE
1299 *
1300 */
1301static int
1302validate_simple_stmt(node *tree)
1303{
1304    int nch = NCH(tree);
1305    int res = (validate_ntype(tree, simple_stmt)
1306               && (nch >= 2)
1307               && validate_small_stmt(CHILD(tree, 0))
1308               && validate_newline(CHILD(tree, nch - 1)));
1309
1310    if (nch < 2)
1311        res = validate_numnodes(tree, 2, "simple_stmt");
1312    --nch;                              /* forget the NEWLINE    */
1313    if (res && is_even(nch))
1314        res = validate_semi(CHILD(tree, --nch));
1315    if (res && (nch > 2)) {
1316        int i;
1317
1318        for (i = 1; res && (i < nch); i += 2)
1319            res = (validate_semi(CHILD(tree, i))
1320                   && validate_small_stmt(CHILD(tree, i + 1)));
1321    }
1322    return (res);
1323}
1324
1325
1326static int
1327validate_small_stmt(node *tree)
1328{
1329    int nch = NCH(tree);
1330    int res = (validate_numnodes(tree, 1, "small_stmt")
1331               && ((TYPE(CHILD(tree, 0)) == expr_stmt)
1332                   || (TYPE(CHILD(tree, 0)) == print_stmt)
1333                   || (TYPE(CHILD(tree, 0)) == del_stmt)
1334                   || (TYPE(CHILD(tree, 0)) == pass_stmt)
1335                   || (TYPE(CHILD(tree, 0)) == flow_stmt)
1336                   || (TYPE(CHILD(tree, 0)) == import_stmt)
1337                   || (TYPE(CHILD(tree, 0)) == global_stmt)
1338                   || (TYPE(CHILD(tree, 0)) == assert_stmt)
1339                   || (TYPE(CHILD(tree, 0)) == exec_stmt)));
1340
1341    if (res)
1342        res = validate_node(CHILD(tree, 0));
1343    else if (nch == 1) {
1344        char buffer[60];
1345        (void) sprintf(buffer, "Unrecognized child node of small_stmt: %d.",
1346                       TYPE(CHILD(tree, 0)));
1347        err_string(buffer);
1348    }
1349    return (res);
1350}
1351
1352
1353/*  compound_stmt:
1354 *      if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef
1355 */
1356static int
1357validate_compound_stmt(node *tree)
1358{
1359    int res = (validate_ntype(tree, compound_stmt)
1360               && validate_numnodes(tree, 1, "compound_stmt"));
1361
1362    if (!res)
1363        return (0);
1364
1365    tree = CHILD(tree, 0);
1366    res = ((TYPE(tree) == if_stmt)
1367           || (TYPE(tree) == while_stmt)
1368           || (TYPE(tree) == for_stmt)
1369           || (TYPE(tree) == try_stmt)
1370           || (TYPE(tree) == funcdef)
1371           || (TYPE(tree) == classdef));
1372    if (res)
1373        res = validate_node(tree);
1374    else {
1375        char buffer[60];
1376        (void) sprintf(buffer, "Illegal compound statement type: %d.",
1377                       TYPE(tree));
1378        err_string(buffer);
1379    }
1380    return (res);
1381}
1382
1383
1384static int
1385validate_expr_stmt(node *tree)
1386{
1387    int j;
1388    int nch = NCH(tree);
1389    int res = (validate_ntype(tree, expr_stmt)
1390               && is_odd(nch)
1391               && validate_testlist(CHILD(tree, 0)));
1392
1393    for (j = 1; res && (j < nch); j += 2)
1394        res = (validate_equal(CHILD(tree, j))
1395               && validate_testlist(CHILD(tree, j + 1)));
1396
1397    return (res);
1398}
1399
1400
1401/*  print_stmt:
1402 *
1403 *      'print' (test ',')* [test]
1404 *
1405 */
1406static int
1407validate_print_stmt(node *tree)
1408{
1409    int j;
1410    int nch = NCH(tree);
1411    int res = (validate_ntype(tree, print_stmt)
1412               && (nch != 0)
1413               && validate_name(CHILD(tree, 0), "print"));
1414
1415    if (res && is_even(nch)) {
1416        res = validate_test(CHILD(tree, nch - 1));
1417        --nch;
1418    }
1419    else if (!res && !PyErr_Occurred())
1420        (void) validate_numnodes(tree, 1, "print_stmt");
1421    for (j = 1; res && (j < nch); j += 2)
1422        res = (validate_test(CHILD(tree, j))
1423               && validate_ntype(CHILD(tree, j + 1), COMMA));
1424
1425    return (res);
1426}
1427
1428
1429static int
1430validate_del_stmt(node *tree)
1431{
1432    return (validate_numnodes(tree, 2, "del_stmt")
1433            && validate_name(CHILD(tree, 0), "del")
1434            && validate_exprlist(CHILD(tree, 1)));
1435}
1436
1437
1438static int
1439validate_return_stmt(node *tree)
1440{
1441    int nch = NCH(tree);
1442    int res = (validate_ntype(tree, return_stmt)
1443               && ((nch == 1) || (nch == 2))
1444               && validate_name(CHILD(tree, 0), "return"));
1445
1446    if (res && (nch == 2))
1447        res = validate_testlist(CHILD(tree, 1));
1448
1449    return (res);
1450}
1451
1452
1453static int
1454validate_raise_stmt(node *tree)
1455{
1456    int nch = NCH(tree);
1457    int res = (validate_ntype(tree, raise_stmt)
1458               && ((nch == 1) || (nch == 2) || (nch == 4) || (nch == 6)));
1459
1460    if (res) {
1461        res = validate_name(CHILD(tree, 0), "raise");
1462        if (res && (nch >= 2))
1463            res = validate_test(CHILD(tree, 1));
1464        if (res && nch > 2) {
1465            res = (validate_comma(CHILD(tree, 2))
1466                   && validate_test(CHILD(tree, 3)));
1467            if (res && (nch > 4))
1468                res = (validate_comma(CHILD(tree, 4))
1469                       && validate_test(CHILD(tree, 5)));
1470        }
1471    }
1472    else
1473        (void) validate_numnodes(tree, 2, "raise");
1474    if (res && (nch == 4))
1475        res = (validate_comma(CHILD(tree, 2))
1476               && validate_test(CHILD(tree, 3)));
1477
1478    return (res);
1479}
1480
1481
1482/*  import_stmt:
1483 *
1484 *    'import' dotted_name (',' dotted_name)*
1485 *  | 'from' dotted_name 'import' ('*' | NAME (',' NAME)*)
1486 */
1487static int
1488validate_import_stmt(node *tree)
1489{
1490    int nch = NCH(tree);
1491    int res = (validate_ntype(tree, import_stmt)
1492               && (nch >= 2) && is_even(nch)
1493               && validate_ntype(CHILD(tree, 0), NAME)
1494               && validate_ntype(CHILD(tree, 1), dotted_name));
1495
1496    if (res && (strcmp(STR(CHILD(tree, 0)), "import") == 0)) {
1497        int j;
1498
1499        for (j = 2; res && (j < nch); j += 2)
1500            res = (validate_comma(CHILD(tree, j))
1501                   && validate_ntype(CHILD(tree, j + 1), dotted_name));
1502    }
1503    else if (res && validate_name(CHILD(tree, 0), "from")) {
1504        res = ((nch >= 4) && is_even(nch)
1505               && validate_name(CHILD(tree, 2), "import"));
1506        if (nch == 4) {
1507            res = ((TYPE(CHILD(tree, 3)) == NAME)
1508                   || (TYPE(CHILD(tree, 3)) == STAR));
1509            if (!res)
1510                err_string("Illegal import statement.");
1511        }
1512        else {
1513            /*  'from' NAME 'import' NAME (',' NAME)+  */
1514            int j;
1515            res = validate_ntype(CHILD(tree, 3), NAME);
1516            for (j = 4; res && (j < nch); j += 2)
1517                res = (validate_comma(CHILD(tree, j))
1518                       && validate_ntype(CHILD(tree, j + 1), NAME));
1519        }
1520    }
1521    else
1522        res = 0;
1523
1524    return (res);
1525}
1526
1527
1528static int
1529validate_global_stmt(node *tree)
1530{
1531    int j;
1532    int nch = NCH(tree);
1533    int res = (validate_ntype(tree, global_stmt)
1534               && is_even(nch) && (nch >= 2));
1535
1536    if (res)
1537        res = (validate_name(CHILD(tree, 0), "global")
1538               && validate_ntype(CHILD(tree, 1), NAME));
1539    for (j = 2; res && (j < nch); j += 2)
1540        res = (validate_comma(CHILD(tree, j))
1541               && validate_ntype(CHILD(tree, j + 1), NAME));
1542
1543    return (res);
1544}
1545
1546
1547/*  exec_stmt:
1548 *
1549 *  'exec' expr ['in' test [',' test]]
1550 */
1551static int
1552validate_exec_stmt(node *tree)
1553{
1554    int nch = NCH(tree);
1555    int res = (validate_ntype(tree, exec_stmt)
1556               && ((nch == 2) || (nch == 4) || (nch == 6))
1557               && validate_name(CHILD(tree, 0), "exec")
1558               && validate_expr(CHILD(tree, 1)));
1559
1560    if (!res && !PyErr_Occurred())
1561        err_string("Illegal exec statement.");
1562    if (res && (nch > 2))
1563        res = (validate_name(CHILD(tree, 2), "in")
1564               && validate_test(CHILD(tree, 3)));
1565    if (res && (nch == 6))
1566        res = (validate_comma(CHILD(tree, 4))
1567               && validate_test(CHILD(tree, 5)));
1568
1569    return (res);
1570}
1571
1572
1573/*  assert_stmt:
1574 *
1575 *  'assert' test [',' test]
1576 */
1577static int
1578validate_assert_stmt(node *tree)
1579{
1580    int nch = NCH(tree);
1581    int res = (validate_ntype(tree, assert_stmt)
1582               && ((nch == 2) || (nch == 4))
1583               && (validate_name(CHILD(tree, 0), "__assert__") ||
1584                   validate_name(CHILD(tree, 0), "assert"))
1585               && validate_test(CHILD(tree, 1)));
1586
1587    if (!res && !PyErr_Occurred())
1588        err_string("Illegal assert statement.");
1589    if (res && (nch > 2))
1590        res = (validate_comma(CHILD(tree, 2))
1591               && validate_test(CHILD(tree, 3)));
1592
1593    return (res);
1594}
1595
1596
1597static int
1598validate_while(node *tree)
1599{
1600    int nch = NCH(tree);
1601    int res = (validate_ntype(tree, while_stmt)
1602               && ((nch == 4) || (nch == 7))
1603               && validate_name(CHILD(tree, 0), "while")
1604               && validate_test(CHILD(tree, 1))
1605               && validate_colon(CHILD(tree, 2))
1606               && validate_suite(CHILD(tree, 3)));
1607
1608    if (res && (nch == 7))
1609        res = (validate_name(CHILD(tree, 4), "else")
1610               && validate_colon(CHILD(tree, 5))
1611               && validate_suite(CHILD(tree, 6)));
1612
1613    return (res);
1614}
1615
1616
1617static int
1618validate_for(node *tree)
1619{
1620    int nch = NCH(tree);
1621    int res = (validate_ntype(tree, for_stmt)
1622               && ((nch == 6) || (nch == 9))
1623               && validate_name(CHILD(tree, 0), "for")
1624               && validate_exprlist(CHILD(tree, 1))
1625               && validate_name(CHILD(tree, 2), "in")
1626               && validate_testlist(CHILD(tree, 3))
1627               && validate_colon(CHILD(tree, 4))
1628               && validate_suite(CHILD(tree, 5)));
1629
1630    if (res && (nch == 9))
1631        res = (validate_name(CHILD(tree, 6), "else")
1632               && validate_colon(CHILD(tree, 7))
1633               && validate_suite(CHILD(tree, 8)));
1634
1635    return (res);
1636}
1637
1638
1639/*  try_stmt:
1640 *      'try' ':' suite (except_clause ':' suite)+ ['else' ':' suite]
1641 *    | 'try' ':' suite 'finally' ':' suite
1642 *
1643 */
1644static int
1645validate_try(tree)
1646    node *tree;
1647{
1648    int nch = NCH(tree);
1649    int pos = 3;
1650    int res = (validate_ntype(tree, try_stmt)
1651               && (nch >= 6) && ((nch % 3) == 0));
1652
1653    if (res)
1654        res = (validate_name(CHILD(tree, 0), "try")
1655               && validate_colon(CHILD(tree, 1))
1656               && validate_suite(CHILD(tree, 2))
1657               && validate_colon(CHILD(tree, nch - 2))
1658               && validate_suite(CHILD(tree, nch - 1)));
1659    else {
1660        const char* name = "execpt";
1661        char buffer[60];
1662        if (TYPE(CHILD(tree, nch - 3)) != except_clause)
1663            name = STR(CHILD(tree, nch - 3));
1664        (void) sprintf(buffer,
1665                       "Illegal number of children for try/%s node.", name);
1666        err_string(buffer);
1667    }
1668    /*  Skip past except_clause sections:  */
1669    while (res && (TYPE(CHILD(tree, pos)) == except_clause)) {
1670        res = (validate_except_clause(CHILD(tree, pos))
1671               && validate_colon(CHILD(tree, pos + 1))
1672               && validate_suite(CHILD(tree, pos + 2)));
1673        pos += 3;
1674    }
1675    if (res && (pos < nch)) {
1676        res = validate_ntype(CHILD(tree, pos), NAME);
1677        if (res && (strcmp(STR(CHILD(tree, pos)), "finally") == 0))
1678            res = (validate_numnodes(tree, 6, "try/finally")
1679                   && validate_colon(CHILD(tree, 4))
1680                   && validate_suite(CHILD(tree, 5)));
1681        else if (res) {
1682            if (nch == (pos + 3)) {
1683                res = ((strcmp(STR(CHILD(tree, pos)), "except") == 0)
1684                       || (strcmp(STR(CHILD(tree, pos)), "else") == 0));
1685                if (!res)
1686                    err_string("Illegal trailing triple in try statement.");
1687            }
1688            else if (nch == (pos + 6)) {
1689                res = (validate_name(CHILD(tree, pos), "except")
1690                       && validate_colon(CHILD(tree, pos + 1))
1691                       && validate_suite(CHILD(tree, pos + 2))
1692                       && validate_name(CHILD(tree, pos + 3), "else"));
1693            }
1694            else
1695                res = validate_numnodes(tree, pos + 3, "try/except");
1696        }
1697    }
1698    return (res);
1699}
1700
1701
1702static int
1703validate_except_clause(node *tree)
1704{
1705    int nch = NCH(tree);
1706    int res = (validate_ntype(tree, except_clause)
1707               && ((nch == 1) || (nch == 2) || (nch == 4))
1708               && validate_name(CHILD(tree, 0), "except"));
1709
1710    if (res && (nch > 1))
1711        res = validate_test(CHILD(tree, 1));
1712    if (res && (nch == 4))
1713        res = (validate_comma(CHILD(tree, 2))
1714               && validate_test(CHILD(tree, 3)));
1715
1716    return (res);
1717}
1718
1719
1720static int
1721validate_test(node *tree)
1722{
1723    int nch = NCH(tree);
1724    int res = validate_ntype(tree, test) && is_odd(nch);
1725
1726    if (res && (TYPE(CHILD(tree, 0)) == lambdef))
1727        res = ((nch == 1)
1728               && validate_lambdef(CHILD(tree, 0)));
1729    else if (res) {
1730        int pos;
1731        res = validate_and_test(CHILD(tree, 0));
1732        for (pos = 1; res && (pos < nch); pos += 2)
1733            res = (validate_name(CHILD(tree, pos), "or")
1734                   && validate_and_test(CHILD(tree, pos + 1)));
1735    }
1736    return (res);
1737}
1738
1739
1740static int
1741validate_and_test(node *tree)
1742{
1743    int pos;
1744    int nch = NCH(tree);
1745    int res = (validate_ntype(tree, and_test)
1746               && is_odd(nch)
1747               && validate_not_test(CHILD(tree, 0)));
1748
1749    for (pos = 1; res && (pos < nch); pos += 2)
1750        res = (validate_name(CHILD(tree, pos), "and")
1751               && validate_not_test(CHILD(tree, 0)));
1752
1753    return (res);
1754}
1755
1756
1757static int
1758validate_not_test(node *tree)
1759{
1760    int nch = NCH(tree);
1761    int res = validate_ntype(tree, not_test) && ((nch == 1) || (nch == 2));
1762
1763    if (res) {
1764        if (nch == 2)
1765            res = (validate_name(CHILD(tree, 0), "not")
1766                   && validate_not_test(CHILD(tree, 1)));
1767        else if (nch == 1)
1768            res = validate_comparison(CHILD(tree, 0));
1769    }
1770    return (res);
1771}
1772
1773
1774static int
1775validate_comparison(node *tree)
1776{
1777    int pos;
1778    int nch = NCH(tree);
1779    int res = (validate_ntype(tree, comparison)
1780               && is_odd(nch)
1781               && validate_expr(CHILD(tree, 0)));
1782
1783    for (pos = 1; res && (pos < nch); pos += 2)
1784        res = (validate_comp_op(CHILD(tree, pos))
1785               && validate_expr(CHILD(tree, pos + 1)));
1786
1787    return (res);
1788}
1789
1790
1791static int
1792validate_comp_op(node *tree)
1793{
1794    int res = 0;
1795    int nch = NCH(tree);
1796
1797    if (!validate_ntype(tree, comp_op))
1798        return (0);
1799    if (nch == 1) {
1800        /*
1801         *  Only child will be a terminal with a well-defined symbolic name
1802         *  or a NAME with a string of either 'is' or 'in'
1803         */
1804        tree = CHILD(tree, 0);
1805        switch (TYPE(tree)) {
1806            case LESS:
1807            case GREATER:
1808            case EQEQUAL:
1809            case EQUAL:
1810            case LESSEQUAL:
1811            case GREATEREQUAL:
1812            case NOTEQUAL:
1813              res = 1;
1814              break;
1815            case NAME:
1816              res = ((strcmp(STR(tree), "in") == 0)
1817                     || (strcmp(STR(tree), "is") == 0));
1818              if (!res) {
1819                  char buff[128];
1820                  (void) sprintf(buff, "Illegal operator: '%s'.", STR(tree));
1821                  err_string(buff);
1822              }
1823              break;
1824          default:
1825              err_string("Illegal comparison operator type.");
1826              break;
1827        }
1828    }
1829    else if ((res = validate_numnodes(tree, 2, "comp_op")) != 0) {
1830        res = (validate_ntype(CHILD(tree, 0), NAME)
1831               && validate_ntype(CHILD(tree, 1), NAME)
1832               && (((strcmp(STR(CHILD(tree, 0)), "is") == 0)
1833                    && (strcmp(STR(CHILD(tree, 1)), "not") == 0))
1834                   || ((strcmp(STR(CHILD(tree, 0)), "not") == 0)
1835                       && (strcmp(STR(CHILD(tree, 1)), "in") == 0))));
1836        if (!res && !PyErr_Occurred())
1837            err_string("Unknown comparison operator.");
1838    }
1839    return (res);
1840}
1841
1842
1843static int
1844validate_expr(node *tree)
1845{
1846    int j;
1847    int nch = NCH(tree);
1848    int res = (validate_ntype(tree, expr)
1849               && is_odd(nch)
1850               && validate_xor_expr(CHILD(tree, 0)));
1851
1852    for (j = 2; res && (j < nch); j += 2)
1853        res = (validate_xor_expr(CHILD(tree, j))
1854               && validate_vbar(CHILD(tree, j - 1)));
1855
1856    return (res);
1857}
1858
1859
1860static int
1861validate_xor_expr(node *tree)
1862{
1863    int j;
1864    int nch = NCH(tree);
1865    int res = (validate_ntype(tree, xor_expr)
1866               && is_odd(nch)
1867               && validate_and_expr(CHILD(tree, 0)));
1868
1869    for (j = 2; res && (j < nch); j += 2)
1870        res = (validate_circumflex(CHILD(tree, j - 1))
1871               && validate_and_expr(CHILD(tree, j)));
1872
1873    return (res);
1874}
1875
1876
1877static int
1878validate_and_expr(node *tree)
1879{
1880    int pos;
1881    int nch = NCH(tree);
1882    int res = (validate_ntype(tree, and_expr)
1883               && is_odd(nch)
1884               && validate_shift_expr(CHILD(tree, 0)));
1885
1886    for (pos = 1; res && (pos < nch); pos += 2)
1887        res = (validate_ampersand(CHILD(tree, pos))
1888               && validate_shift_expr(CHILD(tree, pos + 1)));
1889
1890    return (res);
1891}
1892
1893
1894static int
1895validate_chain_two_ops(tree, termvalid, op1, op2)
1896     node *tree;
1897     int (*termvalid)();
1898     int op1;
1899     int op2;
1900 {
1901    int pos = 1;
1902    int nch = NCH(tree);
1903    int res = (is_odd(nch)
1904               && (*termvalid)(CHILD(tree, 0)));
1905
1906    for ( ; res && (pos < nch); pos += 2) {
1907        if (TYPE(CHILD(tree, pos)) != op1)
1908            res = validate_ntype(CHILD(tree, pos), op2);
1909        if (res)
1910            res = (*termvalid)(CHILD(tree, pos + 1));
1911    }
1912    return (res);
1913}
1914
1915
1916static int
1917validate_shift_expr(node *tree)
1918{
1919    return (validate_ntype(tree, shift_expr)
1920            && validate_chain_two_ops(tree, validate_arith_expr,
1921                                      LEFTSHIFT, RIGHTSHIFT));
1922}
1923
1924
1925static int
1926validate_arith_expr(node *tree)
1927{
1928    return (validate_ntype(tree, arith_expr)
1929            && validate_chain_two_ops(tree, validate_term, PLUS, MINUS));
1930}
1931
1932
1933static int
1934validate_term(node *tree)
1935{
1936    int pos = 1;
1937    int nch = NCH(tree);
1938    int res = (validate_ntype(tree, term)
1939               && is_odd(nch)
1940               && validate_factor(CHILD(tree, 0)));
1941
1942    for ( ; res && (pos < nch); pos += 2)
1943        res = (((TYPE(CHILD(tree, pos)) == STAR)
1944               || (TYPE(CHILD(tree, pos)) == SLASH)
1945               || (TYPE(CHILD(tree, pos)) == PERCENT))
1946               && validate_factor(CHILD(tree, pos + 1)));
1947
1948    return (res);
1949}
1950
1951
1952/*  factor:
1953 *
1954 *  factor: ('+'|'-'|'~') factor | power
1955 */
1956static int
1957validate_factor(node *tree)
1958{
1959    int nch = NCH(tree);
1960    int res = (validate_ntype(tree, factor)
1961               && (((nch == 2)
1962                    && ((TYPE(CHILD(tree, 0)) == PLUS)
1963                        || (TYPE(CHILD(tree, 0)) == MINUS)
1964                        || (TYPE(CHILD(tree, 0)) == TILDE))
1965                    && validate_factor(CHILD(tree, 1)))
1966                   || ((nch == 1)
1967                       && validate_power(CHILD(tree, 0)))));
1968    return (res);
1969}
1970
1971
1972/*  power:
1973 *
1974 *  power: atom trailer* ('**' factor)*
1975 */
1976static int
1977validate_power(node *tree)
1978{
1979    int pos = 1;
1980    int nch = NCH(tree);
1981    int res = (validate_ntype(tree, power) && (nch >= 1)
1982               && validate_atom(CHILD(tree, 0)));
1983
1984    while (res && (pos < nch) && (TYPE(CHILD(tree, pos)) == trailer))
1985        res = validate_trailer(CHILD(tree, pos++));
1986    if (res && (pos < nch)) {
1987        if (!is_even(nch - pos)) {
1988            err_string("Illegal number of nodes for 'power'.");
1989            return (0);
1990        }
1991        for ( ; res && (pos < (nch - 1)); pos += 2)
1992            res = (validate_doublestar(CHILD(tree, pos))
1993                   && validate_factor(CHILD(tree, pos + 1)));
1994    }
1995    return (res);
1996}
1997
1998
1999static int
2000validate_atom(node *tree)
2001{
2002    int pos;
2003    int nch = NCH(tree);
2004    int res = validate_ntype(tree, atom) && (nch >= 1);
2005
2006    if (res) {
2007        switch (TYPE(CHILD(tree, 0))) {
2008          case LPAR:
2009            res = ((nch <= 3)
2010                   && (validate_rparen(CHILD(tree, nch - 1))));
2011
2012            if (res && (nch == 3))
2013                res = validate_testlist(CHILD(tree, 1));
2014            break;
2015          case LSQB:
2016            res = ((nch <= 3)
2017                   && validate_ntype(CHILD(tree, nch - 1), RSQB));
2018
2019            if (res && (nch == 3))
2020                res = validate_testlist(CHILD(tree, 1));
2021            break;
2022          case LBRACE:
2023            res = ((nch <= 3)
2024                   && validate_ntype(CHILD(tree, nch - 1), RBRACE));
2025
2026            if (res && (nch == 3))
2027                res = validate_dictmaker(CHILD(tree, 1));
2028            break;
2029          case BACKQUOTE:
2030            res = ((nch == 3)
2031                   && validate_testlist(CHILD(tree, 1))
2032                   && validate_ntype(CHILD(tree, 2), BACKQUOTE));
2033            break;
2034          case NAME:
2035          case NUMBER:
2036            res = (nch == 1);
2037            break;
2038          case STRING:
2039            for (pos = 1; res && (pos < nch); ++pos)
2040                res = validate_ntype(CHILD(tree, pos), STRING);
2041            break;
2042          default:
2043            res = 0;
2044            break;
2045        }
2046    }
2047    return (res);
2048}
2049
2050
2051/*  funcdef:
2052 *      'def' NAME parameters ':' suite
2053 *
2054 */
2055static int
2056validate_funcdef(node *tree)
2057{
2058    return (validate_ntype(tree, funcdef)
2059            && validate_numnodes(tree, 5, "funcdef")
2060            && validate_name(CHILD(tree, 0), "def")
2061            && validate_ntype(CHILD(tree, 1), NAME)
2062            && validate_colon(CHILD(tree, 3))
2063            && validate_parameters(CHILD(tree, 2))
2064            && validate_suite(CHILD(tree, 4)));
2065}
2066
2067
2068static int
2069validate_lambdef(node *tree)
2070{
2071    int nch = NCH(tree);
2072    int res = (validate_ntype(tree, lambdef)
2073               && ((nch == 3) || (nch == 4))
2074               && validate_name(CHILD(tree, 0), "lambda")
2075               && validate_colon(CHILD(tree, nch - 2))
2076               && validate_test(CHILD(tree, nch - 1)));
2077
2078    if (res && (nch == 4))
2079        res = validate_varargslist(CHILD(tree, 1));
2080    else if (!res && !PyErr_Occurred())
2081        (void) validate_numnodes(tree, 3, "lambdef");
2082
2083    return (res);
2084}
2085
2086
2087/*  arglist:
2088 *
2089 *  (argument ',')* (argument* [','] | '*' test [',' '**' test] | '**' test)
2090 */
2091static int
2092validate_arglist(node *tree)
2093{
2094    int nch = NCH(tree);
2095    int i, ok;
2096    node *last;
2097
2098    if (nch <= 0)
2099        /* raise the right error from having an invalid number of children */
2100        return validate_numnodes(tree, nch + 1, "arglist");
2101
2102    last = CHILD(tree, nch - 1);
2103    if (TYPE(last) == test) {
2104        /* Extended call syntax introduced in Python 1.6 has been used;
2105         * validate and strip that off and continue;
2106         * adjust nch to perform the cut, and ensure resulting nch is even
2107         * (validation of the first part doesn't require that).
2108         */
2109        if (nch < 2) {
2110            validate_numnodes(tree, nch + 1, "arglist");
2111            return 0;
2112        }
2113        ok = validate_test(last);
2114        if (ok) {
2115            node *prev = CHILD(tree, nch - 2);
2116            /* next must be '*' or '**' */
2117            if (validate_doublestar(prev)) {
2118                nch -= 2;
2119                if (nch >= 3) {
2120                    /* may include:  '*' test ',' */
2121                    last = CHILD(tree, nch - 1);
2122                    prev = CHILD(tree, nch - 2);
2123                    if (TYPE(prev) == test) {
2124                        ok = validate_comma(last)
2125                             && validate_test(prev)
2126                             && validate_star(CHILD(tree, nch - 3));
2127                        if (ok)
2128                            nch -= 3;
2129                    }
2130                    /* otherwise, nothing special */
2131                }
2132            }
2133            else {
2134                /* must be only:  '*' test */
2135                PyErr_Clear();
2136                ok = validate_star(prev);
2137                nch -= 2;
2138            }
2139            if (ok && is_odd(nch)) {
2140                /* Illegal number of nodes before extended call syntax;
2141                 * validation of the "normal" arguments does not require
2142                 * a trailing comma, but requiring an even number of
2143                 * children will effect the same requirement.
2144                 */
2145                return validate_numnodes(tree, nch + 1, "arglist");
2146            }
2147        }
2148    }
2149    /* what remains must be:  (argument ",")* [argument [","]] */
2150    i = 0;
2151    while (ok && nch - i >= 2) {
2152        ok = validate_argument(CHILD(tree, i))
2153             && validate_comma(CHILD(tree, i + 1));
2154        i += 2;
2155    }
2156    if (ok && i < nch) {
2157        ok = validate_comma(CHILD(tree, i));
2158        ++i;
2159    }
2160    if (i != nch) {
2161        /* internal error! */
2162        ok = 0;
2163        err_string("arglist: internal error; nch != i");
2164    }
2165    return (ok);
2166}
2167
2168
2169
2170/*  argument:
2171 *
2172 *  [test '='] test
2173 */
2174static int
2175validate_argument(node *tree)
2176{
2177    int nch = NCH(tree);
2178    int res = (validate_ntype(tree, argument)
2179               && ((nch == 1) || (nch == 3))
2180               && validate_test(CHILD(tree, 0)));
2181
2182    if (res && (nch == 3))
2183        res = (validate_equal(CHILD(tree, 1))
2184               && validate_test(CHILD(tree, 2)));
2185
2186    return (res);
2187}
2188
2189
2190
2191/*  trailer:
2192 *
2193 *  '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2194 */
2195static int
2196validate_trailer(node *tree)
2197{
2198    int nch = NCH(tree);
2199    int res = validate_ntype(tree, trailer) && ((nch == 2) || (nch == 3));
2200
2201    if (res) {
2202        switch (TYPE(CHILD(tree, 0))) {
2203          case LPAR:
2204            res = validate_rparen(CHILD(tree, nch - 1));
2205            if (res && (nch == 3))
2206                res = validate_arglist(CHILD(tree, 1));
2207            break;
2208          case LSQB:
2209            res = (validate_numnodes(tree, 3, "trailer")
2210                   && validate_subscriptlist(CHILD(tree, 1))
2211                   && validate_ntype(CHILD(tree, 2), RSQB));
2212            break;
2213          case DOT:
2214            res = (validate_numnodes(tree, 2, "trailer")
2215                   && validate_ntype(CHILD(tree, 1), NAME));
2216            break;
2217          default:
2218            res = 0;
2219            break;
2220        }
2221    }
2222    else {
2223        (void) validate_numnodes(tree, 2, "trailer");
2224    }
2225    return (res);
2226}
2227
2228
2229/*  subscriptlist:
2230 *
2231 *  subscript (',' subscript)* [',']
2232 */
2233static int
2234validate_subscriptlist(node *tree)
2235{
2236    return (validate_repeating_list(tree, subscriptlist,
2237                                    validate_subscript, "subscriptlist"));
2238}
2239
2240
2241/*  subscript:
2242 *
2243 *  '.' '.' '.' | test | [test] ':' [test] [sliceop]
2244 */
2245static int
2246validate_subscript(node *tree)
2247{
2248    int offset = 0;
2249    int nch = NCH(tree);
2250    int res = validate_ntype(tree, subscript) && (nch >= 1) && (nch <= 4);
2251
2252    if (!res) {
2253        if (!PyErr_Occurred())
2254            err_string("invalid number of arguments for subscript node");
2255        return (0);
2256    }
2257    if (TYPE(CHILD(tree, 0)) == DOT)
2258        /* take care of ('.' '.' '.') possibility */
2259        return (validate_numnodes(tree, 3, "subscript")
2260                && validate_dot(CHILD(tree, 0))
2261                && validate_dot(CHILD(tree, 1))
2262                && validate_dot(CHILD(tree, 2)));
2263    if (nch == 1) {
2264        if (TYPE(CHILD(tree, 0)) == test)
2265            res = validate_test(CHILD(tree, 0));
2266        else
2267            res = validate_colon(CHILD(tree, 0));
2268        return (res);
2269    }
2270    /*  Must be [test] ':' [test] [sliceop],
2271     *  but at least one of the optional components will
2272     *  be present, but we don't know which yet.
2273     */
2274    if ((TYPE(CHILD(tree, 0)) != COLON) || (nch == 4)) {
2275        res = validate_test(CHILD(tree, 0));
2276        offset = 1;
2277    }
2278    if (res)
2279        res = validate_colon(CHILD(tree, offset));
2280    if (res) {
2281        int rem = nch - ++offset;
2282        if (rem) {
2283            if (TYPE(CHILD(tree, offset)) == test) {
2284                res = validate_test(CHILD(tree, offset));
2285                ++offset;
2286                --rem;
2287            }
2288            if (res && rem)
2289                res = validate_sliceop(CHILD(tree, offset));
2290        }
2291    }
2292    return (res);
2293}
2294
2295
2296static int
2297validate_sliceop(node *tree)
2298{
2299    int nch = NCH(tree);
2300    int res = ((nch == 1) || validate_numnodes(tree, 2, "sliceop"))
2301              && validate_ntype(tree, sliceop);
2302    if (!res && !PyErr_Occurred()) {
2303        res = validate_numnodes(tree, 1, "sliceop");
2304    }
2305    if (res)
2306        res = validate_colon(CHILD(tree, 0));
2307    if (res && (nch == 2))
2308        res = validate_test(CHILD(tree, 1));
2309
2310    return (res);
2311}
2312
2313
2314static int
2315validate_exprlist(node *tree)
2316{
2317    return (validate_repeating_list(tree, exprlist,
2318                                    validate_expr, "exprlist"));
2319}
2320
2321
2322static int
2323validate_dictmaker(node *tree)
2324{
2325    int nch = NCH(tree);
2326    int res = (validate_ntype(tree, dictmaker)
2327               && (nch >= 3)
2328               && validate_test(CHILD(tree, 0))
2329               && validate_colon(CHILD(tree, 1))
2330               && validate_test(CHILD(tree, 2)));
2331
2332    if (res && ((nch % 4) == 0))
2333        res = validate_comma(CHILD(tree, --nch));
2334    else if (res)
2335        res = ((nch % 4) == 3);
2336
2337    if (res && (nch > 3)) {
2338        int pos = 3;
2339        /*  ( ',' test ':' test )*  */
2340        while (res && (pos < nch)) {
2341            res = (validate_comma(CHILD(tree, pos))
2342                   && validate_test(CHILD(tree, pos + 1))
2343                   && validate_colon(CHILD(tree, pos + 2))
2344                   && validate_test(CHILD(tree, pos + 3)));
2345            pos += 4;
2346        }
2347    }
2348    return (res);
2349}
2350
2351
2352static int
2353validate_eval_input(node *tree)
2354{
2355    int pos;
2356    int nch = NCH(tree);
2357    int res = (validate_ntype(tree, eval_input)
2358               && (nch >= 2)
2359               && validate_testlist(CHILD(tree, 0))
2360               && validate_ntype(CHILD(tree, nch - 1), ENDMARKER));
2361
2362    for (pos = 1; res && (pos < (nch - 1)); ++pos)
2363        res = validate_ntype(CHILD(tree, pos), NEWLINE);
2364
2365    return (res);
2366}
2367
2368
2369static int
2370validate_node(node *tree)
2371{
2372    int   nch  = 0;                     /* num. children on current node  */
2373    int   res  = 1;                     /* result value                   */
2374    node* next = 0;                     /* node to process after this one */
2375
2376    while (res & (tree != 0)) {
2377        nch  = NCH(tree);
2378        next = 0;
2379        switch (TYPE(tree)) {
2380            /*
2381             *  Definition nodes.
2382             */
2383          case funcdef:
2384            res = validate_funcdef(tree);
2385            break;
2386          case classdef:
2387            res = validate_class(tree);
2388            break;
2389            /*
2390             *  "Trivial" parse tree nodes.
2391             *  (Why did I call these trivial?)
2392             */
2393          case stmt:
2394            res = validate_stmt(tree);
2395            break;
2396          case small_stmt:
2397            /*
2398             *  expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt
2399             *  | import_stmt | global_stmt | exec_stmt | assert_stmt
2400             */
2401            res = validate_small_stmt(tree);
2402            break;
2403          case flow_stmt:
2404            res  = (validate_numnodes(tree, 1, "flow_stmt")
2405                    && ((TYPE(CHILD(tree, 0)) == break_stmt)
2406                        || (TYPE(CHILD(tree, 0)) == continue_stmt)
2407                        || (TYPE(CHILD(tree, 0)) == return_stmt)
2408                        || (TYPE(CHILD(tree, 0)) == raise_stmt)));
2409            if (res)
2410                next = CHILD(tree, 0);
2411            else if (nch == 1)
2412                err_string("Illegal flow_stmt type.");
2413            break;
2414            /*
2415             *  Compound statements.
2416             */
2417          case simple_stmt:
2418            res = validate_simple_stmt(tree);
2419            break;
2420          case compound_stmt:
2421            res = validate_compound_stmt(tree);
2422            break;
2423            /*
2424             *  Fundemental statements.
2425             */
2426          case expr_stmt:
2427            res = validate_expr_stmt(tree);
2428            break;
2429          case print_stmt:
2430            res = validate_print_stmt(tree);
2431            break;
2432          case del_stmt:
2433            res = validate_del_stmt(tree);
2434            break;
2435          case pass_stmt:
2436            res = (validate_numnodes(tree, 1, "pass")
2437                   && validate_name(CHILD(tree, 0), "pass"));
2438            break;
2439          case break_stmt:
2440            res = (validate_numnodes(tree, 1, "break")
2441                   && validate_name(CHILD(tree, 0), "break"));
2442            break;
2443          case continue_stmt:
2444            res = (validate_numnodes(tree, 1, "continue")
2445                   && validate_name(CHILD(tree, 0), "continue"));
2446            break;
2447          case return_stmt:
2448            res = validate_return_stmt(tree);
2449            break;
2450          case raise_stmt:
2451            res = validate_raise_stmt(tree);
2452            break;
2453          case import_stmt:
2454            res = validate_import_stmt(tree);
2455            break;
2456          case global_stmt:
2457            res = validate_global_stmt(tree);
2458            break;
2459          case exec_stmt:
2460            res = validate_exec_stmt(tree);
2461            break;
2462          case assert_stmt:
2463            res = validate_assert_stmt(tree);
2464            break;
2465          case if_stmt:
2466            res = validate_if(tree);
2467            break;
2468          case while_stmt:
2469            res = validate_while(tree);
2470            break;
2471          case for_stmt:
2472            res = validate_for(tree);
2473            break;
2474          case try_stmt:
2475            res = validate_try(tree);
2476            break;
2477          case suite:
2478            res = validate_suite(tree);
2479            break;
2480            /*
2481             *  Expression nodes.
2482             */
2483          case testlist:
2484            res = validate_testlist(tree);
2485            break;
2486          case test:
2487            res = validate_test(tree);
2488            break;
2489          case and_test:
2490            res = validate_and_test(tree);
2491            break;
2492          case not_test:
2493            res = validate_not_test(tree);
2494            break;
2495          case comparison:
2496            res = validate_comparison(tree);
2497            break;
2498          case exprlist:
2499            res = validate_exprlist(tree);
2500            break;
2501          case comp_op:
2502            res = validate_comp_op(tree);
2503            break;
2504          case expr:
2505            res = validate_expr(tree);
2506            break;
2507          case xor_expr:
2508            res = validate_xor_expr(tree);
2509            break;
2510          case and_expr:
2511            res = validate_and_expr(tree);
2512            break;
2513          case shift_expr:
2514            res = validate_shift_expr(tree);
2515            break;
2516          case arith_expr:
2517            res = validate_arith_expr(tree);
2518            break;
2519          case term:
2520            res = validate_term(tree);
2521            break;
2522          case factor:
2523            res = validate_factor(tree);
2524            break;
2525          case power:
2526            res = validate_power(tree);
2527            break;
2528          case atom:
2529            res = validate_atom(tree);
2530            break;
2531
2532          default:
2533            /* Hopefully never reached! */
2534            err_string("Unrecogniged node type.");
2535            res = 0;
2536            break;
2537        }
2538        tree = next;
2539    }
2540    return (res);
2541}
2542
2543
2544static int
2545validate_expr_tree(node *tree)
2546{
2547    int res = validate_eval_input(tree);
2548
2549    if (!res && !PyErr_Occurred())
2550        err_string("Could not validate expression tuple.");
2551
2552    return (res);
2553}
2554
2555
2556/*  file_input:
2557 *      (NEWLINE | stmt)* ENDMARKER
2558 */
2559static int
2560validate_file_input(node *tree)
2561{
2562    int j   = 0;
2563    int nch = NCH(tree) - 1;
2564    int res = ((nch >= 0)
2565               && validate_ntype(CHILD(tree, nch), ENDMARKER));
2566
2567    for ( ; res && (j < nch); ++j) {
2568        if (TYPE(CHILD(tree, j)) == stmt)
2569            res = validate_stmt(CHILD(tree, j));
2570        else
2571            res = validate_newline(CHILD(tree, j));
2572    }
2573    /*  This stays in to prevent any internal failues from getting to the
2574     *  user.  Hopefully, this won't be needed.  If a user reports getting
2575     *  this, we have some debugging to do.
2576     */
2577    if (!res && !PyErr_Occurred())
2578        err_string("VALIDATION FAILURE: report this to the maintainer!.");
2579
2580    return (res);
2581}
2582
2583
2584static PyObject*
2585pickle_constructor = NULL;
2586
2587
2588static PyObject*
2589parser__pickler(PyObject *self, PyObject *args)
2590{
2591    NOTE(ARGUNUSED(self))
2592    PyObject *result = NULL;
2593    PyObject *ast = NULL;
2594    PyObject *empty_dict = NULL;
2595
2596    if (PyArg_ParseTuple(args, "O!:_pickler", &PyAST_Type, &ast)) {
2597        PyObject *newargs;
2598        PyObject *tuple;
2599
2600        if ((empty_dict = PyDict_New()) == NULL)
2601            goto finally;
2602        if ((newargs = Py_BuildValue("Oi", ast, 1)) == NULL)
2603            goto finally;
2604        tuple = parser_ast2tuple((PyAST_Object*)NULL, newargs, empty_dict);
2605        if (tuple != NULL) {
2606            result = Py_BuildValue("O(O)", pickle_constructor, tuple);
2607            Py_DECREF(tuple);
2608        }
2609        Py_DECREF(empty_dict);
2610        Py_DECREF(newargs);
2611    }
2612  finally:
2613    Py_XDECREF(empty_dict);
2614
2615    return (result);
2616}
2617
2618
2619/*  Functions exported by this module.  Most of this should probably
2620 *  be converted into an AST object with methods, but that is better
2621 *  done directly in Python, allowing subclasses to be created directly.
2622 *  We'd really have to write a wrapper around it all anyway to allow
2623 *  inheritance.
2624 */
2625static PyMethodDef parser_functions[] =  {
2626    {"ast2tuple",       (PyCFunction)parser_ast2tuple,  PUBLIC_METHOD_TYPE,
2627        "Creates a tuple-tree representation of an AST."},
2628    {"ast2list",        (PyCFunction)parser_ast2list,   PUBLIC_METHOD_TYPE,
2629        "Creates a list-tree representation of an AST."},
2630    {"compileast",      (PyCFunction)parser_compileast, PUBLIC_METHOD_TYPE,
2631        "Compiles an AST object into a code object."},
2632    {"expr",            (PyCFunction)parser_expr,       PUBLIC_METHOD_TYPE,
2633        "Creates an AST object from an expression."},
2634    {"isexpr",          (PyCFunction)parser_isexpr,     PUBLIC_METHOD_TYPE,
2635        "Determines if an AST object was created from an expression."},
2636    {"issuite",         (PyCFunction)parser_issuite,    PUBLIC_METHOD_TYPE,
2637        "Determines if an AST object was created from a suite."},
2638    {"suite",           (PyCFunction)parser_suite,      PUBLIC_METHOD_TYPE,
2639        "Creates an AST object from a suite."},
2640    {"sequence2ast",    (PyCFunction)parser_tuple2ast,  PUBLIC_METHOD_TYPE,
2641        "Creates an AST object from a tree representation."},
2642    {"tuple2ast",       (PyCFunction)parser_tuple2ast,  PUBLIC_METHOD_TYPE,
2643        "Creates an AST object from a tree representation."},
2644
2645    /* private stuff: support pickle module */
2646    {"_pickler",        (PyCFunction)parser__pickler,   METH_VARARGS,
2647        "Returns the pickle magic to allow ast objects to be pickled."},
2648
2649    {NULL, NULL, 0, NULL}
2650    };
2651
2652
2653DL_EXPORT(void)
2654initparser()
2655 {
2656    PyObject* module;
2657    PyObject* dict;
2658
2659    PyAST_Type.ob_type = &PyType_Type;
2660    module = Py_InitModule("parser", parser_functions);
2661    dict = PyModule_GetDict(module);
2662
2663    if (parser_error == 0)
2664        parser_error = PyErr_NewException("parser.ParserError", NULL, NULL);
2665    else
2666        puts("parser module initialized more than once!");
2667
2668    if ((parser_error == 0)
2669        || (PyDict_SetItemString(dict, "ParserError", parser_error) != 0)) {
2670        /*
2671         *  This is serious.
2672         */
2673        Py_FatalError("can't define parser.ParserError");
2674    }
2675    /*
2676     *  Nice to have, but don't cry if we fail.
2677     */
2678    Py_INCREF(&PyAST_Type);
2679    PyDict_SetItemString(dict, "ASTType", (PyObject*)&PyAST_Type);
2680
2681    PyDict_SetItemString(dict, "__copyright__",
2682                         PyString_FromString(parser_copyright_string));
2683    PyDict_SetItemString(dict, "__doc__",
2684                         PyString_FromString(parser_doc_string));
2685    PyDict_SetItemString(dict, "__version__",
2686                         PyString_FromString(parser_version_string));
2687
2688    /* register to support pickling */
2689    module = PyImport_ImportModule("copy_reg");
2690    if (module != NULL) {
2691        PyObject *func, *pickler;
2692
2693        func = PyObject_GetAttrString(module, "pickle");
2694        pickle_constructor = PyDict_GetItemString(dict, "sequence2ast");
2695        pickler = PyDict_GetItemString(dict, "_pickler");
2696        Py_XINCREF(pickle_constructor);
2697        if ((func != NULL) && (pickle_constructor != NULL)
2698            && (pickler != NULL)) {
2699            PyObject *res;
2700
2701            res = PyObject_CallFunction(
2702                    func, "OOO", &PyAST_Type, pickler, pickle_constructor);
2703            Py_XDECREF(res);
2704        }
2705        Py_XDECREF(func);
2706        Py_DECREF(module);
2707    }
2708}
2709