idl_parser.py revision f2477e01787aa58f445919b809d89e252beef54f
1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6""" Parser for PPAPI IDL """
7
8#
9# IDL Parser
10#
11# The parser is uses the PLY yacc library to build a set of parsing rules based
12# on WebIDL.
13#
14# WebIDL, and WebIDL regular expressions can be found at:
15#   http://dev.w3.org/2006/webapi/WebIDL/
16# PLY can be found at:
17#   http://www.dabeaz.com/ply/
18#
19# The parser generates a tree by recursively matching sets of items against
20# defined patterns.  When a match is made, that set of items is reduced
21# to a new item.   The new item can provide a match for parent patterns.
22# In this way an AST is built (reduced) depth first.
23
24
25import getopt
26import glob
27import os.path
28import re
29import sys
30import time
31
32from idl_ast import IDLAst
33from idl_log import ErrOut, InfoOut, WarnOut
34from idl_lexer import IDLLexer
35from idl_node import IDLAttribute, IDLFile, IDLNode
36from idl_option import GetOption, Option, ParseOptions
37from idl_lint import Lint
38
39from ply import lex
40from ply import yacc
41
42Option('build_debug', 'Debug tree building.')
43Option('parse_debug', 'Debug parse reduction steps.')
44Option('token_debug', 'Debug token generation.')
45Option('dump_tree', 'Dump the tree.')
46Option('srcroot', 'Working directory.', default=os.path.join('..', 'api'))
47Option('include_private', 'Include private IDL directory in default API paths.')
48
49#
50# ERROR_REMAP
51#
52# Maps the standard error formula into a more friendly error message.
53#
54ERROR_REMAP = {
55  'Unexpected ")" after "(".' : 'Empty argument list.',
56  'Unexpected ")" after ",".' : 'Missing argument.',
57  'Unexpected "}" after ",".' : 'Trailing comma in block.',
58  'Unexpected "}" after "{".' : 'Unexpected empty block.',
59  'Unexpected comment after "}".' : 'Unexpected trailing comment.',
60  'Unexpected "{" after keyword "enum".' : 'Enum missing name.',
61  'Unexpected "{" after keyword "struct".' : 'Struct missing name.',
62  'Unexpected "{" after keyword "interface".' : 'Interface missing name.',
63}
64
65# DumpReduction
66#
67# Prints out the set of items which matched a particular pattern and the
68# new item or set it was reduced to.
69def DumpReduction(cls, p):
70  if p[0] is None:
71    InfoOut.Log("OBJ: %s(%d) - None\n" % (cls, len(p)))
72    InfoOut.Log("  [%s]\n" % [str(x) for x in p[1:]])
73  else:
74    out = ""
75    for index in range(len(p) - 1):
76      out += " >%s< " % str(p[index + 1])
77    InfoOut.Log("OBJ: %s(%d) - %s : %s\n"  % (cls, len(p), str(p[0]), out))
78
79
80# CopyToList
81#
82# Takes an input item, list, or None, and returns a new list of that set.
83def CopyToList(item):
84  # If the item is 'Empty' make it an empty list
85  if not item: item = []
86
87  # If the item is not a list
88  if type(item) is not type([]): item = [item]
89
90  # Make a copy we can modify
91  return list(item)
92
93
94
95# ListFromConcat
96#
97# Generate a new List by joining of two sets of inputs which can be an
98# individual item, a list of items, or None.
99def ListFromConcat(*items):
100  itemsout = []
101  for item in items:
102    itemlist = CopyToList(item)
103    itemsout.extend(itemlist)
104
105  return itemsout
106
107
108# TokenTypeName
109#
110# Generate a string which has the type and value of the token.
111def TokenTypeName(t):
112  if t.type == 'SYMBOL':  return 'symbol %s' % t.value
113  if t.type in ['HEX', 'INT', 'OCT', 'FLOAT']:
114    return 'value %s' % t.value
115  if t.type == 'STRING' : return 'string "%s"' % t.value
116  if t.type == 'COMMENT' : return 'comment'
117  if t.type == t.value: return '"%s"' % t.value
118  return 'keyword "%s"' % t.value
119
120
121#
122# IDL Parser
123#
124# The Parser inherits the from the Lexer to provide PLY with the tokenizing
125# definitions.  Parsing patterns are encoded as function where p_<name> is
126# is called any time a patern matching the function documentation is found.
127# Paterns are expressed in the form of:
128# """ <new item> : <item> ....
129#                | <item> ...."""
130#
131# Where new item is the result of a match against one or more sets of items
132# separated by the "|".
133#
134# The function is called with an object 'p' where p[0] is the output object
135# and p[n] is the set of inputs for positive values of 'n'.  Len(p) can be
136# used to distinguish between multiple item sets in the pattern.
137#
138# For more details on parsing refer to the PLY documentation at
139#    http://www.dabeaz.com/ply/
140#
141#
142# The parser uses the following conventions:
143#   a <type>_block defines a block of <type> definitions in the form of:
144#       [comment] [ext_attr_block] <type> <name> '{' <type>_list '}' ';'
145#   A block is reduced by returning an object of <type> with a name of <name>
146#   which in turn has <type>_list as children.
147#
148#   A [comment] is a optional C style comment block enclosed in /* ... */ which
149#   is appended to the adjacent node as a child.
150#
151#   A [ext_attr_block] is an optional list of Extended Attributes which is
152#   appended to the adjacent node as a child.
153#
154#   a <type>_list defines a list of <type> items which will be passed as a
155#   list of children to the parent pattern.  A list is in the form of:
156#       [comment] [ext_attr_block] <...DEF...> ';' <type>_list | (empty)
157# or
158#       [comment] [ext_attr_block] <...DEF...> <type>_cont
159#
160#   In the first form, the list is reduced recursively, where the right side
161#   <type>_list is first reduced then joined with pattern currently being
162#   matched.  The list is terminated with the (empty) pattern is matched.
163#
164#   In the second form the list is reduced recursively, where the right side
165#   <type>_cont is first reduced then joined with the pattern currently being
166#   matched.  The type_<cont> is in the form of:
167#       ',' <type>_list | (empty)
168#   The <type>_cont form is used to consume the ',' which only occurs when
169#   there is more than one object in the list.  The <type>_cont also provides
170#   the terminating (empty) definition.
171#
172
173
174class IDLParser(IDLLexer):
175# TOP
176#
177# This pattern defines the top of the parse tree.  The parse tree is in the
178# the form of:
179#
180# top
181#   *modifiers
182#     *comments
183#     *ext_attr_block
184#       ext_attr_list
185#          attr_arg_list
186#   *integer, value
187#   *param_list
188#   *typeref
189#
190#   top_list
191#     describe_block
192#       describe_list
193#     enum_block
194#       enum_item
195#     interface_block
196#       member
197#     label_block
198#       label_item
199#     struct_block
200#       member
201#     typedef_decl
202#       typedef_data
203#       typedef_func
204#
205# (* sub matches found at multiple levels and are not truly children of top)
206#
207# We force all input files to start with two comments.  The first comment is a
208# Copyright notice followed by a set of file wide Extended Attributes, followed
209# by the file comment and finally by file level patterns.
210#
211  # Find the Copyright, File comment, and optional file wide attributes.  We
212  # use a match with COMMENT instead of comments to force the token to be
213  # present.  The extended attributes and the top_list become siblings which
214  # in turn are children of the file object created from the results of top.
215  def p_top(self, p):
216    """top : COMMENT COMMENT ext_attr_block top_list"""
217
218    Copyright = self.BuildComment('Copyright', p, 1)
219    Filedoc = self.BuildComment('Comment', p, 2)
220
221    p[0] = ListFromConcat(Copyright, Filedoc, p[3], p[4])
222    if self.parse_debug: DumpReduction('top', p)
223
224  def p_top_short(self, p):
225    """top : COMMENT ext_attr_block top_list"""
226    Copyright = self.BuildComment('Copyright', p, 1)
227    Filedoc = IDLNode('Comment', self.lexobj.filename, p.lineno(2)-1,
228        p.lexpos(2)-1, [self.BuildAttribute('NAME', ''),
229          self.BuildAttribute('FORM', 'cc')])
230    p[0] = ListFromConcat(Copyright, Filedoc, p[2], p[3])
231    if self.parse_debug: DumpReduction('top', p)
232
233  # Build a list of top level items.
234  def p_top_list(self, p):
235    """top_list : callback_decl top_list
236                | describe_block top_list
237                | dictionary_block top_list
238                | enum_block top_list
239                | inline top_list
240                | interface_block top_list
241                | label_block top_list
242                | namespace top_list
243                | struct_block top_list
244                | typedef_decl top_list
245                | bad_decl top_list
246                | """
247    if len(p) > 2:
248      p[0] = ListFromConcat(p[1], p[2])
249    if self.parse_debug: DumpReduction('top_list', p)
250
251  # Recover from error and continue parsing at the next top match.
252  def p_top_error(self, p):
253    """top_list : error top_list"""
254    p[0] = p[2]
255
256  # Recover from error and continue parsing at the next top match.
257  def p_bad_decl(self, p):
258    """bad_decl : modifiers SYMBOL error '}' ';'"""
259    p[0] = []
260
261#
262# Modifier List
263#
264#
265  def p_modifiers(self, p):
266    """modifiers : comments ext_attr_block"""
267    p[0] = ListFromConcat(p[1], p[2])
268    if self.parse_debug: DumpReduction('modifiers', p)
269
270#
271# Comments
272#
273# Comments are optional list of C style comment objects.  Comments are returned
274# as a list or None.
275#
276  def p_comments(self, p):
277    """comments : COMMENT comments
278                | """
279    if len(p) > 1:
280      child = self.BuildComment('Comment', p, 1)
281      p[0] = ListFromConcat(child, p[2])
282      if self.parse_debug: DumpReduction('comments', p)
283    else:
284      if self.parse_debug: DumpReduction('no comments', p)
285
286
287#
288# Namespace
289#
290# A namespace provides a named scope to an enclosed top_list.
291#
292  def p_namespace(self, p):
293    """namespace : modifiers NAMESPACE namespace_name '{' top_list '}' ';'"""
294    children = ListFromConcat(p[1], p[5])
295    p[0] = self.BuildNamed('Namespace', p, 3, children)
296
297  # We allow namespace names of the form foo.bar.baz.
298  def p_namespace_name(self, p):
299    """namespace_name : SYMBOL
300                      | SYMBOL '.' namespace_name"""
301    p[0] = "".join(p[1:])
302
303
304#
305# Dictionary
306#
307# A dictionary is a named list of optional and required members.
308#
309  def p_dictionary_block(self, p):
310    """dictionary_block : modifiers DICTIONARY SYMBOL '{' struct_list '}' ';'"""
311    p[0] = self.BuildNamed('Dictionary', p, 3, ListFromConcat(p[1], p[5]))
312
313#
314# Callback
315#
316# A callback is essentially a single function declaration (outside of an
317# Interface).
318#
319  def p_callback_decl(self, p):
320    """callback_decl : modifiers CALLBACK SYMBOL '=' SYMBOL param_list ';'"""
321    children = ListFromConcat(p[1], p[6])
322    p[0] = self.BuildNamed('Callback', p, 3, children)
323
324
325#
326# Inline
327#
328# Inline blocks define option code to be emitted based on language tag,
329# in the form of:
330# #inline <LANGUAGE>
331# <CODE>
332# #endinl
333#
334  def p_inline(self, p):
335    """inline : modifiers INLINE"""
336    words = p[2].split()
337    name = self.BuildAttribute('NAME', words[1])
338    lines = p[2].split('\n')
339    value = self.BuildAttribute('VALUE', '\n'.join(lines[1:-1]) + '\n')
340    children = ListFromConcat(name, value, p[1])
341    p[0] = self.BuildProduction('Inline', p, 2, children)
342    if self.parse_debug: DumpReduction('inline', p)
343
344# Extended Attributes
345#
346# Extended Attributes denote properties which will be applied to a node in the
347# AST.  A list of extended attributes are denoted by a brackets '[' ... ']'
348# enclosing a comma separated list of extended attributes in the form of:
349#
350#  Name
351#  Name=HEX | INT | OCT | FLOAT
352#  Name="STRING"
353#  Name=Function(arg ...)
354#  TODO(noelallen) -Not currently supported:
355#  ** Name(arg ...) ...
356#  ** Name=Scope::Value
357#
358# Extended Attributes are returned as a list or None.
359
360  def p_ext_attr_block(self, p):
361    """ext_attr_block : '[' ext_attr_list ']'
362                  | """
363    if len(p) > 1:
364      p[0] = p[2]
365      if self.parse_debug: DumpReduction('ext_attr_block', p)
366    else:
367      if self.parse_debug: DumpReduction('no ext_attr_block', p)
368
369  def p_ext_attr_list(self, p):
370    """ext_attr_list : SYMBOL '=' SYMBOL ext_attr_cont
371                     | SYMBOL '=' value ext_attr_cont
372                     | SYMBOL '=' SYMBOL param_list ext_attr_cont
373                     | SYMBOL ext_attr_cont"""
374    # If there are 4 tokens plus a return slot, this must be in the form
375    # SYMBOL = SYMBOL|value ext_attr_cont
376    if len(p) == 5:
377      p[0] = ListFromConcat(self.BuildAttribute(p[1], p[3]), p[4])
378    # If there are 5 tokens plus a return slot, this must be in the form
379    # SYMBOL = SYMBOL (param_list) ext_attr_cont
380    elif len(p) == 6:
381      member = self.BuildNamed('Member', p, 3, [p[4]])
382      p[0] = ListFromConcat(self.BuildAttribute(p[1], member), p[5])
383    # Otherwise, this must be: SYMBOL ext_attr_cont
384    else:
385      p[0] = ListFromConcat(self.BuildAttribute(p[1], 'True'), p[2])
386    if self.parse_debug: DumpReduction('ext_attribute_list', p)
387
388  def p_ext_attr_list_values(self, p):
389    """ext_attr_list : SYMBOL '=' '(' values ')' ext_attr_cont
390                     | SYMBOL '=' '(' symbols ')' ext_attr_cont"""
391    p[0] = ListFromConcat(self.BuildAttribute(p[1], p[4]), p[6])
392
393  def p_values(self, p):
394    """values : value values_cont"""
395    p[0] = ListFromConcat(p[1], p[2])
396
397  def p_symbols(self, p):
398    """symbols : SYMBOL symbols_cont"""
399    p[0] = ListFromConcat(p[1], p[2])
400
401  def p_symbols_cont(self, p):
402    """symbols_cont : ',' SYMBOL symbols_cont
403                    | """
404    if len(p) > 1: p[0] = ListFromConcat(p[2], p[3])
405
406  def p_values_cont(self, p):
407    """values_cont : ',' value values_cont
408                   | """
409    if len(p) > 1: p[0] = ListFromConcat(p[2], p[3])
410
411  def p_ext_attr_cont(self, p):
412    """ext_attr_cont : ',' ext_attr_list
413                     |"""
414    if len(p) > 1: p[0] = p[2]
415    if self.parse_debug: DumpReduction('ext_attribute_cont', p)
416
417  def p_ext_attr_func(self, p):
418    """ext_attr_list : SYMBOL '(' attr_arg_list ')' ext_attr_cont"""
419    p[0] = ListFromConcat(self.BuildAttribute(p[1] + '()', p[3]), p[5])
420    if self.parse_debug: DumpReduction('attr_arg_func', p)
421
422  def p_ext_attr_arg_list(self, p):
423    """attr_arg_list : SYMBOL attr_arg_cont
424                     | value attr_arg_cont"""
425    p[0] = ListFromConcat(p[1], p[2])
426
427  def p_attr_arg_cont(self, p):
428    """attr_arg_cont : ',' attr_arg_list
429                     | """
430    if self.parse_debug: DumpReduction('attr_arg_cont', p)
431    if len(p) > 1: p[0] = p[2]
432
433  def p_attr_arg_error(self, p):
434    """attr_arg_cont : error attr_arg_cont"""
435    p[0] = p[2]
436    if self.parse_debug: DumpReduction('attr_arg_error', p)
437
438
439#
440# Describe
441#
442# A describe block is defined at the top level.  It provides a mechanism for
443# attributing a group of ext_attr to a describe_list.  Members of the
444# describe list are language specific 'Type' declarations
445#
446  def p_describe_block(self, p):
447    """describe_block : modifiers DESCRIBE '{' describe_list '}' ';'"""
448    children = ListFromConcat(p[1], p[4])
449    p[0] = self.BuildProduction('Describe', p, 2, children)
450    if self.parse_debug: DumpReduction('describe_block', p)
451
452  # Recover from describe error and continue parsing at the next top match.
453  def p_describe_error(self, p):
454    """describe_list : error describe_list"""
455    p[0] = []
456
457  def p_describe_list(self, p):
458    """describe_list : modifiers SYMBOL ';' describe_list
459                     | modifiers ENUM ';' describe_list
460                     | modifiers STRUCT ';' describe_list
461                     | modifiers TYPEDEF ';' describe_list
462                     | """
463    if len(p) > 1:
464      Type = self.BuildNamed('Type', p, 2, p[1])
465      p[0] = ListFromConcat(Type, p[4])
466
467#
468# Constant Values (integer, value)
469#
470# Constant values can be found at various levels.  A Constant value is returns
471# as the string value after validated against a FLOAT, HEX, INT, OCT or
472# STRING pattern as appropriate.
473#
474  def p_value(self, p):
475    """value : FLOAT
476             | HEX
477             | INT
478             | OCT
479             | STRING"""
480    p[0] = p[1]
481    if self.parse_debug: DumpReduction('value', p)
482
483  def p_value_lshift(self, p):
484    """value : integer LSHIFT INT"""
485    p[0] = "%s << %s" % (p[1], p[3])
486    if self.parse_debug: DumpReduction('value', p)
487
488# Integers are numbers which may not be floats used in cases like array sizes.
489  def p_integer(self, p):
490    """integer : HEX
491               | INT
492               | OCT"""
493    p[0] = p[1]
494    if self.parse_debug: DumpReduction('integer', p)
495
496#
497# Expression
498#
499# A simple arithmetic expression.
500#
501  precedence = (
502    ('left','|','&','^'),
503    ('left','LSHIFT','RSHIFT'),
504    ('left','+','-'),
505    ('left','*','/'),
506    ('right','UMINUS','~'),
507    )
508
509  def p_expression_binop(self, p):
510    """expression : expression LSHIFT expression
511                  | expression RSHIFT expression
512                  | expression '|' expression
513                  | expression '&' expression
514                  | expression '^' expression
515                  | expression '+' expression
516                  | expression '-' expression
517                  | expression '*' expression
518                  | expression '/' expression"""
519    p[0] = "%s %s %s" % (str(p[1]), str(p[2]), str(p[3]))
520    if self.parse_debug: DumpReduction('expression_binop', p)
521
522  def p_expression_unop(self, p):
523    """expression : '-' expression %prec UMINUS
524                  | '~' expression %prec '~'"""
525    p[0] = "%s%s" % (str(p[1]), str(p[2]))
526    if self.parse_debug: DumpReduction('expression_unop', p)
527
528  def p_expression_term(self, p):
529    "expression : '(' expression ')'"
530    p[0] = "%s%s%s" % (str(p[1]), str(p[2]), str(p[3]))
531    if self.parse_debug: DumpReduction('expression_term', p)
532
533  def p_expression_symbol(self, p):
534    "expression : SYMBOL"
535    p[0] = p[1]
536    if self.parse_debug: DumpReduction('expression_symbol', p)
537
538  def p_expression_integer(self, p):
539    "expression : integer"
540    p[0] = p[1]
541    if self.parse_debug: DumpReduction('expression_integer', p)
542
543#
544# Array List
545#
546# Defined a list of array sizes (if any).
547#
548  def p_arrays(self, p):
549    """arrays : '[' ']' arrays
550              | '[' integer ']' arrays
551              | """
552    # If there are 3 tokens plus a return slot it is an unsized array
553    if len(p) == 4:
554      array = self.BuildProduction('Array', p, 1)
555      p[0] = ListFromConcat(array, p[3])
556    # If there are 4 tokens plus a return slot it is a fixed array
557    elif len(p) == 5:
558      count = self.BuildAttribute('FIXED', p[2])
559      array = self.BuildProduction('Array', p, 2, [count])
560      p[0] = ListFromConcat(array, p[4])
561    # If there is only a return slot, do not fill it for this terminator.
562    elif len(p) == 1: return
563    if self.parse_debug: DumpReduction('arrays', p)
564
565
566# An identifier is a legal value for a parameter or attribute name. Lots of
567# existing IDL files use "callback" as a parameter/attribute name, so we allow
568# a SYMBOL or the CALLBACK keyword.
569  def p_identifier(self, p):
570    """identifier : SYMBOL
571                  | CALLBACK"""
572    p[0] = p[1]
573    # Save the line number of the underlying token (otherwise it gets
574    # discarded), since we use it in the productions with an identifier in
575    # them.
576    p.set_lineno(0, p.lineno(1))
577
578#
579# Parameter List
580#
581# A parameter list is a collection of arguments which are passed to a
582# function.
583#
584  def p_param_list(self, p):
585    """param_list : '(' param_item param_cont ')'
586                  | '(' ')' """
587    if len(p) > 3:
588      args = ListFromConcat(p[2], p[3])
589    else:
590      args = []
591    p[0] = self.BuildProduction('Callspec', p, 1, args)
592    if self.parse_debug: DumpReduction('param_list', p)
593
594  def p_param_item(self, p):
595    """param_item : modifiers optional SYMBOL arrays identifier"""
596    typeref = self.BuildAttribute('TYPEREF', p[3])
597    children = ListFromConcat(p[1], p[2], typeref, p[4])
598    p[0] = self.BuildNamed('Param', p, 5, children)
599    if self.parse_debug: DumpReduction('param_item', p)
600
601  def p_optional(self, p):
602    """optional : OPTIONAL
603                | """
604    if len(p) == 2:
605      p[0] = self.BuildAttribute('OPTIONAL', True)
606
607
608  def p_param_cont(self, p):
609    """param_cont : ',' param_item param_cont
610                  | """
611    if len(p) > 1:
612      p[0] = ListFromConcat(p[2], p[3])
613      if self.parse_debug: DumpReduction('param_cont', p)
614
615  def p_param_error(self, p):
616    """param_cont : error param_cont"""
617    p[0] = p[2]
618
619
620#
621# Typedef
622#
623# A typedef creates a new referencable type.  The typedef can specify an array
624# definition as well as a function declaration.
625#
626  def p_typedef_data(self, p):
627    """typedef_decl : modifiers TYPEDEF SYMBOL SYMBOL ';' """
628    typeref = self.BuildAttribute('TYPEREF', p[3])
629    children = ListFromConcat(p[1], typeref)
630    p[0] = self.BuildNamed('Typedef', p, 4, children)
631    if self.parse_debug: DumpReduction('typedef_data', p)
632
633  def p_typedef_array(self, p):
634    """typedef_decl : modifiers TYPEDEF SYMBOL arrays SYMBOL ';' """
635    typeref = self.BuildAttribute('TYPEREF', p[3])
636    children = ListFromConcat(p[1], typeref, p[4])
637    p[0] = self.BuildNamed('Typedef', p, 5, children)
638    if self.parse_debug: DumpReduction('typedef_array', p)
639
640  def p_typedef_func(self, p):
641    """typedef_decl : modifiers TYPEDEF SYMBOL SYMBOL param_list ';' """
642    typeref = self.BuildAttribute('TYPEREF', p[3])
643    children = ListFromConcat(p[1], typeref, p[5])
644    p[0] = self.BuildNamed('Typedef', p, 4, children)
645    if self.parse_debug: DumpReduction('typedef_func', p)
646
647#
648# Enumeration
649#
650# An enumeration is a set of named integer constants.  An enumeration
651# is valid type which can be referenced in other definitions.
652#
653  def p_enum_block(self, p):
654    """enum_block : modifiers ENUM SYMBOL '{' enum_list '}' ';'"""
655    p[0] = self.BuildNamed('Enum', p, 3, ListFromConcat(p[1], p[5]))
656    if self.parse_debug: DumpReduction('enum_block', p)
657
658  # Recover from enum error and continue parsing at the next top match.
659  def p_enum_errorA(self, p):
660    """enum_block : modifiers ENUM error '{' enum_list '}' ';'"""
661    p[0] = []
662
663  def p_enum_errorB(self, p):
664    """enum_block : modifiers ENUM error ';'"""
665    p[0] = []
666
667  def p_enum_list(self, p):
668    """enum_list : modifiers SYMBOL '=' expression enum_cont
669                 | modifiers SYMBOL enum_cont"""
670    if len(p) > 4:
671      val  = self.BuildAttribute('VALUE', p[4])
672      enum = self.BuildNamed('EnumItem', p, 2, ListFromConcat(val, p[1]))
673      p[0] = ListFromConcat(enum, p[5])
674    else:
675      enum = self.BuildNamed('EnumItem', p, 2, p[1])
676      p[0] = ListFromConcat(enum, p[3])
677    if self.parse_debug: DumpReduction('enum_list', p)
678
679  def p_enum_cont(self, p):
680    """enum_cont : ',' enum_list
681                 |"""
682    if len(p) > 1: p[0] = p[2]
683    if self.parse_debug: DumpReduction('enum_cont', p)
684
685  def p_enum_cont_error(self, p):
686    """enum_cont : error enum_cont"""
687    p[0] = p[2]
688    if self.parse_debug: DumpReduction('enum_error', p)
689
690
691#
692# Label
693#
694# A label is a special kind of enumeration which allows us to go from a
695# set of labels
696#
697  def p_label_block(self, p):
698    """label_block : modifiers LABEL SYMBOL '{' label_list '}' ';'"""
699    p[0] = self.BuildNamed('Label', p, 3, ListFromConcat(p[1], p[5]))
700    if self.parse_debug: DumpReduction('label_block', p)
701
702  def p_label_list(self, p):
703    """label_list : modifiers SYMBOL '=' FLOAT label_cont"""
704    val  = self.BuildAttribute('VALUE', p[4])
705    label = self.BuildNamed('LabelItem', p, 2, ListFromConcat(val, p[1]))
706    p[0] = ListFromConcat(label, p[5])
707    if self.parse_debug: DumpReduction('label_list', p)
708
709  def p_label_cont(self, p):
710    """label_cont : ',' label_list
711                 |"""
712    if len(p) > 1: p[0] = p[2]
713    if self.parse_debug: DumpReduction('label_cont', p)
714
715  def p_label_cont_error(self, p):
716    """label_cont : error label_cont"""
717    p[0] = p[2]
718    if self.parse_debug: DumpReduction('label_error', p)
719
720
721#
722# Members
723#
724# A member attribute or function of a struct or interface.
725#
726  def p_member_attribute(self, p):
727    """member_attribute : modifiers SYMBOL arrays questionmark identifier"""
728    typeref = self.BuildAttribute('TYPEREF', p[2])
729    children = ListFromConcat(p[1], typeref, p[3], p[4])
730    p[0] = self.BuildNamed('Member', p, 5, children)
731    if self.parse_debug: DumpReduction('attribute', p)
732
733  def p_member_function(self, p):
734    """member_function : modifiers static SYMBOL arrays SYMBOL param_list"""
735    typeref = self.BuildAttribute('TYPEREF', p[3])
736    children = ListFromConcat(p[1], p[2], typeref, p[4], p[6])
737    p[0] = self.BuildNamed('Member', p, 5, children)
738    if self.parse_debug: DumpReduction('function', p)
739
740  def p_static(self, p):
741    """static : STATIC
742              | """
743    if len(p) == 2:
744      p[0] = self.BuildAttribute('STATIC', True)
745
746  def p_questionmark(self, p):
747    """questionmark : '?'
748                    | """
749    if len(p) == 2:
750      p[0] = self.BuildAttribute('OPTIONAL', True)
751
752#
753# Interface
754#
755# An interface is a named collection of functions.
756#
757  def p_interface_block(self, p):
758    """interface_block : modifiers INTERFACE SYMBOL '{' interface_list '}' ';'"""
759    p[0] = self.BuildNamed('Interface', p, 3, ListFromConcat(p[1], p[5]))
760    if self.parse_debug: DumpReduction('interface_block', p)
761
762  def p_interface_error(self, p):
763    """interface_block : modifiers INTERFACE error '{' interface_list '}' ';'"""
764    p[0] = []
765
766  def p_interface_list(self, p):
767    """interface_list : member_function ';' interface_list
768                      | """
769    if len(p) > 1 :
770      p[0] = ListFromConcat(p[1], p[3])
771      if self.parse_debug: DumpReduction('interface_list', p)
772
773
774#
775# Struct
776#
777# A struct is a named collection of members which in turn reference other
778# types.  The struct is a referencable type.
779#
780  def p_struct_block(self, p):
781    """struct_block : modifiers STRUCT SYMBOL '{' struct_list '}' ';'"""
782    children = ListFromConcat(p[1], p[5])
783    p[0] = self.BuildNamed('Struct', p, 3, children)
784    if self.parse_debug: DumpReduction('struct_block', p)
785
786  # Recover from struct error and continue parsing at the next top match.
787  def p_struct_error(self, p):
788    """enum_block : modifiers STRUCT error '{' struct_list '}' ';'"""
789    p[0] = []
790
791  def p_struct_list(self, p):
792    """struct_list : member_attribute ';' struct_list
793                   | member_function ';' struct_list
794                   |"""
795    if len(p) > 1: p[0] = ListFromConcat(p[1], p[3])
796
797
798#
799# Parser Errors
800#
801# p_error is called whenever the parser can not find a pattern match for
802# a set of items from the current state.  The p_error function defined here
803# is triggered logging an error, and parsing recover happens as the
804# p_<type>_error functions defined above are called.  This allows the parser
805# to continue so as to capture more than one error per file.
806#
807  def p_error(self, t):
808    filename = self.lexobj.filename
809    self.parse_errors += 1
810    if t:
811      lineno = t.lineno
812      pos = t.lexpos
813      prev = self.yaccobj.symstack[-1]
814      if type(prev) == lex.LexToken:
815        msg = "Unexpected %s after %s." % (
816            TokenTypeName(t), TokenTypeName(prev))
817      else:
818        msg = "Unexpected %s." % (t.value)
819    else:
820      lineno = self.last.lineno
821      pos = self.last.lexpos
822      msg = "Unexpected end of file after %s." % TokenTypeName(self.last)
823      self.yaccobj.restart()
824
825    # Attempt to remap the error to a friendlier form
826    if msg in ERROR_REMAP:
827      msg = ERROR_REMAP[msg]
828
829    # Log the error
830    ErrOut.LogLine(filename, lineno, pos, msg)
831
832  def Warn(self, node, msg):
833    WarnOut.LogLine(node.filename, node.lineno, node.pos, msg)
834    self.parse_warnings += 1
835
836  def __init__(self):
837    IDLLexer.__init__(self)
838    self.yaccobj = yacc.yacc(module=self, tabmodule=None, debug=False,
839                             optimize=0, write_tables=0)
840
841    self.build_debug = GetOption('build_debug')
842    self.parse_debug = GetOption('parse_debug')
843    self.token_debug = GetOption('token_debug')
844    self.verbose = GetOption('verbose')
845    self.parse_errors = 0
846
847#
848# Tokenizer
849#
850# The token function returns the next token provided by IDLLexer for matching
851# against the leaf paterns.
852#
853  def token(self):
854    tok = self.lexobj.token()
855    if tok:
856      self.last = tok
857      if self.token_debug:
858        InfoOut.Log("TOKEN %s(%s)" % (tok.type, tok.value))
859    return tok
860
861#
862# BuildProduction
863#
864# Production is the set of items sent to a grammar rule resulting in a new
865# item being returned.
866#
867# p - Is the Yacc production object containing the stack of items
868# index - Index into the production of the name for the item being produced.
869# cls - The type of item being producted
870# childlist - The children of the new item
871  def BuildProduction(self, cls, p, index, childlist=None):
872    if not childlist: childlist = []
873    filename = self.lexobj.filename
874    lineno = p.lineno(index)
875    pos = p.lexpos(index)
876    out = IDLNode(cls, filename, lineno, pos, childlist)
877    if self.build_debug:
878      InfoOut.Log("Building %s" % out)
879    return out
880
881  def BuildNamed(self, cls, p, index, childlist=None):
882    if not childlist: childlist = []
883    childlist.append(self.BuildAttribute('NAME', p[index]))
884    return self.BuildProduction(cls, p, index, childlist)
885
886  def BuildComment(self, cls, p, index):
887    name = p[index]
888
889    # Remove comment markers
890    lines = []
891    if name[:2] == '//':
892      # For C++ style, remove any leading whitespace and the '//' marker from
893      # each line.
894      form = 'cc'
895      for line in name.split('\n'):
896        start = line.find('//')
897        lines.append(line[start+2:])
898    else:
899      # For C style, remove ending '*/''
900      form = 'c'
901      for line in name[:-2].split('\n'):
902        # Remove characters until start marker for this line '*' if found
903        # otherwise it should be blank.
904        offs = line.find('*')
905        if offs >= 0:
906          line = line[offs + 1:].rstrip()
907        else:
908          line = ''
909        lines.append(line)
910    name = '\n'.join(lines)
911
912    childlist = [self.BuildAttribute('NAME', name),
913                 self.BuildAttribute('FORM', form)]
914    return self.BuildProduction(cls, p, index, childlist)
915
916#
917# BuildAttribute
918#
919# An ExtendedAttribute is a special production that results in a property
920# which is applied to the adjacent item.  Attributes have no children and
921# instead represent key/value pairs.
922#
923  def BuildAttribute(self, key, val):
924    return IDLAttribute(key, val)
925
926
927#
928# ParseData
929#
930# Attempts to parse the current data loaded in the lexer.
931#
932  def ParseData(self, data, filename='<Internal>'):
933    self.SetData(filename, data)
934    try:
935      self.parse_errors = 0
936      self.parse_warnings = 0
937      return self.yaccobj.parse(lexer=self)
938
939    except lex.LexError as le:
940      ErrOut.Log(str(le))
941      return []
942
943#
944# ParseFile
945#
946# Loads a new file into the lexer and attemps to parse it.
947#
948  def ParseFile(self, filename):
949    date = time.ctime(os.path.getmtime(filename))
950    data = open(filename).read()
951    if self.verbose:
952      InfoOut.Log("Parsing %s" % filename)
953    try:
954      out = self.ParseData(data, filename)
955
956      # If we have a src root specified, remove it from the path
957      srcroot = GetOption('srcroot')
958      if srcroot and filename.find(srcroot) == 0:
959        filename = filename[len(srcroot) + 1:]
960      filenode = IDLFile(filename, out, self.parse_errors + self.lex_errors)
961      filenode.SetProperty('DATETIME', date)
962      return filenode
963
964    except Exception as e:
965      ErrOut.LogLine(filename, self.last.lineno, self.last.lexpos,
966                     'Internal parsing error - %s.' % str(e))
967      raise
968
969
970
971#
972# Flatten Tree
973#
974# Flattens the tree of IDLNodes for use in testing.
975#
976def FlattenTree(node):
977  add_self = False
978  out = []
979  for child in node.children:
980    if child.IsA('Comment'):
981      add_self = True
982    else:
983      out.extend(FlattenTree(child))
984
985  if add_self:
986    out = [str(node)] + out
987  return out
988
989
990def TestErrors(filename, filenode):
991  nodelist = filenode.GetChildren()
992
993  lexer = IDLLexer()
994  data = open(filename).read()
995  lexer.SetData(filename, data)
996
997  pass_comments = []
998  fail_comments = []
999  while True:
1000    tok = lexer.lexobj.token()
1001    if tok == None: break
1002    if tok.type == 'COMMENT':
1003      args = tok.value[3:-3].split()
1004      if args[0] == 'OK':
1005        pass_comments.append((tok.lineno, ' '.join(args[1:])))
1006      else:
1007        if args[0] == 'FAIL':
1008          fail_comments.append((tok.lineno, ' '.join(args[1:])))
1009  obj_list = []
1010  for node in nodelist:
1011    obj_list.extend(FlattenTree(node))
1012
1013  errors = 0
1014
1015  #
1016  # Check for expected successes
1017  #
1018  obj_cnt = len(obj_list)
1019  pass_cnt = len(pass_comments)
1020  if obj_cnt != pass_cnt:
1021    InfoOut.Log("Mismatched pass (%d) vs. nodes built (%d)."
1022        % (pass_cnt, obj_cnt))
1023    InfoOut.Log("PASS: %s" % [x[1] for x in pass_comments])
1024    InfoOut.Log("OBJS: %s" % obj_list)
1025    errors += 1
1026    if pass_cnt > obj_cnt: pass_cnt = obj_cnt
1027
1028  for i in range(pass_cnt):
1029    line, comment = pass_comments[i]
1030    if obj_list[i] != comment:
1031      ErrOut.LogLine(filename, line, None, "OBJ %s : EXPECTED %s\n" %
1032                     (obj_list[i], comment))
1033      errors += 1
1034
1035  #
1036  # Check for expected errors
1037  #
1038  err_list = ErrOut.DrainLog()
1039  err_cnt = len(err_list)
1040  fail_cnt = len(fail_comments)
1041  if err_cnt != fail_cnt:
1042    InfoOut.Log("Mismatched fail (%d) vs. errors seen (%d)."
1043        % (fail_cnt, err_cnt))
1044    InfoOut.Log("FAIL: %s" % [x[1] for x in fail_comments])
1045    InfoOut.Log("ERRS: %s" % err_list)
1046    errors += 1
1047    if fail_cnt > err_cnt:  fail_cnt = err_cnt
1048
1049  for i in range(fail_cnt):
1050    line, comment = fail_comments[i]
1051    err = err_list[i].strip()
1052
1053    if err_list[i] != comment:
1054      ErrOut.Log("%s(%d) Error\n\tERROR : %s\n\tEXPECT: %s" % (
1055        filename, line, err_list[i], comment))
1056      errors += 1
1057
1058  # Clear the error list for the next run
1059  err_list = []
1060  return errors
1061
1062
1063def TestFile(parser, filename):
1064  # Capture errors instead of reporting them so we can compare them
1065  # with the expected errors.
1066  ErrOut.SetConsole(False)
1067  ErrOut.SetCapture(True)
1068
1069  filenode = parser.ParseFile(filename)
1070
1071  # Renable output
1072  ErrOut.SetConsole(True)
1073  ErrOut.SetCapture(False)
1074
1075  # Compare captured errors
1076  return TestErrors(filename, filenode)
1077
1078
1079def TestErrorFiles(filter):
1080  idldir = os.path.split(sys.argv[0])[0]
1081  idldir = os.path.join(idldir, 'test_parser', '*.idl')
1082  filenames = glob.glob(idldir)
1083  parser = IDLParser()
1084  total_errs = 0
1085  for filename in filenames:
1086    if filter and filename not in filter: continue
1087    errs = TestFile(parser, filename)
1088    if errs:
1089      ErrOut.Log("%s test failed with %d error(s)." % (filename, errs))
1090      total_errs += errs
1091
1092  if total_errs:
1093    ErrOut.Log("Failed parsing test.")
1094  else:
1095    InfoOut.Log("Passed parsing test.")
1096  return total_errs
1097
1098
1099def TestNamespaceFiles(filter):
1100  idldir = os.path.split(sys.argv[0])[0]
1101  idldir = os.path.join(idldir, 'test_namespace', '*.idl')
1102  filenames = glob.glob(idldir)
1103  testnames = []
1104
1105  for filename in filenames:
1106    if filter and filename not in filter: continue
1107    testnames.append(filename)
1108
1109  # If we have no files to test, then skip this test
1110  if not testnames:
1111    InfoOut.Log('No files to test for namespace.')
1112    return 0
1113
1114  InfoOut.SetConsole(False)
1115  ast = ParseFiles(testnames)
1116  InfoOut.SetConsole(True)
1117
1118  errs = ast.GetProperty('ERRORS')
1119  if errs:
1120    ErrOut.Log("Failed namespace test.")
1121  else:
1122    InfoOut.Log("Passed namespace test.")
1123  return errs
1124
1125
1126
1127def FindVersionError(releases, node):
1128  err_cnt = 0
1129  if node.IsA('Interface', 'Struct'):
1130    comment_list = []
1131    comment = node.GetOneOf('Comment')
1132    if comment and comment.GetName()[:4] == 'REL:':
1133      comment_list = comment.GetName()[5:].strip().split(' ')
1134
1135    first_list = [node.first_release[rel] for rel in releases]
1136    first_list = sorted(set(first_list))
1137    if first_list != comment_list:
1138      node.Error("Mismatch in releases: %s vs %s." % (
1139          comment_list, first_list))
1140      err_cnt += 1
1141
1142  for child in node.GetChildren():
1143    err_cnt += FindVersionError(releases, child)
1144  return err_cnt
1145
1146
1147def TestVersionFiles(filter):
1148  idldir = os.path.split(sys.argv[0])[0]
1149  idldir = os.path.join(idldir, 'test_version', '*.idl')
1150  filenames = glob.glob(idldir)
1151  testnames = []
1152
1153  for filename in filenames:
1154    if filter and filename not in filter: continue
1155    testnames.append(filename)
1156
1157  # If we have no files to test, then skip this test
1158  if not testnames:
1159    InfoOut.Log('No files to test for version.')
1160    return 0
1161
1162  ast = ParseFiles(testnames)
1163  errs = FindVersionError(ast.releases, ast)
1164  errs += ast.errors
1165
1166  if errs:
1167    ErrOut.Log("Failed version test.")
1168  else:
1169    InfoOut.Log("Passed version test.")
1170  return errs
1171
1172
1173default_dirs = ['.', 'trusted', 'dev', 'private', 'extensions',
1174                'extensions/dev']
1175def ParseFiles(filenames):
1176  parser = IDLParser()
1177  filenodes = []
1178
1179  if not filenames:
1180    filenames = []
1181    srcroot = GetOption('srcroot')
1182    dirs = default_dirs
1183    if GetOption('include_private'):
1184      dirs += ['private']
1185    for dirname in dirs:
1186      srcdir = os.path.join(srcroot, dirname, '*.idl')
1187      srcdir = os.path.normpath(srcdir)
1188      filenames += sorted(glob.glob(srcdir))
1189
1190  if not filenames:
1191    ErrOut.Log('No sources provided.')
1192
1193  for filename in filenames:
1194    filenode = parser.ParseFile(filename)
1195    filenodes.append(filenode)
1196
1197  ast = IDLAst(filenodes)
1198  if GetOption('dump_tree'): ast.Dump(0)
1199
1200  Lint(ast)
1201  return ast
1202
1203
1204def Main(args):
1205  filenames = ParseOptions(args)
1206
1207  # If testing...
1208  if GetOption('test'):
1209    errs = TestErrorFiles(filenames)
1210    errs = TestNamespaceFiles(filenames)
1211    errs = TestVersionFiles(filenames)
1212    if errs:
1213      ErrOut.Log("Parser failed with %d errors." % errs)
1214      return  -1
1215    return 0
1216
1217  # Otherwise, build the AST
1218  ast = ParseFiles(filenames)
1219  errs = ast.GetProperty('ERRORS')
1220  if errs:
1221    ErrOut.Log('Found %d error(s).' % errs);
1222  InfoOut.Log("%d files processed." % len(filenames))
1223  return errs
1224
1225
1226if __name__ == '__main__':
1227  sys.exit(Main(sys.argv[1:]))
1228
1229