1# Copyright (C) 2013 Google Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7#     * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#     * Redistributions in binary form must reproduce the above
10# copyright notice, this list of conditions and the following disclaimer
11# in the documentation and/or other materials provided with the
12# distribution.
13#     * Neither the name of Google Inc. nor the names of its
14# contributors may be used to endorse or promote products derived from
15# this software without specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29"""Lexer for Blink IDL.
30
31The lexer uses the PLY (Python Lex-Yacc) library to build a tokenizer which
32understands the Blink dialect of Web IDL and produces a token stream suitable
33for the Blink IDL parser.
34
35Blink IDL is identical to Web IDL at the token level, but the base lexer
36does not discard comments. We need to override (and not include comments in
37the token stream), as otherwise comments must be explicitly included in the
38phrase grammar of the parser.
39
40FIXME: Change base lexer to discard comments, and simply used the base
41lexer, eliminating this separate lexer.
42
43Web IDL:
44    http://www.w3.org/TR/WebIDL/
45Web IDL Grammar:
46    http://www.w3.org/TR/WebIDL/#idl-grammar
47PLY:
48    http://www.dabeaz.com/ply/
49
50Design doc:
51http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end
52"""
53
54# Disable attribute validation, as lint can't import parent class to check
55# pylint: disable=E1101
56
57import os.path
58import sys
59
60# PLY is in Chromium src/third_party/ply
61module_path, module_name = os.path.split(__file__)
62third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)
63# Insert at front to override system libraries, and after path[0] == script dir
64sys.path.insert(1, third_party)
65from ply import lex
66
67# Base lexer is in Chromium src/tools/idl_parser
68tools_dir = os.path.join(third_party, os.pardir, 'tools')
69sys.path.append(tools_dir)
70from idl_parser.idl_lexer import IDLLexer
71
72LEXTAB = 'lextab'
73REMOVE_TOKENS = ['COMMENT']
74
75
76class BlinkIDLLexer(IDLLexer):
77    # ignore comments
78    def t_COMMENT(self, t):
79        r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
80        self.AddLines(t.value.count('\n'))
81
82    # Analogs to _AddToken/_AddTokens in base lexer
83    # Needed to remove COMMENT token, since comments ignored
84    def _RemoveToken(self, token):
85        if token in self.tokens:
86            self.tokens.remove(token)
87
88    def _RemoveTokens(self, tokens):
89        for token in tokens:
90            self._RemoveToken(token)
91
92    def __init__(self, debug=False, optimize=True, outputdir=None,
93                 rewrite_tables=False):
94        if debug:
95            # Turn off optimization and caching to help debugging
96            optimize = False
97            outputdir = None
98        if outputdir:
99            # Need outputdir in path because lex imports the cached lex table
100            # as a Python module
101            sys.path.append(outputdir)
102
103            if rewrite_tables:
104                tablefile_root = os.path.join(outputdir, LEXTAB)
105                # Also remove the .pyc/.pyo files, or they'll be used even if
106                # the .py file doesn't exist.
107                for ext in ('.py', '.pyc', '.pyo'):
108                    try:
109                        os.unlink(tablefile_root + ext)
110                    except OSError:
111                        pass
112
113        IDLLexer.__init__(self)
114        # Overrides to parent class
115        self._RemoveTokens(REMOVE_TOKENS)
116        # Optimized mode substantially decreases startup time (by disabling
117        # error checking), and also allows use of Python's optimized mode.
118        # See: Optimized Mode
119        # http://www.dabeaz.com/ply/ply.html#ply_nn15
120        self._lexobj = lex.lex(object=self,
121                               debug=debug,
122                               optimize=optimize,
123                               lextab=LEXTAB,
124                               outputdir=outputdir)
125
126
127################################################################################
128
129def main(argv):
130    # If file itself executed, build and cache lex table
131    try:
132        outputdir = argv[1]
133    except IndexError as err:
134        print 'Usage: %s OUTPUT_DIR' % argv[0]
135        return 1
136    # Important: rewrite_tables=True causes the cache file to be deleted if it
137    # exists, thus making sure that PLY doesn't load it instead of regenerating
138    # the parse table.
139    lexer = BlinkIDLLexer(outputdir=outputdir, rewrite_tables=True)
140
141
142if __name__ == '__main__':
143    sys.exit(main(sys.argv))
144