15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#!/usr/bin/env python
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Copyright (c) 2012 The Chromium Authors. All rights reserved.
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# found in the LICENSE file.
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)""" Lexer for PPAPI IDL """
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# IDL Lexer
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# The lexer is uses the PLY lex library to build a tokenizer which understands
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# WebIDL tokens.
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# WebIDL, and WebIDL regular expressions can be found at:
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   http://dev.w3.org/2006/webapi/WebIDL/
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# PLY can be found at:
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#   http://www.dabeaz.com/ply/
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import os.path
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import re
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import sys
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# Try to load the ply module, if not, then assume it is in the third_party
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# directory, relative to ppapi
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)try:
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  from ply import lex
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)except:
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  module_path, module_name = os.path.split(__file__)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  third_party = os.path.join(module_path, '..', '..', 'third_party')
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sys.path.append(third_party)
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  from ply import lex
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from idl_option import GetOption, Option, ParseOptions
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)Option('output', 'Generate output.')
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# IDL Lexer
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class IDLLexer(object):
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # 'tokens' is a value required by lex which specifies the complete list
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # of valid token types.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  tokens = [
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Symbol and keywords types
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'COMMENT',
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'DESCRIBE',
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'ENUM',
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'LABEL',
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'SYMBOL',
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'INLINE',
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'INTERFACE',
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'STRUCT',
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'TYPEDEF',
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      'OR',
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Extra WebIDL keywords
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'CALLBACK',
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'DICTIONARY',
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'OPTIONAL',
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'STATIC',
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Invented for apps use
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'NAMESPACE',
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Data types
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'FLOAT',
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'OCT',
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'INT',
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'HEX',
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'STRING',
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Operators
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'LSHIFT',
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      'RSHIFT'
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ]
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # 'keywords' is a map of string to token type.  All SYMBOL tokens are
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # matched against keywords, to determine if the token is actually a keyword.
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  keywords = {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'describe' : 'DESCRIBE',
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'enum'  : 'ENUM',
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'label' : 'LABEL',
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'interface' : 'INTERFACE',
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'readonly' : 'READONLY',
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'struct' : 'STRUCT',
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'typedef' : 'TYPEDEF',
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'callback' : 'CALLBACK',
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'dictionary' : 'DICTIONARY',
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'optional' : 'OPTIONAL',
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'static' : 'STATIC',
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    'namespace' : 'NAMESPACE',
96a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
97a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    'or' : 'OR',
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # 'literals' is a value expected by lex which specifies a list of valid
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # literal tokens, meaning the token type and token value are identical.
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  literals = '"*.(){}[],;:=+-/~|&^?'
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Token definitions
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  #
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Lex assumes any value or function in the form of 't_<TYPE>' represents a
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # regular expression where a match will emit a token of type <TYPE>.  In the
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # case of a function, the function is called when a match is made. These
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # definitions come from WebIDL.
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # 't_ignore' is a special match of items to ignore
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_ignore = ' \t'
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Constant values
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_FLOAT = r'-?(\d+\.\d*|\d*\.\d+)([Ee][+-]?\d+)?|-?\d+[Ee][+-]?\d+'
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_INT = r'-?[0-9]+[uU]?'
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_OCT = r'-?0[0-7]+'
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_HEX = r'-?0[Xx][0-9A-Fa-f]+'
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_LSHIFT = r'<<'
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  t_RSHIFT = r'>>'
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # A line ending '\n', we use this to increment the line number
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_LINE_END(self, t):
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r'\n+'
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.AddLines(len(t.value))
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # We do not process escapes in the IDL strings.  Strings are exclusively
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # used for attributes, and not used as typical 'C' constants.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_STRING(self, t):
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r'"[^"]*"'
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t.value = t.value[1:-1]
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.AddLines(t.value.count('\n'))
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return t
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # A C or C++ style comment:  /* xxx */ or //
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_COMMENT(self, t):
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.AddLines(t.value.count('\n'))
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return t
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Return a "preprocessor" inline block
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_INLINE(self, t):
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    r'\#inline (.|\n)*?\#endinl.*'
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.AddLines(t.value.count('\n'))
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return t
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # A symbol or keyword.
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_KEYWORD_SYMBOL(self, t):
1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    r'_?[A-Za-z][A-Za-z_0-9]*'
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    # All non-keywords are assumed to be symbols
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t.type = self.keywords.get(t.value, 'SYMBOL')
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    # We strip leading underscores so that you can specify symbols with the same
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    # value as a keywords (E.g. a dictionary named 'interface').
1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if t.value[0] == '_':
1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      t.value = t.value[1:]
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return t
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def t_ANY_error(self, t):
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    msg = "Unrecognized input"
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    line = self.lexobj.lineno
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # If that line has not been accounted for, then we must have hit
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # EoF, so compute the beginning of the line that caused the problem.
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if line >= len(self.index):
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Find the offset in the line of the first word causing the issue
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      word = t.value.split()[0]
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      offs = self.lines[line - 1].find(word)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      # Add the computed line's starting position
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self.index.append(self.lexobj.lexpos - offs)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      msg = "Unexpected EoF reached after"
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    pos = self.lexobj.lexpos - self.index[line]
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    file = self.lexobj.filename
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    out = self.ErrorMessage(file, line, pos, msg)
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sys.stderr.write(out + '\n')
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lex_errors += 1
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def AddLines(self, count):
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Set the lexer position for the beginning of the next line.  In the case
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # of multiple lines, tokens can not exist on any of the lines except the
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # last one, so the recorded value for previous lines are unused.  We still
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # fill the array however, to make sure the line count is correct.
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lexobj.lineno += count
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for i in range(count):
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      self.index.append(self.lexobj.lexpos)
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def FileLineMsg(self, file, line, msg):
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if file:  return "%s(%d) : %s" % (file, line + 1, msg)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "<BuiltIn> : %s" % msg
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def SourceLine(self, file, line, pos):
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    caret = '\t^'.expandtabs(pos)
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # We decrement the line number since the array is 0 based while the
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # line numbers are 1 based.
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "%s\n%s" % (self.lines[line - 1], caret)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def ErrorMessage(self, file, line, pos, msg):
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "\n%s\n%s" % (
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.FileLineMsg(file, line, msg),
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        self.SourceLine(file, line, pos))
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def SetData(self, filename, data):
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    # Start with line 1, not zero
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lexobj.lineno = 1
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lexobj.filename = filename
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lines = data.split('\n')
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.index = [0]
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lexobj.input(data)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lex_errors = 0
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  def __init__(self):
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    self.lexobj = lex.lex(object=self, lextab=None, optimize=0)
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# FilesToTokens
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of source file names, generate a list of tokens.
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def FilesToTokens(filenames, verbose=False):
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lexer = IDLLexer()
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  outlist = []
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for filename in filenames:
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    data = open(filename).read()
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    lexer.SetData(filename, data)
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if verbose: sys.stdout.write('  Loaded %s...\n' % filename)
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while 1:
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      t = lexer.lexobj.token()
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if t is None: break
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      outlist.append(t)
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return outlist
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TokensFromText(text):
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lexer = IDLLexer()
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lexer.SetData('unknown', text)
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  outlist = []
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while 1:
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t = lexer.lexobj.token()
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if t is None: break
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    outlist.append(t.value)
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return outlist
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TextToTokens
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a block of text, generate a list of tokens
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TextToTokens(source):
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lexer = IDLLexer()
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  outlist = []
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  lexer.SetData('AUTO', source)
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while 1:
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    t = lexer.lexobj.token()
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if t is None: break
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    outlist.append(t.value)
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return outlist
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TestSame
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of token values, generate a new source text by joining with a
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# single space.  The new source is then tokenized and compared against the
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# old set.
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TestSame(values1):
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # Recreate the source from the tokens.  We use newline instead of whitespace
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  # since the '//' and #inline regex are line sensitive.
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  text = '\n'.join(values1)
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  values2 = TextToTokens(text)
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  count1 = len(values1)
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  count2 = len(values2)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if count1 != count2:
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    print "Size mismatch original %d vs %d\n" % (count1, count2)
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if count1 > count2: count1 = count2
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for i in range(count1):
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if values1[i] != values2[i]:
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      print "%d >>%s<< >>%s<<" % (i, values1[i], values2[i])
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if GetOption('output'):
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sys.stdout.write('Generating original.txt and tokenized.txt\n')
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    open('original.txt', 'w').write(src1)
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    open('tokenized.txt', 'w').write(src2)
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if values1 == values2:
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sys.stdout.write('Same: Pass\n')
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  print "****************\n%s\n%s***************\n" % (src1, src2)
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sys.stdout.write('Same: Failed\n')
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# TestExpect
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# From a set of tokens pairs, verify the type field of the second matches
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# the value of the first, so that:
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# INT 123 FLOAT 1.1
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# will generate a passing test, where the first token is the SYMBOL INT,
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# and the second token is the INT 123, third token is the SYMBOL FLOAT and
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)# the fourth is the FLOAT 1.1, etc...
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def TestExpect(tokens):
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  count = len(tokens)
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  index = 0
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  errors = 0
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while index < count:
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    type = tokens[index].value
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    token = tokens[index + 1]
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    index += 2
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if type != token.type:
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sys.stderr.write('Mismatch:  Expected %s, but got %s = %s.\n' %
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       (type, token.type, token.value))
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      errors += 1
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if not errors:
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sys.stdout.write('Expect: Pass\n')
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sys.stdout.write('Expect: Failed\n')
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def Main(args):
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  filenames = ParseOptions(args)
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  try:
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    tokens = FilesToTokens(filenames, GetOption('verbose'))
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    values = [tok.value for tok in tokens]
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if GetOption('output'): sys.stdout.write(' <> '.join(values) + '\n')
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if GetOption('test'):
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if TestSame(values):
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return -1
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if TestExpect(tokens):
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return -1
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  except lex.LexError as le:
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    sys.stderr.write('%s\n' % str(le))
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return -1
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if __name__ == '__main__':
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  sys.exit(Main(sys.argv[1:]))
353