1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin LICENSE
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver[The "BSD licence"]
7324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverCopyright (c) 2009-2010 Kyle Yetter
8324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverAll rights reserved.
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
10324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverRedistribution and use in source and binary forms, with or without
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermodification, are permitted provided that the following conditions
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverare met:
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1. Redistributions of source code must retain the above copyright
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    notice, this list of conditions and the following disclaimer.
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2. Redistributions in binary form must reproduce the above copyright
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    notice, this list of conditions and the following disclaimer in the
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    documentation and/or other materials provided with the distribution.
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3. The name of the author may not be used to endorse or promote products
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    derived from this software without specific prior written permission.
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
22324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverOF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverINCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverNOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermodule ANTLR3
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverunless const_defined?( :RecognizerSharedState )
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
38324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverRecognizerSharedState = Struct.new( 
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :following,
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :error_recovery,
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :last_error_index,
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :backtracking,
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :rule_memory,
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :syntax_errors,
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :token,
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :token_start_position,
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :token_start_line,
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :token_start_column,
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :channel,
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :type,
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  :text
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver)
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::RecognizerSharedState
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
56324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA big Struct-based class containing most of the data that makes up a
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrecognizer's state. These attributes are externalized from the recognizer itself
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverso that recognizer delegation (which occurs when you import other grammars into
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruveryour grammar) can function; multiple recognizers can share a common state.
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== Structure Attributes
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfollowing::
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  a stack that tracks follow sets for error recovery
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvererror_recovery::
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  a flag indicating whether or not the recognizer is in error recovery mode
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverlast_error_index::
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the index in the input stream of the last error
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverbacktracking::
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  tracks the backtracking depth
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrule_memory::
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  if a grammar is compiled with the memoization option, this will be 
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  set to a hash mapping previously parsed rules to cached indices
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruversyntax_errors::
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  tracks the number of syntax errors seen so far
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken::
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  holds newly constructed tokens for lexer rules
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_position::
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the input stream index at which the token starts
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_line::
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the input stream line number at which the token starts
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_column::
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the input stream column at which the token starts
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverchannel::
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the channel value of the target token
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertype::
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the type value of the target token
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertext::
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  the text of the target token
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RecognizerSharedState
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def initialize
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( [], false, -1, 0, nil, 0, nil, -1 )
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ^-- same as this --v 
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.following = []
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.error_recovery = false
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.last_error_index = -1
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.backtracking = 0
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.syntax_errors = 0
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # self.token_start_position = -1
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # restores all of the state variables to their respective
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # initial default values
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def reset!
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.following.clear
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.error_recovery = false
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.last_error_index = -1
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.backtracking = 0
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.rule_memory and rule_memory.clear
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.syntax_errors = 0
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.token = nil
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.token_start_position = -1
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.token_start_line = nil
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.token_start_column = nil
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.channel = nil
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.type = nil
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.text = nil
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend # unless const_defined?( :RecognizerSharedState )
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Recognizer
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Scope
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
131324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverScope is used to represent instances of ANTLR's various attribute scopes.
132324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIt is identical to Ruby's built-in Struct class, but it takes string
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverattribute declarations from the ANTLR grammar as parameters, and overrides
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe #initialize method to set the default values if any are present in
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe scope declaration.
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  Block = Scope.new( "name", "depth = 0", "variables = {}" )
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  Block.new                    # => #<struct Block name=nil, depth=0, variables={}>
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  Block.new( "function" )      # => #<struct Block name="function", depth=0, variables={}>
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  Block.new( 'a', 1, :x => 3 ) # => #<struct Block name="a", depth=1, variables={ :x => 3 }>
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Scope < ::Struct
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.new( *declarations, &body )
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    names = []
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    defaults = {}
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    for decl in declarations
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      name, default = decl.to_s.split( /\s*=\s*/, 2 )
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      names << ( name = name.to_sym )
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      default and defaults[ name ] = default
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( *names ) do
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      # If no defaults, leave the initialize method the same as
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      # the struct's default initialize for speed. Otherwise,
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      # overwrite the initialize to populate with default values.
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      unless defaults.empty?
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        parameters = names.map do | name |
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          "#{ name } = " << defaults.fetch( name, 'nil' )
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end.join( ', ' )
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        class_eval( <<-END )
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          def initialize( #{ parameters } )
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            super( #{ names.join( ', ' ) } )
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          end
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        END
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      body and class_eval( &body )
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Recognizer
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Recognizer
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
178324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverAs the base class of all ANTLR-generated recognizers, Recognizer provides
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermuch of the shared functionality and structure used in the recognition process.
180324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverFor all effective purposes, the class and its immediate subclasses Lexer,
181324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParser, and TreeParser are abstract classes. They can be instantiated, but
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthey're pretty useless on their own. Instead, to make useful code, you write an
183324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR grammar and ANTLR will generate classes which inherit from one of the
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrecognizer base classes, providing the implementation of the grammar rules
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruveritself. this group of classes to implement necessary tasks. Recognizer
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverdefines methods related to:
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* token and character matching
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* prediction and recognition strategy
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* recovering from errors
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* reporting errors
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* memoization
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* simple rule tracing and debugging
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Recognizer
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  include Constants
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  include Error
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  include TokenFactory
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  extend ClassMacros
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  @rules = {}
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # inherited class methods and hooks
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  class << self
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    attr_reader :grammar_file_name,
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                :antlr_version,
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                :antlr_version_string,
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                :library_version_string,
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                :grammar_home
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    attr_accessor :token_scheme, :default_rule
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # generated recognizer code uses this method to stamp
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # the code with the name of the grammar file and
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # the current version of ANTLR being used to generate
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # the code
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def generated_using( grammar_file, antlr_version, library_version = nil )
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @grammar_file_name = grammar_file.freeze
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @antlr_version_string = antlr_version.freeze
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @library_version = Util.parse_version( library_version )
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      if @antlr_version_string =~ /^(\d+)\.(\d+)(?:\.(\d+)(?:b(\d+))?)?(.*)$/
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        @antlr_version = [ $1, $2, $3, $4 ].map! { |str| str.to_i }
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        timestamp = $5.strip
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        #@antlr_release_time = $5.empty? ? nil : Time.parse($5)
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      else
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise "bad version string: %p" % version_string
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # this method is used to generate return-value structures for
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # rules with multiple return values. To avoid generating
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # a special class for ever rule in AST parsers and such
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # (where most rules have the same default set of return values),
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # each recognizer gets a default return value structure
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # assigned to the constant +Return+. Rules which don't
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # require additional custom members will have a rule-return
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # name constant that just points to the generic return
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # value. 
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def define_return_scope( *members )
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      if members.empty? then generic_return_scope
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      else
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        members += return_scope_members
245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        Struct.new( *members )
246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # used as a hook to add additional default members
250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # to default return value structures
251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # For example, all AST-building parsers override
252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # this method to add an extra +:tree+ field to
253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # all rule return structures.
254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def return_scope_members
255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      [ :start, :stop ]
256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # sets up and returns the generic rule return
259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # scope for a recognizer
260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def generic_return_scope
261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @generic_return_scope ||= begin
262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        struct = Struct.new( *return_scope_members )
263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        const_set( :Return, struct )
264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def imported_grammars
268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @imported_grammars ||= Set.new
269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def master_grammars
272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @master_grammars ||= []
273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def master
276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      master_grammars.last
277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def masters( *grammar_names )
280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      for grammar in grammar_names
281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        unless master_grammars.include?( grammar )
282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          master_grammars << grammar
283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          attr_reader( Util.snake_case( grammar ) )
284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end
285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    private :masters
288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def imports( *grammar_names )
290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      for grammar in grammar_names
291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        imported_grammars.add?( grammar.to_sym ) and
292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          attr_reader( Util.snake_case( grammar ) )
293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return imported_grammars
295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    private :imports
297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def rules
299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      self::RULE_METHODS.dup rescue []
300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def default_rule
303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @default_rule ||= rules.first
304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def debug?
307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return false
308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def profile?
311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return false
312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def Scope( *declarations, &body )
315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Scope.new( *declarations, &body )
316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    def token_class
319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @token_class ||= begin
320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        self::Token            rescue
321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        superclass.token_class rescue
322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ANTLR3::CommonToken
323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    private :generated_using
326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  @grammar_file_name = nil
329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  @antlr_version = ANTLR3::ANTLR_VERSION
330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  @antlr_version_string = ANTLR3::ANTLR_VERSION_STRING
331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def grammar_file_name
333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.class.grammar_file_name
334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def antlr_version
337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.class.antlr_version
338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def antlr_version_string
341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.class.antlr_version_string
342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  attr_accessor :input
345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  attr_reader :state
346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def each_delegate
348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    block_given? or return enum_for( __method__ )
349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    for grammar in self.class.imported_grammars
350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      del = __send__( Util.snake_case( grammar ) ) and
351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        yield( del )
352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Create a new recognizer. The constructor simply ensures that
356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # all recognizers are initialized with a shared state object.
357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # See the main recognizer subclasses for more specific
358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # information about creating recognizer objects like
359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # lexers and parsers.
360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def initialize( options = {} )
361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state  = options[ :state ] || RecognizerSharedState.new
362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output = options.fetch( :error_output, $stderr )
363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    defined?( @input ) or @input = nil
364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    initialize_dfas
365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Resets the recognizer's state data to initial values.
368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # As a result, all error tracking and error recovery
369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # data accumulated in the current state will be cleared.
370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # It will also attempt to reset the input stream
371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # via input.reset, but it ignores any errors received
372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # from doing so. Thus the input stream is not guarenteed
373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # to be rewound to its initial position
374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def reset
375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state and @state.reset!
376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input and @input.reset rescue nil
377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Attempt to match the current input symbol the token type
380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # specified by +type+. If the symbol matches the type,
381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # consume the current symbol and return its value. If
382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the symbol doesn't match, attempt to use the follow-set
383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # data provided by +follow+ to recover from the mismatched
384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # token. 
385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def match( type, follow )
386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    matched_symbol = current_symbol
387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if @input.peek == type
388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @input.consume
389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.error_recovery = false
390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return matched_symbol
391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    raise( BacktrackingFailed ) if @state.backtracking > 0
393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return recover_from_mismatched_token( type, follow )
394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # match anything -- i.e. wildcard match. Simply consume
397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the current symbol from the input stream. 
398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def match_any
399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.error_recovery = false
400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.consume
401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ##############################################################################################
404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ###################################### Error Reporting #######################################
405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ##############################################################################################
406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ##############################################################################################
407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # When a recognition error occurs, this method is the main
409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # hook for carrying out the error reporting process. The
410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # default implementation calls +display_recognition_error+
411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # to display the error info on $stderr. 
412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def report_error( e = $! )
413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.error_recovery and return
414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.syntax_errors += 1
415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.error_recovery = true
416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    display_recognition_error( e )
417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # error reporting hook for presenting the information
420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # The default implementation builds appropriate error
421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # message text using +error_header+ and +error_message+,
422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # and calls +emit_error_message+ to write the error
423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # message out to some source
424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def display_recognition_error( e = $! )
425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    header = error_header( e )
426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    message = error_message( e )
427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    emit_error_message( "#{ header } #{ message }" )
428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # used to construct an appropriate error message
431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # based on the specific type of error and the
432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # error's attributes
433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def error_message( e = $! )
434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case e
435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when UnwantedToken
436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      token_name = token_name( e.expecting )
437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "extraneous input #{ token_error_display( e.unexpected_token ) } expecting #{ token_name }"
438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MissingToken
439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      token_name = token_name( e.expecting )
440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "missing #{ token_name } at #{ token_error_display( e.symbol ) }"
441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MismatchedToken
442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      token_name = token_name( e.expecting )
443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched input #{ token_error_display( e.symbol ) } expecting #{ token_name }"
444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MismatchedTreeNode
445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      token_name = token_name( e.expecting )
446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched tree node: #{ e.symbol } expecting #{ token_name }"
447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when NoViableAlternative
448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "no viable alternative at input " << token_error_display( e.symbol )
449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MismatchedSet
450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched input %s expecting set %s" %
451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        [ token_error_display( e.symbol ), e.expecting.inspect ]
452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MismatchedNotSet
453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched input %s expecting set %s" %
454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        [ token_error_display( e.symbol ), e.expecting.inspect ]
455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when FailedPredicate
456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "rule %s failed predicate: { %s }?" % [ e.rule_name, e.predicate_text ]
457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else e.message
458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # used to add a tag to the error message that indicates
463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the location of the input stream when the error
464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # occurred
465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def error_header( e = $! )
467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    e.location
468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # formats a token object appropriately for inspection
472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # within an error message
473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def token_error_display( token )
475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    unless text = token.text || ( token.source_text rescue nil )
476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      text =
477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case
478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        when token.type == EOF then '<EOF>'
479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        when name = token_name( token.type ) rescue nil then "<#{ name }>"
480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        when token.respond_to?( :name ) then "<#{ token.name }>"
481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else "<#{ token.type }>"
482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end
483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return text.inspect
485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Write the error report data out to some source. By default,
489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the error message is written to $stderr
490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def emit_error_message( message )
492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output.puts( message ) if @error_output
493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ##############################################################################################
496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ###################################### Error Recovery ########################################
497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ##############################################################################################
498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def recover( error = $! )
500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.last_error_index == @input.index and @input.consume
501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.last_error_index = @input.index
502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    follow_set = compute_error_recovery_set
504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    resync { consume_until( follow_set ) }
506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def resync
509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    begin_resync
510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return( yield )
511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ensure
512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end_resync
513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # overridable hook method that is executed at the start of the
516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # resyncing procedure in recover
517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #
518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # by default, it does nothing
519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def begin_resync
520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # do nothing
521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # overridable hook method that is after the resyncing procedure has completed
524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #
525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # by default, it does nothing
526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def end_resync
527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # do nothing
528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # (The following explanation has been lifted directly from the
531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #  source code documentation of the ANTLR Java runtime library)
532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Compute the error recovery set for the current rule.  During
534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # rule invocation, the parser pushes the set of tokens that can
535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # follow that rule reference on the stack; this amounts to
536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # computing FIRST of what follows the rule reference in the
537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # enclosing rule. This local follow set only includes tokens
538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # from within the rule; i.e., the FIRST computation done by
539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # ANTLR stops at the end of a rule.
540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # EXAMPLE
542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # When you find a "no viable alt exception", the input is not
544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # consistent with any of the alternatives for rule r.  The best
545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # thing to do is to consume tokens until you see something that
546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # can legally follow a call to r *or* any rule that called r.
547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # You don't want the exact set of viable next tokens because the
548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # input might just be missing a token--you might consume the
549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # rest of the input looking for one of the missing tokens.
550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Consider grammar:
552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   a : '[' b ']'
554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     | '(' b ')'
555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     ;
556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   b : c '^' INT ;
557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   c : ID
558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     | INT
559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     ;
560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # At each rule invocation, the set of tokens that could follow
562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # that rule is pushed on a stack.  Here are the various "local"
563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # follow sets:
564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   FOLLOW( b1_in_a ) = FIRST( ']' ) = ']'
566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   FOLLOW( b2_in_a ) = FIRST( ')' ) = ')'
567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   FOLLOW( c_in_b ) = FIRST( '^' ) = '^'
568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Upon erroneous input "[]", the call chain is
570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   a -> b -> c
572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # and, hence, the follow context stack is:
574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   depth  local follow set     after call to rule
576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     0         \<EOF>                   a (from main( ) )
577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     1          ']'                     b
578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #     3          '^'                     c
579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Notice that <tt>')'</tt> is not included, because b would have to have
581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # been called from a different context in rule a for ')' to be
582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # included.
583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # For error recovery, we cannot consider FOLLOW(c)
585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # (context-sensitive or otherwise).  We need the combined set of
586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # all context-sensitive FOLLOW sets--the set of all tokens that
587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # could follow any reference in the call chain.  We need to
588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # resync to one of those tokens.  Note that FOLLOW(c)='^' and if
589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # we resync'd to that token, we'd consume until EOF.  We need to
590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # In this case, for input "[]", LA(1) is in this set so we would
592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # not consume anything and after printing an error rule c would
593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # return normally.  It would not find the required '^' though.
594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # At this point, it gets a mismatched token error and throws an
595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # exception (since LA(1) is not in the viable following token
596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # set).  The rule exception handler tries to recover, but finds
597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the same recovery set and doesn't consume anything.  Rule b
598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # exits normally returning to rule a.  Now it finds the ']' (and
599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # with the successful match exits errorRecovery mode).
600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # So, you cna see that the parser walks up call chain looking
602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # for the token that was a member of the recovery set.
603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Errors are not generated in errorRecovery mode.
605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # ANTLR's error recovery mechanism is based upon original ideas:
607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # "Algorithms + Data Structures = Programs" by Niklaus Wirth
609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # and
611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # "A note on error recovery in recursive descent parsers":
613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # http://portal.acm.org/citation.cfm?id=947902.947905
614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Later, Josef Grosch had some good ideas:
616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # "Efficient and Comfortable Error Recovery in Recursive Descent
618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Parsers":
619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Like Grosch I implemented local FOLLOW sets that are combined
622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # at run-time upon error to avoid overhead during parsing.
623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def compute_error_recovery_set
624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    combine_follows( false )
625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def recover_from_mismatched_token( type, follow )
628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if mismatch_is_unwanted_token?( type )
629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      err = UnwantedToken( type )
630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      resync { @input.consume }
631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      report_error( err )
632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return @input.consume
634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if mismatch_is_missing_token?( follow )
637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      inserted = missing_symbol( nil, type, follow )
638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      report_error( MissingToken( type, inserted ) )
639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return inserted
640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    raise MismatchedToken( type )
643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def recover_from_mismatched_set( e, follow )
646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if mismatch_is_missing_token?( follow )
647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      report_error( e )
648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return missing_symbol( e, INVALID_TOKEN_TYPE, follow )
649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    raise e
651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def recover_from_mismatched_element( e, follow )
654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    follow.nil? and return false
655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if follow.include?( EOR_TOKEN_TYPE )
656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      viable_tokens = compute_context_sensitive_rule_follow
657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      follow = ( follow | viable_tokens ) - Set[ EOR_TOKEN_TYPE ]
658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if follow.include?( @input.peek )
660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      report_error( e )
661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return true
662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return false
664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Conjure up a missing token during error recovery.
667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # The recognizer attempts to recover from single missing
669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # symbols. But, actions might refer to that missing symbol.
670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # For example, x=ID {f($x);}. The action clearly assumes
671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # that there has been an identifier matched previously and that
672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # $x points at that token. If that token is missing, but
673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the next token in the stream is what we want we assume that
674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # this token is missing and we keep going. Because we
675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # have to return some token to replace the missing token,
676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # we have to conjure one up. This method gives the user control
677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # over the tokens returned for missing tokens. Mostly,
678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # you will want to create something special for identifier
679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # tokens. For literals such as '{' and ',', the default
680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # action in the parser or tree parser works. It simply creates
681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # a CommonToken of the appropriate type. The text will be the token.
682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # If you change what tokens must be created by the lexer,
683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # override this method to create the appropriate tokens.
684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def missing_symbol( error, expected_token_type, follow )
685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return nil
686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def mismatch_is_unwanted_token?( type )
689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.peek( 2 ) == type
690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def mismatch_is_missing_token?( follow )
693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    follow.nil? and return false
694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if follow.include?( EOR_TOKEN_TYPE )
695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      viable_tokens = compute_context_sensitive_rule_follow
696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      follow = follow | viable_tokens
697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      follow.delete( EOR_TOKEN_TYPE ) unless @state.following.empty?
699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if follow.include?( @input.peek ) or follow.include?( EOR_TOKEN_TYPE )
701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      return true
702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return false
704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def syntax_errors?
707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ( error_count = @state.syntax_errors ) > 0 and return( error_count )
708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # factor out what to do upon token mismatch so
711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # tree parsers can behave differently.
712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #
713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # * override this method in your parser to do things
714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #	  like bailing out after the first error
715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #	* just raise the exception instead of
716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #	  calling the recovery method.
717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #
718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def number_of_syntax_errors
719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.syntax_errors
720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Compute the context-sensitive +FOLLOW+ set for current rule.
724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # This is set of token types that can follow a specific rule
725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # reference given a specific call chain.  You get the set of
726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # viable tokens that can possibly come next (look depth 1)
727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # given the current call chain.  Contrast this with the
728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # definition of plain FOLLOW for rule r:
729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #    FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # where x in T* and alpha, beta in V*; T is set of terminals and
733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # V is the set of terminals and nonterminals.  In other words,
734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # FOLLOW(r) is the set of all tokens that can possibly follow
735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # references to r in *any* sentential form (context).  At
736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # runtime, however, we know precisely which context applies as
737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # we have the call chain.  We may compute the exact (rather
738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # than covering superset) set of following tokens.
739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # For example, consider grammar:
741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        | "return" expr '.'
744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        ;
745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        | '(' expr ')'
748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        ;
749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # The FOLLOW sets are all inclusive whereas context-sensitive
751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # FOLLOW sets are precisely what could follow a rule reference.
752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # For input input "i=(3);", here is the derivation:
753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   stat => ID '=' expr ';'
755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        => ID '=' atom ('+' atom)* ';'
756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        => ID '=' '(' expr ')' ('+' atom)* ';'
757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        => ID '=' '(' atom ')' ('+' atom)* ';'
758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        => ID '=' '(' INT ')' ('+' atom)* ';'
759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #        => ID '=' '(' INT ')' ';'
760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # At the "3" token, you'd have a call chain of
762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  #   stat -> expr -> atom -> expr -> atom
764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # What can follow that specific nested ref to atom?  Exactly ')'
766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # as you can see by looking at the derivation of this specific
767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # You want the exact viable token set when recovering from a
770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # token mismatch.  Upon token mismatch, if LA(1) is member of
771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # the viable next token set, then you know there is most likely
772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # a missing token in the input stream.  "Insert" one by just not
773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # throwing an exception.
774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def compute_context_sensitive_rule_follow
776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    combine_follows true
777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def combine_follows( exact )
780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    follow_set = Set.new
781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.following.each_with_index.reverse_each do |local_follow_set, index|
782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      follow_set |= local_follow_set
783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      if exact
784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if local_follow_set.include?( EOR_TOKEN_TYPE )
785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          follow_set.delete( EOR_TOKEN_TYPE ) if index > 0
786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          break
788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end
789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return follow_set
792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Match needs to return the current input symbol, which gets put
796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # into the label for the associated token ref; e.g., x=ID.  Token
797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # and tree parsers need to return different objects. Rather than test
798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # for input stream type or change the IntStream interface, I use
799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # a simple method to ask the recognizer to tell me what the current
800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # input symbol is.
801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # This is ignored for lexers.
803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def current_symbol
805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.look
806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Consume input symbols until one matches a type within types
810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # types can be a single symbol type or a set of symbol types
812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def consume_until( types )
814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    types.is_a?( Set ) or types = Set[ *types ]
815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    type = @input.peek
816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    until type == EOF or types.include?( type )
817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @input.consume
818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      type = @input.peek
819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return( type )
821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # Returns true if the recognizer is currently in a decision for which
825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # backtracking has been enabled
826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # 
827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def backtracking?
828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking > 0
829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def backtracking_level
832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking
833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def backtracking_level=( n )
836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking = n
837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def backtrack
840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking += 1
841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    start = @input.mark
842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    success =
843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      begin yield
844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      rescue BacktrackingFailed then false
845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      else true
846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return success
848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  ensure
849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.rewind( start )
850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking -= 1
851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def syntactic_predicate?( name )
854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    backtrack { send name }
855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  alias backtracking backtracking_level
858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  alias backtracking= backtracking_level=
859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def rule_memoization( rule, start_index )
861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.rule_memory.fetch( rule ) do
862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.rule_memory[ rule ] = Hash.new( MEMO_RULE_UNKNOWN )
863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end[ start_index ]
864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def already_parsed_rule?( rule )
867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    stop_index = rule_memoization( rule, @input.index )
868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case stop_index
869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MEMO_RULE_UNKNOWN then return false
870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when MEMO_RULE_FAILED
871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      raise BacktrackingFailed
872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @input.seek( stop_index + 1 )
874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return true
876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def memoize( rule, start_index, success )
879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    stop_index = success ? @input.index - 1 : MEMO_RULE_FAILED
880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    memo = @state.rule_memory[ rule ] and memo[ start_index ] = stop_index
881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_in( rule_name, rule_index, input_symbol )
884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output.printf( "--> enter %s on %s", rule_name, input_symbol )
885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking > 0 and @error_output.printf( 
886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      " (in backtracking mode: depth = %s)", @state.backtracking
887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    )
888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output.print( "\n" )
889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_out( rule_name, rule_index, input_symbol )
892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output.printf( "<-- exit %s on %s", rule_name, input_symbol )
893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.backtracking > 0 and @error_output.printf( 
894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      " (in backtracking mode: depth = %s)", @state.backtracking
895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    )
896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @error_output.print( "\n" )
897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate
900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def initialize_dfas
902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # do nothing
903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# constant alias for compatibility with older versions of the
908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# runtime library
909324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverBaseRecognizer = Recognizer
910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Lexer
912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Lexer
914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
915324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverLexer is the default superclass of all lexers generated by ANTLR. The class
916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertailors the core functionality provided by Recognizer to the task of
917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatching patterns in the text input and breaking the input into tokens.
918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== About Lexers
920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
921324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA lexer's job is to take input text and break it up into _tokens_ -- objects
922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthat encapsulate a piece of text, a type label (such as ID or INTEGER), and the
923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverposition of the text with respect to the input. Thus, a lexer is essentially a
924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercomplicated iterator that steps through an input stream and produces a sequence
925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverof tokens. Sometimes lexers are enough to carry out a goal on their own, such as
926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertasks like source code highlighting and simple code analysis. Usually, however,
927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe lexer converts text into tokens for use by a parser, which recognizes larger
928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstructures within the text.
929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
930324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR parsers have a variety of entry points specified by parser rules, each of
931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwhich defines the structure of a specific type of sentence in a grammar. Lexers,
932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverhowever, are primarily intended to have a single entry point. It looks at the
933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercharacters starting at the current input position, decides if the chunk of text
934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatches one of a number of possible token type definitions, wraps the chunk into
935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera token with information on its type and location, and advances the input stream
936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverto the next place.
937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== ANTLR Lexers and the Lexer API
939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
940324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR-generated lexers will subclass this class, unless specified otherwise
941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwithin a grammar file. The generated class will provide an implementation of
942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvereach lexer rule as a method of the same name. The subclass will also provide an
943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimplementation for the abstract method #m_tokens, the purpose of which is to
944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermultiplex the token type definitions and predict what rule definition to execute
945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverto fetch a token. The primary method in the lexer API, #next_token, uses
946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#m_tokens to fetch the next token and drive the iteration.
947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
948324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIf the lexer is preparing tokens for use by an ANTLR generated parser, the lexer
949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwill generally be used to build a TokenStream object. The following code example
950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverdemonstrates the typical setup for using ANTLR parsers and lexers in Ruby.
951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # in HypotheticalLexer.rb
953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  module Hypothetical
954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  class Lexer < ANTLR3::Lexer
955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ...
956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ANTLR generated code
957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ...
958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # in HypotheticalParser.rb
962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  module Hypothetical
963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  class Parser < ANTLR3::Parser
964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ...
965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # more ANTLR generated code
966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    # ...
967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # to take hypothetical source code and prepare it for parsing,
971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # there is generally a four-step construction process
972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  source = "some hypothetical source code"
974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  input = ANTLR3::StringStream.new(source, :file => 'blah-de-blah.hyp')
975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  lexer = Hypothetical::Lexer.new( input )
976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  tokens = ANTLR3::CommonTokenStream.new( lexer )
977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  parser = Hypothetical::Parser.new( tokens )
978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # if you're using the standard streams, ANTLR3::StringStream and
980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # ANTLR3::CommonTokenStream, you can write the same process 
981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  # shown above more succinctly:
982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  lexer  = Hypothetical::Lexer.new("some hypothetical source code", :file => 'blah-de-blah.hyp')
984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  parser = Hypothetical::Parser.new( lexer )
985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Lexer < Recognizer
988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  include TokenSource
989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  @token_class = CommonToken
990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.default_rule
992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @default_rule ||= :token!
993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.main( argv = ARGV, options = {} )
996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    main = ANTLR3::Main::LexerMain.new( self, options )
998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    block_given? ? yield( main ) : main.execute( argv )
999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.associated_parser
1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @associated_parser ||= begin
1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @grammar_home and @grammar_home::Parser
1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    rescue NameError
1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      grammar_name = @grammar_home.name.split( "::" ).last
1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      begin
1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        require "#{ grammar_name }Parser"
1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        @grammar_home::Parser
1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      rescue LoadError, NameError
1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def initialize( input, options = {} )
1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( options )
1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = cast_input( input, options )
1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def current_symbol
1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    nil
1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def next_token
1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    loop do
1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.token = nil
1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.channel = DEFAULT_CHANNEL
1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.token_start_position = @input.index
1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.token_start_column = @input.column
1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.token_start_line = @input.line
1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.text = nil
1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @input.peek == EOF and return EOF_TOKEN
1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      begin
1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        token!
1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        
1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case token = @state.token
1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        when nil then return( emit )
1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        when SKIP_TOKEN then next
1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          return token
1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end
1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      rescue NoViableAlternative => re
1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        report_error( re )
1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        recover( re )
1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      rescue Error::RecognitionError => re
1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        report_error( re )
1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def skip
1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.token = SKIP_TOKEN
1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  abstract :token!
1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def exhaust
1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    self.to_a
1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def char_stream=( input )
1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = nil
1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    reset()
1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = input
1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def source_name
1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.source_name
1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def emit( token = @state.token )
1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    token ||= create_token
1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.token = token
1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return token
1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def match( expected )
1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case expected
1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when String
1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      expected.each_byte do |char|
1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        unless @input.peek == char
1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          @state.backtracking > 0 and raise BacktrackingFailed
1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          error = MismatchedToken( char )
1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          recover( error )
1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver          raise error
1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        end
1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        @input.consume()
1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else # single integer character
1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      unless @input.peek == expected
1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        @state.backtracking > 0 and raise BacktrackingFailed
1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        error = MismatchedToken( expected )
1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        recover( error )
1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise error
1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @input.consume
1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return true
1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def match_any
1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.consume
1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def match_range( min, max )
1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    char = @input.peek
1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if char.between?( min, max ) then @input.consume
1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @state.backtracking > 0 and raise BacktrackingFailed
1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      error = MismatchedRange( min.chr, max.chr )
1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      recover( error )
1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      raise( error )
1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return true
1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def line
1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.line
1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def column
1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.column
1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def character_index
1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.index
1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def text
1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.text and return @state.text
1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.substring( @state.token_start_position, character_index - 1 )
1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def text=( text )
1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @state.text = text
1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def report_error( e )
1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    display_recognition_error( e )
1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def error_message( e )
1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    char = character_error_display( e.symbol ) rescue nil
1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case e
1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::MismatchedToken
1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      expecting = character_error_display( e.expecting )
1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched character #{ char }; expecting #{ expecting }"
1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::NoViableAlternative
1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "no viable alternative at character #{ char }"
1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::EarlyExit
1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "required ( ... )+ loop did not match anything at character #{ char }"
1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::MismatchedNotSet
1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched character %s; expecting set %p" % [ char, e.expecting ]
1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::MismatchedSet
1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched character %s; expecting set %p" % [ char, e.expecting ]
1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Error::MismatchedRange
1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      a = character_error_display( e.min )
1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      b = character_error_display( e.max )
1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "mismatched character %s; expecting set %s..%s" % [ char, a, b ]
1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else super
1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def character_error_display( char )
1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case char
1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when EOF then '<EOF>'
1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when Integer then char.chr.inspect
1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else char.inspect
1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def recover( re )
1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.consume
1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  alias input= char_stream=
1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate
1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def cast_input( input, options )
1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case input
1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when CharacterStream then input
1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when ::String then StringStream.new( input, options )
1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when ::IO, ARGF then FileStream.new( input, options )
1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else input
1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_in( rule_name, rule_index )
1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else symbol = '<EOF>' end
1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( rule_name, rule_index, input_symbol )
1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_out( rule_name, rule_index )
1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if symbol = @input.look and symbol != EOF then symbol = symbol.inspect
1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else symbol = '<EOF>' end
1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input_symbol = "#{ symbol } @ line #{ line } / col #{ column }"
1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( rule_name, rule_index, input_symbol )
1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def create_token( &b )
1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if block_given? then super( &b )
1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      super do |t|
1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.input = @input
1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.type = @state.type
1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.channel = @state.channel
1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.start = @state.token_start_position
1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.stop = @input.index - 1
1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.line = @state.token_start_line
1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.text = self.text
1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        t.column = @state.token_start_column
1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Parser
1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Parser
1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1224324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParser is the default base class of ANTLR-generated parser classes. The class
1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertailors the functionality provided by Recognizer to the task of parsing.
1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== About Parsing
1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1229324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverThis is just a lose overview of parsing. For considerably more in-depth coverage
1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverof the topic, read the ANTLR documentation or check out the ANTLR website
1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver(http://www.antlr.org).
1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1233324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA grammar defines the vocabulary and the sentence structure of a language. While
1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera lexer concerns the basic vocabulary symbols of the language, a parser's
1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprimary task is to implement the sentence structure.
1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1237324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParsers are set up by providing a stream of tokens, which is usually created by
1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera corresponding lexer. Then, the user requests a specific sentence-structure
1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwithin the grammar, such as "class_definition" or "xml_node", from the parser.
1240324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIt iterates through the tokens, verifying the syntax of the sentence and
1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverperforming actions specified by the grammar. It stops when it encounters an
1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvererror or when it has matched the full sentence according to its defined
1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstructure.
1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== ANTLR Parsers and the Parser API
1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1247324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverPlain ANTLR-generated parsers directly subclass this class, unless specified
1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverotherwise within the grammar options. The generated code will provide a method
1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfor each parser rule defined in the ANTLR grammar, as well as any other
1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercustomized member attributes and methods specified in the source grammar.
1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1252324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverThis class does not override much of the functionality in Recognizer, and
1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthus the API closely mirrors Recognizer.
1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end
1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Parser < Recognizer
1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.main( argv = ARGV, options = {} )
1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end
1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    main = ANTLR3::Main::ParserMain.new( self, options )
1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    block_given? ? yield( main ) : main.execute( argv )
1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def self.associated_lexer
1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @associated_lexer ||= begin
1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      @grammar_home and @grammar_home::Lexer
1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    rescue NameError
1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      grammar_name = @grammar_home.name.split( "::" ).last
1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      begin
1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        require "#{ grammar_name }Lexer"
1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        @grammar_home::Lexer
1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      rescue LoadError, NameError
1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def initialize( input, options = {} )
1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( options )
1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = nil
1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    reset
1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = cast_input( input, options )
1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def missing_symbol( error, expected_type, follow )
1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    current = @input.look
1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    current = @input.look( -1 ) if current == ANTLR3::EOF_TOKEN
1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    t =
1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      case
1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      when current && current != ANTLR3::EOF_TOKEN then current.clone
1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      when @input.token_class then @input.token_class.new
1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      else ( create_token rescue CommonToken.new )
1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    t.type = expected_type
1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    name = t.name.gsub( /(^<)|(>$)/,'' )
1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    t.text = "<missing #{ name }>"
1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    t.channel = DEFAULT_CHANNEL
1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return( t )
1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def token_stream=( input )
1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = nil
1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    reset
1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input = input
1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  alias token_stream input
1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def source_name
1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @input.source_name
1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate
1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_in( rule_name, rule_index )
1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( rule_name, rule_index, @input.look.inspect )
1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def trace_out( rule_name, rule_index )
1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    super( rule_name, rule_index, @input.look.inspect )
1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  def cast_input( input, options )
1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    case input
1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when TokenStream then input
1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when TokenSource then CommonTokenStream.new( input, options )
1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    when IO, String, CharacterStream
1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      if lexer_class = self.class.associated_lexer
1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        CommonTokenStream.new( lexer_class.new( input, options ), options )
1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      else
1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise ArgumentError, Util.tidy( <<-END, true )
1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        | unable to automatically convert input #{ input.inspect }
1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        | to a ANTLR3::TokenStream object as #{ self.class }
1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        | does not appear to have an associated lexer class
1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        END
1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      # assume it's a stream if it at least implements peek and consume
1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      unless input.respond_to?( :peek ) and input.respond_to?( :consume )
1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        raise ArgumentError, Util.tidy( <<-END, true )
1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        | #{ self.class } requires a token stream as input, but
1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        | #{ input.inspect } was provided
1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        END
1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      input
1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    end
1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
1352