1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin LICENSE 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver[The "BSD licence"] 7324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverCopyright (c) 2009-2010 Kyle Yetter 8324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverAll rights reserved. 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 10324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverRedistribution and use in source and binary forms, with or without 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermodification, are permitted provided that the following conditions 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverare met: 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1. Redistributions of source code must retain the above copyright 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver notice, this list of conditions and the following disclaimer. 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2. Redistributions in binary form must reproduce the above copyright 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver notice, this list of conditions and the following disclaimer in the 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver documentation and/or other materials provided with the distribution. 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 3. The name of the author may not be used to endorse or promote products 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver derived from this software without specific prior written permission. 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 22324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverOF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverINCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverNOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverTHIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermodule ANTLR3 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverunless const_defined?( :RecognizerSharedState ) 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 38324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverRecognizerSharedState = Struct.new( 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :following, 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :error_recovery, 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :last_error_index, 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :backtracking, 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :rule_memory, 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :syntax_errors, 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :token, 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :token_start_position, 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :token_start_line, 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :token_start_column, 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :channel, 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :type, 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :text 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver) 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::RecognizerSharedState 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 56324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA big Struct-based class containing most of the data that makes up a 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrecognizer's state. These attributes are externalized from the recognizer itself 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverso that recognizer delegation (which occurs when you import other grammars into 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruveryour grammar) can function; multiple recognizers can share a common state. 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== Structure Attributes 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfollowing:: 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a stack that tracks follow sets for error recovery 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvererror_recovery:: 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a flag indicating whether or not the recognizer is in error recovery mode 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverlast_error_index:: 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the index in the input stream of the last error 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverbacktracking:: 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tracks the backtracking depth 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrule_memory:: 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if a grammar is compiled with the memoization option, this will be 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver set to a hash mapping previously parsed rules to cached indices 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruversyntax_errors:: 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tracks the number of syntax errors seen so far 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken:: 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver holds newly constructed tokens for lexer rules 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_position:: 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the input stream index at which the token starts 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_line:: 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the input stream line number at which the token starts 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertoken_start_column:: 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the input stream column at which the token starts 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverchannel:: 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the channel value of the target token 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertype:: 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the type value of the target token 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertext:: 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver the text of the target token 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass RecognizerSharedState 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( [], false, -1, 0, nil, 0, nil, -1 ) 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ^-- same as this --v 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.following = [] 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.error_recovery = false 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.last_error_index = -1 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.backtracking = 0 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.syntax_errors = 0 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # self.token_start_position = -1 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # restores all of the state variables to their respective 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # initial default values 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reset! 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.following.clear 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.error_recovery = false 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.last_error_index = -1 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.backtracking = 0 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.rule_memory and rule_memory.clear 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.syntax_errors = 0 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.token = nil 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.token_start_position = -1 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.token_start_line = nil 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.token_start_column = nil 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.channel = nil 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.type = nil 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.text = nil 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend # unless const_defined?( :RecognizerSharedState ) 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Recognizer 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Scope 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 131324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverScope is used to represent instances of ANTLR's various attribute scopes. 132324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIt is identical to Ruby's built-in Struct class, but it takes string 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverattribute declarations from the ANTLR grammar as parameters, and overrides 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe #initialize method to set the default values if any are present in 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe scope declaration. 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Block = Scope.new( "name", "depth = 0", "variables = {}" ) 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Block.new # => #<struct Block name=nil, depth=0, variables={}> 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Block.new( "function" ) # => #<struct Block name="function", depth=0, variables={}> 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Block.new( 'a', 1, :x => 3 ) # => #<struct Block name="a", depth=1, variables={ :x => 3 }> 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Scope < ::Struct 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.new( *declarations, &body ) 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver names = [] 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver defaults = {} 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for decl in declarations 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver name, default = decl.to_s.split( /\s*=\s*/, 2 ) 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver names << ( name = name.to_sym ) 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver default and defaults[ name ] = default 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( *names ) do 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # If no defaults, leave the initialize method the same as 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the struct's default initialize for speed. Otherwise, 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # overwrite the initialize to populate with default values. 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless defaults.empty? 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver parameters = names.map do | name | 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "#{ name } = " << defaults.fetch( name, 'nil' ) 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end.join( ', ' ) 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver class_eval( <<-END ) 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize( #{ parameters } ) 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( #{ names.join( ', ' ) } ) 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver body and class_eval( &body ) 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Recognizer 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Recognizer 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 178324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverAs the base class of all ANTLR-generated recognizers, Recognizer provides 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermuch of the shared functionality and structure used in the recognition process. 180324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverFor all effective purposes, the class and its immediate subclasses Lexer, 181324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParser, and TreeParser are abstract classes. They can be instantiated, but 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthey're pretty useless on their own. Instead, to make useful code, you write an 183324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR grammar and ANTLR will generate classes which inherit from one of the 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrecognizer base classes, providing the implementation of the grammar rules 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruveritself. this group of classes to implement necessary tasks. Recognizer 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverdefines methods related to: 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* token and character matching 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* prediction and recognition strategy 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* recovering from errors 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* reporting errors 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* memoization 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver* simple rule tracing and debugging 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Recognizer 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include Constants 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include Error 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include TokenFactory 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver extend ClassMacros 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @rules = {} 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # inherited class methods and hooks 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver class << self 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_reader :grammar_file_name, 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :antlr_version, 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :antlr_version_string, 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :library_version_string, 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver :grammar_home 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_accessor :token_scheme, :default_rule 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # generated recognizer code uses this method to stamp 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the code with the name of the grammar file and 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the current version of ANTLR being used to generate 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the code 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def generated_using( grammar_file, antlr_version, library_version = nil ) 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_file_name = grammar_file.freeze 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @antlr_version_string = antlr_version.freeze 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @library_version = Util.parse_version( library_version ) 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if @antlr_version_string =~ /^(\d+)\.(\d+)(?:\.(\d+)(?:b(\d+))?)?(.*)$/ 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @antlr_version = [ $1, $2, $3, $4 ].map! { |str| str.to_i } 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver timestamp = $5.strip 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver #@antlr_release_time = $5.empty? ? nil : Time.parse($5) 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise "bad version string: %p" % version_string 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # this method is used to generate return-value structures for 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # rules with multiple return values. To avoid generating 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a special class for ever rule in AST parsers and such 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # (where most rules have the same default set of return values), 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # each recognizer gets a default return value structure 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # assigned to the constant +Return+. Rules which don't 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # require additional custom members will have a rule-return 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # name constant that just points to the generic return 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # value. 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def define_return_scope( *members ) 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if members.empty? then generic_return_scope 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver members += return_scope_members 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Struct.new( *members ) 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # used as a hook to add additional default members 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # to default return value structures 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # For example, all AST-building parsers override 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # this method to add an extra +:tree+ field to 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # all rule return structures. 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def return_scope_members 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver [ :start, :stop ] 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # sets up and returns the generic rule return 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # scope for a recognizer 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def generic_return_scope 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @generic_return_scope ||= begin 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver struct = Struct.new( *return_scope_members ) 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver const_set( :Return, struct ) 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def imported_grammars 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @imported_grammars ||= Set.new 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def master_grammars 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @master_grammars ||= [] 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def master 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver master_grammars.last 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def masters( *grammar_names ) 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for grammar in grammar_names 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless master_grammars.include?( grammar ) 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver master_grammars << grammar 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_reader( Util.snake_case( grammar ) ) 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver private :masters 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def imports( *grammar_names ) 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for grammar in grammar_names 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver imported_grammars.add?( grammar.to_sym ) and 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_reader( Util.snake_case( grammar ) ) 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return imported_grammars 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver private :imports 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def rules 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self::RULE_METHODS.dup rescue [] 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def default_rule 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @default_rule ||= rules.first 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def debug? 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def profile? 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def Scope( *declarations, &body ) 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Scope.new( *declarations, &body ) 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def token_class 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @token_class ||= begin 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self::Token rescue 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver superclass.token_class rescue 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3::CommonToken 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver private :generated_using 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_file_name = nil 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @antlr_version = ANTLR3::ANTLR_VERSION 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @antlr_version_string = ANTLR3::ANTLR_VERSION_STRING 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def grammar_file_name 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.class.grammar_file_name 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def antlr_version 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.class.antlr_version 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def antlr_version_string 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.class.antlr_version_string 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_accessor :input 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr_reader :state 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def each_delegate 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver block_given? or return enum_for( __method__ ) 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for grammar in self.class.imported_grammars 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver del = __send__( Util.snake_case( grammar ) ) and 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver yield( del ) 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Create a new recognizer. The constructor simply ensures that 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # all recognizers are initialized with a shared state object. 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # See the main recognizer subclasses for more specific 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # information about creating recognizer objects like 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # lexers and parsers. 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize( options = {} ) 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state = options[ :state ] || RecognizerSharedState.new 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output = options.fetch( :error_output, $stderr ) 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver defined?( @input ) or @input = nil 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver initialize_dfas 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Resets the recognizer's state data to initial values. 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # As a result, all error tracking and error recovery 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # data accumulated in the current state will be cleared. 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # It will also attempt to reset the input stream 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # via input.reset, but it ignores any errors received 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # from doing so. Thus the input stream is not guarenteed 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # to be rewound to its initial position 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reset 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state and @state.reset! 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input and @input.reset rescue nil 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Attempt to match the current input symbol the token type 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # specified by +type+. If the symbol matches the type, 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # consume the current symbol and return its value. If 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the symbol doesn't match, attempt to use the follow-set 383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # data provided by +follow+ to recover from the mismatched 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # token. 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match( type, follow ) 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver matched_symbol = current_symbol 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if @input.peek == type 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.error_recovery = false 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return matched_symbol 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise( BacktrackingFailed ) if @state.backtracking > 0 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return recover_from_mismatched_token( type, follow ) 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # match anything -- i.e. wildcard match. Simply consume 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the current symbol from the input stream. 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match_any 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.error_recovery = false 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ############################################################################################## 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ###################################### Error Reporting ####################################### 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ############################################################################################## 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ############################################################################################## 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # When a recognition error occurs, this method is the main 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # hook for carrying out the error reporting process. The 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # default implementation calls +display_recognition_error+ 411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # to display the error info on $stderr. 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def report_error( e = $! ) 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.error_recovery and return 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.syntax_errors += 1 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.error_recovery = true 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver display_recognition_error( e ) 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # error reporting hook for presenting the information 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # The default implementation builds appropriate error 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # message text using +error_header+ and +error_message+, 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # and calls +emit_error_message+ to write the error 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # message out to some source 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def display_recognition_error( e = $! ) 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver header = error_header( e ) 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver message = error_message( e ) 427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver emit_error_message( "#{ header } #{ message }" ) 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # used to construct an appropriate error message 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # based on the specific type of error and the 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # error's attributes 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def error_message( e = $! ) 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case e 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when UnwantedToken 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token_name = token_name( e.expecting ) 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "extraneous input #{ token_error_display( e.unexpected_token ) } expecting #{ token_name }" 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MissingToken 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token_name = token_name( e.expecting ) 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "missing #{ token_name } at #{ token_error_display( e.symbol ) }" 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MismatchedToken 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token_name = token_name( e.expecting ) 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched input #{ token_error_display( e.symbol ) } expecting #{ token_name }" 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MismatchedTreeNode 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token_name = token_name( e.expecting ) 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched tree node: #{ e.symbol } expecting #{ token_name }" 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when NoViableAlternative 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "no viable alternative at input " << token_error_display( e.symbol ) 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MismatchedSet 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched input %s expecting set %s" % 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver [ token_error_display( e.symbol ), e.expecting.inspect ] 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MismatchedNotSet 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched input %s expecting set %s" % 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver [ token_error_display( e.symbol ), e.expecting.inspect ] 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when FailedPredicate 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "rule %s failed predicate: { %s }?" % [ e.rule_name, e.predicate_text ] 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else e.message 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # used to add a tag to the error message that indicates 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the location of the input stream when the error 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # occurred 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def error_header( e = $! ) 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver e.location 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # formats a token object appropriately for inspection 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # within an error message 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def token_error_display( token ) 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless text = token.text || ( token.source_text rescue nil ) 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver text = 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when token.type == EOF then '<EOF>' 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when name = token_name( token.type ) rescue nil then "<#{ name }>" 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when token.respond_to?( :name ) then "<#{ token.name }>" 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else "<#{ token.type }>" 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return text.inspect 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Write the error report data out to some source. By default, 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the error message is written to $stderr 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def emit_error_message( message ) 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output.puts( message ) if @error_output 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ############################################################################################## 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ###################################### Error Recovery ######################################## 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ############################################################################################## 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover( error = $! ) 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.last_error_index == @input.index and @input.consume 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.last_error_index = @input.index 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow_set = compute_error_recovery_set 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver resync { consume_until( follow_set ) } 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def resync 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver begin_resync 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return( yield ) 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ensure 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end_resync 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # overridable hook method that is executed at the start of the 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # resyncing procedure in recover 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # by default, it does nothing 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def begin_resync 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # do nothing 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # overridable hook method that is after the resyncing procedure has completed 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # by default, it does nothing 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def end_resync 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # do nothing 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # (The following explanation has been lifted directly from the 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # source code documentation of the ANTLR Java runtime library) 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Compute the error recovery set for the current rule. During 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # rule invocation, the parser pushes the set of tokens that can 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # follow that rule reference on the stack; this amounts to 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # computing FIRST of what follows the rule reference in the 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # enclosing rule. This local follow set only includes tokens 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # from within the rule; i.e., the FIRST computation done by 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ANTLR stops at the end of a rule. 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # EXAMPLE 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # When you find a "no viable alt exception", the input is not 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # consistent with any of the alternatives for rule r. The best 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # thing to do is to consume tokens until you see something that 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # can legally follow a call to r *or* any rule that called r. 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # You don't want the exact set of viable next tokens because the 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # input might just be missing a token--you might consume the 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # rest of the input looking for one of the missing tokens. 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Consider grammar: 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a : '[' b ']' 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # | '(' b ')' 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ; 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # b : c '^' INT ; 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # c : ID 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # | INT 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ; 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # At each rule invocation, the set of tokens that could follow 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # that rule is pushed on a stack. Here are the various "local" 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # follow sets: 564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW( b1_in_a ) = FIRST( ']' ) = ']' 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW( b2_in_a ) = FIRST( ')' ) = ')' 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW( c_in_b ) = FIRST( '^' ) = '^' 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Upon erroneous input "[]", the call chain is 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a -> b -> c 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # and, hence, the follow context stack is: 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # depth local follow set after call to rule 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 0 \<EOF> a (from main( ) ) 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 1 ']' b 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 3 '^' c 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Notice that <tt>')'</tt> is not included, because b would have to have 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # been called from a different context in rule a for ')' to be 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # included. 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # For error recovery, we cannot consider FOLLOW(c) 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # (context-sensitive or otherwise). We need the combined set of 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # all context-sensitive FOLLOW sets--the set of all tokens that 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # could follow any reference in the call chain. We need to 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # resync to one of those tokens. Note that FOLLOW(c)='^' and if 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we resync'd to that token, we'd consume until EOF. We need to 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # In this case, for input "[]", LA(1) is in this set so we would 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # not consume anything and after printing an error rule c would 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # return normally. It would not find the required '^' though. 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # At this point, it gets a mismatched token error and throws an 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # exception (since LA(1) is not in the viable following token 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # set). The rule exception handler tries to recover, but finds 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the same recovery set and doesn't consume anything. Rule b 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # exits normally returning to rule a. Now it finds the ']' (and 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # with the successful match exits errorRecovery mode). 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # So, you cna see that the parser walks up call chain looking 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # for the token that was a member of the recovery set. 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Errors are not generated in errorRecovery mode. 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ANTLR's error recovery mechanism is based upon original ideas: 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # "Algorithms + Data Structures = Programs" by Niklaus Wirth 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # and 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # "A note on error recovery in recursive descent parsers": 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # http://portal.acm.org/citation.cfm?id=947902.947905 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Later, Josef Grosch had some good ideas: 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # "Efficient and Comfortable Error Recovery in Recursive Descent 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Parsers": 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Like Grosch I implemented local FOLLOW sets that are combined 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # at run-time upon error to avoid overhead during parsing. 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def compute_error_recovery_set 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver combine_follows( false ) 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover_from_mismatched_token( type, follow ) 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if mismatch_is_unwanted_token?( type ) 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver err = UnwantedToken( type ) 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver resync { @input.consume } 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( err ) 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return @input.consume 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if mismatch_is_missing_token?( follow ) 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inserted = missing_symbol( nil, type, follow ) 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( MissingToken( type, inserted ) ) 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return inserted 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise MismatchedToken( type ) 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover_from_mismatched_set( e, follow ) 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if mismatch_is_missing_token?( follow ) 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( e ) 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return missing_symbol( e, INVALID_TOKEN_TYPE, follow ) 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise e 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover_from_mismatched_element( e, follow ) 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow.nil? and return false 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if follow.include?( EOR_TOKEN_TYPE ) 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver viable_tokens = compute_context_sensitive_rule_follow 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow = ( follow | viable_tokens ) - Set[ EOR_TOKEN_TYPE ] 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if follow.include?( @input.peek ) 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( e ) 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Conjure up a missing token during error recovery. 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # The recognizer attempts to recover from single missing 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # symbols. But, actions might refer to that missing symbol. 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # For example, x=ID {f($x);}. The action clearly assumes 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # that there has been an identifier matched previously and that 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # $x points at that token. If that token is missing, but 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the next token in the stream is what we want we assume that 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # this token is missing and we keep going. Because we 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # have to return some token to replace the missing token, 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we have to conjure one up. This method gives the user control 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # over the tokens returned for missing tokens. Mostly, 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # you will want to create something special for identifier 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # tokens. For literals such as '{' and ',', the default 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # action in the parser or tree parser works. It simply creates 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a CommonToken of the appropriate type. The text will be the token. 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # If you change what tokens must be created by the lexer, 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # override this method to create the appropriate tokens. 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def missing_symbol( error, expected_token_type, follow ) 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return nil 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def mismatch_is_unwanted_token?( type ) 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.peek( 2 ) == type 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def mismatch_is_missing_token?( follow ) 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow.nil? and return false 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if follow.include?( EOR_TOKEN_TYPE ) 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver viable_tokens = compute_context_sensitive_rule_follow 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow = follow | viable_tokens 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow.delete( EOR_TOKEN_TYPE ) unless @state.following.empty? 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if follow.include?( @input.peek ) or follow.include?( EOR_TOKEN_TYPE ) 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return false 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def syntax_errors? 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( error_count = @state.syntax_errors ) > 0 and return( error_count ) 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # factor out what to do upon token mismatch so 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # tree parsers can behave differently. 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # * override this method in your parser to do things 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # like bailing out after the first error 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # * just raise the exception instead of 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # calling the recovery method. 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def number_of_syntax_errors 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.syntax_errors 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Compute the context-sensitive +FOLLOW+ set for current rule. 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is set of token types that can follow a specific rule 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # reference given a specific call chain. You get the set of 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # viable tokens that can possibly come next (look depth 1) 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # given the current call chain. Contrast this with the 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # definition of plain FOLLOW for rule r: 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # where x in T* and alpha, beta in V*; T is set of terminals and 733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # V is the set of terminals and nonterminals. In other words, 734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW(r) is the set of all tokens that can possibly follow 735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # references to r in *any* sentential form (context). At 736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # runtime, however, we know precisely which context applies as 737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # we have the call chain. We may compute the exact (rather 738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # than covering superset) set of following tokens. 739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # For example, consider grammar: 741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # | "return" expr '.' 744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ; 745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # atom : INT // FOLLOW(atom)=={'+',')',';','.'} 747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # | '(' expr ')' 748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ; 749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # The FOLLOW sets are all inclusive whereas context-sensitive 751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # FOLLOW sets are precisely what could follow a rule reference. 752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # For input input "i=(3);", here is the derivation: 753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # stat => ID '=' expr ';' 755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # => ID '=' atom ('+' atom)* ';' 756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # => ID '=' '(' expr ')' ('+' atom)* ';' 757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # => ID '=' '(' atom ')' ('+' atom)* ';' 758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # => ID '=' '(' INT ')' ('+' atom)* ';' 759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # => ID '=' '(' INT ')' ';' 760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # At the "3" token, you'd have a call chain of 762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # stat -> expr -> atom -> expr -> atom 764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # What can follow that specific nested ref to atom? Exactly ')' 766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # as you can see by looking at the derivation of this specific 767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # You want the exact viable token set when recovering from a 770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # token mismatch. Upon token mismatch, if LA(1) is member of 771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # the viable next token set, then you know there is most likely 772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a missing token in the input stream. "Insert" one by just not 773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # throwing an exception. 774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def compute_context_sensitive_rule_follow 776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver combine_follows true 777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def combine_follows( exact ) 780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow_set = Set.new 781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.following.each_with_index.reverse_each do |local_follow_set, index| 782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow_set |= local_follow_set 783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if exact 784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if local_follow_set.include?( EOR_TOKEN_TYPE ) 785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver follow_set.delete( EOR_TOKEN_TYPE ) if index > 0 786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return follow_set 792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Match needs to return the current input symbol, which gets put 796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # into the label for the associated token ref; e.g., x=ID. Token 797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # and tree parsers need to return different objects. Rather than test 798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # for input stream type or change the IntStream interface, I use 799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # a simple method to ask the recognizer to tell me what the current 800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # input symbol is. 801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # This is ignored for lexers. 803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def current_symbol 805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.look 806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Consume input symbols until one matches a type within types 810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # types can be a single symbol type or a set of symbol types 812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def consume_until( types ) 814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver types.is_a?( Set ) or types = Set[ *types ] 815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type = @input.peek 816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver until type == EOF or types.include?( type ) 817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver type = @input.peek 819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return( type ) 821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # Returns true if the recognizer is currently in a decision for which 825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # backtracking has been enabled 826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # 827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def backtracking? 828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def backtracking_level 832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking 833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def backtracking_level=( n ) 836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking = n 837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def backtrack 840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking += 1 841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver start = @input.mark 842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver success = 843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver begin yield 844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue BacktrackingFailed then false 845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else true 846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return success 848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ensure 849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.rewind( start ) 850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking -= 1 851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def syntactic_predicate?( name ) 854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver backtrack { send name } 855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alias backtracking backtracking_level 858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alias backtracking= backtracking_level= 859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def rule_memoization( rule, start_index ) 861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.rule_memory.fetch( rule ) do 862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.rule_memory[ rule ] = Hash.new( MEMO_RULE_UNKNOWN ) 863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end[ start_index ] 864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def already_parsed_rule?( rule ) 867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stop_index = rule_memoization( rule, @input.index ) 868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case stop_index 869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MEMO_RULE_UNKNOWN then return false 870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when MEMO_RULE_FAILED 871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise BacktrackingFailed 872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.seek( stop_index + 1 ) 874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true 876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def memoize( rule, start_index, success ) 879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stop_index = success ? @input.index - 1 : MEMO_RULE_FAILED 880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver memo = @state.rule_memory[ rule ] and memo[ start_index ] = stop_index 881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_in( rule_name, rule_index, input_symbol ) 884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output.printf( "--> enter %s on %s", rule_name, input_symbol ) 885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 and @error_output.printf( 886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " (in backtracking mode: depth = %s)", @state.backtracking 887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output.print( "\n" ) 889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_out( rule_name, rule_index, input_symbol ) 892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output.printf( "<-- exit %s on %s", rule_name, input_symbol ) 893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 and @error_output.printf( 894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " (in backtracking mode: depth = %s)", @state.backtracking 895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @error_output.print( "\n" ) 897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate 900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize_dfas 902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # do nothing 903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# constant alias for compatibility with older versions of the 908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# runtime library 909324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverBaseRecognizer = Recognizer 910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Lexer 912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Lexer 914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 915324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverLexer is the default superclass of all lexers generated by ANTLR. The class 916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertailors the core functionality provided by Recognizer to the task of 917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatching patterns in the text input and breaking the input into tokens. 918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== About Lexers 920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 921324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA lexer's job is to take input text and break it up into _tokens_ -- objects 922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthat encapsulate a piece of text, a type label (such as ID or INTEGER), and the 923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverposition of the text with respect to the input. Thus, a lexer is essentially a 924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercomplicated iterator that steps through an input stream and produces a sequence 925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverof tokens. Sometimes lexers are enough to carry out a goal on their own, such as 926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertasks like source code highlighting and simple code analysis. Usually, however, 927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthe lexer converts text into tokens for use by a parser, which recognizes larger 928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstructures within the text. 929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 930324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR parsers have a variety of entry points specified by parser rules, each of 931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwhich defines the structure of a specific type of sentence in a grammar. Lexers, 932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverhowever, are primarily intended to have a single entry point. It looks at the 933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercharacters starting at the current input position, decides if the chunk of text 934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatches one of a number of possible token type definitions, wraps the chunk into 935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera token with information on its type and location, and advances the input stream 936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverto the next place. 937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== ANTLR Lexers and the Lexer API 939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 940324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR-generated lexers will subclass this class, unless specified otherwise 941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwithin a grammar file. The generated class will provide an implementation of 942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvereach lexer rule as a method of the same name. The subclass will also provide an 943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimplementation for the abstract method #m_tokens, the purpose of which is to 944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermultiplex the token type definitions and predict what rule definition to execute 945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverto fetch a token. The primary method in the lexer API, #next_token, uses 946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#m_tokens to fetch the next token and drive the iteration. 947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 948324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIf the lexer is preparing tokens for use by an ANTLR generated parser, the lexer 949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwill generally be used to build a TokenStream object. The following code example 950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverdemonstrates the typical setup for using ANTLR parsers and lexers in Ruby. 951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # in HypotheticalLexer.rb 953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver module Hypothetical 954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver class Lexer < ANTLR3::Lexer 955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ... 956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ANTLR generated code 957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ... 958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # in HypotheticalParser.rb 962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver module Hypothetical 963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver class Parser < ANTLR3::Parser 964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ... 965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # more ANTLR generated code 966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ... 967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # to take hypothetical source code and prepare it for parsing, 971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # there is generally a four-step construction process 972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver source = "some hypothetical source code" 974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ANTLR3::StringStream.new(source, :file => 'blah-de-blah.hyp') 975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = Hypothetical::Lexer.new( input ) 976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tokens = ANTLR3::CommonTokenStream.new( lexer ) 977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver parser = Hypothetical::Parser.new( tokens ) 978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # if you're using the standard streams, ANTLR3::StringStream and 980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # ANTLR3::CommonTokenStream, you can write the same process 981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # shown above more succinctly: 982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = Hypothetical::Lexer.new("some hypothetical source code", :file => 'blah-de-blah.hyp') 984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver parser = Hypothetical::Parser.new( lexer ) 985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Lexer < Recognizer 988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include TokenSource 989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @token_class = CommonToken 990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.default_rule 992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @default_rule ||= :token! 993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.main( argv = ARGV, options = {} ) 996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end 997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver main = ANTLR3::Main::LexerMain.new( self, options ) 998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver block_given? ? yield( main ) : main.execute( argv ) 999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.associated_parser 1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @associated_parser ||= begin 1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_home and @grammar_home::Parser 1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue NameError 1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver grammar_name = @grammar_home.name.split( "::" ).last 1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver begin 1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver require "#{ grammar_name }Parser" 1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_home::Parser 1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue LoadError, NameError 1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize( input, options = {} ) 1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( options ) 1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = cast_input( input, options ) 1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def current_symbol 1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nil 1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def next_token 1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver loop do 1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token = nil 1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.channel = DEFAULT_CHANNEL 1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token_start_position = @input.index 1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token_start_column = @input.column 1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token_start_line = @input.line 1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.text = nil 1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.peek == EOF and return EOF_TOKEN 1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver begin 1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token! 1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case token = @state.token 1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when nil then return( emit ) 1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when SKIP_TOKEN then next 1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return token 1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue NoViableAlternative => re 1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( re ) 1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recover( re ) 1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue Error::RecognitionError => re 1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver report_error( re ) 1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def skip 1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token = SKIP_TOKEN 1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver abstract :token! 1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def exhaust 1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.to_a 1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def char_stream=( input ) 1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = nil 1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reset() 1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = input 1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def source_name 1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.source_name 1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def emit( token = @state.token ) 1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token ||= create_token 1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.token = token 1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return token 1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match( expected ) 1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case expected 1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when String 1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver expected.each_byte do |char| 1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless @input.peek == char 1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 and raise BacktrackingFailed 1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver error = MismatchedToken( char ) 1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recover( error ) 1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise error 1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume() 1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else # single integer character 1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless @input.peek == expected 1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 and raise BacktrackingFailed 1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver error = MismatchedToken( expected ) 1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recover( error ) 1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise error 1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true 1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match_any 1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def match_range( min, max ) 1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver char = @input.peek 1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if char.between?( min, max ) then @input.consume 1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.backtracking > 0 and raise BacktrackingFailed 1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver error = MismatchedRange( min.chr, max.chr ) 1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver recover( error ) 1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise( error ) 1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return true 1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def line 1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.line 1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def column 1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.column 1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def character_index 1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.index 1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def text 1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.text and return @state.text 1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.substring( @state.token_start_position, character_index - 1 ) 1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def text=( text ) 1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @state.text = text 1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def report_error( e ) 1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver display_recognition_error( e ) 1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def error_message( e ) 1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver char = character_error_display( e.symbol ) rescue nil 1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case e 1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::MismatchedToken 1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver expecting = character_error_display( e.expecting ) 1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched character #{ char }; expecting #{ expecting }" 1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::NoViableAlternative 1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "no viable alternative at character #{ char }" 1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::EarlyExit 1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "required ( ... )+ loop did not match anything at character #{ char }" 1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::MismatchedNotSet 1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched character %s; expecting set %p" % [ char, e.expecting ] 1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::MismatchedSet 1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched character %s; expecting set %p" % [ char, e.expecting ] 1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Error::MismatchedRange 1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver a = character_error_display( e.min ) 1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver b = character_error_display( e.max ) 1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver "mismatched character %s; expecting set %s..%s" % [ char, a, b ] 1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else super 1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def character_error_display( char ) 1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case char 1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when EOF then '<EOF>' 1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when Integer then char.chr.inspect 1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else char.inspect 1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def recover( re ) 1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.consume 1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alias input= char_stream= 1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate 1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def cast_input( input, options ) 1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case input 1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when CharacterStream then input 1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when ::String then StringStream.new( input, options ) 1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when ::IO, ARGF then FileStream.new( input, options ) 1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else input 1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_in( rule_name, rule_index ) 1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if symbol = @input.look and symbol != EOF then symbol = symbol.inspect 1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else symbol = '<EOF>' end 1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input_symbol = "#{ symbol } @ line #{ line } / col #{ column }" 1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( rule_name, rule_index, input_symbol ) 1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_out( rule_name, rule_index ) 1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if symbol = @input.look and symbol != EOF then symbol = symbol.inspect 1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else symbol = '<EOF>' end 1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input_symbol = "#{ symbol } @ line #{ line } / col #{ column }" 1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( rule_name, rule_index, input_symbol ) 1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def create_token( &b ) 1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if block_given? then super( &b ) 1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super do |t| 1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.input = @input 1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.type = @state.type 1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.channel = @state.channel 1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.start = @state.token_start_position 1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.stop = @input.index - 1 1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.line = @state.token_start_line 1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.text = self.text 1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.column = @state.token_start_column 1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=begin rdoc ANTLR3::Parser 1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver= Parser 1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1224324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParser is the default base class of ANTLR-generated parser classes. The class 1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertailors the functionality provided by Recognizer to the task of parsing. 1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== About Parsing 1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1229324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverThis is just a lose overview of parsing. For considerably more in-depth coverage 1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverof the topic, read the ANTLR documentation or check out the ANTLR website 1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver(http://www.antlr.org). 1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1233324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverA grammar defines the vocabulary and the sentence structure of a language. While 1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera lexer concerns the basic vocabulary symbols of the language, a parser's 1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprimary task is to implement the sentence structure. 1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1237324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverParsers are set up by providing a stream of tokens, which is usually created by 1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvera corresponding lexer. Then, the user requests a specific sentence-structure 1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverwithin the grammar, such as "class_definition" or "xml_node", from the parser. 1240324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverIt iterates through the tokens, verifying the syntax of the sentence and 1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverperforming actions specified by the grammar. It stops when it encounters an 1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvererror or when it has matched the full sentence according to its defined 1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstructure. 1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver== ANTLR Parsers and the Parser API 1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1247324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverPlain ANTLR-generated parsers directly subclass this class, unless specified 1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverotherwise within the grammar options. The generated code will provide a method 1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfor each parser rule defined in the ANTLR grammar, as well as any other 1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvercustomized member attributes and methods specified in the source grammar. 1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1252324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverThis class does not override much of the functionality in Recognizer, and 1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverthus the API closely mirrors Recognizer. 1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver=end 1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass Parser < Recognizer 1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.main( argv = ARGV, options = {} ) 1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if argv.is_a?( ::Hash ) then argv, options = ARGV, argv end 1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver main = ANTLR3::Main::ParserMain.new( self, options ) 1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver block_given? ? yield( main ) : main.execute( argv ) 1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def self.associated_lexer 1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @associated_lexer ||= begin 1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_home and @grammar_home::Lexer 1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue NameError 1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver grammar_name = @grammar_home.name.split( "::" ).last 1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver begin 1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver require "#{ grammar_name }Lexer" 1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @grammar_home::Lexer 1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rescue LoadError, NameError 1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def initialize( input, options = {} ) 1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( options ) 1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = nil 1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reset 1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = cast_input( input, options ) 1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def missing_symbol( error, expected_type, follow ) 1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver current = @input.look 1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver current = @input.look( -1 ) if current == ANTLR3::EOF_TOKEN 1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t = 1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when current && current != ANTLR3::EOF_TOKEN then current.clone 1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when @input.token_class then @input.token_class.new 1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else ( create_token rescue CommonToken.new ) 1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.type = expected_type 1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver name = t.name.gsub( /(^<)|(>$)/,'' ) 1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.text = "<missing #{ name }>" 1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver t.channel = DEFAULT_CHANNEL 1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return( t ) 1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def token_stream=( input ) 1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = nil 1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver reset 1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input = input 1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver alias token_stream input 1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def source_name 1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @input.source_name 1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverprivate 1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_in( rule_name, rule_index ) 1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( rule_name, rule_index, @input.look.inspect ) 1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def trace_out( rule_name, rule_index ) 1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver super( rule_name, rule_index, @input.look.inspect ) 1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def cast_input( input, options ) 1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case input 1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when TokenStream then input 1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when TokenSource then CommonTokenStream.new( input, options ) 1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver when IO, String, CharacterStream 1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if lexer_class = self.class.associated_lexer 1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CommonTokenStream.new( lexer_class.new( input, options ), options ) 1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise ArgumentError, Util.tidy( <<-END, true ) 1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | unable to automatically convert input #{ input.inspect } 1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | to a ANTLR3::TokenStream object as #{ self.class } 1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | does not appear to have an associated lexer class 1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # assume it's a stream if it at least implements peek and consume 1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unless input.respond_to?( :peek ) and input.respond_to?( :consume ) 1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise ArgumentError, Util.tidy( <<-END, true ) 1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | #{ self.class } requires a token stream as input, but 1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | #{ input.inspect } was provided 1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input 1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 1352