1#!/usr/bin/ruby 2# encoding: utf-8 3 4=begin LICENSE 5 6[The "BSD licence"] 7Copyright (c) 2009-2010 Kyle Yetter 8All rights reserved. 9 10Redistribution and use in source and binary forms, with or without 11modification, are permitted provided that the following conditions 12are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in the 18 documentation and/or other materials provided with the distribution. 19 3. The name of the author may not be used to endorse or promote products 20 derived from this software without specific prior written permission. 21 22THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33=end 34 35module ANTLR3 36 37=begin rdoc ANTLR3::DFA 38 39DFA is a class that implements a finite state machine that chooses between 40alternatives in a rule based upon lookahead symbols from an input stream. 41 42Deterministic Finite Automata (DFA) are finite state machines that are capable 43of recognizing <i>regular languages</i>. For more background on the subject, 44check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or 45check out general ANTLR documentation at http://www.antlr.org 46 47ANTLR implements most of its decision logic directly using code branching 48structures in methods. However, for certain types of decisions, ANTLR will 49generate a special DFA class definition to implement a decision. 50 51Conceptually, these state machines are defined by a number of states, each state 52represented by an integer indexed upward from zero. State number +0+ is the 53<i>start state</i> of the machine; every prediction will begin in state +0+. At 54each step, the machine examines the next symbol on the input stream, checks the 55value against the transition parameters associated with the current state, and 56either moves to a new state number to repeat the process or decides that the 57machine cannot transition any further. If the machine cannot transition any 58further and the current state is defined as an <i>accept state</i>, an 59alternative has been chosen successfully and the prediction procedure ends. If 60the current state is not an <i>accept state</i>, the prediction has failed and 61there is <i>no viable alternative</i>. 62 63In generated code, ANTLR defines DFA states using seven parameters, each defined 64as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+, 65+SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state 66+s+ are defined by the value of these lists at index +s+. 67 68MIN[s]:: 69 The smallest value of the next input symbol that has 70 a transition for state +s+ 71MAX[s]:: 72 The largest value of the next input symbol that has 73 a transition for state +s+ 74TRANSITION[s]:: 75 A list that defines the next state number based upon 76 the current input symbol. 77EOT[s]:: 78 If positive, it specifies a state transition in 79 situations where a non-matching input symbol does 80 not indicate failure. 81SPECIAL[s]:: 82 If positive, it indicates that the prediction 83 algorithm must defer to a special code block 84 to determine the next state. The value is used 85 by the special state code to determine the next 86 state. 87ACCEPT[s]:: 88 If positive and there are no possible state 89 transitions, this is the alternative number 90 that has been predicted 91EOF[s]:: 92 If positive and the input stream has been exhausted, 93 this is the alternative number that has been predicted. 94 95For more information on the prediction algorithm, check out the #predict method 96below. 97 98=end 99 100class DFA 101 include Constants 102 include Error 103 104 attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max, 105 :accept, :special, :transition, :special_block 106 107 class << self 108 attr_reader :decision, :eot, :eof, :min, :max, 109 :accept, :special, :transition 110 111 def unpack( *data ) 112 data.empty? and return [].freeze 113 114 n = data.length / 2 115 size = 0 116 n.times { |i| size += data[ 2*i ] } 117 if size > 1024 118 values = Hash.new( 0 ) 119 data.each_slice( 2 ) do |count, value| 120 values[ value ] += count 121 end 122 default = values.keys.max_by { |v| values[ v ] } 123 124 unpacked = Hash.new( default ) 125 position = 0 126 data.each_slice( 2 ) do |count, value| 127 unless value == default 128 count.times { |i| unpacked[ position + i ] = value } 129 end 130 position += count 131 end 132 else 133 unpacked = [] 134 data.each_slice( 2 ) do |count, value| 135 unpacked.fill( value, unpacked.length, count ) 136 end 137 end 138 139 return unpacked 140 end 141 142 end 143 144 def initialize( recognizer, decision_number = nil, 145 eot = nil, eof = nil, min = nil, max = nil, 146 accept = nil, special = nil, 147 transition = nil, &special_block ) 148 @recognizer = recognizer 149 @decision_number = decision_number || self.class.decision 150 @eot = eot || self.class::EOT #.eot 151 @eof = eof || self.class::EOF #.eof 152 @min = min || self.class::MIN #.min 153 @max = max || self.class::MAX #.max 154 @accept = accept || self.class::ACCEPT #.accept 155 @special = special || self.class::SPECIAL #.special 156 @transition = transition || self.class::TRANSITION #.transition 157 @special_block = special_block 158 rescue NameError => e 159 raise unless e.message =~ /uninitialized constant/ 160 constant = e.name 161 message = Util.tidy( <<-END ) 162 | No #{ constant } information provided. 163 | DFA cannot be instantiated without providing state array information. 164 | When DFAs are generated by ANTLR, this information should already be 165 | provided in the DFA subclass constants. 166 END 167 end 168 169 if RUBY_VERSION =~ /^1\.9/ 170 171 def predict( input ) 172 mark = input.mark 173 state = 0 174 175 50000.times do 176 special_state = @special[ state ] 177 if special_state >= 0 178 state = @special_block.call( special_state ) 179 if state == -1 180 no_viable_alternative( state, input ) 181 return 0 182 end 183 input.consume 184 next 185 end 186 @accept[ state ] >= 1 and return @accept[ state ] 187 188 # look for a normal char transition 189 190 c = input.peek.ord 191 # the @min and @max arrays contain the bounds of the character (or token type) 192 # ranges for the transition decisions 193 if c.between?( @min[ state ], @max[ state ] ) 194 # c - @min[state] is the position of the character within the range 195 # so for a range like ?a..?z, a match of ?a would be 0, 196 # ?c would be 2, and ?z would be 25 197 next_state = @transition[ state ][ c - @min[ state ] ] 198 if next_state < 0 199 if @eot[ state ] >= 0 200 state = @eot[ state ] 201 input.consume 202 next 203 end 204 no_viable_alternative( state, input ) 205 return 0 206 end 207 208 state = next_state 209 input.consume 210 next 211 end 212 213 if @eot[ state ] >= 0 214 state = @eot[ state ] 215 input.consume() 216 next 217 end 218 219 ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] 220 no_viable_alternative( state, input ) 221 return 0 222 end 223 224 ANTLR3.bug!( Util.tidy( <<-END ) ) 225 | DFA BANG! 226 | The prediction loop has exceeded a maximum limit of 50000 iterations 227 | ---- 228 | decision: #@decision_number 229 | description: #{ description } 230 END 231 ensure 232 input.rewind( mark ) 233 end 234 235 else 236 237 def predict( input ) 238 mark = input.mark 239 state = 0 240 241 50000.times do 242 special_state = @special[ state ] 243 if special_state >= 0 244 state = @special_block.call( special_state ) 245 if state == -1 246 no_viable_alternative( state, input ) 247 return 0 248 end 249 input.consume 250 next 251 end 252 @accept[ state ] >= 1 and return @accept[ state ] 253 254 # look for a normal char transition 255 256 c = input.peek 257 # the @min and @max arrays contain the bounds of the character (or token type) 258 # ranges for the transition decisions 259 if c.between?( @min[ state ], @max[ state ] ) 260 # c - @min[state] is the position of the character within the range 261 # so for a range like ?a..?z, a match of ?a would be 0, 262 # ?c would be 2, and ?z would be 25 263 next_state = @transition[ state ][ c - @min[ state ] ] 264 if next_state < 0 265 if @eot[ state ] >= 0 266 state = @eot[ state ] 267 input.consume 268 next 269 end 270 no_viable_alternative( state, input ) 271 return 0 272 end 273 274 state = next_state 275 input.consume() 276 next 277 end 278 if @eot[ state ] >= 0 279 state = @eot[ state ] 280 input.consume() 281 next 282 end 283 ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ] 284 no_viable_alternative( state, input ) 285 return 0 286 end 287 288 ANTLR3.bug!( Util.tidy( <<-END ) ) 289 | DFA BANG! 290 | The prediction loop has exceeded a maximum limit of 50000 iterations 291 | ---- 292 | decision: #@decision_number 293 | description: #{ description } 294 END 295 ensure 296 input.rewind( mark ) 297 end 298 299 end 300 301 def no_viable_alternative( state, input ) 302 raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0 303 except = NoViableAlternative.new( description, @decision_number, state, input ) 304 error( except ) 305 raise( except ) 306 end 307 308 def error( except ) 309 # overridable debugging hook 310 end 311 312 def special_state_transition( state, input ) 313 return -1 314 end 315 316 def description 317 return "n/a" 318 end 319end 320end 321