dfa.rb revision 324c4644fee44b9898524c09511bd33c3f12e2df
1#!/usr/bin/ruby
2# encoding: utf-8
3
4=begin LICENSE
5
6[The "BSD licence"]
7Copyright (c) 2009-2010 Kyle Yetter
8All rights reserved.
9
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions
12are met:
13
14 1. Redistributions of source code must retain the above copyright
15    notice, this list of conditions and the following disclaimer.
16 2. Redistributions in binary form must reproduce the above copyright
17    notice, this list of conditions and the following disclaimer in the
18    documentation and/or other materials provided with the distribution.
19 3. The name of the author may not be used to endorse or promote products
20    derived from this software without specific prior written permission.
21
22THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33=end
34
35module ANTLR3
36
37=begin rdoc ANTLR3::DFA
38
39DFA is a class that implements a finite state machine that chooses between
40alternatives in a rule based upon lookahead symbols from an input stream.
41
42Deterministic Finite Automata (DFA) are finite state machines that are capable
43of recognizing <i>regular languages</i>. For more background on the subject,
44check out http://en.wikipedia.org/wiki/Deterministic_finite-state_machine or
45check out general ANTLR documentation at http://www.antlr.org
46
47ANTLR implements most of its decision logic directly using code branching
48structures in methods. However, for certain types of decisions, ANTLR will
49generate a special DFA class definition to implement a decision.
50
51Conceptually, these state machines are defined by a number of states, each state
52represented by an integer indexed upward from zero. State number +0+ is the
53<i>start state</i> of the machine; every prediction will begin in state +0+. At
54each step, the machine examines the next symbol on the input stream, checks the
55value against the transition parameters associated with the current state, and
56either moves to a new state number to repeat the process or decides that the
57machine cannot transition any further. If the machine cannot transition any
58further and the current state is defined as an <i>accept state</i>, an
59alternative has been chosen successfully and the prediction procedure ends. If
60the current state is not an <i>accept state</i>, the prediction has failed and
61there is <i>no viable alternative</i>.
62
63In generated code, ANTLR defines DFA states using seven parameters, each defined
64as a member of seven seperate array constants -- +MIN+, +MAX+, +EOT+, +EOF+,
65+SPECIAL+, +ACCEPT+, and +TRANSITION+. The parameters that characterize state
66+s+ are defined by the value of these lists at index +s+.
67
68MIN[s]::
69  The smallest value of the next input symbol that has 
70  a transition for state +s+
71MAX[s]::
72  The largest value of the next input symbol that has 
73  a transition for state +s+
74TRANSITION[s]::
75  A list that defines the next state number based upon
76  the current input symbol.
77EOT[s]::
78  If positive, it specifies a state transition in 
79  situations where a non-matching input symbol does
80  not indicate failure.
81SPECIAL[s]::
82  If positive, it indicates that the prediction 
83  algorithm must defer to a special code block 
84  to determine the next state. The value is used
85  by the special state code to determine the next
86  state.
87ACCEPT[s]::
88  If positive and there are no possible state
89  transitions, this is the alternative number
90  that has been predicted
91EOF[s]::
92  If positive and the input stream has been exhausted,
93  this is the alternative number that has been predicted.
94
95For more information on the prediction algorithm, check out the #predict method
96below.
97
98=end
99
100class DFA
101  include Constants
102  include Error
103  
104  attr_reader :recognizer, :decision_number, :eot, :eof, :min, :max,
105              :accept, :special, :transition, :special_block
106  
107  class << self
108    attr_reader :decision, :eot, :eof, :min, :max,
109                :accept, :special, :transition
110    
111    def unpack( *data )
112      data.empty? and return [].freeze
113      
114      n = data.length / 2
115      size = 0
116      n.times { |i| size += data[ 2*i ] }
117      if size > 1024
118        values = Hash.new( 0 )
119        data.each_slice( 2 ) do |count, value|
120          values[ value ] += count
121        end
122        default = values.keys.max_by { |v| values[ v ] }
123        
124        unpacked = Hash.new( default )
125        position = 0
126        data.each_slice( 2 ) do |count, value|
127          unless value == default
128            count.times { |i| unpacked[ position + i ] = value }
129          end
130          position += count
131        end
132      else
133        unpacked = []
134        data.each_slice( 2 ) do |count, value|
135          unpacked.fill( value, unpacked.length, count )
136        end
137      end
138      
139      return unpacked
140    end
141    
142  end
143  
144  def initialize( recognizer, decision_number = nil,
145                 eot = nil, eof = nil, min = nil, max = nil,
146                 accept = nil, special = nil,
147                 transition = nil, &special_block )
148    @recognizer = recognizer
149    @decision_number = decision_number || self.class.decision
150    @eot = eot || self.class::EOT #.eot
151    @eof = eof || self.class::EOF #.eof
152    @min = min || self.class::MIN #.min
153    @max = max || self.class::MAX #.max
154    @accept = accept || self.class::ACCEPT #.accept
155    @special = special || self.class::SPECIAL #.special
156    @transition = transition || self.class::TRANSITION #.transition
157    @special_block = special_block
158  rescue NameError => e
159    raise unless e.message =~ /uninitialized constant/
160    constant = e.name
161    message = Util.tidy( <<-END )
162    | No #{ constant } information provided.
163    | DFA cannot be instantiated without providing state array information.
164    | When DFAs are generated by ANTLR, this information should already be
165    | provided in the DFA subclass constants.
166    END
167  end
168  
169  if RUBY_VERSION =~ /^1\.9/
170    
171    def predict( input )
172      mark = input.mark
173      state = 0
174      
175      50000.times do
176        special_state = @special[ state ]
177        if special_state >= 0
178          state = @special_block.call( special_state )
179          if state == -1
180            no_viable_alternative( state, input )
181            return 0
182          end
183          input.consume
184          next
185        end
186        @accept[ state ] >= 1 and return @accept[ state ]
187        
188        # look for a normal char transition
189        
190        c = input.peek.ord
191        # the @min and @max arrays contain the bounds of the character (or token type)
192        # ranges for the transition decisions
193        if c.between?( @min[ state ], @max[ state ] )
194          # c - @min[state] is the position of the character within the range
195          # so for a range like ?a..?z, a match of ?a would be 0,
196          # ?c would be 2, and ?z would be 25
197          next_state = @transition[ state ][ c - @min[ state ] ]
198          if next_state < 0
199            if @eot[ state ] >= 0
200              state = @eot[ state ]
201              input.consume
202              next
203            end
204            no_viable_alternative( state, input )
205            return 0
206          end
207          
208          state = next_state
209          input.consume
210          next
211        end
212        
213        if @eot[ state ] >= 0
214          state = @eot[ state ]
215          input.consume()
216          next
217        end
218        
219        ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
220        no_viable_alternative( state, input )
221        return 0
222      end
223      
224      ANTLR3.bug!( Util.tidy( <<-END ) )
225      | DFA BANG!
226      |   The prediction loop has exceeded a maximum limit of 50000 iterations
227      | ----
228      | decision: #@decision_number
229      | description: #{ description }
230      END
231    ensure
232      input.rewind( mark )
233    end
234    
235  else
236    
237    def predict( input )
238      mark = input.mark
239      state = 0
240      
241      50000.times do
242        special_state = @special[ state ]
243        if special_state >= 0
244          state = @special_block.call( special_state )
245          if state == -1
246            no_viable_alternative( state, input )
247            return 0
248          end
249          input.consume
250          next
251        end
252        @accept[ state ] >= 1 and return @accept[ state ]
253        
254        # look for a normal char transition
255        
256        c = input.peek
257        # the @min and @max arrays contain the bounds of the character (or token type)
258        # ranges for the transition decisions
259        if c.between?( @min[ state ], @max[ state ] )
260          # c - @min[state] is the position of the character within the range
261          # so for a range like ?a..?z, a match of ?a would be 0,
262          # ?c would be 2, and ?z would be 25
263          next_state = @transition[ state ][ c - @min[ state ] ]
264          if next_state < 0
265            if @eot[ state ] >= 0
266              state = @eot[ state ]
267              input.consume
268              next
269            end
270            no_viable_alternative( state, input )
271            return 0
272          end
273          
274          state = next_state
275          input.consume()
276          next
277        end
278        if @eot[ state ] >= 0
279          state = @eot[ state ]
280          input.consume()
281          next
282        end
283        ( c == EOF && @eof[ state ] >= 0 ) and return @accept[ @eof[ state ] ]
284        no_viable_alternative( state, input )
285        return 0
286      end
287      
288      ANTLR3.bug!( Util.tidy( <<-END ) )
289      | DFA BANG!
290      |   The prediction loop has exceeded a maximum limit of 50000 iterations
291      | ----
292      | decision: #@decision_number
293      | description: #{ description }
294      END
295    ensure
296      input.rewind( mark )
297    end
298    
299  end
300  
301  def no_viable_alternative( state, input )
302    raise( BacktrackingFailed ) if @recognizer.state.backtracking > 0
303    except = NoViableAlternative.new( description, @decision_number, state, input )
304    error( except )
305    raise( except )
306  end
307  
308  def error( except )
309    # overridable debugging hook
310  end
311  
312  def special_state_transition( state, input )
313    return -1
314  end
315  
316  def description
317    return "n/a"
318  end
319end
320end
321