1#!/usr/bin/ruby
2# encoding: utf-8
3
4require 'antlr3'
5
6=begin LICENSE
7
8[The "BSD licence"]
9Copyright (c) 2009-2010 Kyle Yetter
10All rights reserved.
11
12Redistribution and use in source and binary forms, with or without
13modification, are permitted provided that the following conditions
14are met:
15
16 1. Redistributions of source code must retain the above copyright
17    notice, this list of conditions and the following disclaimer.
18 2. Redistributions in binary form must reproduce the above copyright
19    notice, this list of conditions and the following disclaimer in the
20    documentation and/or other materials provided with the distribution.
21 3. The name of the author may not be used to endorse or promote products
22    derived from this software without specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35=end
36
37module ANTLR3
38  
39=begin rdoc ANTLR3::Debug
40
41Namespace for all debugging-related class and module definitions.
42
43=end
44
45module Debug
46
47DEFAULT_PORT = 49100
48
49# since there are many components to the debug-mode
50# section of the antlr3 runtime library, most of which
51# are not used simultaneously, debug.rb contains the
52# base of the debug library and the various listeners
53# and tree-related code are autloaded on-demand
54autoload :EventSocketProxy, 'antlr3/debug/socket'
55autoload :RemoteEventSocketListener, 'antlr3/debug/socket'
56autoload :TraceEventListener, 'antlr3/debug/trace-event-listener'
57autoload :RecordEventListener, 'antlr3/debug/record-event-listener'
58autoload :RuleTracer, 'antlr3/debug/rule-tracer'
59autoload :EventHub, 'antlr3/debug/event-hub'
60autoload :TreeAdaptor, 'antlr3/tree/debug'
61autoload :TreeNodeStream, 'antlr3/tree/debug'
62
63RecognizerSharedState = Struct.new( 
64  # the rule invocation depth
65  :rule_invocation_stack,
66  # a boolean flag to indicate whether or not the current decision is cyclic
67  :cyclic_decision,
68  # a stack that tracks follow sets for error recovery
69  :following,
70  # a flag indicating whether or not the recognizer is in error recovery mode
71  :error_recovery,
72  # the index in the input stream of the last error
73  :last_error_index,
74  # tracks the backtracking depth
75  :backtracking,
76  # if a grammar is compiled with the memoization option, this will
77  # be set to a hash mapping previously parsed rules to cached indices
78  :rule_memory,
79  # tracks the number of syntax errors seen so far
80  :syntax_errors,
81  # holds newly constructed tokens for lexer rules
82  :token,
83  # the input stream index at which the token starts
84  :token_start_position,
85  # the input stream line number at which the token starts
86  :token_start_line,
87  # the input stream column at which the token starts
88  :token_start_column,
89  # the channel value of the target token
90  :channel,
91  # the type value of the target token
92  :type,
93  # the text of the target token
94  :text
95)
96
97=begin rdoc ANTLR3::Debug::RecognizerSharedState
98
99ANTLR3::Debug::RecognizerSharedState is identical to
100ANTLR3::RecognizerSharedState, but adds additional fields used for recognizers
101generated in debug or profiling mode.
102
103=end
104class RecognizerSharedState
105  def initialize
106    super( [], false, [], false, -1, 0, nil, 0, nil, -1 )
107    # ^-- same as this --v 
108    # self.following = []
109    # self.error_recovery = false
110    # self.last_error_index = -1
111    # self.backtracking = 0
112    # self.syntax_errors = 0
113    # self.rule_level = 0
114    # self.token_start_position = -1
115  end
116  
117  def reset!
118    self.following.clear
119    self.error_recovery = false
120    self.last_error_index = -1
121    self.backtracking = 0
122    self.rule_memory and rule_memory.clear
123    self.syntax_errors = 0
124    self.token = nil
125    self.token_start_position = -1
126    self.token_start_line = nil
127    self.token_start_column = nil
128    self.channel = nil
129    self.type = nil
130    self.text = nil
131    self.rule_invocation_stack.clear
132  end
133  
134end
135
136=begin rdoc ANTLR3::Debug::ParserEvents
137
138ParserEvents adds debugging event hook methods and functionality that is
139required by the code ANTLR generated when called with the <tt>-debug</tt>
140switch.
141
142=end
143module ParserEvents
144  include ANTLR3::Error
145  
146  def self.included( klass )
147    super
148    if klass.is_a?( ::Class )
149      def klass.debug?
150        true
151      end
152    end
153  end
154  
155  
156  attr_reader :debug_listener
157  
158  def initialize( stream, options = {} )
159    @debug_listener = options[ :debug_listener ] ||= begin
160      EventSocketProxy.new( self, options ).handshake
161    end
162    options[ :state ] ||= Debug::RecognizerSharedState.new
163    super( stream, options )
164    if @input.is_a?( Debug::TokenStream )
165      @input.debug_listener ||= @debug_listener
166    else
167      @input = Debug::TokenStream.wrap( @input, @debug_listener )
168    end
169  end
170  
171  def rule_level
172    @state.rule_invocation_stack.length
173  end
174  
175  def cyclic_decision?
176    @state.cyclic_decision
177  end
178  
179  def cyclic_decision=( flag )
180    @state.cyclic_decision = flag
181  end
182  
183  # custom attribute writer for debug_listener
184  # propegates the change in listener to the
185  # parser's debugging input stream
186  def debug_listener=( dbg )
187    @debug_listener = dbg
188    @input.debug_listener = dbg rescue nil
189  end
190  
191  def begin_resync
192    @debug_listener.begin_resync
193    super
194  end
195  
196  def end_resync
197    @debug_listener.end_resync
198    super
199  end
200  
201  # TO-DO: is this pointless?
202  def resync
203    begin_resync
204    yield( self )
205  ensure
206    end_resync
207  end
208  
209  def begin_backtrack
210    @debug_listener.begin_backtrack( @state.backtracking )
211  end
212  
213  def end_backtrack( successful )
214    @debug_listener.end_backtrack( @state.backtracking, successful )
215  end
216  
217  def backtrack
218    @state.backtracking += 1
219    @debug_listener.begin_backtrack( @state.backtracking )
220    start = @input.mark
221    success =
222      begin yield
223      rescue BacktrackingFailed then false
224      else true
225      end
226    return success
227  ensure
228    @input.rewind( start )
229    @debug_listener.end_backtrack( @state.backtracking, ( success rescue nil ) )
230    @state.backtracking -= 1
231  end
232  
233  def report_error( exc )
234    ANTLR3::RecognitionError === exc and
235      @debug_listener.recognition_exception( exc )
236    super
237  end
238  
239  def missing_symbol( error, expected_type, follow )
240    symbol = super
241    @debug_listener.consume_node( symbol )
242    return( symbol )
243  end
244  
245  def in_rule( grammar_file, rule_name )
246    @state.rule_invocation_stack.empty? and @debug_listener.commence
247    @debug_listener.enter_rule( grammar_file, rule_name )
248    @state.rule_invocation_stack.push( grammar_file, rule_name )
249    yield
250  ensure
251    @state.rule_invocation_stack.pop( 2 )
252    @debug_listener.exit_rule( grammar_file, rule_name )
253    @state.rule_invocation_stack.empty? and @debug_listener.terminate
254  end
255  
256  def rule_invocation_stack
257    @state.rule_invocation_stack.each_slice( 2 ).to_a
258  end
259  
260  def predicate?( description )
261    result = yield
262    @debug_listener.semantic_predicate( result, description )
263    return result
264  end
265  
266  def in_alternative( alt_number )
267    @debug_listener.enter_alternative( alt_number )
268  end
269  
270  def in_subrule( decision_number )
271    @debug_listener.enter_subrule( decision_number )
272    yield
273  ensure
274    @debug_listener.exit_subrule( decision_number )
275  end
276  
277  def in_decision( decision_number )
278    @debug_listener.enter_decision( decision_number )
279    yield
280  ensure
281    @debug_listener.exit_decision( decision_number )
282  end
283end
284
285
286=begin rdoc ANTLR3::Debug::TokenStream
287
288A module that wraps token stream methods with debugging event code. A debuggable
289parser will <tt>extend</tt> its input stream with this module if the stream is
290not already a Debug::TokenStream.
291
292=end
293module TokenStream
294  
295  def self.wrap( stream, debug_listener = nil )
296    stream.extend( self )
297    stream.instance_eval do
298      @initial_stream_state = true
299      @debug_listener = debug_listener
300      @last_marker = nil
301    end
302    return( stream )
303  end
304  attr_reader :last_marker
305  attr_accessor :debug_listener
306  
307  def consume
308    @initial_stream_state and consume_initial_hidden_tokens
309    a = index + 1 # the next position IF there are no hidden tokens in between
310    t = super
311    b = index     # the actual position after consuming
312    @debug_listener.consume_token( t ) if @debug_listener
313    
314    # if b > a, report the consumption of hidden tokens
315    for i in a...b
316      @debug_listener.consume_hidden_token at( i )
317    end
318  end
319  
320  
321  # after a token stream fills up its buffer
322  # by exhausting its token source, it may
323  # skip to an initial position beyond the first
324  # actual token, if there are hidden tokens
325  # at the beginning of the stream.
326  #
327  # This private method is used to
328  # figure out if any hidden tokens
329  # were skipped initially, and then
330  # report their consumption to
331  # the debug listener
332  def consume_initial_hidden_tokens
333    first_on_channel_token_index = self.index
334    first_on_channel_token_index.times do |index|
335      @debug_listener.consume_hidden_token at( index )
336    end
337    @initial_stream_state = false
338  end
339  
340  private :consume_initial_hidden_tokens
341  
342  ############################################################################################
343  ###################################### Stream Methods ######################################
344  ############################################################################################
345  
346  def look( steps = 1 )
347    @initial_stream_state and consume_initial_hidden_tokens
348    token = super( steps )
349    @debug_listener.look( steps, token )
350    return token
351  end
352  
353  def peek( steps = 1 )
354    look( steps ).type
355  end
356  
357  def mark
358    @last_marker = super
359    @debug_listener.mark( @last_marker )
360    return @last_marker
361  end
362  
363  def rewind( marker = nil, release = true )
364    @debug_listener.rewind( marker )
365    super
366  end
367end
368
369=begin rdoc ANTLR3::Debug::EventListener
370
371A listener that simply records text representations of the events. Useful for debugging the
372debugging facility ;) Subclasses can override the record() method (which defaults to printing
373to stdout) to record the events in a different way.
374
375=end
376module EventListener
377  PROTOCOL_VERSION = '2'
378  # The parser has just entered a rule. No decision has been made about
379  # which alt is predicted.  This is fired AFTER init actions have been
380  # executed.  Attributes are defined and available etc...
381  # The grammarFileName allows composite grammars to jump around among
382  # multiple grammar files.
383  
384  def enter_rule( grammar_file, rule_name )
385    # do nothing
386  end
387  
388  # Because rules can have lots of alternatives, it is very useful to
389  # know which alt you are entering.  This is 1..n for n alts.
390  
391  def enter_alternative( alt )
392    # do nothing
393  end
394  
395  # This is the last thing executed before leaving a rule.  It is
396  # executed even if an exception is thrown.  This is triggered after
397  # error reporting and recovery have occurred (unless the exception is
398  # not caught in this rule).  This implies an "exitAlt" event.
399  # The grammarFileName allows composite grammars to jump around among
400  # multiple grammar files.
401  
402  def exit_rule( grammar_file, rule_name )
403    # do nothing
404  end
405
406  # Track entry into any (...) subrule other EBNF construct
407  
408  def enter_subrule( decision_number )
409    # do nothing
410  end
411
412  def exit_subrule( decision_number )
413    # do nothing
414  end
415  
416  # Every decision, fixed k or arbitrary, has an enter/exit event
417  # so that a GUI can easily track what look/consume events are
418  # associated with prediction.  You will see a single enter/exit
419  # subrule but multiple enter/exit decision events, one for each
420  # loop iteration.
421  
422  def enter_decision( decision_number )
423    # do nothing
424  end
425
426  def exit_decision( decision_number )
427    # do nothing
428  end
429
430  # An input token was consumed; matched by any kind of element.
431  # Trigger after the token was matched by things like match(), matchAny().
432  
433  def consume_token( tree )
434    # do nothing
435  end
436
437  # An off-channel input token was consumed.
438  # Trigger after the token was matched by things like match(), matchAny().
439  # (unless of course the hidden token is first stuff in the input stream).
440  
441  def consume_hidden_token( tree )
442    # do nothing
443  end
444
445  # Somebody (anybody) looked ahead.  Note that this actually gets
446  # triggered by both peek and look calls.  The debugger will want to know
447  # which Token object was examined.  Like consumeToken, this indicates
448  # what token was seen at that depth.  A remote debugger cannot look
449  # ahead into a file it doesn't have so look events must pass the token
450  # even if the info is redundant.
451  
452  def look( i, tree )
453    # do nothing
454  end
455
456  # The parser is going to look arbitrarily ahead; mark this location,
457  # the token stream's marker is sent in case you need it.
458  
459  def mark( marker )
460    # do nothing
461  end
462
463  # After an arbitrairly long look as with a cyclic DFA (or with
464  # any backtrack), this informs the debugger that stream should be
465  # rewound to the position associated with marker.
466  
467  def rewind( marker = nil )
468    # do nothing
469  end
470
471  def begin_backtrack( level )
472    # do nothing
473  end
474
475  def end_backtrack( level, successful )
476    # do nothing
477  end
478  
479  def backtrack( level )
480    begin_backtrack( level )
481    successful = yield( self )
482    end_backtrack( level, successful )
483  end
484
485  # To watch a parser move through the grammar, the parser needs to
486  # inform the debugger what line/charPos it is passing in the grammar.
487  # For now, this does not know how to switch from one grammar to the
488  # other and back for island grammars etc...
489  # This should also allow breakpoints because the debugger can stop
490  # the parser whenever it hits this line/pos.
491  
492  def location( line, position )
493    # do nothing
494  end
495
496  # A recognition exception occurred such as NoViableAltError.  I made
497  # this a generic event so that I can alter the exception hierachy later
498  # without having to alter all the debug objects.
499  # Upon error, the stack of enter rule/subrule must be properly unwound.
500  # If no viable alt occurs it is within an enter/exit decision, which
501  # also must be rewound.  Even the rewind for each mark must be unwount.
502  # In the Java target this is pretty easy using try/finally, if a bit
503  # ugly in the generated code.  The rewind is generated in DFA.predict()
504  # actually so no code needs to be generated for that.  For languages
505  # w/o this "finally" feature (C++?), the target implementor will have
506  # to build an event stack or something.
507  # Across a socket for remote debugging, only the RecognitionError
508  # data fields are transmitted.  The token object or whatever that
509  # caused the problem was the last object referenced by look.  The
510  # immediately preceding look event should hold the unexpected Token or
511  # char.
512  # Here is a sample event trace for grammar:
513  # b : C ({;}A|B) // {;} is there to prevent A|B becoming a set
514  # | D
515  # ;
516  # The sequence for this rule (with no viable alt in the subrule) for
517  # input 'c c' (there are 3 tokens) is:
518  # commence
519  # look
520  # enterRule b
521  # location 7 1
522  # enter decision 3
523  # look
524  # exit decision 3
525  # enterAlt1
526  # location 7 5
527  # look
528  # consumeToken [c/<4>,1:0]
529  # location 7 7
530  # enterSubRule 2
531  # enter decision 2
532  # look
533  # look
534  # recognitionError NoViableAltError 2 1 2
535  # exit decision 2
536  # exitSubRule 2
537  # beginResync
538  # look
539  # consumeToken [c/<4>,1:1]
540  # look
541  # endResync
542  # look(-1)
543  # exitRule b
544  # terminate
545  
546  def recognition_exception( exception )
547    # do nothing
548  end
549
550  # Indicates the recognizer is about to consume tokens to resynchronize
551  # the parser.  Any consume events from here until the recovered event
552  # are not part of the parse--they are dead tokens.
553  
554  def begin_resync()
555    # do nothing
556  end
557
558  # Indicates that the recognizer has finished consuming tokens in order
559  # to resychronize.  There may be multiple beginResync/endResync pairs
560  # before the recognizer comes out of errorRecovery mode (in which
561  # multiple errors are suppressed).  This will be useful
562  # in a gui where you want to probably grey out tokens that are consumed
563  # but not matched to anything in grammar.  Anything between
564  # a beginResync/endResync pair was tossed out by the parser.
565  
566  def end_resync()
567    # do nothing
568  end
569  
570  def resync
571    begin_resync
572    yield( self )
573    end_resync
574  end
575
576  # A semantic predicate was evaluate with this result and action text
577  
578  def semantic_predicate( result, predicate )
579    # do nothing
580  end
581  
582  # Announce that parsing has begun.  Not technically useful except for
583  # sending events over a socket.  A GUI for example will launch a thread
584  # to connect and communicate with a remote parser.  The thread will want
585  # to notify the GUI when a connection is made.  ANTLR parsers
586  # trigger this upon entry to the first rule (the ruleLevel is used to
587  # figure this out).
588  
589  def commence(  )
590    # do nothing
591  end
592
593  # Parsing is over; successfully or not.  Mostly useful for telling
594  # remote debugging listeners that it's time to quit.  When the rule
595  # invocation level goes to zero at the end of a rule, we are done
596  # parsing.
597  
598  def terminate(  )
599    # do nothing
600  end
601
602  # Input for a tree parser is an AST, but we know nothing for sure
603  # about a node except its type and text (obtained from the adaptor).
604  # This is the analog of the consumeToken method.  Again, the ID is
605  # the hashCode usually of the node so it only works if hashCode is
606  # not implemented.  If the type is UP or DOWN, then
607  # the ID is not really meaningful as it's fixed--there is
608  # just one UP node and one DOWN navigation node.
609  
610  def consume_node( tree )
611    # do nothing
612  end
613  
614  # A nil was created (even nil nodes have a unique ID...
615  # they are not "null" per se).  As of 4/28/2006, this
616  # seems to be uniquely triggered when starting a new subtree
617  # such as when entering a subrule in automatic mode and when
618  # building a tree in rewrite mode.
619  # If you are receiving this event over a socket via
620  # RemoteDebugEventSocketListener then only tree.ID is set.
621  
622  def flat_node( tree )
623    # do nothing
624  end
625
626  # Upon syntax error, recognizers bracket the error with an error node
627  # if they are building ASTs.
628  
629  def error_node( tree )
630    # do nothing
631  end
632
633  # Announce a new node built from token elements such as type etc...
634  # If you are receiving this event over a socket via
635  # RemoteDebugEventSocketListener then only tree.ID, type, text are
636  # set.
637  
638  def create_node( node, token = nil )
639    # do nothing
640  end
641
642  # Make a node the new root of an existing root.
643  # Note: the newRootID parameter is possibly different
644  # than the TreeAdaptor.becomeRoot() newRoot parameter.
645  # In our case, it will always be the result of calling
646  # TreeAdaptor.becomeRoot() and not root_n or whatever.
647  # The listener should assume that this event occurs
648  # only when the current subrule (or rule) subtree is
649  # being reset to newRootID.
650  # If you are receiving this event over a socket via
651  # RemoteDebugEventSocketListener then only IDs are set.
652  # @see antlr3.tree.TreeAdaptor.becomeRoot()
653  
654  def become_root( new_root, old_root )
655    # do nothing
656  end
657
658  # Make childID a child of rootID.
659  # If you are receiving this event over a socket via
660  # RemoteDebugEventSocketListener then only IDs are set.
661  # @see antlr3.tree.TreeAdaptor.addChild()
662  
663  def add_child( root, child )
664    # do nothing
665  end
666
667  # Set the token start/stop token index for a subtree root or node.
668  # If you are receiving this event over a socket via
669  # RemoteDebugEventSocketListener then only tree.ID is set.
670  
671  def set_token_boundaries( tree, token_start_index, token_stop_index )
672    # do nothing
673  end
674  
675  def examine_rule_memoization( rule )
676    # do nothing
677  end
678  
679  def on( event_name, &block )
680    sclass = class << self; self; end
681    sclass.send( :define_method, event_name, &block )
682  end
683  
684  EVENTS = [ 
685    :add_child, :backtrack, :become_root, :begin_backtrack,
686    :begin_resync, :commence, :consume_hidden_token,
687    :consume_node, :consume_token, :create_node, :end_backtrack,
688    :end_resync, :enter_alternative, :enter_decision, :enter_rule,
689    :enter_sub_rule, :error_node, :exit_decision, :exit_rule,
690    :exit_sub_rule, :flat_node, :location, :look, :mark,
691    :recognition_exception, :resync, :rewind,
692    :semantic_predicate, :set_token_boundaries, :terminate
693  ].freeze
694
695end
696end
697end
698