1#!/usr/bin/ruby 2# encoding: utf-8 3 4require 'antlr3' 5 6=begin LICENSE 7 8[The "BSD licence"] 9Copyright (c) 2009-2010 Kyle Yetter 10All rights reserved. 11 12Redistribution and use in source and binary forms, with or without 13modification, are permitted provided that the following conditions 14are met: 15 16 1. Redistributions of source code must retain the above copyright 17 notice, this list of conditions and the following disclaimer. 18 2. Redistributions in binary form must reproduce the above copyright 19 notice, this list of conditions and the following disclaimer in the 20 documentation and/or other materials provided with the distribution. 21 3. The name of the author may not be used to endorse or promote products 22 derived from this software without specific prior written permission. 23 24THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 35=end 36 37module ANTLR3 38 39=begin rdoc ANTLR3::Debug 40 41Namespace for all debugging-related class and module definitions. 42 43=end 44 45module Debug 46 47DEFAULT_PORT = 49100 48 49# since there are many components to the debug-mode 50# section of the antlr3 runtime library, most of which 51# are not used simultaneously, debug.rb contains the 52# base of the debug library and the various listeners 53# and tree-related code are autloaded on-demand 54autoload :EventSocketProxy, 'antlr3/debug/socket' 55autoload :RemoteEventSocketListener, 'antlr3/debug/socket' 56autoload :TraceEventListener, 'antlr3/debug/trace-event-listener' 57autoload :RecordEventListener, 'antlr3/debug/record-event-listener' 58autoload :RuleTracer, 'antlr3/debug/rule-tracer' 59autoload :EventHub, 'antlr3/debug/event-hub' 60autoload :TreeAdaptor, 'antlr3/tree/debug' 61autoload :TreeNodeStream, 'antlr3/tree/debug' 62 63RecognizerSharedState = Struct.new( 64 # the rule invocation depth 65 :rule_invocation_stack, 66 # a boolean flag to indicate whether or not the current decision is cyclic 67 :cyclic_decision, 68 # a stack that tracks follow sets for error recovery 69 :following, 70 # a flag indicating whether or not the recognizer is in error recovery mode 71 :error_recovery, 72 # the index in the input stream of the last error 73 :last_error_index, 74 # tracks the backtracking depth 75 :backtracking, 76 # if a grammar is compiled with the memoization option, this will 77 # be set to a hash mapping previously parsed rules to cached indices 78 :rule_memory, 79 # tracks the number of syntax errors seen so far 80 :syntax_errors, 81 # holds newly constructed tokens for lexer rules 82 :token, 83 # the input stream index at which the token starts 84 :token_start_position, 85 # the input stream line number at which the token starts 86 :token_start_line, 87 # the input stream column at which the token starts 88 :token_start_column, 89 # the channel value of the target token 90 :channel, 91 # the type value of the target token 92 :type, 93 # the text of the target token 94 :text 95) 96 97=begin rdoc ANTLR3::Debug::RecognizerSharedState 98 99ANTLR3::Debug::RecognizerSharedState is identical to 100ANTLR3::RecognizerSharedState, but adds additional fields used for recognizers 101generated in debug or profiling mode. 102 103=end 104class RecognizerSharedState 105 def initialize 106 super( [], false, [], false, -1, 0, nil, 0, nil, -1 ) 107 # ^-- same as this --v 108 # self.following = [] 109 # self.error_recovery = false 110 # self.last_error_index = -1 111 # self.backtracking = 0 112 # self.syntax_errors = 0 113 # self.rule_level = 0 114 # self.token_start_position = -1 115 end 116 117 def reset! 118 self.following.clear 119 self.error_recovery = false 120 self.last_error_index = -1 121 self.backtracking = 0 122 self.rule_memory and rule_memory.clear 123 self.syntax_errors = 0 124 self.token = nil 125 self.token_start_position = -1 126 self.token_start_line = nil 127 self.token_start_column = nil 128 self.channel = nil 129 self.type = nil 130 self.text = nil 131 self.rule_invocation_stack.clear 132 end 133 134end 135 136=begin rdoc ANTLR3::Debug::ParserEvents 137 138ParserEvents adds debugging event hook methods and functionality that is 139required by the code ANTLR generated when called with the <tt>-debug</tt> 140switch. 141 142=end 143module ParserEvents 144 include ANTLR3::Error 145 146 def self.included( klass ) 147 super 148 if klass.is_a?( ::Class ) 149 def klass.debug? 150 true 151 end 152 end 153 end 154 155 156 attr_reader :debug_listener 157 158 def initialize( stream, options = {} ) 159 @debug_listener = options[ :debug_listener ] ||= begin 160 EventSocketProxy.new( self, options ).handshake 161 end 162 options[ :state ] ||= Debug::RecognizerSharedState.new 163 super( stream, options ) 164 if @input.is_a?( Debug::TokenStream ) 165 @input.debug_listener ||= @debug_listener 166 else 167 @input = Debug::TokenStream.wrap( @input, @debug_listener ) 168 end 169 end 170 171 def rule_level 172 @state.rule_invocation_stack.length 173 end 174 175 def cyclic_decision? 176 @state.cyclic_decision 177 end 178 179 def cyclic_decision=( flag ) 180 @state.cyclic_decision = flag 181 end 182 183 # custom attribute writer for debug_listener 184 # propegates the change in listener to the 185 # parser's debugging input stream 186 def debug_listener=( dbg ) 187 @debug_listener = dbg 188 @input.debug_listener = dbg rescue nil 189 end 190 191 def begin_resync 192 @debug_listener.begin_resync 193 super 194 end 195 196 def end_resync 197 @debug_listener.end_resync 198 super 199 end 200 201 # TO-DO: is this pointless? 202 def resync 203 begin_resync 204 yield( self ) 205 ensure 206 end_resync 207 end 208 209 def begin_backtrack 210 @debug_listener.begin_backtrack( @state.backtracking ) 211 end 212 213 def end_backtrack( successful ) 214 @debug_listener.end_backtrack( @state.backtracking, successful ) 215 end 216 217 def backtrack 218 @state.backtracking += 1 219 @debug_listener.begin_backtrack( @state.backtracking ) 220 start = @input.mark 221 success = 222 begin yield 223 rescue BacktrackingFailed then false 224 else true 225 end 226 return success 227 ensure 228 @input.rewind( start ) 229 @debug_listener.end_backtrack( @state.backtracking, ( success rescue nil ) ) 230 @state.backtracking -= 1 231 end 232 233 def report_error( exc ) 234 ANTLR3::RecognitionError === exc and 235 @debug_listener.recognition_exception( exc ) 236 super 237 end 238 239 def missing_symbol( error, expected_type, follow ) 240 symbol = super 241 @debug_listener.consume_node( symbol ) 242 return( symbol ) 243 end 244 245 def in_rule( grammar_file, rule_name ) 246 @state.rule_invocation_stack.empty? and @debug_listener.commence 247 @debug_listener.enter_rule( grammar_file, rule_name ) 248 @state.rule_invocation_stack.push( grammar_file, rule_name ) 249 yield 250 ensure 251 @state.rule_invocation_stack.pop( 2 ) 252 @debug_listener.exit_rule( grammar_file, rule_name ) 253 @state.rule_invocation_stack.empty? and @debug_listener.terminate 254 end 255 256 def rule_invocation_stack 257 @state.rule_invocation_stack.each_slice( 2 ).to_a 258 end 259 260 def predicate?( description ) 261 result = yield 262 @debug_listener.semantic_predicate( result, description ) 263 return result 264 end 265 266 def in_alternative( alt_number ) 267 @debug_listener.enter_alternative( alt_number ) 268 end 269 270 def in_subrule( decision_number ) 271 @debug_listener.enter_subrule( decision_number ) 272 yield 273 ensure 274 @debug_listener.exit_subrule( decision_number ) 275 end 276 277 def in_decision( decision_number ) 278 @debug_listener.enter_decision( decision_number ) 279 yield 280 ensure 281 @debug_listener.exit_decision( decision_number ) 282 end 283end 284 285 286=begin rdoc ANTLR3::Debug::TokenStream 287 288A module that wraps token stream methods with debugging event code. A debuggable 289parser will <tt>extend</tt> its input stream with this module if the stream is 290not already a Debug::TokenStream. 291 292=end 293module TokenStream 294 295 def self.wrap( stream, debug_listener = nil ) 296 stream.extend( self ) 297 stream.instance_eval do 298 @initial_stream_state = true 299 @debug_listener = debug_listener 300 @last_marker = nil 301 end 302 return( stream ) 303 end 304 attr_reader :last_marker 305 attr_accessor :debug_listener 306 307 def consume 308 @initial_stream_state and consume_initial_hidden_tokens 309 a = index + 1 # the next position IF there are no hidden tokens in between 310 t = super 311 b = index # the actual position after consuming 312 @debug_listener.consume_token( t ) if @debug_listener 313 314 # if b > a, report the consumption of hidden tokens 315 for i in a...b 316 @debug_listener.consume_hidden_token at( i ) 317 end 318 end 319 320 321 # after a token stream fills up its buffer 322 # by exhausting its token source, it may 323 # skip to an initial position beyond the first 324 # actual token, if there are hidden tokens 325 # at the beginning of the stream. 326 # 327 # This private method is used to 328 # figure out if any hidden tokens 329 # were skipped initially, and then 330 # report their consumption to 331 # the debug listener 332 def consume_initial_hidden_tokens 333 first_on_channel_token_index = self.index 334 first_on_channel_token_index.times do |index| 335 @debug_listener.consume_hidden_token at( index ) 336 end 337 @initial_stream_state = false 338 end 339 340 private :consume_initial_hidden_tokens 341 342 ############################################################################################ 343 ###################################### Stream Methods ###################################### 344 ############################################################################################ 345 346 def look( steps = 1 ) 347 @initial_stream_state and consume_initial_hidden_tokens 348 token = super( steps ) 349 @debug_listener.look( steps, token ) 350 return token 351 end 352 353 def peek( steps = 1 ) 354 look( steps ).type 355 end 356 357 def mark 358 @last_marker = super 359 @debug_listener.mark( @last_marker ) 360 return @last_marker 361 end 362 363 def rewind( marker = nil, release = true ) 364 @debug_listener.rewind( marker ) 365 super 366 end 367end 368 369=begin rdoc ANTLR3::Debug::EventListener 370 371A listener that simply records text representations of the events. Useful for debugging the 372debugging facility ;) Subclasses can override the record() method (which defaults to printing 373to stdout) to record the events in a different way. 374 375=end 376module EventListener 377 PROTOCOL_VERSION = '2' 378 # The parser has just entered a rule. No decision has been made about 379 # which alt is predicted. This is fired AFTER init actions have been 380 # executed. Attributes are defined and available etc... 381 # The grammarFileName allows composite grammars to jump around among 382 # multiple grammar files. 383 384 def enter_rule( grammar_file, rule_name ) 385 # do nothing 386 end 387 388 # Because rules can have lots of alternatives, it is very useful to 389 # know which alt you are entering. This is 1..n for n alts. 390 391 def enter_alternative( alt ) 392 # do nothing 393 end 394 395 # This is the last thing executed before leaving a rule. It is 396 # executed even if an exception is thrown. This is triggered after 397 # error reporting and recovery have occurred (unless the exception is 398 # not caught in this rule). This implies an "exitAlt" event. 399 # The grammarFileName allows composite grammars to jump around among 400 # multiple grammar files. 401 402 def exit_rule( grammar_file, rule_name ) 403 # do nothing 404 end 405 406 # Track entry into any (...) subrule other EBNF construct 407 408 def enter_subrule( decision_number ) 409 # do nothing 410 end 411 412 def exit_subrule( decision_number ) 413 # do nothing 414 end 415 416 # Every decision, fixed k or arbitrary, has an enter/exit event 417 # so that a GUI can easily track what look/consume events are 418 # associated with prediction. You will see a single enter/exit 419 # subrule but multiple enter/exit decision events, one for each 420 # loop iteration. 421 422 def enter_decision( decision_number ) 423 # do nothing 424 end 425 426 def exit_decision( decision_number ) 427 # do nothing 428 end 429 430 # An input token was consumed; matched by any kind of element. 431 # Trigger after the token was matched by things like match(), matchAny(). 432 433 def consume_token( tree ) 434 # do nothing 435 end 436 437 # An off-channel input token was consumed. 438 # Trigger after the token was matched by things like match(), matchAny(). 439 # (unless of course the hidden token is first stuff in the input stream). 440 441 def consume_hidden_token( tree ) 442 # do nothing 443 end 444 445 # Somebody (anybody) looked ahead. Note that this actually gets 446 # triggered by both peek and look calls. The debugger will want to know 447 # which Token object was examined. Like consumeToken, this indicates 448 # what token was seen at that depth. A remote debugger cannot look 449 # ahead into a file it doesn't have so look events must pass the token 450 # even if the info is redundant. 451 452 def look( i, tree ) 453 # do nothing 454 end 455 456 # The parser is going to look arbitrarily ahead; mark this location, 457 # the token stream's marker is sent in case you need it. 458 459 def mark( marker ) 460 # do nothing 461 end 462 463 # After an arbitrairly long look as with a cyclic DFA (or with 464 # any backtrack), this informs the debugger that stream should be 465 # rewound to the position associated with marker. 466 467 def rewind( marker = nil ) 468 # do nothing 469 end 470 471 def begin_backtrack( level ) 472 # do nothing 473 end 474 475 def end_backtrack( level, successful ) 476 # do nothing 477 end 478 479 def backtrack( level ) 480 begin_backtrack( level ) 481 successful = yield( self ) 482 end_backtrack( level, successful ) 483 end 484 485 # To watch a parser move through the grammar, the parser needs to 486 # inform the debugger what line/charPos it is passing in the grammar. 487 # For now, this does not know how to switch from one grammar to the 488 # other and back for island grammars etc... 489 # This should also allow breakpoints because the debugger can stop 490 # the parser whenever it hits this line/pos. 491 492 def location( line, position ) 493 # do nothing 494 end 495 496 # A recognition exception occurred such as NoViableAltError. I made 497 # this a generic event so that I can alter the exception hierachy later 498 # without having to alter all the debug objects. 499 # Upon error, the stack of enter rule/subrule must be properly unwound. 500 # If no viable alt occurs it is within an enter/exit decision, which 501 # also must be rewound. Even the rewind for each mark must be unwount. 502 # In the Java target this is pretty easy using try/finally, if a bit 503 # ugly in the generated code. The rewind is generated in DFA.predict() 504 # actually so no code needs to be generated for that. For languages 505 # w/o this "finally" feature (C++?), the target implementor will have 506 # to build an event stack or something. 507 # Across a socket for remote debugging, only the RecognitionError 508 # data fields are transmitted. The token object or whatever that 509 # caused the problem was the last object referenced by look. The 510 # immediately preceding look event should hold the unexpected Token or 511 # char. 512 # Here is a sample event trace for grammar: 513 # b : C ({;}A|B) // {;} is there to prevent A|B becoming a set 514 # | D 515 # ; 516 # The sequence for this rule (with no viable alt in the subrule) for 517 # input 'c c' (there are 3 tokens) is: 518 # commence 519 # look 520 # enterRule b 521 # location 7 1 522 # enter decision 3 523 # look 524 # exit decision 3 525 # enterAlt1 526 # location 7 5 527 # look 528 # consumeToken [c/<4>,1:0] 529 # location 7 7 530 # enterSubRule 2 531 # enter decision 2 532 # look 533 # look 534 # recognitionError NoViableAltError 2 1 2 535 # exit decision 2 536 # exitSubRule 2 537 # beginResync 538 # look 539 # consumeToken [c/<4>,1:1] 540 # look 541 # endResync 542 # look(-1) 543 # exitRule b 544 # terminate 545 546 def recognition_exception( exception ) 547 # do nothing 548 end 549 550 # Indicates the recognizer is about to consume tokens to resynchronize 551 # the parser. Any consume events from here until the recovered event 552 # are not part of the parse--they are dead tokens. 553 554 def begin_resync() 555 # do nothing 556 end 557 558 # Indicates that the recognizer has finished consuming tokens in order 559 # to resychronize. There may be multiple beginResync/endResync pairs 560 # before the recognizer comes out of errorRecovery mode (in which 561 # multiple errors are suppressed). This will be useful 562 # in a gui where you want to probably grey out tokens that are consumed 563 # but not matched to anything in grammar. Anything between 564 # a beginResync/endResync pair was tossed out by the parser. 565 566 def end_resync() 567 # do nothing 568 end 569 570 def resync 571 begin_resync 572 yield( self ) 573 end_resync 574 end 575 576 # A semantic predicate was evaluate with this result and action text 577 578 def semantic_predicate( result, predicate ) 579 # do nothing 580 end 581 582 # Announce that parsing has begun. Not technically useful except for 583 # sending events over a socket. A GUI for example will launch a thread 584 # to connect and communicate with a remote parser. The thread will want 585 # to notify the GUI when a connection is made. ANTLR parsers 586 # trigger this upon entry to the first rule (the ruleLevel is used to 587 # figure this out). 588 589 def commence( ) 590 # do nothing 591 end 592 593 # Parsing is over; successfully or not. Mostly useful for telling 594 # remote debugging listeners that it's time to quit. When the rule 595 # invocation level goes to zero at the end of a rule, we are done 596 # parsing. 597 598 def terminate( ) 599 # do nothing 600 end 601 602 # Input for a tree parser is an AST, but we know nothing for sure 603 # about a node except its type and text (obtained from the adaptor). 604 # This is the analog of the consumeToken method. Again, the ID is 605 # the hashCode usually of the node so it only works if hashCode is 606 # not implemented. If the type is UP or DOWN, then 607 # the ID is not really meaningful as it's fixed--there is 608 # just one UP node and one DOWN navigation node. 609 610 def consume_node( tree ) 611 # do nothing 612 end 613 614 # A nil was created (even nil nodes have a unique ID... 615 # they are not "null" per se). As of 4/28/2006, this 616 # seems to be uniquely triggered when starting a new subtree 617 # such as when entering a subrule in automatic mode and when 618 # building a tree in rewrite mode. 619 # If you are receiving this event over a socket via 620 # RemoteDebugEventSocketListener then only tree.ID is set. 621 622 def flat_node( tree ) 623 # do nothing 624 end 625 626 # Upon syntax error, recognizers bracket the error with an error node 627 # if they are building ASTs. 628 629 def error_node( tree ) 630 # do nothing 631 end 632 633 # Announce a new node built from token elements such as type etc... 634 # If you are receiving this event over a socket via 635 # RemoteDebugEventSocketListener then only tree.ID, type, text are 636 # set. 637 638 def create_node( node, token = nil ) 639 # do nothing 640 end 641 642 # Make a node the new root of an existing root. 643 # Note: the newRootID parameter is possibly different 644 # than the TreeAdaptor.becomeRoot() newRoot parameter. 645 # In our case, it will always be the result of calling 646 # TreeAdaptor.becomeRoot() and not root_n or whatever. 647 # The listener should assume that this event occurs 648 # only when the current subrule (or rule) subtree is 649 # being reset to newRootID. 650 # If you are receiving this event over a socket via 651 # RemoteDebugEventSocketListener then only IDs are set. 652 # @see antlr3.tree.TreeAdaptor.becomeRoot() 653 654 def become_root( new_root, old_root ) 655 # do nothing 656 end 657 658 # Make childID a child of rootID. 659 # If you are receiving this event over a socket via 660 # RemoteDebugEventSocketListener then only IDs are set. 661 # @see antlr3.tree.TreeAdaptor.addChild() 662 663 def add_child( root, child ) 664 # do nothing 665 end 666 667 # Set the token start/stop token index for a subtree root or node. 668 # If you are receiving this event over a socket via 669 # RemoteDebugEventSocketListener then only tree.ID is set. 670 671 def set_token_boundaries( tree, token_start_index, token_stop_index ) 672 # do nothing 673 end 674 675 def examine_rule_memoization( rule ) 676 # do nothing 677 end 678 679 def on( event_name, &block ) 680 sclass = class << self; self; end 681 sclass.send( :define_method, event_name, &block ) 682 end 683 684 EVENTS = [ 685 :add_child, :backtrack, :become_root, :begin_backtrack, 686 :begin_resync, :commence, :consume_hidden_token, 687 :consume_node, :consume_token, :create_node, :end_backtrack, 688 :end_resync, :enter_alternative, :enter_decision, :enter_rule, 689 :enter_sub_rule, :error_node, :exit_decision, :exit_rule, 690 :exit_sub_rule, :flat_node, :location, :look, :mark, 691 :recognition_exception, :resync, :rewind, 692 :semantic_predicate, :set_token_boundaries, :terminate 693 ].freeze 694 695end 696end 697end 698