1package ANTLR::Runtime::BaseRecognizer;
2
3use Readonly;
4use Carp;
5
6use ANTLR::Runtime::RecognizerSharedState;
7use ANTLR::Runtime::Token;
8use ANTLR::Runtime::UnwantedTokenException;
9use ANTLR::Runtime::MissingTokenException;
10use ANTLR::Runtime::MismatchedTokenException;
11
12use Moose;
13
14Readonly my $MEMO_RULE_FAILED => -2;
15sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED }
16
17Readonly my $MEMO_RULE_UNKNOWN => -1;
18sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN }
19
20Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100;
21sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE }
22
23# copies from Token object for convenience in actions
24Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL;
25sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL }
26
27Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL;
28sub HIDDEN { $HIDDEN }
29
30Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token';
31sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME }
32
33# State of a lexer, parser, or tree parser are collected into a state
34# object so the state can be shared.  This sharing is needed to
35# have one grammar import others and share same error variables
36# and other state variables.  It's a kind of explicit multiple
37# inheritance via delegation of methods and shared state.
38has 'state' => (
39    is  => 'rw',
40    isa => 'ANTLR::Runtime::RecognizerSharedState',
41    default => sub { ANTLR::Runtime::RecognizerSharedState->new() },
42);
43
44sub reset {
45    my ($self) = @_;
46
47    if (!defined $self->state) {
48        return;
49    }
50
51    my $state = $self->state;
52    $state->_fsp(-1);
53    $state->error_recovery(0);
54    $state->last_error_index(-1);
55    $state->failed(0);
56    $state->syntax_errors(0);
57
58    # wack everything related to backtracking and memoization
59    $state->backtracking(0);
60    # wipe cache
61    $state->rule_memo([]);
62}
63
64sub match {
65    Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)';
66    croak $usage if @_ != 4;
67    my ($self, $input, $ttype, $follow) = @_;
68
69    my $matched_symbol = $self->get_current_input_symbol($input);
70    if ($input->LA(1) eq $ttype) {
71        $input->consume();
72        $self->state->error_recovery(0);
73        $self->state->failed(0);
74        return $matched_symbol;
75    }
76
77    if ($self->state->backtracking > 0) {
78        $self->state->failed(1);
79        return $matched_symbol;
80    }
81
82    return $self->recover_from_mismatched_token($input, $ttype, $follow);
83}
84
85sub match_any {
86    Readonly my $usage => 'void match_any(IntStream input)';
87    croak $usage if @_ != 2;
88    my ($self, $input) = @_;
89
90    $self->state->error_recovery(0);
91    $self->state->failed(0);
92    $input->consume();
93}
94
95sub mismatch_is_unwanted_token {
96    my ($self, $input, $ttype) = @_;
97    return $input->LA(2) == $ttype;
98}
99
100sub mismatch_is_missing_token {
101    my ($self, $input, $follow) = @_;
102
103    if (!defined $follow) {
104        return 0;
105    }
106
107    if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
108        my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
109        $follow = $follow->or($viable_tokens_following_this_rule);
110        if ($self->state->_fsp >= 0) {
111            $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
112        }
113    }
114
115    if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
116        return 1;
117    }
118    return 0;
119}
120
121sub mismatch {
122    Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)';
123    croak $usage if @_ != 4;
124    my ($self, $input, $ttype, $follow) = @_;
125
126    if ($self->mismatch_is_unwanted_token($input, $ttype)) {
127        ANTLR::Runtime::UnwantedTokenException->new({
128            expecting => $ttype,
129            input => $input
130        })->throw();
131    }
132    elsif ($self->mismatch_is_missing_token($input, $follow)) {
133        ANTLR::Runtime::MissingTokenException->new({
134            expecting => $ttype,
135            input => $input
136        })->throw();
137    }
138    else {
139        ANTLR::Runtime::MismatchedTokenException->new({
140            expecting => $ttype,
141            input => $input
142        })->throw();
143    }
144}
145
146sub report_error {
147    Readonly my $usage => 'void report_error(RecognitionException e)';
148    croak $usage if @_ != 2;
149    my ($self, $e) = @_;
150
151    if ($self->state->error_recovery) {
152        return;
153    }
154    $self->state->syntax_errors($self->state->syntax_errors + 1);
155    $self->state->error_recovery(1);
156
157    $self->display_recognition_error($self->get_token_names(), $e);
158    return;
159}
160
161sub display_recognition_error {
162    Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)';
163    croak $usage if @_ != 3;
164    my ($self, $token_names, $e) = @_;
165
166    my $hdr = $self->get_error_header($e);
167    my $msg = $self->get_error_message($e, $token_names);
168    $self->emit_error_message("$hdr $msg");
169}
170
171sub get_error_message {
172    Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)';
173    croak $usage if @_ != 3;
174    my ($self, $e, $token_names) = @_;
175
176    if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) {
177        my $token_name;
178        if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
179            $token_name = 'EOF';
180        } else {
181            $token_name = $token_names->[$e->get_expecting];
182        }
183
184        return 'mismatched input ' . $self->get_token_error_display($e->get_token)
185            . ' expecting ' . $token_name;
186    } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) {
187        my $token_name;
188        if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
189            $token_name = 'EOF';
190        } else {
191            $token_name = $token_names->[$e->get_expecting];
192        }
193
194        return 'mismatched tree node: ' . $e->node
195            . ' expecting ' . $token_name;
196    } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) {
197        return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token);
198    } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) {
199        return 'required (...)+ loop did not match anything at input '
200            . get_token_error_display($e->get_token);
201    } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) {
202        return 'mismatched input ' . $self->get_token_error_display($e->get_token)
203            . ' expecting set ' . $e->get_expecting;
204    } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) {
205        return 'mismatched input ' . $self->get_token_error_display($e->get_token)
206            . ' expecting set ' . $e->get_expecting;
207    } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) {
208        return 'rule ' . $e->rule_name . ' failed predicate: {'
209            . $e->predicate_text . '}?';
210    } else {
211        return undef;
212    }
213}
214
215sub get_number_of_syntax_errors {
216    my ($self) = @_;
217    return $self->state->syntax_errors;
218}
219
220sub get_error_header {
221    Readonly my $usage => 'String get_error_header(RecognitionException e)';
222    croak $usage if @_ != 2;
223    my ($self, $e) = @_;
224
225    my $line = $e->get_line();
226    my $col = $e->get_char_position_in_line();
227
228    return "line $line:$col";
229}
230
231sub get_token_error_display {
232    Readonly my $usage => 'String get_token_error_display(Token t)';
233    croak $usage if @_ != 2;
234    my ($self, $t) = @_;
235
236    my $s = $t->get_text();
237    if (!defined $s) {
238        if ($t->get_type() == ANTLR::Runtime::Token->EOF) {
239            $s = '<EOF>';
240        } else {
241            my $ttype = $t->get_type();
242            $s = "<$ttype>";
243        }
244    }
245
246    $s =~ s/\n/\\\\n/g;
247    $s =~ s/\r/\\\\r/g;
248    $s =~ s/\t/\\\\t/g;
249
250    return "'$s'";
251}
252
253sub emit_error_message {
254    Readonly my $usage => 'void emit_error_message(String msg)';
255    croak $usage if @_ != 2;
256    my ($self, $msg) = @_;
257
258    print STDERR $msg, "\n";
259}
260
261sub recover {
262    Readonly my $usage => 'void recover(IntStream input, RecognitionException re)';
263    croak $usage if @_ != 3;
264    my ($self, $input, $re) = @_;
265
266    if ($self->state->last_error_index == $input->index()) {
267	# uh oh, another error at same token index; must be a case
268	# where LT(1) is in the recovery token set so nothing is
269	# consumed; consume a single token so at least to prevent
270	# an infinite loop; this is a failsafe.
271        $input->consume();
272    }
273
274    $self->state->last_error_index($input->index());
275    my $follow_set = $self->compute_error_recovery_set();
276    $self->begin_resync();
277    $self->consume_until($input, $follow_set);
278    $self->end_resync();
279}
280
281sub begin_resync {
282}
283
284sub end_resync {
285}
286
287sub compute_error_recovery_set {
288    Readonly my $usage => 'void compute_error_recovery_set()';
289    croak $usage if @_ != 1;
290    my ($self) = @_;
291
292    $self->combine_follows(0);
293}
294
295sub compute_context_sensitive_rule_FOLLOW {
296    Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()';
297    croak $usage if @_ != 1;
298    my ($self) = @_;
299
300    $self->combine_follows(1);
301}
302
303sub combine_follows {
304    Readonly my $usage => 'BitSet combine_follows(boolean exact)';
305    croak $usage if @_ != 2;
306    my ($self, $exact) = @_;
307
308    my $top = $self->state->_fsp;
309    my $follow_set = ANTLR::Runtime::BitSet->new();
310
311    foreach my $local_follow_set (reverse @{$self->state->following}) {
312        $follow_set |= $local_follow_set;
313        if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
314            last;
315        }
316    }
317    #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
318    return $follow_set;
319}
320
321sub recover_from_mismatched_token {
322    Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)';
323    croak $usage if @_ != 4;
324    my ($self, $input, $ttype, $follow) = @_;
325
326    if ($self->mismatch_is_unwanted_token($input, $ttype)) {
327        my $ex = ANTLR::Runtime::UnwantedTokenException->new({
328            expecting => $ttype,
329            input => $input
330        });
331
332        $self->begin_resync();
333        $input->consume();
334        $self->end_resync();
335        $self->report_error($ex);
336
337        my $matched_symbol = $self->get_current_input_symbol($input);
338        $input->consume();
339        return $matched_symbol;
340    }
341
342    if ($self->mismatch_is_missing_token($input, $follow)) {
343        my $inserted = $self->get_missing_symbol({
344                input => $input,
345                expected_token_type => $ttype,
346                follow => $follow,
347        });
348        my $ex = ANTLR::Runtime::MissingTokenException({
349            expecting => $ttype,
350            input => $input,
351            inserted => $inserted,
352        });
353        $self->report_error($ex);
354        return $inserted;
355    }
356
357    ANTLR::Runtime::MismatchedTokenException->new({
358        expecting => $ttype,
359        input => $input,
360    })->throw();
361}
362
363sub recover_from_mismatched_set {
364    Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)';
365    croak $usage if @_ != 4;
366    my ($self, $input, $e, $follow) = @_;
367
368    if ($self->mismatch_is_missing_token($input, $follow)) {
369        $self->report_error($e);
370        return $self->get_missing_symbol({
371                input => $input,
372                exception => $e,
373                expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE,
374                follow => $follow,
375            });
376    }
377
378    $e->throw();
379}
380
381sub recover_from_mismatched_element {
382    Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)';
383    croak $usage if @_ != 4;
384    my ($self, $input, $e, $follow) = @_;
385
386    return 0 if (!defined $follow);
387
388    if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
389        my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
390        $follow |= $viable_tokens_following_this_rule;
391        $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
392    }
393
394    if ($follow->member($input->LA(1))) {
395        $self->report_error($e);
396        return 1;
397    }
398
399    return 0;
400}
401
402sub get_current_input_symbol {
403    my ($self, $input) = @_;
404    return undef;
405}
406
407sub get_missing_symbol {
408    my ($self, $arg_ref) = @_;
409    my $input = $arg_ref->{input};
410    my $exception = $arg_ref->{exception};
411    my $expected_token_type = $arg_ref->{expected_token_type};
412    my $follow = $arg_ref->{follow};
413
414    return undef;
415}
416
417sub consume_until {
418    Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))';
419    croak $usage if @_ != 3;
420
421    if ($_[2]->isa('ANTLR::Runtime::BitSet')) {
422        my ($self, $input, $set) = @_;
423
424        my $ttype = $input->LA(1);
425        while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) {
426            $input->consume();
427            $ttype = $input->LA(1);
428        }
429    } else {
430        my ($self, $input, $token_type) = @_;
431
432        my $ttype = $input->LA(1);
433        while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) {
434            $input->consume();
435            $ttype = $input->LA(1);
436        }
437    }
438}
439
440sub push_follow {
441    Readonly my $usage => 'void push_follow(BitSet fset)';
442    croak $usage if @_ != 2;
443    my ($self, $fset) = @_;
444
445    push @{$self->state->following}, $fset;
446    $self->state->_fsp($self->state->_fsp + 1);
447}
448
449sub get_rule_invocation_stack {
450    Readonly my $usage => 'List get_rule_invocation_stack()';
451    croak $usage if @_ != 1;
452    my ($self) = @_;
453
454    my $rules = [];
455    for (my $i = 0; ; ++$i) {
456        my @frame = caller $i;
457        last if !@frame;
458
459        my ($package, $filename, $line, $subroutine) = @frame;
460
461        if ($package =~ /^ANTLR::Runtime::/) {
462            next;
463        }
464
465        if ($subroutine eq NEXT_TOKEN_RULE_NAME) {
466            next;
467        }
468
469        if ($package ne ref $self) {
470            next;
471        }
472
473        push @{$rules}, $subroutine;
474    }
475}
476
477sub get_backtracking_level {
478    Readonly my $usage => 'int get_backtracking_level()';
479    croak $usage if @_ != 1;
480    my ($self) = @_;
481
482    return $self->state->backtracking;
483}
484
485sub set_backtracking_level {
486    my ($self, $n) = @_;
487    $self->state->backtracking($n);
488}
489
490sub failed {
491    my ($self) = @_;
492    return $self->state->failed;
493}
494
495sub get_token_names {
496    return undef;
497}
498
499sub get_grammar_file_name {
500    return undef;
501}
502
503sub to_strings {
504    Readonly my $usage => 'List to_strings(List tokens)';
505    croak $usage if @_ != 2;
506    my ($self, $tokens) = @_;
507
508    if (!defined $tokens) {
509        return undef;
510    }
511
512    return map { $_->get_text() } @{$tokens};
513}
514
515sub get_rule_memoization {
516    Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)';
517    croak $usage if @_ != 3;
518    my ($self, $rule_index, $rule_start_index) = @_;
519
520    if (!defined $self->rule_memo->[$rule_index]) {
521        $self->rule_memo->[$rule_index] = {};
522    }
523
524    my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index};
525    if (!defined $stop_index) {
526        return $self->MEMO_RULE_UNKNOWN;
527    }
528    return $stop_index;
529}
530
531sub alredy_parsed_rule {
532    Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)';
533    croak $usage if @_ != 3;
534    my ($self, $input, $rule_index) = @_;
535
536    my $stop_index = $self->get_rule_memoization($rule_index, $input->index());
537    if ($stop_index == $self->MEMO_RULE_UNKNOWN) {
538        return 0;
539    }
540
541    if ($stop_index == $self->MEMO_RULE_FAILED) {
542        $self->state->failed(1);
543    } else {
544        $input->seek($stop_index + 1);
545    }
546    return 1;
547}
548
549sub memoize {
550    Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)';
551    croak $usage if @_ != 4;
552    my ($self, $input, $rule_index, $rule_start_index) = @_;
553
554    my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1;
555    if (defined $self->state->rule_memo->[$rule_index]) {
556        $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index;
557    }
558}
559
560sub get_rule_memoization_cache_size {
561    Readonly my $usage => 'int get_rule_memoization_cache_size()';
562    croak $usage if @_ != 1;
563    my ($self) = @_;
564
565    my $n = 0;
566    foreach my $m (@{$self->state->rule_memo}) {
567        $n += keys %{$m} if defined $m;
568    }
569
570    return $n;
571}
572
573sub trace_in {
574    Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)';
575    croak $usage if @_ != 4;
576    my ($self, $rule_name, $rule_index, $input_symbol) = @_;
577
578    print "enter $rule_name $input_symbol";
579    if ($self->state->failed) {
580        print ' failed=', $self->state->failed;
581    }
582    if ($self->state->backtracking > 0) {
583        print ' backtracking=', $self->state->backtracking;
584    }
585    print "\n";
586}
587
588sub trace_out {
589    Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)';
590    croak $usage if @_ != 4;
591    my ($self, $rule_name, $rule_index, $input_symbol) = @_;
592
593    print "exit $rule_name $input_symbol";
594    if ($self->state->failed) {
595        print ' failed=', $self->state->failed;
596    }
597    if ($self->state->backtracking > 0) {
598        print ' backtracking=', $self->state->backtracking;
599    }
600    print "\n";
601}
602
603no Moose;
604__PACKAGE__->meta->make_immutable();
6051;
606__END__
607
608=head1 NAME
609
610ANTLR::Runtime::BaseRecognizer
611
612=head1 DESCRIPTION
613
614A generic recognizer that can handle recognizers generated from
615lexer, parser, and tree grammars.  This is all the parsing
616support code essentially; most of it is error recovery stuff and
617backtracking.
618