1unit Antlr.Runtime;
2(*
3[The "BSD licence"]
4Copyright (c) 2008 Erik van Bilsen
5Copyright (c) 2005-2007 Kunle Odutola
6All rights reserved.
7
8Redistribution and use in source and binary forms, with or without
9modification, are permitted provided that the following conditions
10are met:
111. Redistributions of source code MUST RETAIN the above copyright
12   notice, this list of conditions and the following disclaimer.
132. Redistributions in binary form MUST REPRODUCE the above copyright
14   notice, this list of conditions and the following disclaimer in 
15   the documentation and/or other materials provided with the 
16   distribution.
173. The name of the author may not be used to endorse or promote products
18   derived from this software without specific prior WRITTEN permission.
194. Unless explicitly state otherwise, any contribution intentionally 
20   submitted for inclusion in this work to the copyright owner or licensor
21   shall be under the terms and conditions of this license, without any 
22   additional terms or conditions.
23
24THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34*)
35
36interface
37
38{$IF CompilerVersion < 20}
39{$MESSAGE ERROR 'You need Delphi 2009 or higher to use the Antlr runtime'}
40{$IFEND}
41
42uses
43  SysUtils,
44  Classes,
45  Generics.Defaults,
46  Generics.Collections,
47  Antlr.Runtime.Tools,
48  Antlr.Runtime.Collections;
49
50type
51  TCharStreamConstants = (cscEOF = -1);
52
53type
54  ERecognitionException = class;
55  ENoViableAltException = class;
56
57  /// <summary>
58  /// A simple stream of integers. This is useful when all we care about is the char
59  /// or token type sequence (such as for interpretation).
60  /// </summary>
61  IIntStream = interface(IANTLRInterface)
62  ['{6B851BDB-DD9C-422B-AD1E-567E52D2654F}']
63    { Property accessors }
64    function GetSourceName: String;
65
66    { Methods }
67    /// <summary>
68    /// Advances the read position of the stream. Updates line and column state
69    /// </summary>
70    procedure Consume;
71
72    /// <summary>
73    /// Get int at current input pointer + I ahead (where I=1 is next int)
74    /// Negative indexes are allowed.  LA(-1) is previous token (token just matched).
75    /// LA(-i) where i is before first token should yield -1, invalid char or EOF.
76    /// </summary>
77    function LA(I: Integer): Integer;
78    function LAChar(I: Integer): Char;
79
80    /// <summary>Tell the stream to start buffering if it hasn't already.</summary>
81    /// <remarks>
82    /// Executing Rewind(Mark()) on a stream should not affect the input position.
83    /// The Lexer tracks line/col info as well as input index so its markers are
84    /// not pure input indexes.  Same for tree node streams.                          */
85    /// </remarks>
86    /// <returns>Return a marker that can be passed to
87    /// <see cref="IIntStream.Rewind(Integer)"/> to return to the current position.
88    /// This could be the current input position, a value return from
89    /// <see cref="IIntStream.Index"/>, or some other marker.</returns>
90    function Mark: Integer;
91
92    /// <summary>
93    /// Return the current input symbol index 0..N where N indicates the
94    /// last symbol has been read. The index is the symbol about to be
95    /// read not the most recently read symbol.
96    /// </summary>
97    function Index: Integer;
98
99    /// <summary>
100    /// Resets the stream so that the next call to
101    /// <see cref="IIntStream.Index"/> would  return marker.
102    /// </summary>
103    /// <remarks>
104    /// The marker will usually be <see cref="IIntStream.Index"/> but
105    /// it doesn't have to be.  It's just a marker to indicate what
106    /// state the stream was in.  This is essentially calling
107    /// <see cref="IIntStream.Release"/> and <see cref="IIntStream.Seek"/>.
108    /// If there are other markers created after the specified marker,
109    /// this routine must unroll them like a stack.  Assumes the state the
110    /// stream was in when this marker was created.
111    /// </remarks>
112    procedure Rewind(const Marker: Integer); overload;
113
114    /// <summary>
115    /// Rewind to the input position of the last marker.
116    /// </summary>
117    /// <remarks>
118    /// Used currently only after a cyclic DFA and just before starting
119    /// a sem/syn predicate to get the input position back to the start
120    /// of the decision. Do not "pop" the marker off the state.  Mark(I)
121    /// and Rewind(I) should balance still. It is like invoking
122    /// Rewind(last marker) but it should not "pop" the marker off.
123    /// It's like Seek(last marker's input position).
124    /// </remarks>
125    procedure Rewind; overload;
126
127    /// <summary>
128    /// You may want to commit to a backtrack but don't want to force the
129    /// stream to keep bookkeeping objects around for a marker that is
130    /// no longer necessary.  This will have the same behavior as
131    /// <see cref="IIntStream.Rewind(Integer)"/> except it releases resources without
132    /// the backward seek.
133    /// </summary>
134    /// <remarks>
135    /// This must throw away resources for all markers back to the marker
136    /// argument. So if you're nested 5 levels of Mark(), and then Release(2)
137    /// you have to release resources for depths 2..5.
138    /// </remarks>
139    procedure Release(const Marker: Integer);
140
141    /// <summary>
142    /// Set the input cursor to the position indicated by index.  This is
143    /// normally used to seek ahead in the input stream.
144    /// </summary>
145    /// <remarks>
146    /// No buffering is required to do this unless you know your stream
147    /// will use seek to move backwards such as when backtracking.
148    ///
149    /// This is different from rewind in its multi-directional requirement
150    /// and in that its argument is strictly an input cursor (index).
151    ///
152    /// For char streams, seeking forward must update the stream state such
153    /// as line number.  For seeking backwards, you will be presumably
154    /// backtracking using the
155    /// <see cref="IIntStream.Mark"/>/<see cref="IIntStream.Rewind(Integer)"/>
156    /// mechanism that restores state and so this method does not need to
157    /// update state when seeking backwards.
158    ///
159    /// Currently, this method is only used for efficient backtracking using
160    /// memoization, but in the future it may be used for incremental parsing.
161    ///
162    /// The index is 0..N-1. A seek to position i means that LA(1) will return
163    /// the ith symbol.  So, seeking to 0 means LA(1) will return the first
164    /// element in the stream.
165    /// </remarks>
166    procedure Seek(const Index: Integer);
167
168    /// <summary>Returns the size of the entire stream.</summary>
169    /// <remarks>
170    /// Only makes sense for streams that buffer everything up probably,
171    /// but might be useful to display the entire stream or for testing.
172    /// This value includes a single EOF.
173    /// </remarks>
174    function Size: Integer;
175
176    { Properties }
177
178    /// <summary>
179    /// Where are you getting symbols from?  Normally, implementations will
180    /// pass the buck all the way to the lexer who can ask its input stream
181    /// for the file name or whatever.
182    /// </summary>
183    property SourceName: String read GetSourceName;
184  end;
185
186  /// <summary>A source of characters for an ANTLR lexer </summary>
187  ICharStream = interface(IIntStream)
188  ['{C30EF0DB-F4BD-4CBC-8C8F-828DABB6FF36}']
189    { Property accessors }
190    function GetLine: Integer;
191    procedure SetLine(const Value: Integer);
192    function GetCharPositionInLine: Integer;
193    procedure SetCharPositionInLine(const Value: Integer);
194
195    { Methods }
196
197    /// <summary>
198    /// Get the ith character of lookahead.  This is usually the same as
199    /// LA(I).  This will be used for labels in the generated lexer code.
200    /// I'd prefer to return a char here type-wise, but it's probably
201    /// better to be 32-bit clean and be consistent with LA.
202    /// </summary>
203    function LT(const I: Integer): Integer;
204
205    /// <summary>
206    /// This primarily a useful interface for action code (just make sure
207    /// actions don't use this on streams that don't support it).
208    /// For infinite streams, you don't need this.
209    /// </summary>
210    function Substring(const Start, Stop: Integer): String;
211
212    { Properties }
213
214    /// <summary>
215    /// The current line in the character stream (ANTLR tracks the
216    /// line information automatically. To support rewinding character
217    /// streams, we are able to [re-]set the line.
218    /// </summary>
219    property Line: Integer read GetLine write SetLine;
220
221    /// <summary>
222    /// The index of the character relative to the beginning of the
223    /// line (0..N-1). To support rewinding character streams, we are
224    /// able to [re-]set the character position.
225    /// </summary>
226    property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
227  end;
228
229  IToken = interface(IANTLRInterface)
230  ['{73BF129C-2F45-4C68-838E-BF5D3536AC6D}']
231    { Property accessors }
232    function GetTokenType: Integer;
233    procedure SetTokenType(const Value: Integer);
234    function GetLine: Integer;
235    procedure SetLine(const Value: Integer);
236    function GetCharPositionInLine: Integer;
237    procedure SetCharPositionInLine(const Value: Integer);
238    function GetChannel: Integer;
239    procedure SetChannel(const Value: Integer);
240    function GetTokenIndex: Integer;
241    procedure SetTokenIndex(const Value: Integer);
242    function GetText: String;
243    procedure SetText(const Value: String);
244
245    { Properties }
246    property TokenType: Integer read GetTokenType write SetTokenType;
247
248    /// <summary>The line number on which this token was matched; line=1..N</summary>
249    property Line: Integer read GetLine write SetLine;
250
251    /// <summary>
252    /// The index of the first character relative to the beginning of the line 0..N-1
253    /// </summary>
254    property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
255
256    /// <summary>The line number on which this token was matched; line=1..N</summary>
257    property Channel: Integer read GetChannel write SetChannel;
258
259    /// <summary>
260    /// An index from 0..N-1 of the token object in the input stream
261    /// </summary>
262    /// <remarks>
263    /// This must be valid in order to use the ANTLRWorks debugger.
264    /// </remarks>
265    property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
266
267    /// <summary>The text of the token</summary>
268    /// <remarks>
269    /// When setting the text, it might be a NOP such as for the CommonToken,
270    /// which doesn't have string pointers, just indexes into a char buffer.
271    /// </remarks>
272    property Text: String read GetText write SetText;
273  end;
274
275  /// <summary>
276  /// A source of tokens must provide a sequence of tokens via NextToken()
277  /// and also must reveal it's source of characters; CommonToken's text is
278  /// computed from a CharStream; it only store indices into the char stream.
279  ///
280  /// Errors from the lexer are never passed to the parser.  Either you want
281  /// to keep going or you do not upon token recognition error.  If you do not
282  /// want to continue lexing then you do not want to continue parsing.  Just
283  /// throw an exception not under RecognitionException and Delphi will naturally
284  /// toss you all the way out of the recognizers.  If you want to continue
285  /// lexing then you should not throw an exception to the parser--it has already
286  /// requested a token.  Keep lexing until you get a valid one.  Just report
287  /// errors and keep going, looking for a valid token.
288  /// </summary>
289  ITokenSource = interface(IANTLRInterface)
290  ['{2C71FAD0-AEEE-417D-B576-4059F7C4CEB4}']
291    { Property accessors }
292    function GetSourceName: String;
293
294    { Methods }
295
296    /// <summary>
297    /// Returns a Token object from the input stream (usually a CharStream).
298    /// Does not fail/return upon lexing error; just keeps chewing on the
299    /// characters until it gets a good one; errors are not passed through
300    /// to the parser.
301    /// </summary>
302    function NextToken: IToken;
303
304    { Properties }
305
306    /// <summary>
307    /// Where are you getting tokens from? normally the implication will simply
308    /// ask lexers input stream.
309    /// </summary>
310    property SourceName: String read GetSourceName;
311  end;
312
313  /// <summary>A stream of tokens accessing tokens from a TokenSource </summary>
314  ITokenStream = interface(IIntStream)
315  ['{59E5B39D-31A6-496D-9FA9-AC75CC584B68}']
316    { Property accessors }
317    function GetTokenSource: ITokenSource;
318    procedure SetTokenSource(const Value: ITokenSource);
319
320    { Methods }
321
322    /// <summary>
323    /// Get Token at current input pointer + I ahead (where I=1 is next
324    /// Token).
325    /// I &lt; 0 indicates tokens in the past.  So -1 is previous token and -2 is
326    /// two tokens ago. LT(0) is undefined.  For I>=N, return Token.EOFToken.
327    /// Return null for LT(0) and any index that results in an absolute address
328    /// that is negative.
329    /// </summary>
330    function LT(const K: Integer): IToken;
331
332    /// <summary>
333    /// Get a token at an absolute index I; 0..N-1.  This is really only
334    /// needed for profiling and debugging and token stream rewriting.
335    /// If you don't want to buffer up tokens, then this method makes no
336    /// sense for you.  Naturally you can't use the rewrite stream feature.
337    /// I believe DebugTokenStream can easily be altered to not use
338    /// this method, removing the dependency.
339    /// </summary>
340    function Get(const I: Integer): IToken;
341
342    /// <summary>Return the text of all tokens from start to stop, inclusive.
343    /// If the stream does not buffer all the tokens then it can just
344    /// return '';  Users should not access $ruleLabel.text in
345    /// an action of course in that case.
346    /// </summary>
347    function ToString(const Start, Stop: Integer): String; overload;
348
349    /// <summary>Because the user is not required to use a token with an index stored
350    /// in it, we must provide a means for two token objects themselves to
351    /// indicate the start/end location.  Most often this will just delegate
352    /// to the other ToString(Integer,Integer).  This is also parallel with
353    /// the TreeNodeStream.ToString(Object,Object).
354    /// </summary>
355    function ToString(const Start, Stop: IToken): String; overload;
356
357    { Properties }
358    property TokenSource: ITokenSource read GetTokenSource write SetTokenSource;
359  end;
360
361  /// <summary>
362  /// This is the complete state of a stream.
363  ///
364  /// When walking ahead with cyclic DFA for syntactic predicates, we
365  /// need to record the state of the input stream (char index, line,
366  /// etc...) so that we can rewind the state after scanning ahead.
367  /// </summary>
368  ICharStreamState = interface(IANTLRInterface)
369  ['{62D2A1CD-ED3A-4C95-A366-AB8F2E54060B}']
370    { Property accessors }
371    function GetP: Integer;
372    procedure SetP(const Value: Integer);
373    function GetLine: Integer;
374    procedure SetLine(const Value: Integer);
375    function GetCharPositionInLine: Integer;
376    procedure SetCharPositionInLine(const Value: Integer);
377
378    { Properties }
379    /// <summary>Index into the char stream of next lookahead char </summary>
380    property P: Integer read GetP write SetP;
381
382    /// <summary>What line number is the scanner at before processing buffer[P]? </summary>
383    property Line: Integer read GetLine write SetLine;
384
385    /// <summary>What char position 0..N-1 in line is scanner before processing buffer[P]? </summary>
386    property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
387  end;
388
389  /// <summary>
390  /// A pretty quick <see cref="ICharStream"/> that uses a character array
391  /// directly as it's underlying source.
392  /// </summary>
393  IANTLRStringStream = interface(ICharStream)
394  ['{2FA24299-FF97-4AB6-8CA6-5D3DA13C4AB2}']
395    { Methods }
396
397    /// <summary>
398    /// Resets the stream so that it is in the same state it was
399    /// when the object was created *except* the data array is not
400    /// touched.
401    /// </summary>
402    procedure Reset;
403
404  end;
405
406  /// <summary>
407  /// A character stream - an <see cref="ICharStream"/> - that loads
408  /// and caches the contents of it's underlying file fully during
409  /// object construction
410  /// </summary>
411  /// <remarks>
412  /// This looks very much like an ANTLReaderStream or an ANTLRInputStream
413  /// but, it is a special case. Since we know the exact size of the file to
414  /// load, we can avoid lots of data copying and buffer resizing.
415  /// </remarks>
416  IANTLRFileStream = interface(IANTLRStringStream)
417  ['{2B0145DB-2DAA-48A0-8316-B47A69EDDD1A}']
418    { Methods }
419
420    /// <summary>
421    /// Loads and buffers the specified file to be used as this
422    /// ANTLRFileStream's source
423    /// </summary>
424    /// <param name="FileName">File to load</param>
425    /// <param name="Encoding">Encoding to apply to file</param>
426    procedure Load(const FileName: String; const Encoding: TEncoding);
427  end;
428
429  /// <summary>
430  /// A stripped-down version of org.antlr.misc.BitSet that is just
431  /// good enough to handle runtime requirements such as FOLLOW sets
432  /// for automatic error recovery.
433  /// </summary>
434  IBitSet = interface(IANTLRInterface)
435  ['{F2045045-FC46-4779-A65D-56C65D257A8E}']
436    { Property accessors }
437    function GetIsNil: Boolean;
438
439    { Methods }
440
441    /// <summary>return "this or a" in a new set </summary>
442    function BitSetOr(const A: IBitSet): IBitSet;
443
444    /// <summary>Or this element into this set (grow as necessary to accommodate)</summary>
445    procedure Add(const El: Integer);
446
447    /// <summary> Grows the set to a larger number of bits.</summary>
448    /// <param name="bit">element that must fit in set
449    /// </param>
450    procedure GrowToInclude(const Bit: Integer);
451
452    procedure OrInPlace(const A: IBitSet);
453    function Size: Integer;
454    function Member(const El: Integer): Boolean;
455
456    // remove this element from this set
457    procedure Remove(const El: Integer);
458
459    function NumBits: Integer;
460
461    /// <summary>return how much space is being used by the bits array not
462    /// how many actually have member bits on.
463    /// </summary>
464    function LengthInLongWords: Integer;
465
466    function ToArray: TIntegerArray;
467    function ToPackedArray: TUInt64Array;
468
469    function ToString: String; overload;
470    function ToString(const TokenNames: TStringArray): String; overload;
471    function Equals(Obj: TObject): Boolean;
472
473    { Properties }
474    property IsNil: Boolean read GetIsNil;
475  end;
476  TBitSetArray = array of IBitSet;
477
478  /// <summary>
479  /// The set of fields needed by an abstract recognizer to recognize input
480  /// and recover from errors
481  /// </summary>
482  /// <remarks>
483  /// As a separate state object, it can be shared among multiple grammars;
484  /// e.g., when one grammar imports another.
485  /// These fields are publicly visible but the actual state pointer per
486  /// parser is protected.
487  /// </remarks>
488  IRecognizerSharedState = interface(IANTLRInterface)
489  ['{6CB6E17A-0B01-4AA7-8D49-5742A3CB8901}']
490    { Property accessors }
491    function GetFollowing: TBitSetArray;
492    procedure SetFollowing(const Value: TBitSetArray);
493    function GetFollowingStackPointer: Integer;
494    procedure SetFollowingStackPointer(const Value: Integer);
495    function GetErrorRecovery: Boolean;
496    procedure SetErrorRecovery(const Value: Boolean);
497    function GetLastErrorIndex: Integer;
498    procedure SetLastErrorIndex(const Value: Integer);
499    function GetFailed: Boolean;
500    procedure SetFailed(const Value: Boolean);
501    function GetSyntaxErrors: Integer;
502    procedure SetSyntaxErrors(const Value: Integer);
503    function GetBacktracking: Integer;
504    procedure SetBacktracking(const Value: Integer);
505    function GetRuleMemo: TDictionaryArray<Integer, Integer>;
506    function GetRuleMemoCount: Integer;
507    procedure SetRuleMemoCount(const Value: Integer);
508    function GetToken: IToken;
509    procedure SetToken(const Value: IToken);
510    function GetTokenStartCharIndex: Integer;
511    procedure SetTokenStartCharIndex(const Value: Integer);
512    function GetTokenStartLine: Integer;
513    procedure SetTokenStartLine(const Value: Integer);
514    function GetTokenStartCharPositionInLine: Integer;
515    procedure SetTokenStartCharPositionInLine(const Value: Integer);
516    function GetChannel: Integer;
517    procedure SetChannel(const Value: Integer);
518    function GetTokenType: Integer;
519    procedure SetTokenType(const Value: Integer);
520    function GetText: String;
521    procedure SetText(const Value: String);
522
523    { Properties }
524
525    /// <summary>
526    /// Tracks the set of token types that can follow any rule invocation.
527    /// Stack grows upwards.  When it hits the max, it grows 2x in size
528    /// and keeps going.
529    /// </summary>
530    property Following: TBitSetArray read GetFollowing write SetFollowing;
531    property FollowingStackPointer: Integer read GetFollowingStackPointer write SetFollowingStackPointer;
532
533    /// <summary>
534    /// This is true when we see an error and before having successfully
535    /// matched a token.  Prevents generation of more than one error message
536    /// per error.
537    /// </summary>
538    property ErrorRecovery: Boolean read GetErrorRecovery write SetErrorRecovery;
539
540    /// <summary>
541    /// The index into the input stream where the last error occurred.
542    /// </summary>
543    /// <remarks>
544    /// This is used to prevent infinite loops where an error is found
545    /// but no token is consumed during recovery...another error is found,
546    /// ad naseum.  This is a failsafe mechanism to guarantee that at least
547    /// one token/tree node is consumed for two errors.
548    /// </remarks>
549    property LastErrorIndex: Integer read GetLastErrorIndex write SetLastErrorIndex;
550
551    /// <summary>
552    /// In lieu of a return value, this indicates that a rule or token
553    /// has failed to match.  Reset to false upon valid token match.
554    /// </summary>
555    property Failed: Boolean read GetFailed write SetFailed;
556
557    /// <summary>
558    /// Did the recognizer encounter a syntax error?  Track how many.
559    /// </summary>
560    property SyntaxErrors: Integer read GetSyntaxErrors write SetSyntaxErrors;
561
562    /// <summary>
563    /// If 0, no backtracking is going on.  Safe to exec actions etc...
564    /// If >0 then it's the level of backtracking.
565    /// </summary>
566    property Backtracking: Integer read GetBacktracking write SetBacktracking;
567
568    /// <summary>
569    /// An array[size num rules] of Map&lt;Integer,Integer&gt; that tracks
570    /// the stop token index for each rule.
571    /// </summary>
572    /// <remarks>
573    ///  RuleMemo[RuleIndex] is the memoization table for RuleIndex.
574    ///  For key RuleStartIndex, you get back the stop token for
575    ///  associated rule or MEMO_RULE_FAILED.
576    ///
577    ///  This is only used if rule memoization is on (which it is by default).
578    ///  </remarks>
579    property RuleMemo: TDictionaryArray<Integer, Integer> read GetRuleMemo;
580    property RuleMemoCount: Integer read GetRuleMemoCount write SetRuleMemoCount;
581
582    // Lexer Specific Members
583    // LEXER FIELDS (must be in same state object to avoid casting
584    //               constantly in generated code and Lexer object) :(
585
586    /// <summary>
587    /// Token object normally returned by NextToken() after matching lexer rules.
588    /// </summary>
589    /// <remarks>
590    /// The goal of all lexer rules/methods is to create a token object.
591    /// This is an instance variable as multiple rules may collaborate to
592    /// create a single token.  NextToken will return this object after
593    /// matching lexer rule(s).  If you subclass to allow multiple token
594    /// emissions, then set this to the last token to be matched or
595    /// something nonnull so that the auto token emit mechanism will not
596    /// emit another token.
597    /// </remarks>
598    property Token: IToken read GetToken write SetToken;
599
600    /// <summary>
601    /// What character index in the stream did the current token start at?
602    /// </summary>
603    /// <remarks>
604    /// Needed, for example, to get the text for current token.  Set at
605    /// the start of nextToken.
606    /// </remarks>
607    property TokenStartCharIndex: Integer read GetTokenStartCharIndex write SetTokenStartCharIndex;
608
609    /// <summary>
610    /// The line on which the first character of the token resides
611    /// </summary>
612    property TokenStartLine: Integer read GetTokenStartLine write SetTokenStartLine;
613
614    /// <summary>The character position of first character within the line</summary>
615    property TokenStartCharPositionInLine: Integer read GetTokenStartCharPositionInLine write SetTokenStartCharPositionInLine;
616
617    /// <summary>The channel number for the current token</summary>
618    property Channel: Integer read GetChannel write SetChannel;
619
620    /// <summary>The token type for the current token</summary>
621    property TokenType: Integer read GetTokenType write SetTokenType;
622
623    /// <summary>
624    /// You can set the text for the current token to override what is in
625    /// the input char buffer.  Use setText() or can set this instance var.
626    /// </summary>
627    property Text: String read GetText write SetText;
628  end;
629
630  ICommonToken = interface(IToken)
631  ['{06B1B0C3-2A0D-477A-AE30-414F51ACE8A0}']
632    { Property accessors }
633    function GetStartIndex: Integer;
634    procedure SetStartIndex(const Value: Integer);
635    function GetStopIndex: Integer;
636    procedure SetStopIndex(const Value: Integer);
637    function GetInputStream: ICharStream;
638    procedure SetInputStream(const Value: ICharStream);
639
640    { Methods }
641    function ToString: String;
642
643    { Properties }
644    property StartIndex: Integer read GetStartIndex write SetStartIndex;
645    property StopIndex: Integer read GetStopIndex write SetStopIndex;
646    property InputStream: ICharStream read GetInputStream write SetInputStream;
647  end;
648
649  /// <summary>
650  /// A Token object like we'd use in ANTLR 2.x; has an actual string created
651  /// and associated with this object.  These objects are needed for imaginary
652  /// tree nodes that have payload objects.  We need to create a Token object
653  /// that has a string; the tree node will point at this token.  CommonToken
654  /// has indexes into a char stream and hence cannot be used to introduce
655  /// new strings.
656  /// </summary>
657  IClassicToken = interface(IToken)
658    { Property accessors }
659    function GetTokenType: Integer;
660    procedure SetTokenType(const Value: Integer);
661    function GetLine: Integer;
662    procedure SetLine(const Value: Integer);
663    function GetCharPositionInLine: Integer;
664    procedure SetCharPositionInLine(const Value: Integer);
665    function GetChannel: Integer;
666    procedure SetChannel(const Value: Integer);
667    function GetTokenIndex: Integer;
668    procedure SetTokenIndex(const Value: Integer);
669    function GetText: String;
670    procedure SetText(const Value: String);
671    function GetInputStream: ICharStream;
672    procedure SetInputStream(const Value: ICharStream);
673
674    { Properties }
675    property TokenType: Integer read GetTokenType write SetTokenType;
676    property Line: Integer read GetLine write SetLine;
677    property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
678    property Channel: Integer read GetChannel write SetChannel;
679    property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
680    property Text: String read GetText write SetText;
681    property InputStream: ICharStream read GetInputStream write SetInputStream;
682  end;
683
684  /// <summary>
685  /// A generic recognizer that can handle recognizers generated from
686  /// lexer, parser, and tree grammars.  This is all the parsing
687  /// support code essentially; most of it is error recovery stuff and
688  /// backtracking.
689  /// </summary>
690  IBaseRecognizer = interface(IANTLRObject)
691  ['{90813CE2-614B-4773-A26E-936E7DE7E9E9}']
692    { Property accessors }
693    function GetInput: IIntStream;
694    function GetBacktrackingLevel: Integer;
695    function GetState: IRecognizerSharedState;
696    function GetNumberOfSyntaxErrors: Integer;
697    function GetGrammarFileName: String;
698    function GetSourceName: String;
699    function GetTokenNames: TStringArray;
700
701    { Methods }
702    procedure BeginBacktrack(const Level: Integer);
703    procedure EndBacktrack(const Level: Integer; const Successful: Boolean);
704
705    /// <summary>Reset the parser's state. Subclasses must rewind the input stream.</summary>
706    procedure Reset;
707
708    /// <summary>
709    /// Match current input symbol against ttype.  Attempt
710    /// single token insertion or deletion error recovery.  If
711    /// that fails, throw EMismatchedTokenException.
712    /// </summary>
713    /// <remarks>
714    /// To turn off single token insertion or deletion error
715    /// recovery, override MismatchRecover() and have it call
716    /// plain Mismatch(), which does not recover. Then any error
717    /// in a rule will cause an exception and immediate exit from
718    /// rule. Rule would recover by resynchronizing to the set of
719    /// symbols that can follow rule ref.
720    /// </remarks>
721    function Match(const Input: IIntStream; const TokenType: Integer;
722      const Follow: IBitSet): IANTLRInterface;
723
724    function MismatchIsUnwantedToken(const Input: IIntStream;
725      const TokenType: Integer): Boolean;
726
727    function MismatchIsMissingToken(const Input: IIntStream;
728      const Follow: IBitSet): Boolean;
729
730    /// <summary>A hook to listen in on the token consumption during error recovery.
731    /// The DebugParser subclasses this to fire events to the listenter.
732    /// </summary>
733    procedure BeginResync;
734    procedure EndResync;
735
736    /// <summary>
737    /// Report a recognition problem.
738    /// </summary>
739    /// <remarks>
740    /// This method sets errorRecovery to indicate the parser is recovering
741    /// not parsing.  Once in recovery mode, no errors are generated.
742    /// To get out of recovery mode, the parser must successfully Match
743    /// a token (after a resync).  So it will go:
744    ///
745    /// 1. error occurs
746    /// 2. enter recovery mode, report error
747    /// 3. consume until token found in resynch set
748    /// 4. try to resume parsing
749    /// 5. next Match() will reset errorRecovery mode
750    ///
751    /// If you override, make sure to update syntaxErrors if you care about that.
752    /// </remarks>
753    procedure ReportError(const E: ERecognitionException);
754
755    /// <summary> Match the wildcard: in a symbol</summary>
756    procedure MatchAny(const Input: IIntStream);
757
758    procedure DisplayRecognitionError(const TokenNames: TStringArray;
759      const E: ERecognitionException);
760
761    /// <summary>
762    /// What error message should be generated for the various exception types?
763    ///
764    /// Not very object-oriented code, but I like having all error message generation
765    /// within one method rather than spread among all of the exception classes. This
766    /// also makes it much easier for the exception handling because the exception
767    /// classes do not have to have pointers back to this object to access utility
768    /// routines and so on. Also, changing the message for an exception type would be
769    /// difficult because you would have to subclassing exception, but then somehow get
770    /// ANTLR to make those kinds of exception objects instead of the default.
771    ///
772    /// This looks weird, but trust me--it makes the most sense in terms of flexibility.
773    ///
774    /// For grammar debugging, you will want to override this to add more information
775    /// such as the stack frame with GetRuleInvocationStack(e, this.GetType().Fullname)
776    /// and, for no viable alts, the decision description and state etc...
777    ///
778    /// Override this to change the message generated for one or more exception types.
779    /// </summary>
780    function GetErrorMessage(const E: ERecognitionException;
781      const TokenNames: TStringArray): String;
782
783    /// <summary>
784    /// What is the error header, normally line/character position information?
785    /// </summary>
786    function GetErrorHeader(const E: ERecognitionException): String;
787
788    /// <summary>
789    /// How should a token be displayed in an error message? The default
790    /// is to display just the text, but during development you might
791    /// want to have a lot of information spit out.  Override in that case
792    /// to use t.ToString() (which, for CommonToken, dumps everything about
793    /// the token). This is better than forcing you to override a method in
794    /// your token objects because you don't have to go modify your lexer
795    /// so that it creates a new type.
796    /// </summary>
797    function GetTokenErrorDisplay(const T: IToken): String;
798
799    /// <summary>
800    /// Override this method to change where error messages go
801    /// </summary>
802    procedure EmitErrorMessage(const Msg: String);
803
804    /// <summary>
805    /// Recover from an error found on the input stream.  This is
806    /// for NoViableAlt and mismatched symbol exceptions.  If you enable
807    /// single token insertion and deletion, this will usually not
808    /// handle mismatched symbol exceptions but there could be a mismatched
809    /// token that the Match() routine could not recover from.
810    /// </summary>
811    procedure Recover(const Input: IIntStream; const RE: ERecognitionException);
812
813    // Not currently used
814    function RecoverFromMismatchedSet(const Input: IIntStream;
815      const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
816
817    procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload;
818
819    /// <summary>Consume tokens until one matches the given token set </summary>
820    procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload;
821
822    /// <summary>
823    /// Returns List &lt;String&gt; of the rules in your parser instance
824    /// leading up to a call to this method.  You could override if
825    /// you want more details such as the file/line info of where
826    /// in the parser source code a rule is invoked.
827    /// </summary>
828    /// <remarks>
829    /// NOT IMPLEMENTED IN THE DELPHI VERSION YET
830    /// This is very useful for error messages and for context-sensitive
831    /// error recovery.
832    /// </remarks>
833    //function GetRuleInvocationStack: IList<IANTLRInterface>; overload;
834
835    /// <summary>
836    /// A more general version of GetRuleInvocationStack where you can
837    /// pass in, for example, a RecognitionException to get it's rule
838    /// stack trace.  This routine is shared with all recognizers, hence,
839    /// static.
840    ///
841    /// TODO: move to a utility class or something; weird having lexer call this
842    /// </summary>
843    /// <remarks>
844    /// NOT IMPLEMENTED IN THE DELPHI VERSION YET
845    /// </remarks>
846    //function GetRuleInvocationStack(const E: Exception;
847    //  const RecognizerClassName: String): IList<IANTLRInterface>; overload;
848
849    /// <summary>A convenience method for use most often with template rewrites.
850    /// Convert a List&lt;Token&gt; to List&lt;String&gt;
851    /// </summary>
852    function ToStrings(const Tokens: IList<IToken>): IList<String>;
853
854    /// <summary>
855    /// Given a rule number and a start token index number, return
856    /// MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
857    /// start index.  If this rule has parsed input starting from the
858    /// start index before, then return where the rule stopped parsing.
859    /// It returns the index of the last token matched by the rule.
860    /// </summary>
861    /// <remarks>
862    /// For now we use a hashtable and just the slow Object-based one.
863    /// Later, we can make a special one for ints and also one that
864    /// tosses out data after we commit past input position i.
865    /// </remarks>
866    function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer;
867
868    /// <summary>
869    /// Has this rule already parsed input at the current index in the
870    /// input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
871    /// If we attempted but failed to parse properly before, return
872    /// MEMO_RULE_FAILED.
873    ///
874    /// This method has a side-effect: if we have seen this input for
875    /// this rule and successfully parsed before, then seek ahead to
876    /// 1 past the stop token matched for this rule last time.
877    /// </summary>
878    function AlreadyParsedRule(const Input: IIntStream;
879      const RuleIndex: Integer): Boolean;
880
881    /// <summary>
882    /// Record whether or not this rule parsed the input at this position
883    /// successfully.  Use a standard hashtable for now.
884    /// </summary>
885    procedure Memoize(const Input: IIntStream; const RuleIndex,
886      RuleStartIndex: Integer);
887
888    /// <summary>
889    /// Return how many rule/input-index pairs there are in total.
890    ///  TODO: this includes synpreds. :(
891    /// </summary>
892    /// <returns></returns>
893    function GetRuleMemoizationChaceSize: Integer;
894
895    procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
896      const InputSymbol: String);
897    procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
898      const InputSymbol: String);
899
900    { Properties }
901    property Input: IIntStream read GetInput;
902    property BacktrackingLevel: Integer read GetBacktrackingLevel;
903    property State: IRecognizerSharedState read GetState;
904
905    /// <summary>
906    /// Get number of recognition errors (lexer, parser, tree parser).  Each
907    /// recognizer tracks its own number.  So parser and lexer each have
908    /// separate count.  Does not count the spurious errors found between
909    /// an error and next valid token match
910    ///
911    /// See also ReportError()
912    /// </summary>
913    property NumberOfSyntaxErrors: Integer read GetNumberOfSyntaxErrors;
914
915    /// <summary>
916    /// For debugging and other purposes, might want the grammar name.
917    /// Have ANTLR generate an implementation for this property.
918    /// </summary>
919    /// <returns></returns>
920    property GrammarFileName: String read GetGrammarFileName;
921
922    /// <summary>
923    /// For debugging and other purposes, might want the source name.
924    /// Have ANTLR provide a hook for this property.
925    /// </summary>
926    /// <returns>The source name</returns>
927    property SourceName: String read GetSourceName;
928
929    /// <summary>
930    /// Used to print out token names like ID during debugging and
931    /// error reporting.  The generated parsers implement a method
932    /// that overrides this to point to their string[] tokenNames.
933    /// </summary>
934    property TokenNames: TStringArray read GetTokenNames;
935  end;
936
937  /// <summary>
938  /// The most common stream of tokens is one where every token is buffered up
939  /// and tokens are prefiltered for a certain channel (the parser will only
940  /// see these tokens and cannot change the filter channel number during the
941  /// parse).
942  ///
943  /// TODO: how to access the full token stream?  How to track all tokens matched per rule?
944  /// </summary>
945  ICommonTokenStream = interface(ITokenStream)
946    { Methods }
947
948    /// <summary>
949    /// A simple filter mechanism whereby you can tell this token stream
950    /// to force all tokens of type TType to be on Channel.
951    /// </summary>
952    ///
953    /// <remarks>
954    /// For example,
955    /// when interpreting, we cannot exec actions so we need to tell
956    /// the stream to force all WS and NEWLINE to be a different, ignored
957    /// channel.
958    /// </remarks>
959    procedure SetTokenTypeChannel(const TType, Channel: Integer);
960
961    procedure DiscardTokenType(const TType: Integer);
962
963    procedure DiscardOffChannelTokens(const Discard: Boolean);
964
965    function GetTokens: IList<IToken>; overload;
966    function GetTokens(const Start, Stop: Integer): IList<IToken>; overload;
967
968    /// <summary>Given a start and stop index, return a List of all tokens in
969    /// the token type BitSet.  Return null if no tokens were found.  This
970    /// method looks at both on and off channel tokens.
971    /// </summary>
972    function GetTokens(const Start, Stop: Integer;
973      const Types: IBitSet): IList<IToken>; overload;
974
975    function GetTokens(const Start, Stop: Integer;
976      const Types: IList<Integer>): IList<IToken>; overload;
977
978    function GetTokens(const Start, Stop,
979      TokenType: Integer): IList<IToken>; overload;
980
981    procedure Reset;
982  end;
983
984  IDFA = interface;
985
986  TSpecialStateTransitionHandler = function(const DFA: IDFA; S: Integer;
987    const Input: IIntStream): Integer of Object;
988
989  /// <summary>
990  ///  A DFA implemented as a set of transition tables.
991  /// </summary>
992  /// <remarks>
993  /// <para>
994  /// Any state that has a semantic predicate edge is special; those states are
995  /// generated with if-then-else structures in a SpecialStateTransition()
996  /// which is generated by cyclicDFA template.
997  /// </para>
998  /// <para>
999  /// There are at most 32767 states (16-bit signed short). Could get away with byte
1000  /// sometimes but would have to generate different types and the simulation code too.
1001  /// </para>
1002  /// <para>
1003  /// As a point of reference, the Tokens rule DFA for the lexer in the Java grammar
1004  /// sample has approximately 326 states.
1005  /// </para>
1006  /// </remarks>
1007  IDFA = interface(IANTLRInterface)
1008  ['{36312B59-B718-48EF-A0EC-4529DE70F4C2}']
1009    { Property accessors }
1010    function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
1011    procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
1012
1013    { Methods }
1014
1015    /// <summary>
1016    /// From the input stream, predict what alternative will succeed using this
1017    /// DFA (representing the covering regular approximation to the underlying CFL).
1018    /// </summary>
1019    /// <param name="Input">Input stream</param>
1020    /// <returns>Return an alternative number 1..N.  Throw an exception upon error.</returns>
1021    function Predict(const Input: IIntStream): Integer;
1022
1023    /// <summary>
1024    /// A hook for debugging interface
1025    /// </summary>
1026    /// <param name="NVAE"></param>
1027    procedure Error(const NVAE: ENoViableAltException);
1028
1029    function SpecialStateTransition(const S: Integer; const Input: IIntStream): Integer;
1030
1031    function Description: String;
1032
1033    function SpecialTransition(const State, Symbol: Integer): Integer;
1034
1035    { Properties }
1036    property SpecialStateTransitionHandler: TSpecialStateTransitionHandler read GetSpecialStateTransitionHandler write SetSpecialStateTransitionHandler;
1037  end;
1038
1039  /// <summary>
1040  /// A lexer is recognizer that draws input symbols from a character stream.
1041  /// lexer grammars result in a subclass of this object. A Lexer object
1042  /// uses simplified Match() and error recovery mechanisms in the interest
1043  /// of speed.
1044  /// </summary>
1045  ILexer = interface(IBaseRecognizer)
1046  ['{331AAB49-E7CD-40E7-AEF5-427F7D6577AD}']
1047    { Property accessors }
1048    function GetCharStream: ICharStream;
1049    procedure SetCharStream(const Value: ICharStream);
1050    function GetLine: Integer;
1051    function GetCharPositionInLine: Integer;
1052    function GetCharIndex: Integer;
1053    function GetText: String;
1054    procedure SetText(const Value: String);
1055
1056    { Methods }
1057
1058    /// <summary>
1059    /// Return a token from this source; i.e., Match a token on the char stream.
1060    /// </summary>
1061    function NextToken: IToken;
1062
1063    /// <summary>
1064    /// Instruct the lexer to skip creating a token for current lexer rule and
1065    /// look for another token.  NextToken() knows to keep looking when a lexer
1066    /// rule finishes with token set to SKIP_TOKEN.  Recall that if token==null
1067    /// at end of any token rule, it creates one for you and emits it.
1068    /// </summary>
1069    procedure Skip;
1070
1071    /// <summary>This is the lexer entry point that sets instance var 'token' </summary>
1072    procedure DoTokens;
1073
1074    /// <summary>
1075    /// Currently does not support multiple emits per nextToken invocation
1076    /// for efficiency reasons.  Subclass and override this method and
1077    /// NextToken (to push tokens into a list and pull from that list rather
1078    /// than a single variable as this implementation does).
1079    /// </summary>
1080    procedure Emit(const Token: IToken); overload;
1081
1082    /// <summary>
1083    /// The standard method called to automatically emit a token at the
1084    /// outermost lexical rule.  The token object should point into the
1085    /// char buffer start..stop.  If there is a text override in 'text',
1086    /// use that to set the token's text.
1087    /// </summary>
1088    /// <remarks><para>Override this method to emit custom Token objects.</para>
1089    /// <para>If you are building trees, then you should also override
1090    /// Parser or TreeParser.GetMissingSymbol().</para>
1091    ///</remarks>
1092    function Emit: IToken; overload;
1093
1094    procedure Match(const S: String); overload;
1095    procedure Match(const C: Integer); overload;
1096    procedure MatchAny;
1097    procedure MatchRange(const A, B: Integer);
1098
1099    /// <summary>
1100    /// Lexers can normally Match any char in it's vocabulary after matching
1101    /// a token, so do the easy thing and just kill a character and hope
1102    /// it all works out.  You can instead use the rule invocation stack
1103    /// to do sophisticated error recovery if you are in a Fragment rule.
1104    /// </summary>
1105    procedure Recover(const RE: ERecognitionException);
1106
1107    function GetCharErrorDisplay(const C: Integer): String;
1108
1109    procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
1110    procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
1111
1112    { Properties }
1113
1114    /// <summary>Set the char stream and reset the lexer </summary>
1115    property CharStream: ICharStream read GetCharStream write SetCharStream;
1116    property Line: Integer read GetLine;
1117    property CharPositionInLine: Integer read GetCharPositionInLine;
1118
1119    /// <summary>What is the index of the current character of lookahead? </summary>
1120    property CharIndex: Integer read GetCharIndex;
1121
1122    /// <summary>
1123    /// Gets or sets the 'lexeme' for the current token.
1124    /// </summary>
1125    /// <remarks>
1126    /// <para>
1127    /// The getter returns the text matched so far for the current token or any
1128    /// text override.
1129    /// </para>
1130    /// <para>
1131    /// The setter sets the complete text of this token. It overrides/wipes any
1132    /// previous changes to the text.
1133    /// </para>
1134    /// </remarks>
1135    property Text: String read GetText write SetText;
1136  end;
1137
1138  /// <summary>A parser for TokenStreams.  Parser grammars result in a subclass
1139  /// of this.
1140  /// </summary>
1141  IParser = interface(IBaseRecognizer)
1142  ['{7420879A-5D1F-43CA-BD49-2264D7514501}']
1143    { Property accessors }
1144    function GetTokenStream: ITokenStream;
1145    procedure SetTokenStream(const Value: ITokenStream);
1146
1147    { Methods }
1148    procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
1149    procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
1150
1151    { Properties }
1152
1153    /// <summary>Set the token stream and reset the parser </summary>
1154    property TokenStream: ITokenStream read GetTokenStream write SetTokenStream;
1155  end;
1156
1157  /// <summary>
1158  /// Rules can return start/stop info as well as possible trees and templates
1159  /// </summary>
1160  IRuleReturnScope = interface(IANTLRInterface)
1161  ['{E9870056-BF6D-4CB2-B71C-10B80797C0B4}']
1162    { Property accessors }
1163    function GetStart: IANTLRInterface;
1164    procedure SetStart(const Value: IANTLRInterface);
1165    function GetStop: IANTLRInterface;
1166    procedure SetStop(const Value: IANTLRInterface);
1167    function GetTree: IANTLRInterface;
1168    procedure SetTree(const Value: IANTLRInterface);
1169    function GetTemplate: IANTLRInterface;
1170
1171    { Properties }
1172
1173    /// <summary>Return the start token or tree </summary>
1174    property Start: IANTLRInterface read GetStart write SetStart;
1175
1176    /// <summary>Return the stop token or tree </summary>
1177    property Stop: IANTLRInterface read GetStop write SetStop;
1178
1179    /// <summary>Has a value potentially if output=AST; </summary>
1180    property Tree: IANTLRInterface read GetTree write SetTree;
1181
1182    /// <summary>
1183    /// Has a value potentially if output=template;
1184    /// Don't use StringTemplate type to avoid dependency on ST assembly
1185    /// </summary>
1186    property Template: IANTLRInterface read GetTemplate;
1187  end;
1188
1189  /// <summary>
1190  /// Rules that return more than a single value must return an object
1191  /// containing all the values.  Besides the properties defined in
1192  /// RuleLabelScope.PredefinedRulePropertiesScope there may be user-defined
1193  /// return values.  This class simply defines the minimum properties that
1194  /// are always defined and methods to access the others that might be
1195  /// available depending on output option such as template and tree.
1196  ///
1197  /// Note text is not an actual property of the return value, it is computed
1198  /// from start and stop using the input stream's ToString() method.  I
1199  /// could add a ctor to this so that we can pass in and store the input
1200  /// stream, but I'm not sure we want to do that.  It would seem to be undefined
1201  /// to get the .text property anyway if the rule matches tokens from multiple
1202  /// input streams.
1203  ///
1204  /// I do not use getters for fields of objects that are used simply to
1205  /// group values such as this aggregate.
1206  /// </summary>
1207  IParserRuleReturnScope = interface(IRuleReturnScope)
1208  ['{9FB62050-E23B-4FE4-87D5-2C1EE67AEC3E}']
1209  end;
1210
1211  /// <summary>Useful for dumping out the input stream after doing some
1212  /// augmentation or other manipulations.
1213  /// </summary>
1214  ///
1215  /// <remarks>
1216  /// You can insert stuff, Replace, and delete chunks.  Note that the
1217  /// operations are done lazily--only if you convert the buffer to a
1218  /// String.  This is very efficient because you are not moving data around
1219  /// all the time.  As the buffer of tokens is converted to strings, the
1220  /// ToString() method(s) check to see if there is an operation at the
1221  /// current index.  If so, the operation is done and then normal String
1222  /// rendering continues on the buffer.  This is like having multiple Turing
1223  /// machine instruction streams (programs) operating on a single input tape. :)
1224  ///
1225  /// Since the operations are done lazily at ToString-time, operations do not
1226  /// screw up the token index values.  That is, an insert operation at token
1227  /// index I does not change the index values for tokens I+1..N-1.
1228  ///
1229  /// Because operations never actually alter the buffer, you may always get
1230  /// the original token stream back without undoing anything.  Since
1231  /// the instructions are queued up, you can easily simulate transactions and
1232  /// roll back any changes if there is an error just by removing instructions.
1233  /// For example,
1234  ///
1235  /// var
1236  ///   Input: ICharStream;
1237  ///   Lex: ILexer;
1238  ///   Tokens: ITokenRewriteStream;
1239  ///   Parser: IParser;
1240  /// Input := TANTLRFileStream.Create('input');
1241  /// Lex := TLexer.Create(Input);
1242  /// Tokens := TTokenRewriteStream.Create(Lex);
1243  /// Parser := TParser.Create(tokens);
1244  /// Parser.startRule();
1245  ///
1246  /// Then in the rules, you can execute
1247  /// var
1248  ///   t,u: IToken;
1249  /// ...
1250  /// Input.InsertAfter(t, 'text to put after t');
1251  /// Input.InsertAfter(u, 'text after u');
1252  /// WriteLn(Tokens.ToString());
1253  ///
1254  /// Actually, you have to cast the 'input' to a TokenRewriteStream. :(
1255  ///
1256  /// You can also have multiple "instruction streams" and get multiple
1257  /// rewrites from a single pass over the input.  Just name the instruction
1258  /// streams and use that name again when printing the buffer.  This could be
1259  /// useful for generating a C file and also its header file--all from the
1260  /// same buffer:
1261  ///
1262  /// Tokens.InsertAfter('pass1', t, 'text to put after t');
1263  /// Tokens.InsertAfter('pass2', u, 'text after u');
1264  /// WriteLn(Tokens.ToString('pass1'));
1265  /// WriteLn(Tokens.ToString('pass2'));
1266  ///
1267  /// If you don't use named rewrite streams, a "default" stream is used as
1268  /// the first example shows.
1269  /// </remarks>
1270  ITokenRewriteStream = interface(ICommonTokenStream)
1271  ['{7B49CBB6-9395-4781-B616-F201889EEA13}']
1272    { Methods }
1273    procedure Rollback(const InstructionIndex: Integer); overload;
1274
1275    /// <summary>Rollback the instruction stream for a program so that
1276    /// the indicated instruction (via instructionIndex) is no
1277    /// longer in the stream.  UNTESTED!
1278    /// </summary>
1279    procedure Rollback(const ProgramName: String;
1280      const InstructionIndex: Integer); overload;
1281
1282    procedure DeleteProgram; overload;
1283
1284    /// <summary>Reset the program so that no instructions exist </summary>
1285    procedure DeleteProgram(const ProgramName: String); overload;
1286
1287    procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload;
1288    procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload;
1289    procedure InsertAfter(const ProgramName: String; const T: IToken;
1290      const Text: IANTLRInterface); overload;
1291    procedure InsertAfter(const ProgramName: String; const Index: Integer;
1292      const Text: IANTLRInterface); overload;
1293    procedure InsertAfter(const T: IToken; const Text: String); overload;
1294    procedure InsertAfter(const Index: Integer; const Text: String); overload;
1295    procedure InsertAfter(const ProgramName: String; const T: IToken;
1296      const Text: String); overload;
1297    procedure InsertAfter(const ProgramName: String; const Index: Integer;
1298      const Text: String); overload;
1299
1300    procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload;
1301    procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload;
1302    procedure InsertBefore(const ProgramName: String; const T: IToken;
1303      const Text: IANTLRInterface); overload;
1304    procedure InsertBefore(const ProgramName: String; const Index: Integer;
1305      const Text: IANTLRInterface); overload;
1306    procedure InsertBefore(const T: IToken; const Text: String); overload;
1307    procedure InsertBefore(const Index: Integer; const Text: String); overload;
1308    procedure InsertBefore(const ProgramName: String; const T: IToken;
1309      const Text: String); overload;
1310    procedure InsertBefore(const ProgramName: String; const Index: Integer;
1311      const Text: String); overload;
1312
1313    procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload;
1314    procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload;
1315    procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload;
1316    procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload;
1317    procedure Replace(const ProgramName: String; const Start, Stop: Integer;
1318      const Text: IANTLRInterface); overload;
1319    procedure Replace(const ProgramName: String; const Start, Stop: IToken;
1320      const Text: IANTLRInterface); overload;
1321    procedure Replace(const Index: Integer; const Text: String); overload;
1322    procedure Replace(const Start, Stop: Integer; const Text: String); overload;
1323    procedure Replace(const IndexT: IToken; const Text: String); overload;
1324    procedure Replace(const Start, Stop: IToken; const Text: String); overload;
1325    procedure Replace(const ProgramName: String; const Start, Stop: Integer;
1326      const Text: String); overload;
1327    procedure Replace(const ProgramName: String; const Start, Stop: IToken;
1328      const Text: String); overload;
1329
1330    procedure Delete(const Index: Integer); overload;
1331    procedure Delete(const Start, Stop: Integer); overload;
1332    procedure Delete(const IndexT: IToken); overload;
1333    procedure Delete(const Start, Stop: IToken); overload;
1334    procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload;
1335    procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload;
1336
1337    function GetLastRewriteTokenIndex: Integer;
1338
1339    function ToOriginalString: String; overload;
1340    function ToOriginalString(const Start, Stop: Integer): String; overload;
1341
1342    function ToString(const ProgramName: String): String; overload;
1343    function ToString(const ProgramName: String;
1344      const Start, Stop: Integer): String; overload;
1345
1346    function ToDebugString: String; overload;
1347    function ToDebugString(const Start, Stop: Integer): String; overload;
1348  end;
1349
1350  /// <summary>The root of the ANTLR exception hierarchy.</summary>
1351  /// <remarks>
1352  /// To avoid English-only error messages and to generally make things
1353  /// as flexible as possible, these exceptions are not created with strings,
1354  /// but rather the information necessary to generate an error.  Then
1355  /// the various reporting methods in Parser and Lexer can be overridden
1356  /// to generate a localized error message.  For example, MismatchedToken
1357  /// exceptions are built with the expected token type.
1358  /// So, don't expect getMessage() to return anything.
1359  ///
1360  /// You can access the stack trace, which means that you can compute the
1361  /// complete trace of rules from the start symbol. This gives you considerable
1362  /// context information with which to generate useful error messages.
1363  ///
1364  /// ANTLR generates code that throws exceptions upon recognition error and
1365  /// also generates code to catch these exceptions in each rule.  If you
1366  /// want to quit upon first error, you can turn off the automatic error
1367  /// handling mechanism using rulecatch action, but you still need to
1368  /// override methods mismatch and recoverFromMismatchSet.
1369  ///
1370  /// In general, the recognition exceptions can track where in a grammar a
1371  /// problem occurred and/or what was the expected input.  While the parser
1372  /// knows its state (such as current input symbol and line info) that
1373  /// state can change before the exception is reported so current token index
1374  /// is computed and stored at exception time.  From this info, you can
1375  /// perhaps print an entire line of input not just a single token, for example.
1376  /// Better to just say the recognizer had a problem and then let the parser
1377  /// figure out a fancy report.
1378  /// </remarks>
1379  ERecognitionException = class(Exception)
1380  strict private
1381    FApproximateLineInfo: Boolean;
1382  strict protected
1383    /// <summary>What input stream did the error occur in? </summary>
1384    FInput: IIntStream;
1385
1386    /// <summary>
1387    /// What is index of token/char were we looking at when the error occurred?
1388    /// </summary>
1389    FIndex: Integer;
1390
1391    /// <summary>
1392    /// The current Token when an error occurred.  Since not all streams
1393    /// can retrieve the ith Token, we have to track the Token object.
1394    /// </summary>
1395    FToken: IToken;
1396
1397    /// <summary>[Tree parser] Node with the problem.</summary>
1398    FNode: IANTLRInterface;
1399
1400    /// <summary>The current char when an error occurred. For lexers. </summary>
1401    FC: Integer;
1402
1403    /// <summary>Track the line at which the error occurred in case this is
1404    /// generated from a lexer.  We need to track this since the
1405    /// unexpected char doesn't carry the line info.
1406    /// </summary>
1407    FLine: Integer;
1408    FCharPositionInLine: Integer;
1409  strict protected
1410    procedure ExtractInformationFromTreeNodeStream(const Input: IIntStream);
1411    function GetUnexpectedType: Integer; virtual;
1412  public
1413    /// <summary>Used for remote debugger deserialization </summary>
1414    constructor Create; overload;
1415    constructor Create(const AMessage: String); overload;
1416    constructor Create(const AInput: IIntStream); overload;
1417    constructor Create(const AMessage: String; const AInput: IIntStream); overload;
1418
1419    /// <summary>
1420    /// If you are parsing a tree node stream, you will encounter some
1421    /// imaginary nodes w/o line/col info.  We now search backwards looking
1422    /// for most recent token with line/col info, but notify getErrorHeader()
1423    /// that info is approximate.
1424    /// </summary>
1425    property ApproximateLineInfo: Boolean read FApproximateLineInfo write FApproximateLineInfo;
1426
1427    /// <summary>
1428    /// Returns the current Token when the error occurred (for parsers
1429    /// although a tree parser might also set the token)
1430    /// </summary>
1431    property Token: IToken read FToken write FToken;
1432
1433    /// <summary>
1434    /// Returns the [tree parser] node where the error occured (for tree parsers).
1435    /// </summary>
1436    property Node: IANTLRInterface read FNode write FNode;
1437
1438    /// <summary>
1439    /// Returns the line at which the error occurred (for lexers)
1440    /// </summary>
1441    property Line: Integer read FLine write FLine;
1442
1443    /// <summary>
1444    /// Returns the character position in the line when the error
1445    /// occurred (for lexers)
1446    /// </summary>
1447    property CharPositionInLine: Integer read FCharPositionInLine write FCharPositionInLine;
1448
1449    /// <summary>Returns the input stream in which the error occurred</summary>
1450    property Input: IIntStream read FInput write FInput;
1451
1452    /// <summary>
1453    /// Returns the token type or char of the unexpected input element
1454    /// </summary>
1455    property UnexpectedType: Integer read GetUnexpectedType;
1456
1457    /// <summary>
1458    /// Returns the current char when the error occurred (for lexers)
1459    /// </summary>
1460    property Character: Integer read FC write FC;
1461
1462    /// <summary>
1463    /// Returns the token/char index in the stream when the error occurred
1464    /// </summary>
1465    property Index: Integer read FIndex write FIndex;
1466  end;
1467
1468  /// <summary>
1469  /// A mismatched char or Token or tree node.
1470  /// </summary>
1471  EMismatchedTokenException = class(ERecognitionException)
1472  strict private
1473    FExpecting: Integer;
1474  public
1475    constructor Create(const AExpecting: Integer; const AInput: IIntStream);
1476
1477    function ToString: String; override;
1478
1479    property Expecting: Integer read FExpecting write FExpecting;
1480  end;
1481
1482  EUnwantedTokenException = class(EMismatchedTokenException)
1483  strict private
1484    function GetUnexpectedToken: IToken;
1485  public
1486    property UnexpectedToken: IToken read GetUnexpectedToken;
1487
1488    function ToString: String; override;
1489  end;
1490
1491  /// <summary>
1492  /// We were expecting a token but it's not found. The current token
1493  /// is actually what we wanted next. Used for tree node errors too.
1494  /// </summary>
1495  EMissingTokenException = class(EMismatchedTokenException)
1496  strict private
1497    FInserted: IANTLRInterface;
1498    function GetMissingType: Integer;
1499  public
1500    constructor Create(const AExpecting: Integer; const AInput: IIntStream;
1501      const AInserted: IANTLRInterface);
1502
1503    function ToString: String; override;
1504
1505    property MissingType: Integer read GetMissingType;
1506    property Inserted: IANTLRInterface read FInserted write FInserted;
1507  end;
1508
1509  EMismatchedTreeNodeException = class(ERecognitionException)
1510  strict private
1511    FExpecting: Integer;
1512  public
1513    constructor Create(const AExpecting: Integer; const AInput: IIntStream);
1514
1515    function ToString: String; override;
1516
1517    property Expecting: Integer read FExpecting write FExpecting;
1518  end;
1519
1520  ENoViableAltException = class(ERecognitionException)
1521  strict private
1522    FGrammarDecisionDescription: String;
1523    FDecisionNumber: Integer;
1524    FStateNumber: Integer;
1525  public
1526    constructor Create(const AGrammarDecisionDescription: String;
1527      const ADecisionNumber, AStateNumber: Integer; const AInput: IIntStream);
1528
1529    function ToString: String; override;
1530
1531    property GrammarDecisionDescription: String read FGrammarDecisionDescription;
1532    property DecisionNumber: Integer read FDecisionNumber;
1533    property StateNumber: Integer read FStateNumber;
1534  end;
1535
1536  EEarlyExitException = class(ERecognitionException)
1537  strict private
1538    FDecisionNumber: Integer;
1539  public
1540    constructor Create(const ADecisionNumber: Integer; const AInput: IIntStream);
1541
1542    property DecisionNumber: Integer read FDecisionNumber;
1543  end;
1544
1545  EMismatchedSetException = class(ERecognitionException)
1546  strict private
1547    FExpecting: IBitSet;
1548  public
1549    constructor Create(const AExpecting: IBitSet; const AInput: IIntStream);
1550
1551    function ToString: String; override;
1552
1553    property Expecting: IBitSet read FExpecting write FExpecting;
1554  end;
1555
1556  EMismatchedNotSetException = class(EMismatchedSetException)
1557
1558  public
1559    function ToString: String; override;
1560  end;
1561
1562  EFailedPredicateException = class(ERecognitionException)
1563  strict private
1564    FRuleName: String;
1565    FPredicateText: String;
1566  public
1567    constructor Create(const AInput: IIntStream; const ARuleName,
1568      APredicateText: String);
1569
1570    function ToString: String; override;
1571
1572    property RuleName: String read FRuleName write FRuleName;
1573    property PredicateText: String read FPredicateText write FPredicateText;
1574  end;
1575
1576  EMismatchedRangeException = class(ERecognitionException)
1577  strict private
1578    FA: Integer;
1579    FB: Integer;
1580  public
1581    constructor Create(const AA, AB: Integer; const AInput: IIntStream);
1582
1583    function ToString: String; override;
1584
1585    property A: Integer read FA write FA;
1586    property B: Integer read FB write FB;
1587  end;
1588
1589type
1590  TCharStreamState = class(TANTLRObject, ICharStreamState)
1591  strict private
1592    FP: Integer;
1593    FLine: Integer;
1594    FCharPositionInLine: Integer;
1595  protected
1596    { ICharStreamState }
1597    function GetP: Integer;
1598    procedure SetP(const Value: Integer);
1599    function GetLine: Integer;
1600    procedure SetLine(const Value: Integer);
1601    function GetCharPositionInLine: Integer;
1602    procedure SetCharPositionInLine(const Value: Integer);
1603  end;
1604
1605type
1606  TANTLRStringStream = class(TANTLRObject, IANTLRStringStream, ICharStream)
1607  private
1608    FData: PChar;
1609    FOwnsData: Boolean;
1610
1611    /// <summary>How many characters are actually in the buffer?</summary>
1612    FN: Integer;
1613
1614    /// <summary>Current line number within the input (1..n )</summary>
1615    FLine: Integer;
1616
1617    /// <summary>Index in our array for the next char (0..n-1)</summary>
1618    FP: Integer;
1619
1620    /// <summary>
1621    /// The index of the character relative to the beginning of the
1622    /// line (0..n-1)
1623    /// </summary>
1624    FCharPositionInLine: Integer;
1625
1626    /// <summary>
1627    /// Tracks the depth of nested <see cref="IIntStream.Mark"/> calls
1628    /// </summary>
1629    FMarkDepth: Integer;
1630
1631    /// <summary>
1632    /// A list of CharStreamState objects that tracks the stream state
1633    /// (i.e. line, charPositionInLine, and p) that can change as you
1634    /// move through the input stream.  Indexed from 1..markDepth.
1635    /// A null is kept @ index 0.  Create upon first call to Mark().
1636    /// </summary>
1637    FMarkers: IList<ICharStreamState>;
1638
1639    /// <summary>
1640    /// Track the last Mark() call result value for use in Rewind().
1641    /// </summary>
1642    FLastMarker: Integer;
1643    /// <summary>
1644    /// What is name or source of this char stream?
1645    /// </summary>
1646    FName: String;
1647  protected
1648    { IIntStream }
1649    function GetSourceName: String; virtual;
1650
1651    procedure Consume; virtual;
1652    function LA(I: Integer): Integer; virtual;
1653    function LAChar(I: Integer): Char;
1654    function Index: Integer;
1655    function Size: Integer;
1656    function Mark: Integer; virtual;
1657    procedure Rewind(const Marker: Integer); overload; virtual;
1658    procedure Rewind; overload; virtual;
1659    procedure Release(const Marker: Integer); virtual;
1660    procedure Seek(const Index: Integer); virtual;
1661
1662    property SourceName: String read GetSourceName write FName;
1663  protected
1664    { ICharStream }
1665    function GetLine: Integer; virtual;
1666    procedure SetLine(const Value: Integer); virtual;
1667    function GetCharPositionInLine: Integer; virtual;
1668    procedure SetCharPositionInLine(const Value: Integer); virtual;
1669    function LT(const I: Integer): Integer; virtual;
1670    function Substring(const Start, Stop: Integer): String; virtual;
1671  protected
1672    { IANTLRStringStream }
1673    procedure Reset; virtual;
1674  public
1675    constructor Create; overload;
1676
1677    /// <summary>
1678    /// Initializes a new instance of the ANTLRStringStream class for the
1679    /// specified string. This copies data from the string to a local
1680    /// character array
1681    /// </summary>
1682    constructor Create(const AInput: String); overload;
1683
1684    /// <summary>
1685    /// Initializes a new instance of the ANTLRStringStream class for the
1686    /// specified character array. This is the preferred constructor as
1687    /// no data is copied
1688    /// </summary>
1689    constructor Create(const AData: PChar;
1690      const ANumberOfActualCharsInArray: Integer); overload;
1691
1692    destructor Destroy; override;
1693  end;
1694
1695  TANTLRFileStream = class(TANTLRStringStream, IANTLRFileStream)
1696  strict private
1697    /// <summary>Fully qualified name of the stream's underlying file</summary>
1698    FFileName: String;
1699  protected
1700    { IIntStream }
1701    function GetSourceName: String; override;
1702  protected
1703    { IANTLRFileStream }
1704
1705    procedure Load(const FileName: String; const Encoding: TEncoding); virtual;
1706  public
1707    /// <summary>
1708    /// Initializes a new instance of the ANTLRFileStream class for the
1709    /// specified file name
1710    /// </summary>
1711    constructor Create(const AFileName: String); overload;
1712
1713    /// <summary>
1714    /// Initializes a new instance of the ANTLRFileStream class for the
1715    /// specified file name and encoding
1716    /// </summary>
1717    constructor Create(const AFileName: String; const AEncoding: TEncoding); overload;
1718  end;
1719
1720  TBitSet = class(TANTLRObject, IBitSet, ICloneable)
1721  strict private
1722    const
1723      BITS = 64; // number of bits / ulong
1724      LOG_BITS = 6; // 2 shl 6 = 64
1725
1726      ///<summary> We will often need to do a mod operator (i mod nbits).
1727      /// Its turns out that, for powers of two, this mod operation is
1728      ///  same as <![CDATA[(I and (nbits-1))]]>.  Since mod is slow, we use a precomputed
1729      /// mod mask to do the mod instead.
1730      /// </summary>
1731      MOD_MASK = BITS - 1;
1732  strict private
1733    /// <summary>The actual data bits </summary>
1734    FBits: TUInt64Array;
1735  strict private
1736    class function WordNumber(const Bit: Integer): Integer; static;
1737    class function BitMask(const BitNumber: Integer): UInt64; static;
1738    class function NumWordsToHold(const El: Integer): Integer; static;
1739  protected
1740    { ICloneable }
1741    function Clone: IANTLRInterface; virtual;
1742  protected
1743    { IBitSet }
1744    function GetIsNil: Boolean; virtual;
1745    function BitSetOr(const A: IBitSet): IBitSet; virtual;
1746    procedure Add(const El: Integer); virtual;
1747    procedure GrowToInclude(const Bit: Integer); virtual;
1748    procedure OrInPlace(const A: IBitSet); virtual;
1749    function Size: Integer; virtual;
1750    function Member(const El: Integer): Boolean; virtual;
1751    procedure Remove(const El: Integer); virtual;
1752    function NumBits: Integer; virtual;
1753    function LengthInLongWords: Integer; virtual;
1754    function ToArray: TIntegerArray; virtual;
1755    function ToPackedArray: TUInt64Array; virtual;
1756    function ToString(const TokenNames: TStringArray): String; reintroduce; overload; virtual;
1757  public
1758    /// <summary>Construct a bitset of size one word (64 bits) </summary>
1759    constructor Create; overload;
1760
1761    /// <summary>Construction from a static array of ulongs </summary>
1762    constructor Create(const ABits: array of UInt64); overload;
1763
1764    /// <summary>Construction from a list of integers </summary>
1765    constructor Create(const AItems: IList<Integer>); overload;
1766
1767    /// <summary>Construct a bitset given the size</summary>
1768    /// <param name="nbits">The size of the bitset in bits</param>
1769    constructor Create(const ANBits: Integer); overload;
1770
1771    class function BitSetOf(const El: Integer): IBitSet; overload; static;
1772    class function BitSetOf(const A, B: Integer): IBitSet; overload; static;
1773    class function BitSetOf(const A, B, C: Integer): IBitSet; overload; static;
1774    class function BitSetOf(const A, B, C, D: Integer): IBitSet; overload; static;
1775
1776    function ToString: String; overload; override;
1777    function Equals(Obj: TObject): Boolean; override;
1778  end;
1779
1780  TRecognizerSharedState = class(TANTLRObject, IRecognizerSharedState)
1781  strict private
1782    FFollowing: TBitSetArray;
1783    FFollowingStackPointer: Integer;
1784    FErrorRecovery: Boolean;
1785    FLastErrorIndex: Integer;
1786    FFailed: Boolean;
1787    FSyntaxErrors: Integer;
1788    FBacktracking: Integer;
1789    FRuleMemo: TDictionaryArray<Integer, Integer>;
1790    FToken: IToken;
1791    FTokenStartCharIndex: Integer;
1792    FTokenStartLine: Integer;
1793    FTokenStartCharPositionInLine: Integer;
1794    FChannel: Integer;
1795    FTokenType: Integer;
1796    FText: String;
1797  protected
1798    { IRecognizerSharedState }
1799    function GetFollowing: TBitSetArray;
1800    procedure SetFollowing(const Value: TBitSetArray);
1801    function GetFollowingStackPointer: Integer;
1802    procedure SetFollowingStackPointer(const Value: Integer);
1803    function GetErrorRecovery: Boolean;
1804    procedure SetErrorRecovery(const Value: Boolean);
1805    function GetLastErrorIndex: Integer;
1806    procedure SetLastErrorIndex(const Value: Integer);
1807    function GetFailed: Boolean;
1808    procedure SetFailed(const Value: Boolean);
1809    function GetSyntaxErrors: Integer;
1810    procedure SetSyntaxErrors(const Value: Integer);
1811    function GetBacktracking: Integer;
1812    procedure SetBacktracking(const Value: Integer);
1813    function GetRuleMemo: TDictionaryArray<Integer, Integer>;
1814    function GetRuleMemoCount: Integer;
1815    procedure SetRuleMemoCount(const Value: Integer);
1816    function GetToken: IToken;
1817    procedure SetToken(const Value: IToken);
1818    function GetTokenStartCharIndex: Integer;
1819    procedure SetTokenStartCharIndex(const Value: Integer);
1820    function GetTokenStartLine: Integer;
1821    procedure SetTokenStartLine(const Value: Integer);
1822    function GetTokenStartCharPositionInLine: Integer;
1823    procedure SetTokenStartCharPositionInLine(const Value: Integer);
1824    function GetChannel: Integer;
1825    procedure SetChannel(const Value: Integer);
1826    function GetTokenType: Integer;
1827    procedure SetTokenType(const Value: Integer);
1828    function GetText: String;
1829    procedure SetText(const Value: String);
1830  public
1831    constructor Create;
1832  end;
1833
1834  TCommonToken = class(TANTLRObject, ICommonToken, IToken)
1835  strict protected
1836    FTokenType: Integer;
1837    FLine: Integer;
1838    FCharPositionInLine: Integer;
1839    FChannel: Integer;
1840    FInput: ICharStream;
1841
1842    /// <summary>We need to be able to change the text once in a while.  If
1843    /// this is non-null, then getText should return this.  Note that
1844    /// start/stop are not affected by changing this.
1845    /// </summary>
1846    FText: String;
1847
1848    /// <summary>What token number is this from 0..n-1 tokens; &lt; 0 implies invalid index </summary>
1849    FIndex: Integer;
1850
1851    /// <summary>The char position into the input buffer where this token starts </summary>
1852    FStart: Integer;
1853
1854    /// <summary>The char position into the input buffer where this token stops </summary>
1855    FStop: Integer;
1856  protected
1857    { IToken }
1858    function GetTokenType: Integer; virtual;
1859    procedure SetTokenType(const Value: Integer); virtual;
1860    function GetLine: Integer; virtual;
1861    procedure SetLine(const Value: Integer); virtual;
1862    function GetCharPositionInLine: Integer; virtual;
1863    procedure SetCharPositionInLine(const Value: Integer); virtual;
1864    function GetChannel: Integer; virtual;
1865    procedure SetChannel(const Value: Integer); virtual;
1866    function GetTokenIndex: Integer; virtual;
1867    procedure SetTokenIndex(const Value: Integer); virtual;
1868    function GetText: String; virtual;
1869    procedure SetText(const Value: String); virtual;
1870  protected
1871    { ICommonToken }
1872    function GetStartIndex: Integer;
1873    procedure SetStartIndex(const Value: Integer);
1874    function GetStopIndex: Integer;
1875    procedure SetStopIndex(const Value: Integer);
1876    function GetInputStream: ICharStream;
1877    procedure SetInputStream(const Value: ICharStream);
1878  protected
1879    constructor Create; overload;
1880  public
1881    constructor Create(const ATokenType: Integer); overload;
1882    constructor Create(const AInput: ICharStream; const ATokenType, AChannel,
1883      AStart, AStop: Integer); overload;
1884    constructor Create(const ATokenType: Integer; const AText: String); overload;
1885    constructor Create(const AOldToken: IToken); overload;
1886
1887    function ToString: String; override;
1888  end;
1889
1890  TClassicToken = class(TANTLRObject, IClassicToken, IToken)
1891  strict private
1892    FText: String;
1893    FTokenType: Integer;
1894    FLine: Integer;
1895    FCharPositionInLine: Integer;
1896    FChannel: Integer;
1897
1898    /// <summary>What token number is this from 0..n-1 tokens </summary>
1899    FIndex: Integer;
1900  protected
1901    { IClassicToken }
1902    function GetTokenType: Integer; virtual;
1903    procedure SetTokenType(const Value: Integer); virtual;
1904    function GetLine: Integer; virtual;
1905    procedure SetLine(const Value: Integer); virtual;
1906    function GetCharPositionInLine: Integer; virtual;
1907    procedure SetCharPositionInLine(const Value: Integer); virtual;
1908    function GetChannel: Integer; virtual;
1909    procedure SetChannel(const Value: Integer); virtual;
1910    function GetTokenIndex: Integer; virtual;
1911    procedure SetTokenIndex(const Value: Integer); virtual;
1912    function GetText: String; virtual;
1913    procedure SetText(const Value: String); virtual;
1914    function GetInputStream: ICharStream; virtual;
1915    procedure SetInputStream(const Value: ICharStream); virtual;
1916  public
1917    constructor Create(const ATokenType: Integer); overload;
1918    constructor Create(const AOldToken: IToken); overload;
1919    constructor Create(const ATokenType: Integer; const AText: String); overload;
1920    constructor Create(const ATokenType: Integer; const AText: String;
1921      const AChannel: Integer); overload;
1922
1923    function ToString: String; override;
1924  end;
1925
1926  TToken = class sealed
1927  public
1928    const
1929      EOR_TOKEN_TYPE = 1;
1930
1931      /// <summary>imaginary tree navigation type; traverse "get child" link </summary>
1932      DOWN = 2;
1933
1934      /// <summary>imaginary tree navigation type; finish with a child list </summary>
1935      UP = 3;
1936
1937      MIN_TOKEN_TYPE = UP + 1;
1938      EOF = Integer(cscEOF);
1939      INVALID_TOKEN_TYPE = 0;
1940
1941      /// <summary>
1942      /// All tokens go to the parser (unless skip() is called in that rule)
1943      /// on a particular "channel".  The parser tunes to a particular channel
1944      /// so that whitespace etc... can go to the parser on a "hidden" channel.
1945      /// </summary>
1946      DEFAULT_CHANNEL = 0;
1947
1948      /// <summary>
1949      /// Anything on different channel than DEFAULT_CHANNEL is not parsed by parser.
1950      /// </summary>
1951      HIDDEN_CHANNEL = 99;
1952  public
1953    class var
1954      EOF_TOKEN: IToken;
1955      INVALID_TOKEN: IToken;
1956      /// <summary>
1957      /// In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
1958      /// will avoid creating a token for this symbol and try to fetch another.
1959      /// </summary>
1960      SKIP_TOKEN: IToken;
1961  private
1962    class procedure Initialize; static;
1963  end;
1964
1965  /// <summary>
1966    /// Global constants
1967  /// </summary>
1968  TConstants = class sealed
1969  public
1970    const
1971      VERSION = '3.1b1';
1972
1973      // Moved to version 2 for v3.1: added grammar name to enter/exit Rule
1974      DEBUG_PROTOCOL_VERSION = '2';
1975
1976      ANTLRWORKS_DIR = 'antlrworks';
1977  end;
1978
1979  TBaseRecognizer = class abstract(TANTLRObject, IBaseRecognizer)
1980  public
1981    const
1982      MEMO_RULE_FAILED = -2;
1983      MEMO_RULE_UNKNOWN = -1;
1984      INITIAL_FOLLOW_STACK_SIZE = 100;
1985      NEXT_TOKEN_RULE_NAME = 'nextToken';
1986      // copies from Token object for convenience in actions
1987      DEFAULT_TOKEN_CHANNEL = TToken.DEFAULT_CHANNEL;
1988      HIDDEN = TToken.HIDDEN_CHANNEL;
1989  strict protected
1990    /// <summary>
1991    /// An externalized representation of the - shareable - internal state of
1992    /// this lexer, parser or tree parser.
1993    /// </summary>
1994    /// <remarks>
1995    /// The state of a lexer, parser, or tree parser are collected into
1996    /// external state objects so that the state can be shared. This sharing
1997    /// is needed to have one grammar import others and share same error
1998    /// variables and other state variables.  It's a kind of explicit multiple
1999    /// inheritance via delegation of methods and shared state.
2000    /// </remarks>
2001    FState: IRecognizerSharedState;
2002
2003    property State: IRecognizerSharedState read FState;
2004  strict protected
2005    /// <summary>
2006    /// Match needs to return the current input symbol, which gets put
2007    /// into the label for the associated token ref; e.g., x=ID.  Token
2008    /// and tree parsers need to return different objects. Rather than test
2009    /// for input stream type or change the IntStream interface, I use
2010    /// a simple method to ask the recognizer to tell me what the current
2011    /// input symbol is.
2012    /// </summary>
2013    /// <remarks>This is ignored for lexers.</remarks>
2014    function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; virtual;
2015
2016    /// <summary>
2017    /// Factor out what to do upon token mismatch so tree parsers can behave
2018    /// differently.  Override and call MismatchRecover(input, ttype, follow)
2019    /// to get single token insertion and deletion. Use this to turn off
2020    /// single token insertion and deletion. Override mismatchRecover
2021    /// to call this instead.
2022    /// </summary>
2023    procedure Mismatch(const Input: IIntStream; const TokenType: Integer;
2024      const Follow: IBitSet); virtual;
2025
2026    /// <summary>
2027    /// Attempt to Recover from a single missing or extra token.
2028    /// </summary>
2029    /// <remarks>
2030    /// EXTRA TOKEN
2031    ///
2032    /// LA(1) is not what we are looking for.  If LA(2) has the right token,
2033    /// however, then assume LA(1) is some extra spurious token.  Delete it
2034    /// and LA(2) as if we were doing a normal Match(), which advances the
2035    /// input.
2036    ///
2037    /// MISSING TOKEN
2038    ///
2039    /// If current token is consistent with what could come after
2040    /// ttype then it is ok to "insert" the missing token, else throw
2041    /// exception For example, Input "i=(3;" is clearly missing the
2042    /// ')'.  When the parser returns from the nested call to expr, it
2043    /// will have call chain:
2044    ///
2045    /// stat -> expr -> atom
2046    ///
2047    /// and it will be trying to Match the ')' at this point in the
2048    /// derivation:
2049    ///
2050    /// => ID '=' '(' INT ')' ('+' atom)* ';'
2051    /// ^
2052    /// Match() will see that ';' doesn't Match ')' and report a
2053    /// mismatched token error.  To Recover, it sees that LA(1)==';'
2054    /// is in the set of tokens that can follow the ')' token
2055    /// reference in rule atom.  It can assume that you forgot the ')'.
2056    /// </remarks>
2057    function RecoverFromMismatchedToken(const Input: IIntStream;
2058      const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; virtual;
2059
2060    /// <summary>
2061    /// Conjure up a missing token during error recovery.
2062    /// </summary>
2063    /// <remarks>
2064    /// The recognizer attempts to recover from single missing
2065    /// symbols. But, actions might refer to that missing symbol.
2066    /// For example, x=ID {f($x);}. The action clearly assumes
2067    /// that there has been an identifier matched previously and that
2068    /// $x points at that token. If that token is missing, but
2069    /// the next token in the stream is what we want we assume that
2070    /// this token is missing and we keep going. Because we
2071    /// have to return some token to replace the missing token,
2072    /// we have to conjure one up. This method gives the user control
2073    /// over the tokens returned for missing tokens. Mostly,
2074    /// you will want to create something special for identifier
2075    /// tokens. For literals such as '{' and ',', the default
2076    /// action in the parser or tree parser works. It simply creates
2077    /// a CommonToken of the appropriate type. The text will be the token.
2078    /// If you change what tokens must be created by the lexer,
2079    /// override this method to create the appropriate tokens.
2080    /// </remarks>
2081    function GetMissingSymbol(const Input: IIntStream;
2082      const E: ERecognitionException; const ExpectedTokenType: Integer;
2083      const Follow: IBitSet): IANTLRInterface; virtual;
2084
2085    /// <summary>
2086    /// Push a rule's follow set using our own hardcoded stack
2087    /// </summary>
2088    /// <param name="fset"></param>
2089    procedure PushFollow(const FSet: IBitSet);
2090
2091    /// <summary>Compute the context-sensitive FOLLOW set for current rule.
2092    /// This is set of token types that can follow a specific rule
2093    /// reference given a specific call chain.  You get the set of
2094    /// viable tokens that can possibly come next (lookahead depth 1)
2095    /// given the current call chain.  Contrast this with the
2096    /// definition of plain FOLLOW for rule r:
2097    ///
2098    /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
2099    ///
2100    /// where x in T* and alpha, beta in V*; T is set of terminals and
2101    /// V is the set of terminals and nonterminals.  In other words,
2102    /// FOLLOW(r) is the set of all tokens that can possibly follow
2103    /// references to r in *any* sentential form (context).  At
2104    /// runtime, however, we know precisely which context applies as
2105    /// we have the call chain.  We may compute the exact (rather
2106    /// than covering superset) set of following tokens.
2107    ///
2108    /// For example, consider grammar:
2109    ///
2110    /// stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
2111    /// | "return" expr '.'
2112    /// ;
2113    /// expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
2114    /// atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
2115    /// | '(' expr ')'
2116    /// ;
2117    ///
2118    /// The FOLLOW sets are all inclusive whereas context-sensitive
2119    /// FOLLOW sets are precisely what could follow a rule reference.
2120    /// For input input "i=(3);", here is the derivation:
2121    ///
2122    /// stat => ID '=' expr ';'
2123    /// => ID '=' atom ('+' atom)* ';'
2124    /// => ID '=' '(' expr ')' ('+' atom)* ';'
2125    /// => ID '=' '(' atom ')' ('+' atom)* ';'
2126    /// => ID '=' '(' INT ')' ('+' atom)* ';'
2127    /// => ID '=' '(' INT ')' ';'
2128    ///
2129    /// At the "3" token, you'd have a call chain of
2130    ///
2131    /// stat -> expr -> atom -> expr -> atom
2132    ///
2133    /// What can follow that specific nested ref to atom?  Exactly ')'
2134    /// as you can see by looking at the derivation of this specific
2135    /// input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
2136    ///
2137    /// You want the exact viable token set when recovering from a
2138    /// token mismatch.  Upon token mismatch, if LA(1) is member of
2139    /// the viable next token set, then you know there is most likely
2140    /// a missing token in the input stream.  "Insert" one by just not
2141    /// throwing an exception.
2142    /// </summary>
2143    function ComputeContextSensitiveRuleFOLLOW: IBitSet; virtual;
2144
2145    (*  Compute the error recovery set for the current rule.  During
2146    *  rule invocation, the parser pushes the set of tokens that can
2147    *  follow that rule reference on the stack; this amounts to
2148    *  computing FIRST of what follows the rule reference in the
2149    *  enclosing rule. This local follow set only includes tokens
2150    *  from within the rule; i.e., the FIRST computation done by
2151    *  ANTLR stops at the end of a rule.
2152    *
2153    *  EXAMPLE
2154    *
2155    *  When you find a "no viable alt exception", the input is not
2156    *  consistent with any of the alternatives for rule r.  The best
2157    *  thing to do is to consume tokens until you see something that
2158    *  can legally follow a call to r *or* any rule that called r.
2159    *  You don't want the exact set of viable next tokens because the
2160    *  input might just be missing a token--you might consume the
2161    *  rest of the input looking for one of the missing tokens.
2162    *
2163    *  Consider grammar:
2164    *
2165    *  a : '[' b ']'
2166    *    | '(' b ')'
2167    *    ;
2168    *  b : c '^' INT ;
2169    *  c : ID
2170    *    | INT
2171    *    ;
2172    *
2173    *  At each rule invocation, the set of tokens that could follow
2174    *  that rule is pushed on a stack.  Here are the various "local"
2175    *  follow sets:
2176    *
2177    *  FOLLOW(b1_in_a) = FIRST(']') = ']'
2178    *  FOLLOW(b2_in_a) = FIRST(')') = ')'
2179    *  FOLLOW(c_in_b) = FIRST('^') = '^'
2180    *
2181    *  Upon erroneous input "[]", the call chain is
2182    *
2183    *  a -> b -> c
2184    *
2185    *  and, hence, the follow context stack is:
2186    *
2187    *  depth  local follow set     after call to rule
2188    *    0         <EOF>                    a (from main())
2189    *    1          ']'                     b
2190    *    3          '^'                     c
2191    *
2192    *  Notice that ')' is not included, because b would have to have
2193    *  been called from a different context in rule a for ')' to be
2194    *  included.
2195    *
2196    *  For error recovery, we cannot consider FOLLOW(c)
2197    *  (context-sensitive or otherwise).  We need the combined set of
2198    *  all context-sensitive FOLLOW sets--the set of all tokens that
2199    *  could follow any reference in the call chain.  We need to
2200    *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
2201    *  we resync'd to that token, we'd consume until EOF.  We need to
2202    *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
2203    *  In this case, for input "[]", LA(1) is in this set so we would
2204    *  not consume anything and after printing an error rule c would
2205    *  return normally.  It would not find the required '^' though.
2206    *  At this point, it gets a mismatched token error and throws an
2207    *  exception (since LA(1) is not in the viable following token
2208    *  set).  The rule exception handler tries to Recover, but finds
2209    *  the same recovery set and doesn't consume anything.  Rule b
2210    *  exits normally returning to rule a.  Now it finds the ']' (and
2211    *  with the successful Match exits errorRecovery mode).
2212    *
2213    *  So, you cna see that the parser walks up call chain looking
2214    *  for the token that was a member of the recovery set.
2215    *
2216    *  Errors are not generated in errorRecovery mode.
2217    *
2218    *  ANTLR's error recovery mechanism is based upon original ideas:
2219    *
2220    *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
2221    *
2222    *  and
2223    *
2224    *  "A note on error recovery in recursive descent parsers":
2225    *  http://portal.acm.org/citation.cfm?id=947902.947905
2226    *
2227    *  Later, Josef Grosch had some good ideas:
2228    *
2229    *  "Efficient and Comfortable Error Recovery in Recursive Descent
2230    *  Parsers":
2231    *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
2232    *
2233    *  Like Grosch I implemented local FOLLOW sets that are combined
2234    *  at run-time upon error to avoid overhead during parsing.
2235    *)
2236    function ComputeErrorRecoverySet: IBitSet; virtual;
2237
2238    function CombineFollows(const Exact: Boolean): IBitSet;
2239  protected
2240    { IBaseRecognizer }
2241    function GetInput: IIntStream; virtual; abstract;
2242    function GetBacktrackingLevel: Integer;
2243    function GetState: IRecognizerSharedState;
2244    function GetNumberOfSyntaxErrors: Integer;
2245    function GetGrammarFileName: String; virtual;
2246    function GetSourceName: String; virtual; abstract;
2247    function GetTokenNames: TStringArray; virtual;
2248
2249    procedure BeginBacktrack(const Level: Integer); virtual;
2250    procedure EndBacktrack(const Level: Integer; const Successful: Boolean); virtual;
2251    procedure Reset; virtual;
2252    function Match(const Input: IIntStream; const TokenType: Integer;
2253      const Follow: IBitSet): IANTLRInterface; virtual;
2254    function MismatchIsUnwantedToken(const Input: IIntStream;
2255      const TokenType: Integer): Boolean;
2256    function MismatchIsMissingToken(const Input: IIntStream;
2257      const Follow: IBitSet): Boolean;
2258    procedure BeginResync; virtual;
2259    procedure EndResync; virtual;
2260    procedure ReportError(const E: ERecognitionException); virtual;
2261    procedure MatchAny(const Input: IIntStream); virtual;
2262    procedure DisplayRecognitionError(const TokenNames: TStringArray;
2263      const E: ERecognitionException); virtual;
2264    function GetErrorMessage(const E: ERecognitionException;
2265      const TokenNames: TStringArray): String; virtual;
2266    function GetErrorHeader(const E: ERecognitionException): String; virtual;
2267    function GetTokenErrorDisplay(const T: IToken): String; virtual;
2268    procedure EmitErrorMessage(const Msg: String); virtual;
2269    procedure Recover(const Input: IIntStream; const RE: ERecognitionException); virtual;
2270    function RecoverFromMismatchedSet(const Input: IIntStream;
2271      const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; virtual;
2272    procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; virtual;
2273    procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; virtual;
2274    //function GetRuleInvocationStack: IList<IANTLRInterface>; overload; virtual;
2275    //function GetRuleInvocationStack(const E: Exception;
2276    //  const RecognizerClassName: String): IList<IANTLRInterface>; overload;
2277    function ToStrings(const Tokens: IList<IToken>): IList<String>; virtual;
2278    function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; virtual;
2279    function AlreadyParsedRule(const Input: IIntStream;
2280      const RuleIndex: Integer): Boolean; virtual;
2281    procedure Memoize(const Input: IIntStream; const RuleIndex,
2282      RuleStartIndex: Integer); virtual;
2283    function GetRuleMemoizationChaceSize: Integer;
2284
2285    procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
2286      const InputSymbol: String); virtual;
2287    procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
2288      const InputSymbol: String); virtual;
2289
2290    property Input: IIntStream read GetInput;
2291  public
2292    constructor Create; overload;
2293    constructor Create(const AState: IRecognizerSharedState); overload;
2294  end;
2295
2296  TCommonTokenStream = class(TANTLRObject, ICommonTokenStream, ITokenStream)
2297  strict private
2298    FTokenSource: ITokenSource;
2299
2300    /// <summary>Record every single token pulled from the source so we can reproduce
2301    /// chunks of it later.
2302    /// </summary>
2303    FTokens: IList<IToken>;
2304
2305    /// <summary><![CDATA[Map<tokentype, channel>]]> to override some Tokens' channel numbers </summary>
2306    FChannelOverrideMap: IDictionary<Integer, Integer>;
2307
2308    /// <summary><![CDATA[Set<tokentype>;]]> discard any tokens with this type </summary>
2309    FDiscardSet: IHashList<Integer, Integer>;
2310
2311    /// <summary>Skip tokens on any channel but this one; this is how we skip whitespace... </summary>
2312    FChannel: Integer;
2313
2314    /// <summary>By default, track all incoming tokens </summary>
2315    FDiscardOffChannelTokens: Boolean;
2316
2317    /// <summary>Track the last Mark() call result value for use in Rewind().</summary>
2318    FLastMarker: Integer;
2319
2320    /// <summary>
2321    /// The index into the tokens list of the current token (next token
2322    /// to consume).  p==-1 indicates that the tokens list is empty
2323    /// </summary>
2324    FP: Integer;
2325  strict protected
2326    /// <summary>Load all tokens from the token source and put in tokens.
2327    /// This is done upon first LT request because you might want to
2328    /// set some token type / channel overrides before filling buffer.
2329    /// </summary>
2330    procedure FillBuffer; virtual;
2331
2332    /// <summary>Look backwards k tokens on-channel tokens </summary>
2333    function LB(const K: Integer): IToken; virtual;
2334
2335    /// <summary>Given a starting index, return the index of the first on-channel
2336    /// token.
2337    /// </summary>
2338    function SkipOffTokenChannels(const I: Integer): Integer; virtual;
2339    function SkipOffTokenChannelsReverse(const I: Integer): Integer; virtual;
2340  protected
2341    { IIntStream }
2342    function GetSourceName: String; virtual;
2343
2344    procedure Consume; virtual;
2345    function LA(I: Integer): Integer; virtual;
2346    function LAChar(I: Integer): Char;
2347    function Mark: Integer; virtual;
2348    function Index: Integer; virtual;
2349    procedure Rewind(const Marker: Integer); overload; virtual;
2350    procedure Rewind; overload; virtual;
2351    procedure Release(const Marker: Integer); virtual;
2352    procedure Seek(const Index: Integer); virtual;
2353    function Size: Integer; virtual;
2354  protected
2355    { ITokenStream }
2356    function GetTokenSource: ITokenSource; virtual;
2357    procedure SetTokenSource(const Value: ITokenSource); virtual;
2358
2359    function LT(const K: Integer): IToken; virtual;
2360    function Get(const I: Integer): IToken; virtual;
2361    function ToString(const Start, Stop: Integer): String; reintroduce; overload; virtual;
2362    function ToString(const Start, Stop: IToken): String; reintroduce; overload; virtual;
2363  protected
2364    { ICommonTokenStream }
2365    procedure SetTokenTypeChannel(const TType, Channel: Integer);
2366    procedure DiscardTokenType(const TType: Integer);
2367    procedure DiscardOffChannelTokens(const Discard: Boolean);
2368    function GetTokens: IList<IToken>; overload;
2369    function GetTokens(const Start, Stop: Integer): IList<IToken>; overload;
2370    function GetTokens(const Start, Stop: Integer;
2371      const Types: IBitSet): IList<IToken>; overload;
2372    function GetTokens(const Start, Stop: Integer;
2373      const Types: IList<Integer>): IList<IToken>; overload;
2374    function GetTokens(const Start, Stop,
2375      TokenType: Integer): IList<IToken>; overload;
2376    procedure Reset; virtual;
2377  public
2378    constructor Create; overload;
2379    constructor Create(const ATokenSource: ITokenSource); overload;
2380    constructor Create(const ATokenSource: ITokenSource;
2381      const AChannel: Integer); overload;
2382    constructor Create(const ALexer: ILexer); overload;
2383    constructor Create(const ALexer: ILexer;
2384      const AChannel: Integer); overload;
2385
2386    function ToString: String; overload; override;
2387  end;
2388
2389  TDFA = class abstract(TANTLRObject, IDFA)
2390  strict private
2391    FSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
2392    FEOT: TSmallintArray;
2393    FEOF: TSmallintArray;
2394    FMin: TCharArray;
2395    FMax: TCharArray;
2396    FAccept: TSmallintArray;
2397    FSpecial: TSmallintArray;
2398    FTransition: TSmallintMatrix;
2399    FDecisionNumber: Integer;
2400    FRecognizer: Pointer; { IBaseRecognizer }
2401    function GetRecognizer: IBaseRecognizer;
2402    procedure SetRecognizer(const Value: IBaseRecognizer);
2403  strict protected
2404    procedure NoViableAlt(const S: Integer; const Input: IIntStream);
2405
2406    property Recognizer: IBaseRecognizer read GetRecognizer write SetRecognizer;
2407    property DecisionNumber: Integer read FDecisionNumber write FDecisionNumber;
2408    property EOT: TSmallintArray read FEOT write FEOT;
2409    property EOF: TSmallintArray read FEOF write FEOF;
2410    property Min: TCharArray read FMin write FMin;
2411    property Max: TCharArray read FMax write FMax;
2412    property Accept: TSmallintArray read FAccept write FAccept;
2413    property Special: TSmallintArray read FSpecial write FSpecial;
2414    property Transition: TSmallintMatrix read FTransition write FTransition;
2415  protected
2416    { IDFA }
2417    function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
2418    procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
2419
2420    function Predict(const Input: IIntStream): Integer;
2421    procedure Error(const NVAE: ENoViableAltException); virtual;
2422    function SpecialStateTransition(const S: Integer;
2423      const Input: IIntStream): Integer; virtual;
2424    function Description: String; virtual;
2425    function SpecialTransition(const State, Symbol: Integer): Integer;
2426  public
2427    class function UnpackEncodedString(const EncodedString: String): TSmallintArray; static;
2428    class function UnpackEncodedStringArray(const EncodedStrings: TStringArray): TSmallintMatrix; overload; static;
2429    class function UnpackEncodedStringArray(const EncodedStrings: array of String): TSmallintMatrix; overload; static;
2430    class function UnpackEncodedStringToUnsignedChars(const EncodedString: String): TCharArray; static;
2431  end;
2432
2433  TLexer = class abstract(TBaseRecognizer, ILexer, ITokenSource)
2434  strict private
2435    const
2436      TOKEN_dot_EOF = Ord(cscEOF);
2437  strict private
2438    /// <summary>Where is the lexer drawing characters from? </summary>
2439    FInput: ICharStream;
2440  protected
2441    { IBaseRecognizer }
2442    function GetSourceName: String; override;
2443    function GetInput: IIntStream; override;
2444    procedure Reset; override;
2445    procedure ReportError(const E: ERecognitionException); override;
2446    function GetErrorMessage(const E: ERecognitionException;
2447      const TokenNames: TStringArray): String; override;
2448  protected
2449    { ILexer }
2450    function GetCharStream: ICharStream; virtual;
2451    procedure SetCharStream(const Value: ICharStream); virtual;
2452    function GetLine: Integer; virtual;
2453    function GetCharPositionInLine: Integer; virtual;
2454    function GetCharIndex: Integer; virtual;
2455    function GetText: String; virtual;
2456    procedure SetText(const Value: String); virtual;
2457
2458    function NextToken: IToken; virtual;
2459    procedure Skip;
2460    procedure DoTokens; virtual; abstract;
2461    procedure Emit(const Token: IToken); overload; virtual;
2462    function Emit: IToken; overload; virtual;
2463    procedure Match(const S: String); reintroduce; overload; virtual;
2464    procedure Match(const C: Integer); reintroduce; overload; virtual;
2465    procedure MatchAny; reintroduce; overload; virtual;
2466    procedure MatchRange(const A, B: Integer); virtual;
2467    procedure Recover(const RE: ERecognitionException); reintroduce; overload; virtual;
2468    function GetCharErrorDisplay(const C: Integer): String;
2469    procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
2470    procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
2471  strict protected
2472    property Input: ICharStream read FInput;
2473    property CharIndex: Integer read GetCharIndex;
2474    property Text: String read GetText write SetText;
2475  public
2476    constructor Create; overload;
2477    constructor Create(const AInput: ICharStream); overload;
2478    constructor Create(const AInput: ICharStream;
2479      const AState: IRecognizerSharedState); overload;
2480  end;
2481
2482  TParser = class(TBaseRecognizer, IParser)
2483  strict private
2484    FInput: ITokenStream;
2485  protected
2486    property Input: ITokenStream read FInput;
2487  protected
2488    { IBaseRecognizer }
2489    procedure Reset; override;
2490    function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; override;
2491    function GetMissingSymbol(const Input: IIntStream;
2492      const E: ERecognitionException; const ExpectedTokenType: Integer;
2493      const Follow: IBitSet): IANTLRInterface; override;
2494    function GetSourceName: String; override;
2495    function GetInput: IIntStream; override;
2496  protected
2497    { IParser }
2498    function GetTokenStream: ITokenStream; virtual;
2499    procedure SetTokenStream(const Value: ITokenStream); virtual;
2500
2501    procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
2502    procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
2503  public
2504    constructor Create(const AInput: ITokenStream); overload;
2505    constructor Create(const AInput: ITokenStream;
2506      const AState: IRecognizerSharedState); overload;
2507  end;
2508
2509  TRuleReturnScope = class(TANTLRObject, IRuleReturnScope)
2510  protected
2511    { IRuleReturnScope }
2512    function GetStart: IANTLRInterface; virtual;
2513    procedure SetStart(const Value: IANTLRInterface); virtual;
2514    function GetStop: IANTLRInterface; virtual;
2515    procedure SetStop(const Value: IANTLRInterface); virtual;
2516    function GetTree: IANTLRInterface; virtual;
2517    procedure SetTree(const Value: IANTLRInterface); virtual;
2518    function GetTemplate: IANTLRInterface; virtual;
2519  end;
2520
2521  TParserRuleReturnScope = class(TRuleReturnScope, IParserRuleReturnScope)
2522  strict private
2523    FStart: IToken;
2524    FStop: IToken;
2525  protected
2526    { IRuleReturnScope }
2527    function GetStart: IANTLRInterface; override;
2528    procedure SetStart(const Value: IANTLRInterface); override;
2529    function GetStop: IANTLRInterface; override;
2530    procedure SetStop(const Value: IANTLRInterface); override;
2531  end;
2532
2533  TTokenRewriteStream = class(TCommonTokenStream, ITokenRewriteStream)
2534  public
2535    const
2536      DEFAULT_PROGRAM_NAME = 'default';
2537      PROGRAM_INIT_SIZE = 100;
2538      MIN_TOKEN_INDEX = 0;
2539  strict protected
2540    // Define the rewrite operation hierarchy
2541    type
2542      IRewriteOperation = interface(IANTLRInterface)
2543      ['{285A54ED-58FF-44B1-A268-2686476D4419}']
2544        { Property accessors }
2545        function GetInstructionIndex: Integer;
2546        procedure SetInstructionIndex(const Value: Integer);
2547        function GetIndex: Integer;
2548        procedure SetIndex(const Value: Integer);
2549        function GetText: IANTLRInterface;
2550        procedure SetText(const Value: IANTLRInterface);
2551        function GetParent: ITokenRewriteStream;
2552        procedure SetParent(const Value: ITokenRewriteStream);
2553
2554        { Methods }
2555
2556        /// <summary>Execute the rewrite operation by possibly adding to the buffer.
2557        /// Return the index of the next token to operate on.
2558        /// </summary>
2559        function Execute(const Buf: TStringBuilder): Integer;
2560
2561        { Properties }
2562        property InstructionIndex: Integer read GetInstructionIndex write SetInstructionIndex;
2563        property Index: Integer read GetIndex write SetIndex;
2564        property Text: IANTLRInterface read GetText write SetText;
2565        property Parent: ITokenRewriteStream read GetParent write SetParent;
2566      end;
2567
2568      TRewriteOperation = class(TANTLRObject, IRewriteOperation)
2569      strict private
2570        // What index into rewrites List are we?
2571        FInstructionIndex: Integer;
2572        // Token buffer index
2573        FIndex: Integer;
2574        FText: IANTLRInterface;
2575        FParent: Pointer; {ITokenRewriteStream;}
2576      protected
2577        { IRewriteOperation }
2578        function GetInstructionIndex: Integer;
2579        procedure SetInstructionIndex(const Value: Integer);
2580        function GetIndex: Integer;
2581        procedure SetIndex(const Value: Integer);
2582        function GetText: IANTLRInterface;
2583        procedure SetText(const Value: IANTLRInterface);
2584        function GetParent: ITokenRewriteStream;
2585        procedure SetParent(const Value: ITokenRewriteStream);
2586
2587        function Execute(const Buf: TStringBuilder): Integer; virtual;
2588      protected
2589        constructor Create(const AIndex: Integer; const AText: IANTLRInterface;
2590          const AParent: ITokenRewriteStream);
2591
2592        property Index: Integer read FIndex write FIndex;
2593        property Text: IANTLRInterface read FText write FText;
2594        property Parent: ITokenRewriteStream read GetParent write SetParent;
2595      public
2596        function ToString: String; override;
2597      end;
2598
2599      IInsertBeforeOp = interface(IRewriteOperation)
2600      ['{BFB732E2-BE6A-4691-AE3B-5C8013DE924E}']
2601      end;
2602
2603      TInsertBeforeOp = class(TRewriteOperation, IInsertBeforeOp)
2604      protected
2605        { IRewriteOperation }
2606        function Execute(const Buf: TStringBuilder): Integer; override;
2607      end;
2608
2609      /// <summary>I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
2610      /// instructions.
2611      /// </summary>
2612      IReplaceOp = interface(IRewriteOperation)
2613      ['{630C434A-99EA-4589-A65D-64A7B3DAC407}']
2614        { Property accessors }
2615        function GetLastIndex: Integer;
2616        procedure SetLastIndex(const Value: Integer);
2617
2618        { Properties }
2619        property LastIndex: Integer read GetLastIndex write SetLastIndex;
2620      end;
2621
2622      TReplaceOp = class(TRewriteOperation, IReplaceOp)
2623      private
2624        FLastIndex: Integer;
2625      protected
2626        { IRewriteOperation }
2627        function Execute(const Buf: TStringBuilder): Integer; override;
2628      protected
2629        { IReplaceOp }
2630        function GetLastIndex: Integer;
2631        procedure SetLastIndex(const Value: Integer);
2632      public
2633        constructor Create(const AStart, AStop: Integer;
2634          const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
2635
2636        function ToString: String; override;
2637      end;
2638
2639      IDeleteOp = interface(IRewriteOperation)
2640      ['{C39345BC-F170-4C3A-A989-65E6B9F0712B}']
2641      end;
2642
2643      TDeleteOp = class(TReplaceOp)
2644      public
2645        function ToString: String; override;
2646      end;
2647  strict private
2648    type
2649      TRewriteOpComparer<T: IRewriteOperation> = class(TComparer<T>)
2650      public
2651        function Compare(const Left, Right: T): Integer; override;
2652      end;
2653  strict private
2654    /// <summary>You may have multiple, named streams of rewrite operations.
2655    /// I'm calling these things "programs."
2656    /// Maps String (name) -> rewrite (IList)
2657    /// </summary>
2658    FPrograms: IDictionary<String, IList<IRewriteOperation>>;
2659
2660    /// <summary>Map String (program name) -> Integer index </summary>
2661    FLastRewriteTokenIndexes: IDictionary<String, Integer>;
2662  strict private
2663    function InitializeProgram(const Name: String): IList<IRewriteOperation>;
2664  protected
2665    { ITokenRewriteStream }
2666    procedure Rollback(const InstructionIndex: Integer); overload; virtual;
2667    procedure Rollback(const ProgramName: String;
2668      const InstructionIndex: Integer); overload; virtual;
2669
2670    procedure DeleteProgram; overload; virtual;
2671    procedure DeleteProgram(const ProgramName: String); overload; virtual;
2672
2673    procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; virtual;
2674    procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
2675    procedure InsertAfter(const ProgramName: String; const T: IToken;
2676      const Text: IANTLRInterface); overload; virtual;
2677    procedure InsertAfter(const ProgramName: String; const Index: Integer;
2678      const Text: IANTLRInterface); overload; virtual;
2679    procedure InsertAfter(const T: IToken; const Text: String); overload;
2680    procedure InsertAfter(const Index: Integer; const Text: String); overload;
2681    procedure InsertAfter(const ProgramName: String; const T: IToken;
2682      const Text: String); overload;
2683    procedure InsertAfter(const ProgramName: String; const Index: Integer;
2684      const Text: String); overload;
2685
2686    procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; virtual;
2687    procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
2688    procedure InsertBefore(const ProgramName: String; const T: IToken;
2689      const Text: IANTLRInterface); overload; virtual;
2690    procedure InsertBefore(const ProgramName: String; const Index: Integer;
2691      const Text: IANTLRInterface); overload; virtual;
2692    procedure InsertBefore(const T: IToken; const Text: String); overload;
2693    procedure InsertBefore(const Index: Integer; const Text: String); overload;
2694    procedure InsertBefore(const ProgramName: String; const T: IToken;
2695      const Text: String); overload;
2696    procedure InsertBefore(const ProgramName: String; const Index: Integer;
2697      const Text: String); overload;
2698
2699    procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
2700    procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; virtual;
2701    procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; virtual;
2702    procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; virtual;
2703    procedure Replace(const ProgramName: String; const Start, Stop: Integer;
2704      const Text: IANTLRInterface); overload; virtual;
2705    procedure Replace(const ProgramName: String; const Start, Stop: IToken;
2706      const Text: IANTLRInterface); overload; virtual;
2707    procedure Replace(const Index: Integer; const Text: String); overload;
2708    procedure Replace(const Start, Stop: Integer; const Text: String); overload;
2709    procedure Replace(const IndexT: IToken; const Text: String); overload;
2710    procedure Replace(const Start, Stop: IToken; const Text: String); overload;
2711    procedure Replace(const ProgramName: String; const Start, Stop: Integer;
2712      const Text: String); overload;
2713    procedure Replace(const ProgramName: String; const Start, Stop: IToken;
2714      const Text: String); overload;
2715
2716    procedure Delete(const Index: Integer); overload; virtual;
2717    procedure Delete(const Start, Stop: Integer); overload; virtual;
2718    procedure Delete(const IndexT: IToken); overload; virtual;
2719    procedure Delete(const Start, Stop: IToken); overload; virtual;
2720    procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; virtual;
2721    procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; virtual;
2722
2723    function GetLastRewriteTokenIndex: Integer; overload; virtual;
2724
2725    function ToOriginalString: String; overload; virtual;
2726    function ToOriginalString(const Start, Stop: Integer): String; overload; virtual;
2727
2728    function ToString(const ProgramName: String): String; overload; virtual;
2729    function ToString(const ProgramName: String;
2730      const Start, Stop: Integer): String; overload; virtual;
2731
2732    function ToDebugString: String; overload; virtual;
2733    function ToDebugString(const Start, Stop: Integer): String; overload; virtual;
2734  protected
2735    { ITokenStream }
2736    function ToString(const Start, Stop: Integer): String; overload; override;
2737  strict protected
2738    procedure Init; virtual;
2739    function GetProgram(const Name: String): IList<IRewriteOperation>; virtual;
2740    function GetLastRewriteTokenIndex(const ProgramName: String): Integer; overload; virtual;
2741    procedure SetLastRewriteTokenIndex(const ProgramName: String; const I: Integer); overload; virtual;
2742
2743    /// <summary>
2744    /// Return a map from token index to operation.
2745    /// </summary>
2746    /// <remarks>We need to combine operations and report invalid operations (like
2747    /// overlapping replaces that are not completed nested).  Inserts to
2748    /// same index need to be combined etc...   Here are the cases:
2749    ///
2750    /// I.i.u I.j.v               leave alone, nonoverlapping
2751    /// I.i.u I.i.v               combine: Iivu
2752    ///
2753    /// R.i-j.u R.x-y.v | i-j in x-y      delete first R
2754    /// R.i-j.u R.i-j.v             delete first R
2755    /// R.i-j.u R.x-y.v | x-y in i-j      ERROR
2756    /// R.i-j.u R.x-y.v | boundaries overlap  ERROR
2757    ///
2758    /// I.i.u R.x-y.v | i in x-y        delete I
2759    /// I.i.u R.x-y.v | i not in x-y      leave alone, nonoverlapping
2760    /// R.x-y.v I.i.u | i in x-y        ERROR
2761    /// R.x-y.v I.x.u               R.x-y.uv (combine, delete I)
2762    /// R.x-y.v I.i.u | i not in x-y      leave alone, nonoverlapping
2763    ///
2764    /// I.i.u = insert u before op @ index i
2765    /// R.x-y.u = replace x-y indexed tokens with u
2766    ///
2767    /// First we need to examine replaces.  For any replace op:
2768    ///
2769    ///   1. wipe out any insertions before op within that range.
2770    ///   2. Drop any replace op before that is contained completely within
2771    ///        that range.
2772    ///   3. Throw exception upon boundary overlap with any previous replace.
2773    ///
2774    /// Then we can deal with inserts:
2775    ///
2776    ///   1. for any inserts to same index, combine even if not adjacent.
2777    ///   2. for any prior replace with same left boundary, combine this
2778    ///        insert with replace and delete this replace.
2779    ///   3. throw exception if index in same range as previous replace
2780    ///
2781    /// Don't actually delete; make op null in list. Easier to walk list.
2782    /// Later we can throw as we add to index -> op map.
2783    ///
2784    /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
2785    /// inserted stuff would be before the replace range.  But, if you
2786    /// add tokens in front of a method body '{' and then delete the method
2787    /// body, I think the stuff before the '{' you added should disappear too.
2788    /// </remarks>
2789    function ReduceToSingleOperationPerIndex(
2790      const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>;
2791
2792    function GetKindOfOps(const Rewrites: IList<IRewriteOperation>;
2793      const Kind: TGUID): IList<IRewriteOperation>; overload;
2794    /// <summary>
2795    /// Get all operations before an index of a particular kind
2796    /// </summary>
2797    function GetKindOfOps(const Rewrites: IList<IRewriteOperation>;
2798      const Kind: TGUID; const Before: Integer): IList<IRewriteOperation>; overload;
2799
2800    function CatOpText(const A, B: IANTLRInterface): IANTLRInterface;
2801  public
2802    constructor Create; overload;
2803    constructor Create(const ATokenSource: ITokenSource); overload;
2804    constructor Create(const ATokenSource: ITokenSource;
2805      const AChannel: Integer); overload;
2806    constructor Create(const ALexer: ILexer); overload;
2807    constructor Create(const ALexer: ILexer;
2808      const AChannel: Integer); overload;
2809
2810    function ToString: String; overload; override;
2811  end;
2812
2813{ These functions return X or, if X = nil, an empty default instance }
2814function Def(const X: IToken): IToken; overload;
2815function Def(const X: IRuleReturnScope): IRuleReturnScope; overload;
2816
2817implementation
2818
2819uses
2820  StrUtils,
2821  Math,
2822  Antlr.Runtime.Tree;
2823
2824{ ERecognitionException }
2825
2826constructor ERecognitionException.Create;
2827begin
2828  Create('', nil);
2829end;
2830
2831constructor ERecognitionException.Create(const AMessage: String);
2832begin
2833  Create(AMessage, nil);
2834end;
2835
2836constructor ERecognitionException.Create(const AInput: IIntStream);
2837begin
2838  Create('', AInput);
2839end;
2840
2841constructor ERecognitionException.Create(const AMessage: String;
2842  const AInput: IIntStream);
2843var
2844  TokenStream: ITokenStream;
2845  CharStream: ICharStream;
2846begin
2847  inherited Create(AMessage);
2848  FInput := AInput;
2849  FIndex := AInput.Index;
2850
2851  if Supports(AInput, ITokenStream, TokenStream) then
2852  begin
2853    FToken := TokenStream.LT(1);
2854    FLine := FToken.Line;
2855    FCharPositionInLine := FToken.CharPositionInLine;
2856  end;
2857
2858  if Supports(AInput, ITreeNodeStream) then
2859    ExtractInformationFromTreeNodeStream(AInput)
2860  else
2861  begin
2862    if Supports(AInput, ICharStream, CharStream) then
2863    begin
2864      FC := AInput.LA(1);
2865      FLine := CharStream.Line;
2866      FCharPositionInLine := CharStream.CharPositionInLine;
2867    end
2868    else
2869      FC := AInput.LA(1);
2870  end;
2871end;
2872
2873procedure ERecognitionException.ExtractInformationFromTreeNodeStream(
2874  const Input: IIntStream);
2875var
2876  Nodes: ITreeNodeStream;
2877  Adaptor: ITreeAdaptor;
2878  Payload, PriorPayload: IToken;
2879  I, NodeType: Integer;
2880  PriorNode: IANTLRInterface;
2881  Tree: ITree;
2882  Text: String;
2883  CommonTree: ICommonTree;
2884begin
2885  Nodes := Input as ITreeNodeStream;
2886  FNode := Nodes.LT(1);
2887  Adaptor := Nodes.TreeAdaptor;
2888  Payload := Adaptor.GetToken(FNode);
2889
2890  if Assigned(Payload) then
2891  begin
2892    FToken := Payload;
2893    if (Payload.Line <= 0) then
2894    begin
2895      // imaginary node; no line/pos info; scan backwards
2896      I := -1;
2897      PriorNode := Nodes.LT(I);
2898      while Assigned(PriorNode) do
2899      begin
2900        PriorPayload := Adaptor.GetToken(PriorNode);
2901        if Assigned(PriorPayload) and (PriorPayload.Line > 0) then
2902        begin
2903          // we found the most recent real line / pos info
2904          FLine := PriorPayload.Line;
2905          FCharPositionInLine := PriorPayload.CharPositionInLine;
2906          FApproximateLineInfo := True;
2907          Break;
2908        end;
2909        Dec(I);
2910        PriorNode := Nodes.LT(I)
2911      end;
2912    end
2913    else
2914    begin
2915      // node created from real token
2916      FLine := Payload.Line;
2917      FCharPositionInLine := Payload.CharPositionInLine;
2918    end;
2919  end else
2920    if Supports(FNode, ITree, Tree) then
2921    begin
2922      FLine := Tree.Line;
2923      FCharPositionInLine := Tree.CharPositionInLine;
2924      if Supports(FNode, ICommonTree, CommonTree) then
2925        FToken := CommonTree.Token;
2926    end
2927    else
2928    begin
2929      NodeType := Adaptor.GetNodeType(FNode);
2930      Text := Adaptor.GetNodeText(FNode);
2931      FToken := TCommonToken.Create(NodeType, Text);
2932    end;
2933end;
2934
2935function ERecognitionException.GetUnexpectedType: Integer;
2936var
2937  Nodes: ITreeNodeStream;
2938  Adaptor: ITreeAdaptor;
2939begin
2940  if Supports(FInput, ITokenStream) then
2941    Result := FToken.TokenType
2942  else
2943    if Supports(FInput, ITreeNodeStream, Nodes) then
2944    begin
2945      Adaptor := Nodes.TreeAdaptor;
2946      Result := Adaptor.GetNodeType(FNode);
2947    end else
2948      Result := FC;
2949end;
2950
2951{ EMismatchedTokenException }
2952
2953constructor EMismatchedTokenException.Create(const AExpecting: Integer;
2954  const AInput: IIntStream);
2955begin
2956  inherited Create(AInput);
2957  FExpecting := AExpecting;
2958end;
2959
2960function EMismatchedTokenException.ToString: String;
2961begin
2962  Result := 'MismatchedTokenException(' + IntToStr(UnexpectedType)
2963    + '!=' + IntToStr(Expecting) + ')';
2964
2965end;
2966
2967{ EUnwantedTokenException }
2968
2969function EUnwantedTokenException.GetUnexpectedToken: IToken;
2970begin
2971  Result := FToken;
2972end;
2973
2974function EUnwantedTokenException.ToString: String;
2975var
2976  Exp: String;
2977begin
2978  if (Expecting = TToken.INVALID_TOKEN_TYPE) then
2979    Exp := ''
2980  else
2981    Exp := ', expected ' + IntToStr(Expecting);
2982  if (Token = nil) then
2983    Result := 'UnwantedTokenException(found=nil' + Exp + ')'
2984  else
2985    Result := 'UnwantedTokenException(found=' + Token.Text + Exp + ')'
2986end;
2987
2988{ EMissingTokenException }
2989
2990constructor EMissingTokenException.Create(const AExpecting: Integer;
2991  const AInput: IIntStream; const AInserted: IANTLRInterface);
2992begin
2993  inherited Create(AExpecting, AInput);
2994  FInserted := AInserted;
2995end;
2996
2997function EMissingTokenException.GetMissingType: Integer;
2998begin
2999  Result := Expecting;
3000end;
3001
3002function EMissingTokenException.ToString: String;
3003begin
3004  if Assigned(FInserted) and Assigned(FToken) then
3005    Result := 'MissingTokenException(inserted ' + FInserted.ToString
3006      + ' at ' + FToken.Text + ')'
3007  else
3008    if Assigned(FToken) then
3009      Result := 'MissingTokenException(at ' + FToken.Text + ')'
3010    else
3011      Result := 'MissingTokenException';
3012end;
3013
3014{ EMismatchedTreeNodeException }
3015
3016constructor EMismatchedTreeNodeException.Create(const AExpecting: Integer;
3017  const AInput: IIntStream);
3018begin
3019  inherited Create(AInput);
3020  FExpecting := AExpecting;
3021end;
3022
3023function EMismatchedTreeNodeException.ToString: String;
3024begin
3025  Result := 'MismatchedTreeNodeException(' + IntToStr(UnexpectedType)
3026    + '!=' + IntToStr(Expecting) + ')';
3027end;
3028
3029{ ENoViableAltException }
3030
3031constructor ENoViableAltException.Create(
3032  const AGrammarDecisionDescription: String; const ADecisionNumber,
3033  AStateNumber: Integer; const AInput: IIntStream);
3034begin
3035  inherited Create(AInput);
3036  FGrammarDecisionDescription := AGrammarDecisionDescription;
3037  FDecisionNumber := ADecisionNumber;
3038  FStateNumber := AStateNumber;
3039end;
3040
3041function ENoViableAltException.ToString: String;
3042begin
3043  if Supports(Input, ICharStream) then
3044    Result := 'NoViableAltException(''' + Char(UnexpectedType) + '''@['
3045      + FGrammarDecisionDescription + '])'
3046  else
3047    Result := 'NoViableAltException(''' + IntToStr(UnexpectedType) + '''@['
3048      + FGrammarDecisionDescription + '])'
3049end;
3050
3051{ EEarlyExitException }
3052
3053constructor EEarlyExitException.Create(const ADecisionNumber: Integer;
3054  const AInput: IIntStream);
3055begin
3056  inherited Create(AInput);
3057  FDecisionNumber := ADecisionNumber;
3058end;
3059
3060{ EMismatchedSetException }
3061
3062constructor EMismatchedSetException.Create(const AExpecting: IBitSet;
3063  const AInput: IIntStream);
3064begin
3065  inherited Create(AInput);
3066  FExpecting := AExpecting;
3067end;
3068
3069function EMismatchedSetException.ToString: String;
3070begin
3071  Result := 'MismatchedSetException(' + IntToStr(UnexpectedType)
3072    + '!=' + Expecting.ToString + ')';
3073end;
3074
3075{ EMismatchedNotSetException }
3076
3077function EMismatchedNotSetException.ToString: String;
3078begin
3079  Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
3080    + '!=' + Expecting.ToString + ')';
3081end;
3082
3083{ EFailedPredicateException }
3084
3085constructor EFailedPredicateException.Create(const AInput: IIntStream;
3086  const ARuleName, APredicateText: String);
3087begin
3088  inherited Create(AInput);
3089  FRuleName := ARuleName;
3090  FPredicateText := APredicateText;
3091end;
3092
3093function EFailedPredicateException.ToString: String;
3094begin
3095  Result := 'FailedPredicateException(' + FRuleName + ',{' + FPredicateText + '}?)';
3096end;
3097
3098{ EMismatchedRangeException }
3099
3100constructor EMismatchedRangeException.Create(const AA, AB: Integer;
3101  const AInput: IIntStream);
3102begin
3103  inherited Create(FInput);
3104  FA := AA;
3105  FB := AB;
3106end;
3107
3108function EMismatchedRangeException.ToString: String;
3109begin
3110  Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
3111    + ' not in [' + IntToStr(FA)+ ',' + IntToStr(FB) + '])';
3112end;
3113
3114{ TCharStreamState }
3115
3116function TCharStreamState.GetCharPositionInLine: Integer;
3117begin
3118  Result := FCharPositionInLine;
3119end;
3120
3121function TCharStreamState.GetLine: Integer;
3122begin
3123  Result := FLine;
3124end;
3125
3126function TCharStreamState.GetP: Integer;
3127begin
3128  Result := FP;
3129end;
3130
3131procedure TCharStreamState.SetCharPositionInLine(const Value: Integer);
3132begin
3133  FCharPositionInLine := Value;
3134end;
3135
3136procedure TCharStreamState.SetLine(const Value: Integer);
3137begin
3138  FLine := Value;
3139end;
3140
3141procedure TCharStreamState.SetP(const Value: Integer);
3142begin
3143  FP := Value;
3144end;
3145
3146{ TANTLRStringStream }
3147
3148constructor TANTLRStringStream.Create(const AInput: String);
3149begin
3150  inherited Create;
3151  FLine := 1;
3152  FOwnsData := True;
3153  FN := Length(AInput);
3154  if (FN > 0) then
3155  begin
3156    GetMem(FData,FN * SizeOf(Char));
3157    Move(AInput[1],FData^,FN * SizeOf(Char));
3158  end;
3159end;
3160
3161procedure TANTLRStringStream.Consume;
3162begin
3163  if (FP < FN) then
3164  begin
3165    Inc(FCharPositionInLine);
3166    if (FData[FP] = #10) then
3167    begin
3168      Inc(FLine);
3169      FCharPositionInLine := 0;
3170    end;
3171    Inc(FP);
3172  end;
3173end;
3174
3175constructor TANTLRStringStream.Create(const AData: PChar;
3176  const ANumberOfActualCharsInArray: Integer);
3177begin
3178  inherited Create;
3179  FLine := 1;
3180  FOwnsData := False;
3181  FData := AData;
3182  FN := ANumberOfActualCharsInArray;
3183end;
3184
3185constructor TANTLRStringStream.Create;
3186begin
3187  inherited Create;
3188  FLine := 1;
3189end;
3190
3191destructor TANTLRStringStream.Destroy;
3192begin
3193  if (FOwnsData) then
3194    FreeMem(FData);
3195  inherited;
3196end;
3197
3198function TANTLRStringStream.GetCharPositionInLine: Integer;
3199begin
3200  Result := FCharPositionInLine;
3201end;
3202
3203function TANTLRStringStream.GetLine: Integer;
3204begin
3205  Result := FLine;
3206end;
3207
3208function TANTLRStringStream.GetSourceName: String;
3209begin
3210  Result := FName;
3211end;
3212
3213function TANTLRStringStream.Index: Integer;
3214begin
3215  Result := FP;
3216end;
3217
3218function TANTLRStringStream.LA(I: Integer): Integer;
3219begin
3220  if (I = 0) then
3221    Result := 0 // undefined
3222  else begin
3223    if (I < 0) then
3224    begin
3225      Inc(I); // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
3226      if ((FP + I - 1) < 0) then
3227      begin
3228        Result := Integer(cscEOF);
3229        Exit;
3230      end;
3231    end;
3232
3233    if ((FP + I - 1) >= FN) then
3234      Result := Integer(cscEOF)
3235    else
3236      Result := Integer(FData[FP + I - 1]);
3237  end;
3238end;
3239
3240function TANTLRStringStream.LAChar(I: Integer): Char;
3241begin
3242  Result := Char(LA(I));
3243end;
3244
3245function TANTLRStringStream.LT(const I: Integer): Integer;
3246begin
3247  Result := LA(I);
3248end;
3249
3250function TANTLRStringStream.Mark: Integer;
3251var
3252  State: ICharStreamState;
3253begin
3254  if (FMarkers = nil) then
3255  begin
3256    FMarkers := TList<ICharStreamState>.Create;
3257    FMarkers.Add(nil);  // depth 0 means no backtracking, leave blank
3258  end;
3259
3260  Inc(FMarkDepth);
3261  if (FMarkDepth >= FMarkers.Count) then
3262  begin
3263    State := TCharStreamState.Create;
3264    FMarkers.Add(State);
3265  end
3266  else
3267    State := FMarkers[FMarkDepth];
3268
3269  State.P := FP;
3270  State.Line := FLine;
3271  State.CharPositionInLine := FCharPositionInLine;
3272  FLastMarker := FMarkDepth;
3273  Result := FMarkDepth;
3274end;
3275
3276procedure TANTLRStringStream.Release(const Marker: Integer);
3277begin
3278  // unwind any other markers made after m and release m
3279  FMarkDepth := Marker;
3280  // release this marker
3281  Dec(FMarkDepth);
3282end;
3283
3284procedure TANTLRStringStream.Reset;
3285begin
3286  FP := 0;
3287  FLine := 1;
3288  FCharPositionInLine := 0;
3289  FMarkDepth := 0;
3290end;
3291
3292procedure TANTLRStringStream.Rewind(const Marker: Integer);
3293var
3294  State: ICharStreamState;
3295begin
3296  State := FMarkers[Marker];
3297  // restore stream state
3298  Seek(State.P);
3299  FLine := State.Line;
3300  FCharPositionInLine := State.CharPositionInLine;
3301  Release(Marker);
3302end;
3303
3304procedure TANTLRStringStream.Rewind;
3305begin
3306  Rewind(FLastMarker);
3307end;
3308
3309procedure TANTLRStringStream.Seek(const Index: Integer);
3310begin
3311  if (Index <= FP) then
3312    FP := Index // just jump; don't update stream state (line, ...)
3313  else begin
3314    // seek forward, consume until p hits index
3315    while (FP < Index) do
3316      Consume;
3317  end;
3318end;
3319
3320procedure TANTLRStringStream.SetCharPositionInLine(const Value: Integer);
3321begin
3322  FCharPositionInLine := Value;
3323end;
3324
3325procedure TANTLRStringStream.SetLine(const Value: Integer);
3326begin
3327  FLine := Value;
3328end;
3329
3330function TANTLRStringStream.Size: Integer;
3331begin
3332  Result := FN;
3333end;
3334
3335function TANTLRStringStream.Substring(const Start, Stop: Integer): String;
3336begin
3337  Result := Copy(FData, Start + 1, Stop - Start + 1);
3338end;
3339
3340{ TANTLRFileStream }
3341
3342constructor TANTLRFileStream.Create(const AFileName: String);
3343begin
3344  Create(AFilename,TEncoding.Default);
3345end;
3346
3347constructor TANTLRFileStream.Create(const AFileName: String;
3348  const AEncoding: TEncoding);
3349begin
3350  inherited Create;
3351  FFileName := AFileName;
3352  Load(FFileName, AEncoding);
3353end;
3354
3355function TANTLRFileStream.GetSourceName: String;
3356begin
3357  Result := FFileName;
3358end;
3359
3360procedure TANTLRFileStream.Load(const FileName: String;
3361  const Encoding: TEncoding);
3362var
3363  FR: TStreamReader;
3364  S: String;
3365begin
3366  if (FFileName <> '') then
3367  begin
3368    if (Encoding = nil) then
3369      FR := TStreamReader.Create(FileName,TEncoding.Default)
3370    else
3371      FR := TStreamReader.Create(FileName,Encoding);
3372
3373    try
3374      if (FOwnsData) then
3375      begin
3376        FreeMem(FData);
3377        FData := nil;
3378      end;
3379
3380      FOwnsData := True;
3381      S := FR.ReadToEnd;
3382      FN := Length(S);
3383      if (FN > 0) then
3384      begin
3385        GetMem(FData,FN * SizeOf(Char));
3386        Move(S[1],FData^,FN * SizeOf(Char));
3387      end;
3388    finally
3389      FR.Free;
3390    end;
3391  end;
3392end;
3393
3394{ TBitSet }
3395
3396class function TBitSet.BitSetOf(const El: Integer): IBitSet;
3397begin
3398  Result := TBitSet.Create(El + 1);
3399  Result.Add(El);
3400end;
3401
3402class function TBitSet.BitSetOf(const A, B: Integer): IBitSet;
3403begin
3404  Result := TBitSet.Create(Max(A,B) + 1);
3405  Result.Add(A);
3406  Result.Add(B);
3407end;
3408
3409class function TBitSet.BitSetOf(const A, B, C: Integer): IBitSet;
3410begin
3411  Result := TBitSet.Create;
3412  Result.Add(A);
3413  Result.Add(B);
3414  Result.Add(C);
3415end;
3416
3417class function TBitSet.BitSetOf(const A, B, C, D: Integer): IBitSet;
3418begin
3419  Result := TBitSet.Create;
3420  Result.Add(A);
3421  Result.Add(B);
3422  Result.Add(C);
3423  Result.Add(D);
3424end;
3425
3426procedure TBitSet.Add(const El: Integer);
3427var
3428  N: Integer;
3429begin
3430  N := WordNumber(El);
3431  if (N >= Length(FBits)) then
3432    GrowToInclude(El);
3433  FBits[N] := FBits[N] or BitMask(El);
3434end;
3435
3436class function TBitSet.BitMask(const BitNumber: Integer): UInt64;
3437var
3438  BitPosition: Integer;
3439begin
3440  BitPosition := BitNumber and MOD_MASK;
3441  Result := UInt64(1) shl BitPosition;
3442end;
3443
3444function TBitSet.BitSetOr(const A: IBitSet): IBitSet;
3445begin
3446  Result := Clone as IBitSet;
3447  Result.OrInPlace(A);
3448end;
3449
3450function TBitSet.Clone: IANTLRInterface;
3451var
3452  BS: TBitSet;
3453begin
3454  BS := TBitSet.Create;
3455  Result := BS;
3456  SetLength(BS.FBits,Length(FBits));
3457  if (Length(FBits) > 0) then
3458    Move(FBits[0],BS.FBits[0],Length(FBits) * SizeOf(UInt64));
3459end;
3460
3461constructor TBitSet.Create;
3462begin
3463  Create(BITS);
3464end;
3465
3466constructor TBitSet.Create(const ABits: array of UInt64);
3467begin
3468  inherited Create;
3469  SetLength(FBits, Length(ABits));
3470  if (Length(ABits) > 0) then
3471    Move(ABits[0], FBits[0], Length(ABits) * SizeOf(UInt64));
3472end;
3473
3474constructor TBitSet.Create(const AItems: IList<Integer>);
3475var
3476  V: Integer;
3477begin
3478  Create(BITS);
3479  for V in AItems do
3480    Add(V);
3481end;
3482
3483constructor TBitSet.Create(const ANBits: Integer);
3484begin
3485  inherited Create;
3486  SetLength(FBits,((ANBits - 1) shr LOG_BITS) + 1);
3487end;
3488
3489function TBitSet.Equals(Obj: TObject): Boolean;
3490var
3491  OtherSet: TBitSet absolute Obj;
3492  I, N: Integer;
3493begin
3494  Result := False;
3495  if (Obj = nil) or (not (Obj is TBitSet)) then
3496    Exit;
3497
3498  N := Min(Length(FBits), Length(OtherSet.FBits));
3499
3500  // for any bits in common, compare
3501  for I := 0 to N - 1 do
3502  begin
3503    if (FBits[I] <> OtherSet.FBits[I]) then
3504      Exit;
3505  end;
3506
3507  // make sure any extra bits are off
3508  if (Length(FBits) > N) then
3509  begin
3510    for I := N + 1 to Length(FBits) - 1 do
3511    begin
3512      if (FBits[I] <> 0) then
3513        Exit;
3514    end;
3515  end
3516  else
3517    if (Length(OtherSet.FBits) > N) then
3518    begin
3519      for I := N + 1 to Length(OtherSet.FBits) - 1 do
3520      begin
3521        if (OtherSet.FBits[I] <> 0) then
3522          Exit;
3523      end;
3524    end;
3525
3526  Result := True;
3527end;
3528
3529function TBitSet.GetIsNil: Boolean;
3530var
3531  I: Integer;
3532begin
3533  for I := Length(FBits) - 1 downto 0 do
3534    if (FBits[I] <> 0) then
3535    begin
3536      Result := False;
3537      Exit;
3538    end;
3539  Result := True;
3540end;
3541
3542procedure TBitSet.GrowToInclude(const Bit: Integer);
3543var
3544  NewSize: Integer;
3545begin
3546  NewSize := Max(Length(FBits) shl 1,NumWordsToHold(Bit));
3547  SetLength(FBits,NewSize);
3548end;
3549
3550function TBitSet.LengthInLongWords: Integer;
3551begin
3552  Result := Length(FBits);
3553end;
3554
3555function TBitSet.Member(const El: Integer): Boolean;
3556var
3557  N: Integer;
3558begin
3559  if (El < 0) then
3560    Result := False
3561  else
3562  begin
3563    N := WordNumber(El);
3564    if (N >= Length(FBits)) then
3565      Result := False
3566    else
3567      Result := ((FBits[N] and BitMask(El)) <> 0);
3568  end;
3569end;
3570
3571function TBitSet.NumBits: Integer;
3572begin
3573  Result := Length(FBits) shl LOG_BITS;
3574end;
3575
3576class function TBitSet.NumWordsToHold(const El: Integer): Integer;
3577begin
3578  Result := (El shr LOG_BITS) + 1;
3579end;
3580
3581procedure TBitSet.OrInPlace(const A: IBitSet);
3582var
3583  I, M: Integer;
3584  ABits: TUInt64Array;
3585begin
3586  if Assigned(A) then
3587  begin
3588    // If this is smaller than a, grow this first
3589    if (A.LengthInLongWords > Length(FBits)) then
3590      SetLength(FBits,A.LengthInLongWords);
3591    M := Min(Length(FBits), A.LengthInLongWords);
3592    ABits := A.ToPackedArray;
3593    for I := M - 1 downto 0 do
3594      FBits[I] := FBits[I] or ABits[I];
3595  end;
3596end;
3597
3598procedure TBitSet.Remove(const El: Integer);
3599var
3600  N: Integer;
3601begin
3602  N := WordNumber(El);
3603  if (N < Length(FBits)) then
3604    FBits[N] := (FBits[N] and not BitMask(El));
3605end;
3606
3607function TBitSet.Size: Integer;
3608var
3609  I, Bit: Integer;
3610  W: UInt64;
3611begin
3612  Result := 0;
3613  for I := Length(FBits) - 1 downto 0 do
3614  begin
3615    W := FBits[I];
3616    if (W <> 0) then
3617    begin
3618      for Bit := BITS - 1 downto 0 do
3619      begin
3620        if ((W and (UInt64(1) shl Bit)) <> 0) then
3621          Inc(Result);
3622      end;
3623    end;
3624  end;
3625end;
3626
3627function TBitSet.ToArray: TIntegerArray;
3628var
3629  I, En: Integer;
3630begin
3631  SetLength(Result,Size);
3632  En := 0;
3633  for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
3634  begin
3635    if Member(I) then
3636    begin
3637      Result[En] := I;
3638      Inc(En);
3639    end;
3640  end;
3641end;
3642
3643function TBitSet.ToPackedArray: TUInt64Array;
3644begin
3645  Result := FBits;
3646end;
3647
3648function TBitSet.ToString: String;
3649begin
3650  Result := ToString(nil);
3651end;
3652
3653function TBitSet.ToString(const TokenNames: TStringArray): String;
3654var
3655  Buf: TStringBuilder;
3656  I: Integer;
3657  HavePrintedAnElement: Boolean;
3658begin
3659  HavePrintedAnElement := False;
3660  Buf := TStringBuilder.Create;
3661  try
3662    Buf.Append('{');
3663    for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
3664    begin
3665      if Member(I) then
3666      begin
3667        if (I > 0) and HavePrintedAnElement then
3668          Buf.Append(',');
3669        if Assigned(TokenNames) then
3670          Buf.Append(TokenNames[I])
3671        else
3672          Buf.Append(I);
3673        HavePrintedAnElement := True;
3674      end;
3675    end;
3676    Buf.Append('}');
3677    Result := Buf.ToString;
3678  finally
3679    Buf.Free;
3680  end;
3681end;
3682
3683class function TBitSet.WordNumber(const Bit: Integer): Integer;
3684begin
3685  Result := Bit shr LOG_BITS; // Bit / BITS
3686end;
3687
3688{ TRecognizerSharedState }
3689
3690constructor TRecognizerSharedState.Create;
3691var
3692  I: Integer;
3693begin
3694  inherited;
3695  SetLength(FFollowing,TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE);
3696  for I := 0 to TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE - 1 do
3697    FFollowing[I] := TBitSet.Create;
3698  FFollowingStackPointer := -1;
3699  FLastErrorIndex := -1;
3700  FTokenStartCharIndex := -1;
3701end;
3702
3703function TRecognizerSharedState.GetBacktracking: Integer;
3704begin
3705  Result := FBacktracking;
3706end;
3707
3708function TRecognizerSharedState.GetChannel: Integer;
3709begin
3710  Result := FChannel;
3711end;
3712
3713function TRecognizerSharedState.GetErrorRecovery: Boolean;
3714begin
3715  Result := FErrorRecovery;
3716end;
3717
3718function TRecognizerSharedState.GetFailed: Boolean;
3719begin
3720  Result := FFailed;
3721end;
3722
3723function TRecognizerSharedState.GetFollowing: TBitSetArray;
3724begin
3725  Result := FFollowing;
3726end;
3727
3728function TRecognizerSharedState.GetFollowingStackPointer: Integer;
3729begin
3730  Result := FFollowingStackPointer;
3731end;
3732
3733function TRecognizerSharedState.GetLastErrorIndex: Integer;
3734begin
3735  Result := FLastErrorIndex;
3736end;
3737
3738function TRecognizerSharedState.GetRuleMemo: TDictionaryArray<Integer, Integer>;
3739begin
3740  Result := FRuleMemo;
3741end;
3742
3743function TRecognizerSharedState.GetRuleMemoCount: Integer;
3744begin
3745  Result := Length(FRuleMemo);
3746end;
3747
3748function TRecognizerSharedState.GetSyntaxErrors: Integer;
3749begin
3750  Result := FSyntaxErrors;
3751end;
3752
3753function TRecognizerSharedState.GetText: String;
3754begin
3755  Result := FText;
3756end;
3757
3758function TRecognizerSharedState.GetToken: IToken;
3759begin
3760  Result := FToken;
3761end;
3762
3763function TRecognizerSharedState.GetTokenStartCharIndex: Integer;
3764begin
3765  Result := FTokenStartCharIndex;
3766end;
3767
3768function TRecognizerSharedState.GetTokenStartCharPositionInLine: Integer;
3769begin
3770  Result := FTokenStartCharPositionInLine;
3771end;
3772
3773function TRecognizerSharedState.GetTokenStartLine: Integer;
3774begin
3775  Result := FTokenStartLine;
3776end;
3777
3778function TRecognizerSharedState.GetTokenType: Integer;
3779begin
3780  Result := FTokenType;
3781end;
3782
3783procedure TRecognizerSharedState.SetBacktracking(const Value: Integer);
3784begin
3785  FBacktracking := Value;
3786end;
3787
3788procedure TRecognizerSharedState.SetChannel(const Value: Integer);
3789begin
3790  FChannel := Value;
3791end;
3792
3793procedure TRecognizerSharedState.SetErrorRecovery(const Value: Boolean);
3794begin
3795  FErrorRecovery := Value;
3796end;
3797
3798procedure TRecognizerSharedState.SetFailed(const Value: Boolean);
3799begin
3800  FFailed := Value;
3801end;
3802
3803procedure TRecognizerSharedState.SetFollowing(const Value: TBitSetArray);
3804begin
3805  FFollowing := Value;
3806end;
3807
3808procedure TRecognizerSharedState.SetFollowingStackPointer(const Value: Integer);
3809begin
3810  FFollowingStackPointer := Value;
3811end;
3812
3813procedure TRecognizerSharedState.SetLastErrorIndex(const Value: Integer);
3814begin
3815  FLastErrorIndex := Value;
3816end;
3817
3818procedure TRecognizerSharedState.SetRuleMemoCount(const Value: Integer);
3819begin
3820  SetLength(FRuleMemo, Value);
3821end;
3822
3823procedure TRecognizerSharedState.SetSyntaxErrors(const Value: Integer);
3824begin
3825  FSyntaxErrors := Value;
3826end;
3827
3828procedure TRecognizerSharedState.SetText(const Value: String);
3829begin
3830  FText := Value;
3831end;
3832
3833procedure TRecognizerSharedState.SetToken(const Value: IToken);
3834begin
3835  FToken := Value;
3836end;
3837
3838procedure TRecognizerSharedState.SetTokenStartCharIndex(const Value: Integer);
3839begin
3840  FTokenStartCharIndex := Value;
3841end;
3842
3843procedure TRecognizerSharedState.SetTokenStartCharPositionInLine(
3844  const Value: Integer);
3845begin
3846  FTokenStartCharPositionInLine := Value;
3847end;
3848
3849procedure TRecognizerSharedState.SetTokenStartLine(const Value: Integer);
3850begin
3851  FTokenStartLine := Value;
3852end;
3853
3854procedure TRecognizerSharedState.SetTokenType(const Value: Integer);
3855begin
3856  FTokenType := Value;
3857end;
3858
3859{ TCommonToken }
3860
3861constructor TCommonToken.Create;
3862begin
3863  inherited;
3864  FChannel := TToken.DEFAULT_CHANNEL;
3865  FCharPositionInLine := -1;
3866  FIndex := -1;
3867end;
3868
3869constructor TCommonToken.Create(const ATokenType: Integer);
3870begin
3871  Create;
3872  FTokenType := ATokenType;
3873end;
3874
3875constructor TCommonToken.Create(const AInput: ICharStream; const ATokenType,
3876  AChannel, AStart, AStop: Integer);
3877begin
3878  Create;
3879  FInput := AInput;
3880  FTokenType := ATokenType;
3881  FChannel := AChannel;
3882  FStart := AStart;
3883  FStop := AStop;
3884end;
3885
3886constructor TCommonToken.Create(const ATokenType: Integer; const AText: String);
3887begin
3888  Create;
3889  FTokenType := ATokenType;
3890  FChannel := TToken.DEFAULT_CHANNEL;
3891  FText := AText;
3892end;
3893
3894function TCommonToken.GetChannel: Integer;
3895begin
3896  Result := FChannel;
3897end;
3898
3899function TCommonToken.GetCharPositionInLine: Integer;
3900begin
3901  Result := FCharPositionInLine;
3902end;
3903
3904function TCommonToken.GetInputStream: ICharStream;
3905begin
3906  Result := FInput;
3907end;
3908
3909function TCommonToken.GetLine: Integer;
3910begin
3911  Result := FLine;
3912end;
3913
3914function TCommonToken.GetStartIndex: Integer;
3915begin
3916  Result := FStart;
3917end;
3918
3919function TCommonToken.GetStopIndex: Integer;
3920begin
3921  Result := FStop;
3922end;
3923
3924function TCommonToken.GetText: String;
3925begin
3926  if (FText <> '') then
3927    Result := FText
3928  else
3929    if (FInput = nil) then
3930      Result := ''
3931    else
3932      Result := FInput.Substring(FStart, FStop);
3933end;
3934
3935function TCommonToken.GetTokenIndex: Integer;
3936begin
3937  Result := FIndex;
3938end;
3939
3940function TCommonToken.GetTokenType: Integer;
3941begin
3942  Result := FTokenType;
3943end;
3944
3945procedure TCommonToken.SetChannel(const Value: Integer);
3946begin
3947  FChannel := Value;
3948end;
3949
3950procedure TCommonToken.SetCharPositionInLine(const Value: Integer);
3951begin
3952  FCharPositionInLine := Value;
3953end;
3954
3955procedure TCommonToken.SetInputStream(const Value: ICharStream);
3956begin
3957  FInput := Value;
3958end;
3959
3960procedure TCommonToken.SetLine(const Value: Integer);
3961begin
3962  FLine := Value;
3963end;
3964
3965procedure TCommonToken.SetStartIndex(const Value: Integer);
3966begin
3967  FStart := Value;
3968end;
3969
3970procedure TCommonToken.SetStopIndex(const Value: Integer);
3971begin
3972  FStop := Value;
3973end;
3974
3975procedure TCommonToken.SetText(const Value: String);
3976begin
3977  (* Override the text for this token.  The property getter
3978   * will return this text rather than pulling from the buffer.
3979   * Note that this does not mean that start/stop indexes are
3980   * not valid.  It means that the input was converted to a new
3981   * string in the token object.
3982   *)
3983  FText := Value;
3984end;
3985
3986procedure TCommonToken.SetTokenIndex(const Value: Integer);
3987begin
3988  FIndex := Value;
3989end;
3990
3991procedure TCommonToken.SetTokenType(const Value: Integer);
3992begin
3993  FTokenType := Value;
3994end;
3995
3996function TCommonToken.ToString: String;
3997var
3998  ChannelStr, Txt: String;
3999begin
4000  if (FChannel > 0) then
4001    ChannelStr := ',channel=' + IntToStr(FChannel)
4002  else
4003    ChannelStr := '';
4004
4005  Txt := GetText;
4006  if (Txt <> '') then
4007  begin
4008    Txt := ReplaceStr(Txt,#10,'\n');
4009    Txt := ReplaceStr(Txt,#13,'\r');
4010    Txt := ReplaceStr(Txt,#9,'\t');
4011  end else
4012    Txt := '<no text>';
4013
4014  Result := Format('[@%d,%d:%d=''%s'',<%d>%s,%d:%d]',
4015    [FIndex,FStart,FStop,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
4016end;
4017
4018constructor TCommonToken.Create(const AOldToken: IToken);
4019var
4020  OldCommonToken: ICommonToken;
4021begin
4022  Create;
4023  FText := AOldToken.Text;
4024  FTokenType := AOldToken.TokenType;
4025  FLine := AOldToken.Line;
4026  FIndex := AOldToken.TokenIndex;
4027  FCharPositionInLine := AOldToken.CharPositionInLine;
4028  FChannel := AOldToken.Channel;
4029  if Supports(AOldToken, ICommonToken, OldCommonToken) then
4030  begin
4031    FStart := OldCommonToken.StartIndex;
4032    FStop := OldCommonToken.StopIndex;
4033  end;
4034end;
4035
4036{ TClassicToken }
4037
4038constructor TClassicToken.Create(const AOldToken: IToken);
4039begin
4040  inherited Create;
4041  FText := AOldToken.Text;
4042  FTokenType := AOldToken.TokenType;
4043  FLine := AOldToken.Line;
4044  FCharPositionInLine := AOldToken.CharPositionInLine;
4045  FChannel := AOldToken.Channel;
4046end;
4047
4048constructor TClassicToken.Create(const ATokenType: Integer);
4049begin
4050  inherited Create;
4051  FTokenType := ATokenType;
4052end;
4053
4054constructor TClassicToken.Create(const ATokenType: Integer; const AText: String;
4055  const AChannel: Integer);
4056begin
4057  inherited Create;
4058  FTokenType := ATokenType;
4059  FText := AText;
4060  FChannel := AChannel;
4061end;
4062
4063constructor TClassicToken.Create(const ATokenType: Integer;
4064  const AText: String);
4065begin
4066  inherited Create;
4067  FTokenType := ATokenType;
4068  FText := AText;
4069end;
4070
4071function TClassicToken.GetChannel: Integer;
4072begin
4073  Result := FChannel;
4074end;
4075
4076function TClassicToken.GetCharPositionInLine: Integer;
4077begin
4078  Result := FCharPositionInLine;
4079end;
4080
4081function TClassicToken.GetInputStream: ICharStream;
4082begin
4083  // No default implementation
4084  Result := nil;
4085end;
4086
4087function TClassicToken.GetLine: Integer;
4088begin
4089  Result := FLine;
4090end;
4091
4092function TClassicToken.GetText: String;
4093begin
4094  Result := FText;
4095end;
4096
4097function TClassicToken.GetTokenIndex: Integer;
4098begin
4099  Result := FIndex;
4100end;
4101
4102function TClassicToken.GetTokenType: Integer;
4103begin
4104  Result := FTokenType;
4105end;
4106
4107procedure TClassicToken.SetChannel(const Value: Integer);
4108begin
4109  FChannel := Value;
4110end;
4111
4112procedure TClassicToken.SetCharPositionInLine(const Value: Integer);
4113begin
4114  FCharPositionInLine := Value;
4115end;
4116
4117procedure TClassicToken.SetInputStream(const Value: ICharStream);
4118begin
4119  // No default implementation
4120end;
4121
4122procedure TClassicToken.SetLine(const Value: Integer);
4123begin
4124  FLine := Value;
4125end;
4126
4127procedure TClassicToken.SetText(const Value: String);
4128begin
4129  FText := Value;
4130end;
4131
4132procedure TClassicToken.SetTokenIndex(const Value: Integer);
4133begin
4134  FIndex := Value;
4135end;
4136
4137procedure TClassicToken.SetTokenType(const Value: Integer);
4138begin
4139  FTokenType := Value;
4140end;
4141
4142function TClassicToken.ToString: String;
4143var
4144  ChannelStr, Txt: String;
4145begin
4146  if (FChannel > 0) then
4147    ChannelStr := ',channel=' + IntToStr(FChannel)
4148  else
4149    ChannelStr := '';
4150  Txt := FText;
4151  if (Txt <> '') then
4152  begin
4153    Txt := ReplaceStr(Txt,#10,'\n');
4154    Txt := ReplaceStr(Txt,#13,'\r');
4155    Txt := ReplaceStr(Txt,#9,'\t');
4156  end else
4157    Txt := '<no text>';
4158
4159  Result := Format('[@%d,''%s'',<%d>%s,%d:%d]',
4160    [FIndex,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
4161end;
4162
4163{ TToken }
4164
4165class procedure TToken.Initialize;
4166begin
4167  EOF_TOKEN := TCommonToken.Create(EOF);
4168  INVALID_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
4169  SKIP_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
4170end;
4171
4172{ TBaseRecognizer }
4173
4174constructor TBaseRecognizer.Create;
4175begin
4176  inherited;
4177  FState := TRecognizerSharedState.Create;
4178end;
4179
4180function TBaseRecognizer.AlreadyParsedRule(const Input: IIntStream;
4181  const RuleIndex: Integer): Boolean;
4182var
4183  StopIndex: Integer;
4184begin
4185  StopIndex := GetRuleMemoization(RuleIndex, Input.Index);
4186  Result := (StopIndex <> MEMO_RULE_UNKNOWN);
4187  if Result then
4188  begin
4189    if (StopIndex = MEMO_RULE_FAILED) then
4190      FState.Failed := True
4191    else
4192      Input.Seek(StopIndex + 1);  // jump to one past stop token
4193  end;
4194end;
4195
4196procedure TBaseRecognizer.BeginBacktrack(const Level: Integer);
4197begin
4198  // No defeault implementation
4199end;
4200
4201procedure TBaseRecognizer.BeginResync;
4202begin
4203  // No defeault implementation
4204end;
4205
4206procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
4207  const TokenType: Integer);
4208var
4209  TType: Integer;
4210begin
4211  TType := Input.LA(1);
4212  while (TType <> TToken.EOF) and (TType <> TokenType) do
4213  begin
4214    Input.Consume;
4215    TType := Input.LA(1);
4216  end;
4217end;
4218
4219function TBaseRecognizer.CombineFollows(const Exact: Boolean): IBitSet;
4220var
4221  I, Top: Integer;
4222  LocalFollowSet: IBitSet;
4223begin
4224  Top := FState.FollowingStackPointer;
4225  Result := TBitSet.Create;
4226  for I := Top downto 0 do
4227  begin
4228    LocalFollowSet := FState.Following[I];
4229    Result.OrInPlace(LocalFollowSet);
4230    if (Exact) then
4231    begin
4232      // can we see end of rule?
4233      if LocalFollowSet.Member(TToken.EOR_TOKEN_TYPE) then
4234      begin
4235        // Only leave EOR in set if at top (start rule); this lets
4236        // us know if have to include follow(start rule); i.e., EOF
4237        if (I > 0) then
4238          Result.Remove(TToken.EOR_TOKEN_TYPE);
4239      end
4240      else
4241        // can't see end of rule, quit
4242        Break;
4243    end;
4244  end;
4245end;
4246
4247function TBaseRecognizer.ComputeContextSensitiveRuleFOLLOW: IBitSet;
4248begin
4249  Result := CombineFollows(True);
4250end;
4251
4252function TBaseRecognizer.ComputeErrorRecoverySet: IBitSet;
4253begin
4254  Result := CombineFollows(False);
4255end;
4256
4257procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
4258  const BitSet: IBitSet);
4259var
4260  TType: Integer;
4261begin
4262  TType := Input.LA(1);
4263  while (TType <> TToken.EOF) and (not BitSet.Member(TType)) do
4264  begin
4265    Input.Consume;
4266    TType := Input.LA(1);
4267  end;
4268end;
4269
4270constructor TBaseRecognizer.Create(const AState: IRecognizerSharedState);
4271begin
4272  if (AState = nil) then
4273    Create
4274  else
4275  begin
4276    inherited Create;
4277    FState := AState;
4278  end;
4279end;
4280
4281procedure TBaseRecognizer.DisplayRecognitionError(
4282  const TokenNames: TStringArray; const E: ERecognitionException);
4283var
4284  Hdr, Msg: String;
4285begin
4286  Hdr := GetErrorHeader(E);
4287  Msg := GetErrorMessage(E, TokenNames);
4288  EmitErrorMessage(Hdr + ' ' + Msg);
4289end;
4290
4291procedure TBaseRecognizer.EmitErrorMessage(const Msg: String);
4292begin
4293  WriteLn(Msg);
4294end;
4295
4296procedure TBaseRecognizer.EndBacktrack(const Level: Integer;
4297  const Successful: Boolean);
4298begin
4299  // No defeault implementation
4300end;
4301
4302procedure TBaseRecognizer.EndResync;
4303begin
4304  // No defeault implementation
4305end;
4306
4307function TBaseRecognizer.GetBacktrackingLevel: Integer;
4308begin
4309  Result := FState.Backtracking;
4310end;
4311
4312function TBaseRecognizer.GetCurrentInputSymbol(
4313  const Input: IIntStream): IANTLRInterface;
4314begin
4315  // No defeault implementation
4316  Result := nil;
4317end;
4318
4319function TBaseRecognizer.GetErrorHeader(const E: ERecognitionException): String;
4320begin
4321  Result := 'line ' + IntToStr(E.Line) + ':' + IntToStr(E.CharPositionInLine);
4322end;
4323
4324function TBaseRecognizer.GetErrorMessage(const E: ERecognitionException;
4325  const TokenNames: TStringArray): String;
4326var
4327  UTE: EUnwantedTokenException absolute E;
4328  MTE: EMissingTokenException absolute E;
4329  MMTE: EMismatchedTokenException absolute E;
4330  MTNE: EMismatchedTreeNodeException absolute E;
4331  NVAE: ENoViableAltException absolute E;
4332  EEE: EEarlyExitException absolute E;
4333  MSE: EMismatchedSetException absolute E;
4334  MNSE: EMismatchedNotSetException absolute E;
4335  FPE: EFailedPredicateException absolute E;
4336  TokenName: String;
4337begin
4338  Result := E.Message;
4339  if (E is EUnwantedTokenException) then
4340  begin
4341    if (UTE.Expecting = TToken.EOF) then
4342      TokenName := 'EOF'
4343    else
4344      TokenName := TokenNames[UTE.Expecting];
4345    Result := 'extraneous input ' + GetTokenErrorDisplay(UTE.UnexpectedToken)
4346      + ' expecting ' + TokenName;
4347  end
4348  else
4349    if (E is EMissingTokenException) then
4350    begin
4351      if (MTE.Expecting = TToken.EOF) then
4352        TokenName := 'EOF'
4353      else
4354        TokenName := TokenNames[MTE.Expecting];
4355      Result := 'missing ' + TokenName + ' at ' + GetTokenErrorDisplay(E.Token);
4356    end
4357    else
4358      if (E is EMismatchedTokenException) then
4359      begin
4360        if (MMTE.Expecting = TToken.EOF) then
4361          TokenName := 'EOF'
4362        else
4363          TokenName := TokenNames[MMTE.Expecting];
4364        Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
4365          + ' expecting ' + TokenName;
4366      end
4367      else
4368        if (E is EMismatchedTreeNodeException) then
4369        begin
4370          if (MTNE.Expecting = TToken.EOF) then
4371            Result := 'EOF'
4372          else
4373            Result := TokenNames[MTNE.Expecting];
4374          // The ternary operator is only necessary because of a bug in the .NET framework
4375          Result := 'mismatched tree node: ';
4376          if (MTNE.Node <> nil) and (MTNE.Node.ToString <> '') then
4377            Result := Result + MTNE.Node.ToString;
4378          Result := Result + ' expecting ' + TokenName;
4379        end
4380        else
4381          if (E is ENoViableAltException) then
4382          begin
4383            // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
4384            // and "(decision="+nvae.decisionNumber+") and
4385            // "state "+nvae.stateNumber
4386            Result := 'no viable alternative at input ' + GetTokenErrorDisplay(E.Token);
4387          end
4388          else
4389            if (E is EEarlyExitException) then
4390            begin
4391              // for development, can add "(decision="+eee.decisionNumber+")"
4392              Result := 'required (...)+ loop did not  match anyting at input '
4393                + GetTokenErrorDisplay(E.Token);
4394            end else
4395              if (E is EMismatchedSetException) then
4396              begin
4397                Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
4398                  + ' expecting set ' + MSE.Expecting.ToString;
4399              end
4400              else
4401                if (E is EMismatchedNotSetException) then
4402                begin
4403                  Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
4404                    + ' expecting set ' + MSE.Expecting.ToString;
4405                end
4406                else
4407                  if (E is EFailedPredicateException) then
4408                  begin
4409                    Result := 'rule ' + FPE.RuleName
4410                      + ' failed predicate: {' + FPE.PredicateText + '}?';
4411                  end;
4412end;
4413
4414function TBaseRecognizer.GetGrammarFileName: String;
4415begin
4416  // No defeault implementation
4417  Result := '';
4418end;
4419
4420function TBaseRecognizer.GetMissingSymbol(const Input: IIntStream;
4421  const E: ERecognitionException; const ExpectedTokenType: Integer;
4422  const Follow: IBitSet): IANTLRInterface;
4423begin
4424  // No defeault implementation
4425  Result := nil;
4426end;
4427
4428function TBaseRecognizer.GetNumberOfSyntaxErrors: Integer;
4429begin
4430  Result := FState.SyntaxErrors;
4431end;
4432
4433function TBaseRecognizer.GetRuleMemoization(const RuleIndex,
4434  RuleStartIndex: Integer): Integer;
4435var
4436  Dict: IDictionary<Integer, Integer>;
4437begin
4438  Dict := FState.RuleMemo[RuleIndex];
4439  if (Dict = nil) then
4440  begin
4441    Dict := TDictionary<Integer, Integer>.Create;
4442    FState.RuleMemo[RuleIndex] := Dict;
4443  end;
4444  if (not Dict.TryGetValue(RuleStartIndex, Result)) then
4445    Result := MEMO_RULE_UNKNOWN;
4446end;
4447
4448function TBaseRecognizer.GetRuleMemoizationChaceSize: Integer;
4449var
4450  RuleMap: IDictionary<Integer, Integer>;
4451begin
4452  Result := 0;
4453  if Assigned(FState.RuleMemo) then
4454  begin
4455    for RuleMap in FState.RuleMemo do
4456      if Assigned(RuleMap) then
4457        Inc(Result,RuleMap.Count);  // how many input indexes are recorded?
4458  end;
4459end;
4460
4461function TBaseRecognizer.GetState: IRecognizerSharedState;
4462begin
4463  Result := FState;
4464end;
4465
4466function TBaseRecognizer.GetTokenErrorDisplay(const T: IToken): String;
4467begin
4468  Result := T.Text;
4469  if (Result = '') then
4470  begin
4471    if (T.TokenType = TToken.EOF) then
4472      Result := '<EOF>'
4473    else
4474      Result := '<' + IntToStr(T.TokenType) + '>';
4475  end;
4476  Result := ReplaceStr(Result,#10,'\n');
4477  Result := ReplaceStr(Result,#13,'\r');
4478  Result := ReplaceStr(Result,#9,'\t');
4479  Result := '''' + Result + '''';
4480end;
4481
4482function TBaseRecognizer.GetTokenNames: TStringArray;
4483begin
4484  // no default implementation
4485  Result := nil;
4486end;
4487
4488function TBaseRecognizer.Match(const Input: IIntStream;
4489  const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
4490begin
4491  Result := GetCurrentInputSymbol(Input);
4492  if (Input.LA(1) = TokenType) then
4493  begin
4494    Input.Consume;
4495    FState.ErrorRecovery := False;
4496    FState.Failed := False;
4497  end else
4498  begin
4499    if (FState.Backtracking > 0) then
4500      FState.Failed := True
4501    else
4502    begin
4503      Mismatch(Input, TokenType, Follow);
4504      Result := RecoverFromMismatchedToken(Input, TokenType, Follow);
4505    end;
4506  end;
4507end;
4508
4509procedure TBaseRecognizer.MatchAny(const Input: IIntStream);
4510begin
4511  FState.ErrorRecovery := False;
4512  FState.Failed := False;
4513  Input.Consume;
4514end;
4515
4516procedure TBaseRecognizer.Memoize(const Input: IIntStream; const RuleIndex,
4517  RuleStartIndex: Integer);
4518var
4519  StopTokenIndex: Integer;
4520  Dict: IDictionary<Integer, Integer>;
4521begin
4522  Dict := FState.RuleMemo[RuleIndex];
4523  if Assigned(Dict) then
4524  begin
4525    if FState.Failed then
4526      StopTokenIndex := MEMO_RULE_FAILED
4527    else
4528      StopTokenIndex := Input.Index - 1;
4529    Dict.AddOrSetValue(RuleStartIndex, StopTokenIndex);
4530  end;
4531end;
4532
4533procedure TBaseRecognizer.Mismatch(const Input: IIntStream;
4534  const TokenType: Integer; const Follow: IBitSet);
4535begin
4536  if MismatchIsUnwantedToken(Input, TokenType) then
4537    raise EUnwantedTokenException.Create(TokenType, Input)
4538  else
4539    if MismatchIsMissingToken(Input, Follow) then
4540      raise EMissingTokenException.Create(TokenType, Input, nil)
4541    else
4542      raise EMismatchedTokenException.Create(TokenType, Input);
4543end;
4544
4545function TBaseRecognizer.MismatchIsMissingToken(const Input: IIntStream;
4546  const Follow: IBitSet): Boolean;
4547var
4548  ViableTokensFollowingThisRule, Follow2: IBitSet;
4549begin
4550  if (Follow = nil) then
4551    // we have no information about the follow; we can only consume
4552    // a single token and hope for the best
4553    Result := False
4554  else
4555  begin
4556    Follow2 := Follow;
4557    // compute what can follow this grammar element reference
4558    if (Follow.Member(TToken.EOR_TOKEN_TYPE)) then
4559    begin
4560      ViableTokensFollowingThisRule := ComputeContextSensitiveRuleFOLLOW();
4561      Follow2 := Follow.BitSetOr(ViableTokensFollowingThisRule);
4562      if (FState.FollowingStackPointer >= 0) then
4563        // remove EOR if we're not the start symbol
4564        Follow2.Remove(TToken.EOR_TOKEN_TYPE);
4565    end;
4566
4567    // if current token is consistent with what could come after set
4568    // then we know we're missing a token; error recovery is free to
4569    // "insert" the missing token
4570
4571    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
4572    // in follow set to indicate that the fall of the start symbol is
4573    // in the set (EOF can follow).
4574    if (Follow2.Member(Input.LA(1)) or Follow2.Member(TToken.EOR_TOKEN_TYPE)) then
4575      Result := True
4576    else
4577      Result := False;
4578  end;
4579end;
4580
4581function TBaseRecognizer.MismatchIsUnwantedToken(const Input: IIntStream;
4582  const TokenType: Integer): Boolean;
4583begin
4584  Result := (Input.LA(2) = TokenType);
4585end;
4586
4587procedure TBaseRecognizer.PushFollow(const FSet: IBitSet);
4588var
4589  F: TBitSetArray;
4590  I: Integer;
4591begin
4592  if ((FState.FollowingStackPointer + 1) >= Length(FState.Following)) then
4593  begin
4594    SetLength(F, Length(FState.Following) * 2);
4595    FillChar(F[0], Length(F) * SizeOf(IBitSet), 0);
4596    for I := 0 to Length(FState.Following) - 1 do
4597      F[I] := FState.Following[I];
4598    FState.Following := F;
4599  end;
4600  FState.FollowingStackPointer := FState.FollowingStackPointer + 1;
4601  FState.Following[FState.FollowingStackPointer] := FSet;
4602end;
4603
4604procedure TBaseRecognizer.Recover(const Input: IIntStream;
4605  const RE: ERecognitionException);
4606var
4607  FollowSet: IBitSet;
4608begin
4609  if (FState.LastErrorIndex = Input.Index) then
4610    // uh oh, another error at same token index; must be a case
4611    // where LT(1) is in the recovery token set so nothing is
4612    // consumed; consume a single token so at least to prevent
4613    // an infinite loop; this is a failsafe.
4614    Input.Consume;
4615  FState.LastErrorIndex := Input.Index;
4616  FollowSet := ComputeErrorRecoverySet;
4617  BeginResync;
4618  ConsumeUntil(Input,FollowSet);
4619  EndResync;
4620end;
4621
4622function TBaseRecognizer.RecoverFromMismatchedSet(const Input: IIntStream;
4623  const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
4624begin
4625  if MismatchIsMissingToken(Input, Follow) then
4626  begin
4627    ReportError(E);
4628    // we don't know how to conjure up a token for sets yet
4629    Result := GetMissingSymbol(Input, E, TToken.INVALID_TOKEN_TYPE, Follow);
4630  end
4631  else
4632  begin
4633    // TODO do single token deletion like above for Token mismatch
4634    Result := nil;
4635    raise E;
4636  end;
4637end;
4638
4639function TBaseRecognizer.RecoverFromMismatchedToken(const Input: IIntStream;
4640  const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
4641var
4642  E: ERecognitionException;
4643begin
4644  // if next token is what we are looking for then "delete" this token
4645  if MismatchIsUnwantedToken(Input, TokenType) then
4646  begin
4647    E := EUnwantedTokenException.Create(TokenType, Input);
4648    BeginResync;
4649    Input.Consume; // simply delete extra token
4650    EndResync;
4651    ReportError(E);  // report after consuming so AW sees the token in the exception
4652    // we want to return the token we're actually matching
4653    Result := GetCurrentInputSymbol(Input);
4654    Input.Consume;  // move past ttype token as if all were ok
4655  end
4656  else
4657  begin
4658    // can't recover with single token deletion, try insertion
4659    if MismatchIsMissingToken(Input, Follow) then
4660    begin
4661      E := nil;
4662      Result := GetMissingSymbol(Input, E, TokenType, Follow);
4663      E := EMissingTokenException.Create(TokenType, Input, Result);
4664      ReportError(E);  // report after inserting so AW sees the token in the exception
4665    end
4666    else
4667    begin
4668      // even that didn't work; must throw the exception
4669      raise EMismatchedTokenException.Create(TokenType, Input);
4670    end;
4671  end;
4672end;
4673
4674procedure TBaseRecognizer.ReportError(const E: ERecognitionException);
4675begin
4676  // if we've already reported an error and have not matched a token
4677  // yet successfully, don't report any errors.
4678  if (not FState.ErrorRecovery) then
4679  begin
4680    FState.SyntaxErrors := FState.SyntaxErrors + 1; // don't count spurious
4681    FState.ErrorRecovery := True;
4682    DisplayRecognitionError(GetTokenNames, E);
4683  end;
4684end;
4685
4686procedure TBaseRecognizer.Reset;
4687var
4688  I: Integer;
4689begin
4690  // wack everything related to error recovery
4691  if (FState = nil) then
4692    Exit;  // no shared state work to do
4693
4694  FState.FollowingStackPointer := -1;
4695  FState.ErrorRecovery := False;
4696  FState.LastErrorIndex := -1;
4697  FState.Failed := False;
4698  FState.SyntaxErrors := 0;
4699
4700  // wack everything related to backtracking and memoization
4701  FState.Backtracking := 0;
4702  if Assigned(FState.RuleMemo) then
4703    for I := 0 to Length(FState.RuleMemo) - 1 do
4704    begin
4705      // wipe cache
4706      FState.RuleMemo[I] := nil;
4707    end;
4708end;
4709
4710function TBaseRecognizer.ToStrings(const Tokens: IList<IToken>): IList<String>;
4711var
4712  Token: IToken;
4713begin
4714  if (Tokens = nil) then
4715    Result := nil
4716  else
4717  begin
4718    Result := TList<String>.Create;
4719    for Token in Tokens do
4720      Result.Add(Token.Text);
4721  end;
4722end;
4723
4724procedure TBaseRecognizer.TraceIn(const RuleName: String;
4725  const RuleIndex: Integer; const InputSymbol: String);
4726begin
4727  Write('enter ' + RuleName + ' ' + InputSymbol);
4728  if (FState.Failed) then
4729    WriteLn(' failed=True');
4730  if (FState.Backtracking > 0) then
4731    Write(' backtracking=' + IntToStr(FState.Backtracking));
4732  WriteLn;
4733end;
4734
4735procedure TBaseRecognizer.TraceOut(const RuleName: String;
4736  const RuleIndex: Integer; const InputSymbol: String);
4737begin
4738  Write('exit ' + RuleName + ' ' + InputSymbol);
4739  if (FState.Failed) then
4740    WriteLn(' failed=True');
4741  if (FState.Backtracking > 0) then
4742    Write(' backtracking=' + IntToStr(FState.Backtracking));
4743  WriteLn;
4744end;
4745
4746{ TCommonTokenStream }
4747
4748procedure TCommonTokenStream.Consume;
4749begin
4750  if (FP < FTokens.Count) then
4751  begin
4752    Inc(FP);
4753    FP := SkipOffTokenChannels(FP); // leave p on valid token
4754  end;
4755end;
4756
4757constructor TCommonTokenStream.Create;
4758begin
4759  inherited;
4760  FP := -1;
4761  FChannel := TToken.DEFAULT_CHANNEL;
4762  FTokens := TList<IToken>.Create;
4763  FTokens.Capacity := 500;
4764end;
4765
4766constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource);
4767begin
4768  Create;
4769  FTokenSource := ATokenSource;
4770end;
4771
4772procedure TCommonTokenStream.DiscardOffChannelTokens(const Discard: Boolean);
4773begin
4774  FDiscardOffChannelTokens := Discard;
4775end;
4776
4777procedure TCommonTokenStream.DiscardTokenType(const TType: Integer);
4778begin
4779  if (FDiscardSet = nil) then
4780    FDiscardSet := THashList<Integer, Integer>.Create;
4781  FDiscardSet.Add(TType, TType);
4782end;
4783
4784procedure TCommonTokenStream.FillBuffer;
4785var
4786  Index: Integer;
4787  T: IToken;
4788  Discard: Boolean;
4789begin
4790  Index := 0;
4791  T := FTokenSource.NextToken;
4792  while Assigned(T) and (T.TokenType <> Integer(cscEOF)) do
4793  begin
4794    Discard := False;
4795    // is there a channel override for token type?
4796    if Assigned(FChannelOverrideMap) then
4797      if FChannelOverrideMap.ContainsKey(T.TokenType) then
4798        T.Channel := FChannelOverrideMap[T.TokenType];
4799
4800    if Assigned(FDiscardSet) and FDiscardSet.ContainsKey(T.TokenType) then
4801      Discard := True
4802    else
4803      if FDiscardOffChannelTokens and (T.Channel <> FChannel) then
4804        Discard := True;
4805
4806    if (not Discard) then
4807    begin
4808      T.TokenIndex := Index;
4809      FTokens.Add(T);
4810      Inc(Index);
4811    end;
4812
4813    T := FTokenSource.NextToken;
4814  end;
4815  // leave p pointing at first token on channel
4816  FP := 0;
4817  FP := SkipOffTokenChannels(FP);
4818end;
4819
4820function TCommonTokenStream.Get(const I: Integer): IToken;
4821begin
4822  Result := FTokens[I];
4823end;
4824
4825function TCommonTokenStream.GetSourceName: String;
4826begin
4827  Result := FTokenSource.SourceName;
4828end;
4829
4830function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
4831  const Types: IList<Integer>): IList<IToken>;
4832begin
4833  Result := GetTokens(Start, Stop, TBitSet.Create(Types));
4834end;
4835
4836function TCommonTokenStream.GetTokens(const Start, Stop,
4837  TokenType: Integer): IList<IToken>;
4838begin
4839  Result := GetTokens(Start, Stop, TBitSet.BitSetOf(TokenType));
4840end;
4841
4842function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
4843  const Types: IBitSet): IList<IToken>;
4844var
4845  I, StartIndex, StopIndex: Integer;
4846  T: IToken;
4847begin
4848  if (FP = -1) then
4849    FillBuffer;
4850  StopIndex := Min(Stop,FTokens.Count - 1);
4851  StartIndex := Max(Start,0);
4852  if (StartIndex > StopIndex) then
4853    Result := nil
4854  else
4855  begin
4856    Result := TList<IToken>.Create;
4857    for I := StartIndex to StopIndex do
4858    begin
4859      T := FTokens[I];
4860      if (Types = nil) or Types.Member(T.TokenType) then
4861        Result.Add(T);
4862    end;
4863    if (Result.Count = 0) then
4864      Result := nil;
4865  end;
4866end;
4867
4868function TCommonTokenStream.GetTokens: IList<IToken>;
4869begin
4870  if (FP = -1) then
4871    FillBuffer;
4872  Result := FTokens;
4873end;
4874
4875function TCommonTokenStream.GetTokens(const Start,
4876  Stop: Integer): IList<IToken>;
4877begin
4878  Result := GetTokens(Start, Stop, IBitSet(nil));
4879end;
4880
4881function TCommonTokenStream.GetTokenSource: ITokenSource;
4882begin
4883  Result := FTokenSource;
4884end;
4885
4886function TCommonTokenStream.Index: Integer;
4887begin
4888  Result := FP;
4889end;
4890
4891function TCommonTokenStream.LA(I: Integer): Integer;
4892begin
4893  Result := LT(I).TokenType;
4894end;
4895
4896function TCommonTokenStream.LAChar(I: Integer): Char;
4897begin
4898  Result := Char(LA(I));
4899end;
4900
4901function TCommonTokenStream.LB(const K: Integer): IToken;
4902var
4903  I, N: Integer;
4904begin
4905  if (FP = -1) then
4906    FillBuffer;
4907  if (K = 0) then
4908    Result := nil
4909  else
4910    if ((FP - K) < 0) then
4911      Result := nil
4912    else
4913    begin
4914      I := FP;
4915      N := 1;
4916      // find k good tokens looking backwards
4917      while (N <= K) do
4918      begin
4919        // skip off-channel tokens
4920        I := SkipOffTokenChannelsReverse(I - 1); // leave p on valid token
4921        Inc(N);
4922      end;
4923      if (I < 0) then
4924        Result := nil
4925      else
4926        Result := FTokens[I];
4927    end;
4928end;
4929
4930function TCommonTokenStream.LT(const K: Integer): IToken;
4931var
4932  I, N: Integer;
4933begin
4934  if (FP = -1) then
4935    FillBuffer;
4936  if (K = 0) then
4937    Result := nil
4938  else
4939    if (K < 0) then
4940      Result := LB(-K)
4941    else
4942      if ((FP + K - 1) >= FTokens.Count) then
4943        Result := TToken.EOF_TOKEN
4944      else
4945      begin
4946        I := FP;
4947        N := 1;
4948        // find k good tokens
4949        while (N < K) do
4950        begin
4951          // skip off-channel tokens
4952          I := SkipOffTokenChannels(I + 1); // leave p on valid token
4953          Inc(N);
4954        end;
4955        if (I >= FTokens.Count) then
4956          Result := TToken.EOF_TOKEN
4957        else
4958          Result := FTokens[I];
4959      end;
4960end;
4961
4962function TCommonTokenStream.Mark: Integer;
4963begin
4964  if (FP = -1) then
4965    FillBuffer;
4966  FLastMarker := Index;
4967  Result := FLastMarker;
4968end;
4969
4970procedure TCommonTokenStream.Release(const Marker: Integer);
4971begin
4972  // no resources to release
4973end;
4974
4975procedure TCommonTokenStream.Reset;
4976begin
4977  FP := 0;
4978  FLastMarker := 0;
4979end;
4980
4981procedure TCommonTokenStream.Rewind(const Marker: Integer);
4982begin
4983  Seek(Marker);
4984end;
4985
4986procedure TCommonTokenStream.Rewind;
4987begin
4988  Seek(FLastMarker);
4989end;
4990
4991procedure TCommonTokenStream.Seek(const Index: Integer);
4992begin
4993  FP := Index;
4994end;
4995
4996procedure TCommonTokenStream.SetTokenSource(const Value: ITokenSource);
4997begin
4998  FTokenSource := Value;
4999  FTokens.Clear;
5000  FP := -1;
5001  FChannel := TToken.DEFAULT_CHANNEL;
5002end;
5003
5004procedure TCommonTokenStream.SetTokenTypeChannel(const TType, Channel: Integer);
5005begin
5006  if (FChannelOverrideMap = nil) then
5007    FChannelOverrideMap := TDictionary<Integer, Integer>.Create;
5008  FChannelOverrideMap[TType] := Channel;
5009end;
5010
5011function TCommonTokenStream.Size: Integer;
5012begin
5013  Result := FTokens.Count;
5014end;
5015
5016function TCommonTokenStream.SkipOffTokenChannels(const I: Integer): Integer;
5017var
5018  N: Integer;
5019begin
5020  Result := I;
5021  N := FTokens.Count;
5022  while (Result < N) and (FTokens[Result].Channel <> FChannel) do
5023    Inc(Result);
5024end;
5025
5026function TCommonTokenStream.SkipOffTokenChannelsReverse(
5027  const I: Integer): Integer;
5028begin
5029  Result := I;
5030  while (Result >= 0) and (FTokens[Result].Channel <> FChannel) do
5031    Dec(Result);
5032end;
5033
5034function TCommonTokenStream.ToString: String;
5035begin
5036  if (FP = -1) then
5037    FillBuffer;
5038  Result := ToString(0, FTokens.Count - 1);
5039end;
5040
5041function TCommonTokenStream.ToString(const Start, Stop: Integer): String;
5042var
5043  I, Finish: Integer;
5044  Buf: TStringBuilder;
5045  T: IToken;
5046begin
5047  if (Start < 0) or (Stop < 0) then
5048    Result := ''
5049  else
5050  begin
5051    if (FP = -1) then
5052      FillBuffer;
5053    if (Stop >= FTokens.Count) then
5054      Finish := FTokens.Count - 1
5055    else
5056      Finish := Stop;
5057    Buf := TStringBuilder.Create;
5058    try
5059      for I := Start to Finish do
5060      begin
5061        T := FTokens[I];
5062        Buf.Append(T.Text);
5063      end;
5064      Result := Buf.ToString;
5065    finally
5066      Buf.Free;
5067    end;
5068  end;
5069end;
5070
5071function TCommonTokenStream.ToString(const Start, Stop: IToken): String;
5072begin
5073  if Assigned(Start) and Assigned(Stop) then
5074    Result := ToString(Start.TokenIndex, Stop.TokenIndex)
5075  else
5076    Result := '';
5077end;
5078
5079constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource;
5080  const AChannel: Integer);
5081begin
5082  Create(ATokenSource);
5083  FChannel := AChannel;
5084end;
5085
5086constructor TCommonTokenStream.Create(const ALexer: ILexer);
5087begin
5088  Create(ALexer as ITokenSource);
5089end;
5090
5091constructor TCommonTokenStream.Create(const ALexer: ILexer;
5092  const AChannel: Integer);
5093begin
5094  Create(ALexer as ITokenSource, AChannel);
5095end;
5096
5097{ TDFA }
5098
5099function TDFA.Description: String;
5100begin
5101  Result := 'n/a';
5102end;
5103
5104procedure TDFA.Error(const NVAE: ENoViableAltException);
5105begin
5106  // No default implementation
5107end;
5108
5109function TDFA.GetRecognizer: IBaseRecognizer;
5110begin
5111  Result := IBaseRecognizer(FRecognizer);
5112end;
5113
5114function TDFA.GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
5115begin
5116  Result := FSpecialStateTransitionHandler;
5117end;
5118
5119procedure TDFA.NoViableAlt(const S: Integer; const Input: IIntStream);
5120var
5121  NVAE: ENoViableAltException;
5122begin
5123  if (Recognizer.State.Backtracking > 0) then
5124    Recognizer.State.Failed := True
5125  else
5126  begin
5127    NVAE := ENoViableAltException.Create(Description, FDecisionNumber, S, Input);
5128    Error(NVAE);
5129    raise NVAE;
5130  end;
5131end;
5132
5133function TDFA.Predict(const Input: IIntStream): Integer;
5134var
5135  Mark, S, SNext, SpecialState: Integer;
5136  C: Char;
5137begin
5138  Result := 0;
5139  Mark := Input.Mark; // remember where decision started in input
5140  S := 0; // we always start at s0
5141  try
5142    while True do
5143    begin
5144      SpecialState := FSpecial[S];
5145      if (SpecialState >= 0) then
5146      begin
5147        S := FSpecialStateTransitionHandler(Self, SpecialState, Input);
5148        if (S = -1) then
5149        begin
5150          NoViableAlt(S, Input);
5151          Exit;
5152        end;
5153        Input.Consume;
5154        Continue;
5155      end;
5156
5157      if (FAccept[S] >= 1) then
5158      begin
5159        Result := FAccept[S];
5160        Exit;
5161      end;
5162
5163      // look for a normal char transition
5164      C := Char(Input.LA(1)); // -1 == \uFFFF, all tokens fit in 65000 space
5165      if (C >= FMin[S]) and (C <= FMax[S]) then
5166      begin
5167        SNext := FTransition[S,Integer(C) - Integer(FMin[S])];  // move to next state
5168        if (SNext < 0) then
5169        begin
5170          // was in range but not a normal transition
5171          // must check EOT, which is like the else clause.
5172          // eot[s]>=0 indicates that an EOT edge goes to another
5173          // state.
5174          if (FEOT[S] >= 0) then  // EOT Transition to accept state?
5175          begin
5176            S := FEOT[S];
5177            Input.Consume;
5178            // TODO: I had this as return accept[eot[s]]
5179            // which assumed here that the EOT edge always
5180            // went to an accept...faster to do this, but
5181            // what about predicated edges coming from EOT
5182            // target?
5183            Continue;
5184          end;
5185
5186          NoViableAlt(S, Input);
5187          Exit;
5188        end;
5189        S := SNext;
5190        Input.Consume;
5191        Continue;
5192      end;
5193
5194      if (FEOT[S] >= 0) then
5195      begin
5196        // EOT Transition?
5197        S := FEOT[S];
5198        Input.Consume;
5199        Continue;
5200      end;
5201
5202      if (C = Char(TToken.EOF)) and (FEOF[S] >= 0) then
5203      begin
5204        // EOF Transition to accept state?
5205        Result := FAccept[FEOF[S]];
5206        Exit;
5207      end;
5208
5209      // not in range and not EOF/EOT, must be invalid symbol
5210      NoViableAlt(S, Input);
5211      Exit;
5212    end;
5213  finally
5214    Input.Rewind(Mark);
5215  end;
5216end;
5217
5218procedure TDFA.SetRecognizer(const Value: IBaseRecognizer);
5219begin
5220  FRecognizer := Pointer(Value);
5221end;
5222
5223procedure TDFA.SetSpecialStateTransitionHandler(
5224  const Value: TSpecialStateTransitionHandler);
5225begin
5226  FSpecialStateTransitionHandler := Value;
5227end;
5228
5229function TDFA.SpecialStateTransition(const S: Integer;
5230  const Input: IIntStream): Integer;
5231begin
5232  // No default implementation
5233  Result := -1;
5234end;
5235
5236function TDFA.SpecialTransition(const State, Symbol: Integer): Integer;
5237begin
5238  Result := 0;
5239end;
5240
5241class function TDFA.UnpackEncodedString(
5242  const EncodedString: String): TSmallintArray;
5243var
5244  I, J, DI, Size: Integer;
5245  N, V: Char;
5246begin
5247  Size := 0;
5248  I := 1;
5249  while (I <= Length(EncodedString)) do
5250  begin
5251    Inc(Size,Integer(EncodedString[I]));
5252    Inc(I,2);
5253  end;
5254
5255  SetLength(Result,Size);
5256  DI := 0;
5257  I := 1;
5258  while (I <= Length(EncodedString)) do
5259  begin
5260    N := EncodedString[I];
5261    V := EncodedString[I + 1];
5262    // add v n times to data
5263    for J := 1 to Integer(N) do
5264    begin
5265      Result[DI] := Smallint(V);
5266      Inc(DI);
5267    end;
5268    Inc(I,2);
5269  end;
5270end;
5271
5272class function TDFA.UnpackEncodedStringArray(
5273  const EncodedStrings: array of String): TSmallintMatrix;
5274var
5275  I: Integer;
5276begin
5277  SetLength(Result,Length(EncodedStrings));
5278  for I := 0 to Length(EncodedStrings) - 1 do
5279    Result[I] := UnpackEncodedString(EncodedStrings[I]);
5280end;
5281
5282class function TDFA.UnpackEncodedStringArray(
5283  const EncodedStrings: TStringArray): TSmallintMatrix;
5284var
5285  I: Integer;
5286begin
5287  SetLength(Result,Length(EncodedStrings));
5288  for I := 0 to Length(EncodedStrings) - 1 do
5289    Result[I] := UnpackEncodedString(EncodedStrings[I]);
5290end;
5291
5292class function TDFA.UnpackEncodedStringToUnsignedChars(
5293  const EncodedString: String): TCharArray;
5294var
5295  I, J, DI, Size: Integer;
5296  N, V: Char;
5297begin
5298  Size := 0;
5299  I := 1;
5300  while (I <= Length(EncodedString)) do
5301  begin
5302    Inc(Size,Integer(EncodedString[I]));
5303    Inc(I,2);
5304  end;
5305
5306  SetLength(Result,Size);
5307  DI := 0;
5308  I := 1;
5309  while (I <= Length(EncodedString)) do
5310  begin
5311    N := EncodedString[I];
5312    V := EncodedString[I + 1];
5313    // add v n times to data
5314    for J := 1 to Integer(N) do
5315    begin
5316      Result[DI] := V;
5317      Inc(DI);
5318    end;
5319    Inc(I,2);
5320  end;
5321end;
5322
5323{ TLexer }
5324
5325constructor TLexer.Create;
5326begin
5327  inherited;
5328end;
5329
5330constructor TLexer.Create(const AInput: ICharStream);
5331begin
5332  inherited Create;
5333  FInput := AInput;
5334end;
5335
5336constructor TLexer.Create(const AInput: ICharStream;
5337  const AState: IRecognizerSharedState);
5338begin
5339  inherited Create(AState);
5340  FInput := AInput;
5341end;
5342
5343function TLexer.Emit: IToken;
5344begin
5345  Result := TCommonToken.Create(FInput, FState.TokenType, FState.Channel,
5346    FState.TokenStartCharIndex, GetCharIndex - 1);
5347  Result.Line := FState.TokenStartLine;
5348  Result.Text := FState.Text;
5349  Result.CharPositionInLine := FState.TokenStartCharPositionInLine;
5350  Emit(Result);
5351end;
5352
5353procedure TLexer.Emit(const Token: IToken);
5354begin
5355  FState.Token := Token;
5356end;
5357
5358function TLexer.GetCharErrorDisplay(const C: Integer): String;
5359begin
5360  case C of
5361    // TToken.EOF
5362    TOKEN_dot_EOF:
5363      Result := '<EOF>';
5364    10:
5365      Result := '\n';
5366    9:
5367      Result := '\t';
5368    13:
5369      Result := '\r';
5370    else
5371      Result := Char(C);
5372  end;
5373  Result := '''' + Result + '''';
5374end;
5375
5376function TLexer.GetCharIndex: Integer;