1/*
2 * [The "BSD licence"]
3 * Copyright (c) 2005-2008 Terence Parr
4 * All rights reserved.
5 *
6 * Conversion to C#:
7 * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33namespace Antlr.Runtime
34{
35    using ConditionalAttribute = System.Diagnostics.ConditionalAttribute;
36
37    /** <summary>
38     *  A lexer is recognizer that draws input symbols from a character stream.
39     *  lexer grammars result in a subclass of this object. A Lexer object
40     *  uses simplified match() and error recovery mechanisms in the interest
41     *  of speed.
42     *  </summary>
43     */
44    public abstract class Lexer : BaseRecognizer, ITokenSource
45    {
46        /** <summary>Where is the lexer drawing characters from?</summary> */
47        protected ICharStream input;
48
49        public Lexer()
50        {
51        }
52
53        public Lexer( ICharStream input )
54        {
55            this.input = input;
56        }
57
58        public Lexer( ICharStream input, RecognizerSharedState state )
59            : base(state)
60        {
61            this.input = input;
62        }
63
64        #region Properties
65        public string Text
66        {
67            /** <summary>Return the text matched so far for the current token or any text override.</summary> */
68            get
69            {
70                if ( state.text != null )
71                {
72                    return state.text;
73                }
74                return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
75            }
76            /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
77            set
78            {
79                state.text = value;
80            }
81        }
82        public int Line
83        {
84            get
85            {
86                return input.Line;
87            }
88            set
89            {
90                input.Line = value;
91            }
92        }
93        public int CharPositionInLine
94        {
95            get
96            {
97                return input.CharPositionInLine;
98            }
99            set
100            {
101                input.CharPositionInLine = value;
102            }
103        }
104        #endregion
105
106        public override void Reset()
107        {
108            base.Reset(); // reset all recognizer state variables
109            // wack Lexer state variables
110            if ( input != null )
111            {
112                input.Seek( 0 ); // rewind the input
113            }
114            if ( state == null )
115            {
116                return; // no shared state work to do
117            }
118            state.token = null;
119            state.type = TokenTypes.Invalid;
120            state.channel = TokenChannels.Default;
121            state.tokenStartCharIndex = -1;
122            state.tokenStartCharPositionInLine = -1;
123            state.tokenStartLine = -1;
124            state.text = null;
125        }
126
127        /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
128        public virtual IToken NextToken()
129        {
130            for ( ; ; )
131            {
132                state.token = null;
133                state.channel = TokenChannels.Default;
134                state.tokenStartCharIndex = input.Index;
135                state.tokenStartCharPositionInLine = input.CharPositionInLine;
136                state.tokenStartLine = input.Line;
137                state.text = null;
138                if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
139                {
140                    IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index);
141                    eof.Line = Line;
142                    eof.CharPositionInLine = CharPositionInLine;
143                    return eof;
144                }
145                try
146                {
147                    ParseNextToken();
148                    if ( state.token == null )
149                    {
150                        Emit();
151                    }
152                    else if ( state.token == Tokens.Skip )
153                    {
154                        continue;
155                    }
156                    return state.token;
157                }
158                catch (MismatchedRangeException mre)
159                {
160                    ReportError(mre);
161                    // MatchRange() routine has already called recover()
162                }
163                catch (MismatchedTokenException mte)
164                {
165                    ReportError(mte);
166                    // Match() routine has already called recover()
167                }
168                catch ( RecognitionException re )
169                {
170                    ReportError( re );
171                    Recover( re ); // throw out current char and try again
172                }
173            }
174        }
175
176        /** <summary>
177         *  Instruct the lexer to skip creating a token for current lexer rule
178         *  and look for another token.  nextToken() knows to keep looking when
179         *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
180         *  if token==null at end of any token rule, it creates one for you
181         *  and emits it.
182         *  </summary>
183         */
184        public virtual void Skip()
185        {
186            state.token = Tokens.Skip;
187        }
188
189        /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
190        public abstract void mTokens();
191
192        public virtual ICharStream CharStream
193        {
194            get
195            {
196                return input;
197            }
198            /** <summary>Set the char stream and reset the lexer</summary> */
199            set
200            {
201                input = null;
202                Reset();
203                input = value;
204            }
205        }
206
207        public override string SourceName
208        {
209            get
210            {
211                return input.SourceName;
212            }
213        }
214
215        /** <summary>
216         *  Currently does not support multiple emits per nextToken invocation
217         *  for efficiency reasons.  Subclass and override this method and
218         *  nextToken (to push tokens into a list and pull from that list rather
219         *  than a single variable as this implementation does).
220         *  </summary>
221         */
222        public virtual void Emit( IToken token )
223        {
224            state.token = token;
225        }
226
227        /** <summary>
228         *  The standard method called to automatically emit a token at the
229         *  outermost lexical rule.  The token object should point into the
230         *  char buffer start..stop.  If there is a text override in 'text',
231         *  use that to set the token's text.  Override this method to emit
232         *  custom Token objects.
233         *  </summary>
234         *
235         *  <remarks>
236         *  If you are building trees, then you should also override
237         *  Parser or TreeParser.getMissingSymbol().
238         *  </remarks>
239         */
240        public virtual IToken Emit()
241        {
242            IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
243            t.Line = state.tokenStartLine;
244            t.Text = state.text;
245            t.CharPositionInLine = state.tokenStartCharPositionInLine;
246            Emit( t );
247            return t;
248        }
249
250        public virtual void Match( string s )
251        {
252            int i = 0;
253            while ( i < s.Length )
254            {
255                if ( input.LA( 1 ) != s[i] )
256                {
257                    if ( state.backtracking > 0 )
258                    {
259                        state.failed = true;
260                        return;
261                    }
262                    MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
263                    Recover( mte );
264                    throw mte;
265                }
266                i++;
267                input.Consume();
268                state.failed = false;
269            }
270        }
271
272        public virtual void MatchAny()
273        {
274            input.Consume();
275        }
276
277        public virtual void Match( int c )
278        {
279            if ( input.LA( 1 ) != c )
280            {
281                if ( state.backtracking > 0 )
282                {
283                    state.failed = true;
284                    return;
285                }
286                MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
287                Recover( mte );  // don't really recover; just consume in lexer
288                throw mte;
289            }
290            input.Consume();
291            state.failed = false;
292        }
293
294        public virtual void MatchRange( int a, int b )
295        {
296            if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
297            {
298                if ( state.backtracking > 0 )
299                {
300                    state.failed = true;
301                    return;
302                }
303                MismatchedRangeException mre = new MismatchedRangeException(a, b, input);
304                Recover( mre );
305                throw mre;
306            }
307            input.Consume();
308            state.failed = false;
309        }
310
311        /** <summary>What is the index of the current character of lookahead?</summary> */
312        public virtual int CharIndex
313        {
314            get
315            {
316                return input.Index;
317            }
318        }
319
320        public override void ReportError( RecognitionException e )
321        {
322            /** TODO: not thought about recovery in lexer yet.
323             *
324            // if we've already reported an error and have not matched a token
325            // yet successfully, don't report any errors.
326            if ( errorRecovery ) {
327                //System.err.print("[SPURIOUS] ");
328                return;
329            }
330            errorRecovery = true;
331             */
332
333            DisplayRecognitionError( this.TokenNames, e );
334        }
335
336        public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
337        {
338            string msg = null;
339            if ( e is MismatchedTokenException )
340            {
341                MismatchedTokenException mte = (MismatchedTokenException)e;
342                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
343            }
344            else if ( e is NoViableAltException )
345            {
346                NoViableAltException nvae = (NoViableAltException)e;
347                // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
348                // and "(decision="+nvae.decisionNumber+") and
349                // "state "+nvae.stateNumber
350                msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
351            }
352            else if ( e is EarlyExitException )
353            {
354                EarlyExitException eee = (EarlyExitException)e;
355                // for development, can add "(decision="+eee.decisionNumber+")"
356                msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
357            }
358            else if ( e is MismatchedNotSetException )
359            {
360                MismatchedNotSetException mse = (MismatchedNotSetException)e;
361                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
362            }
363            else if ( e is MismatchedSetException )
364            {
365                MismatchedSetException mse = (MismatchedSetException)e;
366                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
367            }
368            else if ( e is MismatchedRangeException )
369            {
370                MismatchedRangeException mre = (MismatchedRangeException)e;
371                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
372                      GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
373            }
374            else
375            {
376                msg = base.GetErrorMessage( e, tokenNames );
377            }
378            return msg;
379        }
380
381        public virtual string GetCharErrorDisplay( int c )
382        {
383            string s = ( (char)c ).ToString();
384            switch ( c )
385            {
386            case TokenTypes.EndOfFile:
387                s = "<EOF>";
388                break;
389            case '\n':
390                s = "\\n";
391                break;
392            case '\t':
393                s = "\\t";
394                break;
395            case '\r':
396                s = "\\r";
397                break;
398            }
399            return "'" + s + "'";
400        }
401
402        /** <summary>
403         *  Lexers can normally match any char in it's vocabulary after matching
404         *  a token, so do the easy thing and just kill a character and hope
405         *  it all works out.  You can instead use the rule invocation stack
406         *  to do sophisticated error recovery if you are in a fragment rule.
407         *  </summary>
408         */
409        public virtual void Recover( RecognitionException re )
410        {
411            //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
412            //re.printStackTrace();
413            input.Consume();
414        }
415
416        [Conditional("ANTLR_TRACE")]
417        public virtual void TraceIn( string ruleName, int ruleIndex )
418        {
419            string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
420            base.TraceIn( ruleName, ruleIndex, inputSymbol );
421        }
422
423        [Conditional("ANTLR_TRACE")]
424        public virtual void TraceOut( string ruleName, int ruleIndex )
425        {
426            string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
427            base.TraceOut( ruleName, ruleIndex, inputSymbol );
428        }
429
430        protected virtual void ParseNextToken()
431        {
432            mTokens();
433        }
434    }
435}
436