1/*
2 * [The "BSD license"]
3 * Copyright (c) 2011 Terence Parr
4 * All rights reserved.
5 *
6 * Conversion to C#:
7 * Copyright (c) 2011 Sam Harwell, Pixel Mine, Inc.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. The name of the author may not be used to endorse or promote products
19 *    derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33namespace Antlr.Runtime
34{
35    public abstract class SlimLexer
36        : BaseRecognizer
37        , ITokenSource<SlimToken>
38    {
39        /** <summary>Where is the lexer drawing characters from?</summary> */
40        protected SlimStringStream input;
41        SlimToken _token;
42        bool _emitted;
43        bool _skip;
44
45        public SlimLexer()
46        {
47        }
48
49        public SlimLexer( ICharStream input )
50        {
51            this.input = (SlimStringStream)input;
52        }
53
54        public SlimLexer( ICharStream input, RecognizerSharedState state )
55            : base( state )
56        {
57            this.input = (SlimStringStream)input;
58        }
59
60        #region Properties
61        public string Text
62        {
63            /** <summary>Return the text matched so far for the current token or any text override.</summary> */
64            get
65            {
66                if ( state.text != null )
67                {
68                    return state.text;
69                }
70                return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex );
71            }
72            /** <summary>Set the complete text of this token; it wipes any previous changes to the text.</summary> */
73            set
74            {
75                state.text = value;
76            }
77        }
78        public int Line
79        {
80            get
81            {
82                return input.Line;
83            }
84            set
85            {
86                input.Line = value;
87            }
88        }
89        public int CharPositionInLine
90        {
91            get
92            {
93                return input.CharPositionInLine;
94            }
95            set
96            {
97                input.CharPositionInLine = value;
98            }
99        }
100        #endregion
101
102        public override void Reset()
103        {
104            base.Reset(); // reset all recognizer state variables
105            // wack Lexer state variables
106            if ( input != null )
107            {
108                input.Seek( 0 ); // rewind the input
109            }
110            if ( state == null )
111            {
112                return; // no shared state work to do
113            }
114            _token = default( SlimToken );
115            _emitted = false;
116            _skip = false;
117            //state.token = null;
118            state.type = TokenTypes.Invalid;
119            state.channel = TokenChannels.Default;
120            state.tokenStartCharIndex = -1;
121#if TRACK_POSITION
122            state.tokenStartCharPositionInLine = -1;
123            state.tokenStartLine = -1;
124#endif
125            state.text = null;
126        }
127
128        /** <summary>Return a token from this source; i.e., match a token on the char stream.</summary> */
129        public virtual SlimToken NextToken()
130        {
131            for ( ; ; )
132            {
133                _token = default( SlimToken );
134                _emitted = false;
135                _skip = false;
136                //state.token = null;
137                state.channel = TokenChannels.Default;
138                state.tokenStartCharIndex = input.Index;
139#if TRACK_POSITION
140                state.tokenStartCharPositionInLine = input.CharPositionInLine;
141                state.tokenStartLine = input.Line;
142#endif
143                state.text = null;
144                if ( input.LA( 1 ) == CharStreamConstants.EndOfFile )
145                {
146                    return new SlimToken(TokenTypes.EndOfFile);
147                }
148                try
149                {
150                    mTokens();
151                    if ( _skip )
152                    {
153                        continue;
154                    }
155                    else if ( !_emitted )
156                    {
157                        Emit();
158                    }
159
160                    return _token;
161                }
162                catch ( NoViableAltException nva )
163                {
164                    ReportError( nva );
165                    Recover( nva ); // throw out current char and try again
166                }
167                catch ( RecognitionException re )
168                {
169                    ReportError( re );
170                    // match() routine has already called recover()
171                }
172            }
173        }
174        IToken ITokenSource.NextToken()
175        {
176            return NextToken();
177        }
178
179        /** <summary>
180         *  Instruct the lexer to skip creating a token for current lexer rule
181         *  and look for another token.  nextToken() knows to keep looking when
182         *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
183         *  if token==null at end of any token rule, it creates one for you
184         *  and emits it.
185         *  </summary>
186         */
187        public virtual void Skip()
188        {
189            _skip = true;
190            //state.token = Tokens.Skip;
191        }
192
193        /** <summary>This is the lexer entry point that sets instance var 'token'</summary> */
194        public abstract void mTokens();
195
196        public ICharStream CharStream
197        {
198            get
199            {
200                return input;
201            }
202            /** <summary>Set the char stream and reset the lexer</summary> */
203            set
204            {
205                input = null;
206                Reset();
207                input = (SlimStringStream)value;
208            }
209        }
210
211        public override string SourceName
212        {
213            get
214            {
215                return input.SourceName;
216            }
217        }
218
219        ///** <summary>
220        // *  Currently does not support multiple emits per nextToken invocation
221        // *  for efficiency reasons.  Subclass and override this method and
222        // *  nextToken (to push tokens into a list and pull from that list rather
223        // *  than a single variable as this implementation does).
224        // *  </summary>
225        // */
226        //public void Emit( T token )
227        //{
228        //    _token = token;
229        //}
230
231        /** <summary>
232         *  The standard method called to automatically emit a token at the
233         *  outermost lexical rule.  The token object should point into the
234         *  char buffer start..stop.  If there is a text override in 'text',
235         *  use that to set the token's text.  Override this method to emit
236         *  custom Token objects.
237         *  </summary>
238         *
239         *  <remarks>
240         *  If you are building trees, then you should also override
241         *  Parser or TreeParser.getMissingSymbol().
242         *  </remarks>
243         */
244        public void Emit()
245        {
246            _token = new SlimToken()
247            {
248                //InputStream = input,
249                Type = state.type,
250                Channel = state.channel,
251                //CharPositionInLine = state.tokenStartCharPositionInLine,
252                //Line = state.tokenStartLine,
253                //Text = state.text
254            };
255            //Emit( t );
256            //return t;
257
258            //IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 );
259            //t.Line = state.tokenStartLine;
260            //t.Text = state.text;
261            //t.CharPositionInLine = state.tokenStartCharPositionInLine;
262            //Emit( t );
263            //return t;
264        }
265
266        public void Match( string s )
267        {
268            int i = 0;
269            while ( i < s.Length )
270            {
271                if ( input.LA( 1 ) != s[i] )
272                {
273                    if ( state.backtracking > 0 )
274                    {
275                        state.failed = true;
276                        return;
277                    }
278                    MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames);
279                    Recover( mte );
280                    throw mte;
281                }
282                i++;
283                input.Consume();
284                state.failed = false;
285            }
286        }
287
288        public void MatchAny()
289        {
290            input.Consume();
291        }
292
293        public void Match( int c )
294        {
295            if ( input.LA( 1 ) != c )
296            {
297                if ( state.backtracking > 0 )
298                {
299                    state.failed = true;
300                    return;
301                }
302                MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames);
303                Recover( mte );  // don't really recover; just consume in lexer
304                throw mte;
305            }
306            input.Consume();
307            state.failed = false;
308        }
309
310        public void MatchRange( int a, int b )
311        {
312            if ( input.LA( 1 ) < a || input.LA( 1 ) > b )
313            {
314                if ( state.backtracking > 0 )
315                {
316                    state.failed = true;
317                    return;
318                }
319                MismatchedRangeException mre =
320                    new MismatchedRangeException( a, b, input );
321                Recover( mre );
322                throw mre;
323            }
324            input.Consume();
325            state.failed = false;
326        }
327
328        /** <summary>What is the index of the current character of lookahead?</summary> */
329        public int CharIndex
330        {
331            get
332            {
333                return input.Index;
334            }
335        }
336
337        public override void ReportError( RecognitionException e )
338        {
339            /** TODO: not thought about recovery in lexer yet.
340             *
341            // if we've already reported an error and have not matched a token
342            // yet successfully, don't report any errors.
343            if ( errorRecovery ) {
344                //System.err.print("[SPURIOUS] ");
345                return;
346            }
347            errorRecovery = true;
348             */
349
350            DisplayRecognitionError( this.TokenNames, e );
351        }
352
353        public override string GetErrorMessage( RecognitionException e, string[] tokenNames )
354        {
355            string msg = null;
356            if ( e is MismatchedTokenException )
357            {
358                MismatchedTokenException mte = (MismatchedTokenException)e;
359                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting );
360            }
361            else if ( e is NoViableAltException )
362            {
363                NoViableAltException nvae = (NoViableAltException)e;
364                // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
365                // and "(decision="+nvae.decisionNumber+") and
366                // "state "+nvae.stateNumber
367                msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character );
368            }
369            else if ( e is EarlyExitException )
370            {
371                EarlyExitException eee = (EarlyExitException)e;
372                // for development, can add "(decision="+eee.decisionNumber+")"
373                msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character );
374            }
375            else if ( e is MismatchedNotSetException )
376            {
377                MismatchedNotSetException mse = (MismatchedNotSetException)e;
378                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
379            }
380            else if ( e is MismatchedSetException )
381            {
382                MismatchedSetException mse = (MismatchedSetException)e;
383                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting;
384            }
385            else if ( e is MismatchedRangeException )
386            {
387                MismatchedRangeException mre = (MismatchedRangeException)e;
388                msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " +
389                      GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B );
390            }
391            else
392            {
393                msg = base.GetErrorMessage( e, tokenNames );
394            }
395            return msg;
396        }
397
398        public virtual string GetCharErrorDisplay( int c )
399        {
400            string s = ( (char)c ).ToString();
401            switch ( c )
402            {
403            case TokenTypes.EndOfFile:
404                s = "<EOF>";
405                break;
406            case '\n':
407                s = "\\n";
408                break;
409            case '\t':
410                s = "\\t";
411                break;
412            case '\r':
413                s = "\\r";
414                break;
415            }
416            return "'" + s + "'";
417        }
418
419        /** <summary>
420         *  Lexers can normally match any char in it's vocabulary after matching
421         *  a token, so do the easy thing and just kill a character and hope
422         *  it all works out.  You can instead use the rule invocation stack
423         *  to do sophisticated error recovery if you are in a fragment rule.
424         *  </summary>
425         */
426        public virtual void Recover( RecognitionException re )
427        {
428            //System.out.println("consuming char "+(char)input.LA(1)+" during recovery");
429            //re.printStackTrace();
430            input.Consume();
431        }
432
433        public virtual void TraceIn( string ruleName, int ruleIndex )
434        {
435            string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
436            base.TraceIn( ruleName, ruleIndex, inputSymbol );
437        }
438
439        public virtual void TraceOut( string ruleName, int ruleIndex )
440        {
441            string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine;
442            base.TraceOut( ruleName, ruleIndex, inputSymbol );
443        }
444    }
445}
446