1/** \file
2 *
3 * Base implementation of an antlr 3 lexer.
4 *
5 * An ANTLR3 lexer implements a base recongizer, a token source and
6 * a lexer interface. It constructs a base recognizer with default
7 * functions, then overrides any of these that are parser specific (usual
8 * default implementation of base recognizer.
9 */
10
11// [The "BSD licence"]
12// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13// http://www.temporal-wave.com
14// http://www.linkedin.com/in/jimidle
15//
16// All rights reserved.
17//
18// Redistribution and use in source and binary forms, with or without
19// modification, are permitted provided that the following conditions
20// are met:
21// 1. Redistributions of source code must retain the above copyright
22//    notice, this list of conditions and the following disclaimer.
23// 2. Redistributions in binary form must reproduce the above copyright
24//    notice, this list of conditions and the following disclaimer in the
25//    documentation and/or other materials provided with the distribution.
26// 3. The name of the author may not be used to endorse or promote products
27//    derived from this software without specific prior written permission.
28//
29// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
40#include    <antlr3lexer.h>
41
42static void					mTokens						(pANTLR3_LEXER lexer);
43static void					setCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
44static void					pushCharStream				(pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input);
45static void					popCharStream				(pANTLR3_LEXER lexer);
46
47static void					emitNew						(pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token);
48static pANTLR3_COMMON_TOKEN emit						(pANTLR3_LEXER lexer);
49static ANTLR3_BOOLEAN	    matchs						(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string);
50static ANTLR3_BOOLEAN	    matchc						(pANTLR3_LEXER lexer, ANTLR3_UCHAR c);
51static ANTLR3_BOOLEAN	    matchRange					(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high);
52static void					matchAny					(pANTLR3_LEXER lexer);
53static void					recover						(pANTLR3_LEXER lexer);
54static ANTLR3_UINT32	    getLine						(pANTLR3_LEXER lexer);
55static ANTLR3_MARKER	    getCharIndex				(pANTLR3_LEXER lexer);
56static ANTLR3_UINT32	    getCharPositionInLine		(pANTLR3_LEXER lexer);
57static pANTLR3_STRING	    getText						(pANTLR3_LEXER lexer);
58static pANTLR3_COMMON_TOKEN nextToken					(pANTLR3_TOKEN_SOURCE toksource);
59
60static void					displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames);
61static void					reportError					(pANTLR3_BASE_RECOGNIZER rec);
62static void *				getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream);
63static void *				getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
64															ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow);
65
66static void					reset						(pANTLR3_BASE_RECOGNIZER rec);
67
68static void					freeLexer					(pANTLR3_LEXER lexer);
69
70
71ANTLR3_API pANTLR3_LEXER
72antlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state)
73{
74    pANTLR3_LEXER   lexer;
75    pANTLR3_COMMON_TOKEN	specialT;
76
77	/* Allocate memory
78	*/
79	lexer   = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER));
80
81	if	(lexer == NULL)
82	{
83		return	NULL;
84	}
85
86	/* Now we need to create the base recognizer
87	*/
88	lexer->rec	    =  antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state);
89
90	if	(lexer->rec == NULL)
91	{
92		lexer->free(lexer);
93		return	NULL;
94	}
95	lexer->rec->super  =  lexer;
96
97	lexer->rec->displayRecognitionError	    = displayRecognitionError;
98	lexer->rec->reportError					= reportError;
99	lexer->rec->reset						= reset;
100	lexer->rec->getCurrentInputSymbol		= getCurrentInputSymbol;
101	lexer->rec->getMissingSymbol			= getMissingSymbol;
102
103	/* Now install the token source interface
104	*/
105	if	(lexer->rec->state->tokSource == NULL)
106	{
107		lexer->rec->state->tokSource	= (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE));
108
109		if	(lexer->rec->state->tokSource == NULL)
110		{
111			lexer->rec->free(lexer->rec);
112			lexer->free(lexer);
113
114			return	NULL;
115		}
116		lexer->rec->state->tokSource->super    =  lexer;
117
118		/* Install the default nextToken() method, which may be overridden
119		 * by generated code, or by anything else in fact.
120		 */
121		lexer->rec->state->tokSource->nextToken	    =  nextToken;
122		lexer->rec->state->tokSource->strFactory    = NULL;
123
124		lexer->rec->state->tokFactory				= NULL;
125	}
126
127    /* Install the lexer API
128     */
129    lexer->setCharStream			=  setCharStream;
130    lexer->mTokens					= (void (*)(void *))(mTokens);
131    lexer->setCharStream			=  setCharStream;
132    lexer->pushCharStream			=  pushCharStream;
133    lexer->popCharStream			=  popCharStream;
134    lexer->emit						=  emit;
135    lexer->emitNew					=  emitNew;
136    lexer->matchs					=  matchs;
137    lexer->matchc					=  matchc;
138    lexer->matchRange				=  matchRange;
139    lexer->matchAny					=  matchAny;
140    lexer->recover					=  recover;
141    lexer->getLine					=  getLine;
142    lexer->getCharIndex				=  getCharIndex;
143    lexer->getCharPositionInLine    =  getCharPositionInLine;
144    lexer->getText					=  getText;
145    lexer->free						=  freeLexer;
146
147    /* Initialise the eof token
148     */
149    specialT					= &(lexer->rec->state->tokSource->eofToken);
150    antlr3SetTokenAPI	  (specialT);
151    specialT->setType	  (specialT, ANTLR3_TOKEN_EOF);
152    specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
153    specialT->strFactory        = NULL;
154	specialT->textState			= ANTLR3_TEXT_NONE;
155	specialT->custom			= NULL;
156	specialT->user1				= 0;
157	specialT->user2				= 0;
158	specialT->user3				= 0;
159
160	// Initialize the skip token.
161	//
162    specialT					= &(lexer->rec->state->tokSource->skipToken);
163    antlr3SetTokenAPI	  (specialT);
164    specialT->setType	  (specialT, ANTLR3_TOKEN_INVALID);
165    specialT->factoryMade		= ANTLR3_TRUE;					// Prevent things trying to free() it
166    specialT->strFactory        = NULL;
167	specialT->custom			= NULL;
168	specialT->user1				= 0;
169	specialT->user2				= 0;
170	specialT->user3				= 0;
171    return  lexer;
172}
173
174static void
175reset	(pANTLR3_BASE_RECOGNIZER rec)
176{
177    pANTLR3_LEXER   lexer;
178
179    lexer   = rec->super;
180
181    lexer->rec->state->token			    = NULL;
182    lexer->rec->state->type			    = ANTLR3_TOKEN_INVALID;
183    lexer->rec->state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
184    lexer->rec->state->tokenStartCharIndex	    = -1;
185    lexer->rec->state->tokenStartCharPositionInLine = -1;
186    lexer->rec->state->tokenStartLine		    = -1;
187
188    lexer->rec->state->text	                    = NULL;
189
190    // OK - that's all hunky dory, but we may well have had
191    // a token factory that needs a reset. Do that here
192    //
193    if  (lexer->rec->state->tokFactory != NULL)
194    {
195        lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory);
196    }
197}
198
199///
200/// \brief
201/// Returns the next available token from the current input stream.
202///
203/// \param toksource
204/// Points to the implementation of a token source. The lexer is
205/// addressed by the super structure pointer.
206///
207/// \returns
208/// The next token in the current input stream or the EOF token
209/// if there are no more tokens.
210///
211/// \remarks
212/// Write remarks for nextToken here.
213///
214/// \see nextToken
215///
216ANTLR3_INLINE static pANTLR3_COMMON_TOKEN
217nextTokenStr	    (pANTLR3_TOKEN_SOURCE toksource)
218{
219    pANTLR3_LEXER                   lexer;
220    pANTLR3_RECOGNIZER_SHARED_STATE state;
221    pANTLR3_INPUT_STREAM            input;
222    pANTLR3_INT_STREAM              istream;
223
224    lexer   = (pANTLR3_LEXER)(toksource->super);
225    state   = lexer->rec->state;
226    input   = lexer->input;
227    istream = input->istream;
228
229    /// Loop until we get a non skipped token or EOF
230    ///
231    for	(;;)
232    {
233        // Get rid of any previous token (token factory takes care of
234        // any de-allocation when this token is finally used up.
235        //
236        state->token		    = NULL;
237        state->error		    = ANTLR3_FALSE;	    // Start out without an exception
238        state->failed		    = ANTLR3_FALSE;
239
240        // Now call the matching rules and see if we can generate a new token
241        //
242        for	(;;)
243        {
244            // Record the start of the token in our input stream.
245            //
246            state->channel			    = ANTLR3_TOKEN_DEFAULT_CHANNEL;
247            state->tokenStartCharIndex	            = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
248            state->tokenStartCharPositionInLine     = input->charPositionInLine;
249            state->tokenStartLine		    = input->line;
250            state->text			            = NULL;
251            state->custom                           = NULL;
252            state->user1                            = 0;
253            state->user2                            = 0;
254            state->user3                            = 0;
255
256            if  (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF)
257            {
258                // Reached the end of the current stream, nothing more to do if this is
259                // the last in the stack.
260                //
261                pANTLR3_COMMON_TOKEN    teof = &(toksource->eofToken);
262
263                teof->setStartIndex (teof, lexer->getCharIndex(lexer));
264                teof->setStopIndex  (teof, lexer->getCharIndex(lexer));
265                teof->setLine	    (teof, lexer->getLine(lexer));
266                teof->factoryMade = ANTLR3_TRUE;	// This isn't really manufactured but it stops things from trying to free it
267                return  teof;
268            }
269
270            state->token		= NULL;
271            state->error		= ANTLR3_FALSE;	    // Start out without an exception
272            state->failed		= ANTLR3_FALSE;
273
274            // Call the generated lexer, see if it can get a new token together.
275            //
276            lexer->mTokens(lexer->ctx);
277
278            if  (state->error  == ANTLR3_TRUE)
279            {
280                // Recognition exception, report it and try to recover.
281                //
282                state->failed	    = ANTLR3_TRUE;
283                lexer->rec->reportError(lexer->rec);
284                lexer->recover(lexer);
285            }
286            else
287            {
288                if (state->token == NULL)
289                {
290                    // Emit the real token, which adds it in to the token stream basically
291                    //
292                    emit(lexer);
293                }
294                else if	(state->token ==  &(toksource->skipToken))
295                {
296                    // A real token could have been generated, but "Computer say's naaaaah" and it
297                    // it is just something we need to skip altogether.
298                    //
299                    continue;
300                }
301
302                // Good token, not skipped, not EOF token
303                //
304                return  state->token;
305            }
306        }
307    }
308}
309
310/**
311 * \brief
312 * Default implementation of the nextToken() call for a lexer.
313 *
314 * \param toksource
315 * Points to the implementation of a token source. The lexer is
316 * addressed by the super structure pointer.
317 *
318 * \returns
319 * The next token in the current input stream or the EOF token
320 * if there are no more tokens in any input stream in the stack.
321 *
322 * Write detailed description for nextToken here.
323 *
324 * \remarks
325 * Write remarks for nextToken here.
326 *
327 * \see nextTokenStr
328 */
329static pANTLR3_COMMON_TOKEN
330nextToken	    (pANTLR3_TOKEN_SOURCE toksource)
331{
332	pANTLR3_COMMON_TOKEN tok;
333
334	// Find the next token in the current stream
335	//
336	tok = nextTokenStr(toksource);
337
338	// If we got to the EOF token then switch to the previous
339	// input stream if there were any and just return the
340	// EOF if there are none. We must check the next token
341	// in any outstanding input stream we pop into the active
342	// role to see if it was sitting at EOF after PUSHing the
343	// stream we just consumed, otherwise we will return EOF
344	// on the reinstalled input stream, when in actual fact
345	// there might be more input streams to POP before the
346	// real EOF of the whole logical inptu stream. Hence we
347	// use a while loop here until we find somethign in the stream
348	// that isn't EOF or we reach the actual end of the last input
349	// stream on the stack.
350	//
351	while	(tok->type == ANTLR3_TOKEN_EOF)
352	{
353		pANTLR3_LEXER   lexer;
354
355		lexer   = (pANTLR3_LEXER)(toksource->super);
356
357		if  (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
358		{
359			// We have another input stream in the stack so we
360			// need to revert to it, then resume the loop to check
361			// it wasn't sitting at EOF itself.
362			//
363			lexer->popCharStream(lexer);
364			tok = nextTokenStr(toksource);
365		}
366		else
367		{
368			// There were no more streams on the input stack
369			// so this EOF is the 'real' logical EOF for
370			// the input stream. So we just exit the loop and
371			// return the EOF we have found.
372			//
373			break;
374		}
375
376	}
377
378	// return whatever token we have, which may be EOF
379	//
380	return  tok;
381}
382
383ANTLR3_API pANTLR3_LEXER
384antlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state)
385{
386    pANTLR3_LEXER   lexer;
387
388    // Create a basic lexer first
389    //
390    lexer   = antlr3LexerNew(sizeHint, state);
391
392    if	(lexer != NULL)
393    {
394		// Install the input stream and reset the lexer
395		//
396		setCharStream(lexer, input);
397    }
398
399    return  lexer;
400}
401
402static void mTokens	    (pANTLR3_LEXER lexer)
403{
404    if	(lexer)	    // Fool compiler, avoid pragmas
405    {
406		ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n");
407    }
408}
409
410static void
411reportError		    (pANTLR3_BASE_RECOGNIZER rec)
412{
413    // Indicate this recognizer had an error while processing.
414	//
415	rec->state->errorCount++;
416
417    rec->displayRecognitionError(rec, rec->state->tokenNames);
418}
419
420#ifdef	ANTLR3_WINDOWS
421#pragma warning( disable : 4100 )
422#endif
423
424/** Default lexer error handler (works for 8 bit streams only!!!)
425 */
426static void
427displayRecognitionError	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
428{
429    pANTLR3_LEXER			lexer;
430	pANTLR3_EXCEPTION	    ex;
431	pANTLR3_STRING			ftext;
432
433    lexer   = (pANTLR3_LEXER)(recognizer->super);
434	ex		= lexer->rec->state->exception;
435
436	// See if there is a 'filename' we can use
437    //
438    if	(ex->name == NULL)
439    {
440		ANTLR3_FPRINTF(stderr, "-unknown source-(");
441    }
442    else
443    {
444		ftext = ex->streamName->to8(ex->streamName);
445		ANTLR3_FPRINTF(stderr, "%s(", ftext->chars);
446    }
447
448    ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line);
449    ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ",
450						ex->type,
451						(pANTLR3_UINT8)	   (ex->message),
452					    ex->charPositionInLine+1
453		    );
454	{
455		ANTLR3_INT32	width;
456
457		width	= ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));
458
459		if	(width >= 1)
460		{
461			if	(isprint(ex->c))
462			{
463				ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c);
464			}
465			else
466			{
467				ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c));
468			}
469			ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index));
470		}
471		else
472		{
473			ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n");
474			ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ",
475								(ANTLR3_UINT32)(lexer->rec->state->tokenStartLine),
476								(ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine)
477								);
478			width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
479
480			if	(width >= 1)
481			{
482				ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex));
483			}
484			else
485			{
486				ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n");
487			}
488		}
489	}
490}
491
492static void setCharStream   (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
493{
494    /* Install the input interface
495     */
496    lexer->input	= input;
497
498    /* We may need a token factory for the lexer; we don't destroy any existing factory
499     * until the lexer is destroyed, as people may still be using the tokens it produced.
500     * TODO: Later I will provide a dup() method for a token so that it can extract itself
501     * out of the factory.
502     */
503    if	(lexer->rec->state->tokFactory == NULL)
504    {
505	lexer->rec->state->tokFactory	= antlr3TokenFactoryNew(input);
506    }
507    else
508    {
509	/* When the input stream is being changed on the fly, rather than
510	 * at the start of a new lexer, then we must tell the tokenFactory
511	 * which input stream to adorn the tokens with so that when they
512	 * are asked to provide their original input strings they can
513	 * do so from the correct text stream.
514	 */
515	lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input);
516    }
517
518    /* Propagate the string factory so that we preserve the encoding form from
519     * the input stream.
520     */
521    if	(lexer->rec->state->tokSource->strFactory == NULL)
522    {
523        lexer->rec->state->tokSource->strFactory	= input->strFactory;
524
525        // Set the newly acquired string factory up for our pre-made tokens
526        // for EOF.
527        //
528        if (lexer->rec->state->tokSource->eofToken.strFactory == NULL)
529        {
530            lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory;
531        }
532    }
533
534    /* This is a lexer, install the appropriate exception creator
535     */
536    lexer->rec->exConstruct = antlr3RecognitionExceptionNew;
537
538    /* Set the current token to nothing
539     */
540    lexer->rec->state->token		= NULL;
541    lexer->rec->state->text			= NULL;
542    lexer->rec->state->tokenStartCharIndex	= -1;
543
544    /* Copy the name of the char stream to the token source
545     */
546    lexer->rec->state->tokSource->fileName = input->fileName;
547}
548
549/*!
550 * \brief
551 * Change to a new input stream, remembering the old one.
552 *
553 * \param lexer
554 * Pointer to the lexer instance to switch input streams for.
555 *
556 * \param input
557 * New input stream to install as the current one.
558 *
559 * Switches the current character input stream to
560 * a new one, saving the old one, which we will revert to at the end of this
561 * new one.
562 */
563static void
564pushCharStream  (pANTLR3_LEXER lexer,  pANTLR3_INPUT_STREAM input)
565{
566	// Do we need a new input stream stack?
567	//
568	if	(lexer->rec->state->streams == NULL)
569	{
570		// This is the first call to stack a new
571		// stream and so we must create the stack first.
572		//
573		lexer->rec->state->streams = antlr3StackNew(0);
574
575		if  (lexer->rec->state->streams == NULL)
576		{
577			// Could not do this, we just fail to push it.
578			// TODO: Consider if this is what we want to do, but then
579			//       any programmer can override this method to do something else.
580			return;
581		}
582	}
583
584	// We have a stack, so we can save the current input stream
585	// into it.
586	//
587	lexer->input->istream->mark(lexer->input->istream);
588	lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL);
589
590	// And now we can install this new one
591	//
592	lexer->setCharStream(lexer, input);
593}
594
595/*!
596 * \brief
597 * Stops using the current input stream and reverts to any prior
598 * input stream on the stack.
599 *
600 * \param lexer
601 * Description of parameter lexer.
602 *
603 * Pointer to a function that abandons the current input stream, whether it
604 * is empty or not and reverts to the previous stacked input stream.
605 *
606 * \remark
607 * The function fails silently if there are no prior input streams.
608 */
609static void
610popCharStream   (pANTLR3_LEXER lexer)
611{
612    pANTLR3_INPUT_STREAM input;
613
614    // If we do not have a stream stack or we are already at the
615    // stack bottom, then do nothing.
616    //
617    if	(lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0)
618    {
619	// We just leave the current stream to its fate, we do not close
620	// it or anything as we do not know what the programmer intended
621	// for it. This method can always be overridden of course.
622	// So just find out what was currently saved on the stack and use
623	// that now, then pop it from the stack.
624	//
625	input	= (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top);
626	lexer->rec->state->streams->pop(lexer->rec->state->streams);
627
628	// Now install the stream as the current one.
629	//
630	lexer->setCharStream(lexer, input);
631	lexer->input->istream->rewindLast(lexer->input->istream);
632    }
633    return;
634}
635
636static void emitNew	    (pANTLR3_LEXER lexer,  pANTLR3_COMMON_TOKEN token)
637{
638    lexer->rec->state->token    = token;	/* Voila!   */
639}
640
641static pANTLR3_COMMON_TOKEN
642emit	    (pANTLR3_LEXER lexer)
643{
644    pANTLR3_COMMON_TOKEN	token;
645
646    /* We could check pointers to token factories and so on, but
647    * we are in code that we want to run as fast as possible
648    * so we are not checking any errors. So make sure you have installed an input stream before
649    * trying to emit a new token.
650    */
651    token   = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory);
652
653    /* Install the supplied information, and some other bits we already know
654    * get added automatically, such as the input stream it is associated with
655    * (though it can all be overridden of course)
656    */
657    token->type		    = lexer->rec->state->type;
658    token->channel	    = lexer->rec->state->channel;
659    token->start	    = lexer->rec->state->tokenStartCharIndex;
660    token->stop		    = lexer->getCharIndex(lexer) - 1;
661    token->line		    = lexer->rec->state->tokenStartLine;
662    token->charPosition	= lexer->rec->state->tokenStartCharPositionInLine;
663
664    if	(lexer->rec->state->text != NULL)
665    {
666        token->textState	    = ANTLR3_TEXT_STRING;
667        token->tokText.text	    = lexer->rec->state->text;
668    }
669    else
670    {
671        token->textState	= ANTLR3_TEXT_NONE;
672    }
673    token->lineStart	= lexer->input->currentLine;
674    token->user1	= lexer->rec->state->user1;
675    token->user2	= lexer->rec->state->user2;
676    token->user3	= lexer->rec->state->user3;
677    token->custom	= lexer->rec->state->custom;
678
679    lexer->rec->state->token	    = token;
680
681    return  token;
682}
683
684/**
685 * Free the resources allocated by a lexer
686 */
687static void
688freeLexer    (pANTLR3_LEXER lexer)
689{
690	// This may have ben a delegate or delegator lexer, in which case the
691	// state may already have been freed (and set to NULL therefore)
692	// so we ignore the state if we don't have it.
693	//
694	if	(lexer->rec->state != NULL)
695	{
696		if	(lexer->rec->state->streams != NULL)
697		{
698			lexer->rec->state->streams->free(lexer->rec->state->streams);
699		}
700		if	(lexer->rec->state->tokFactory != NULL)
701		{
702			lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory);
703			lexer->rec->state->tokFactory = NULL;
704		}
705		if	(lexer->rec->state->tokSource != NULL)
706		{
707			ANTLR3_FREE(lexer->rec->state->tokSource);
708			lexer->rec->state->tokSource = NULL;
709		}
710	}
711	if	(lexer->rec != NULL)
712	{
713		lexer->rec->free(lexer->rec);
714		lexer->rec = NULL;
715	}
716	ANTLR3_FREE(lexer);
717}
718
719/** Implementation of matchs for the lexer, overrides any
720 *  base implementation in the base recognizer.
721 *
722 *  \remark
723 *  Note that the generated code lays down arrays of ints for constant
724 *  strings so that they are int UTF32 form!
725 */
726static ANTLR3_BOOLEAN
727matchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string)
728{
729	while   (*string != ANTLR3_STRING_TERMINATOR)
730	{
731		if  (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string))
732		{
733			if	(lexer->rec->state->backtracking > 0)
734			{
735				lexer->rec->state->failed = ANTLR3_TRUE;
736				return ANTLR3_FALSE;
737			}
738
739			lexer->rec->exConstruct(lexer->rec);
740			lexer->rec->state->failed	 = ANTLR3_TRUE;
741
742			/* TODO: Implement exception creation more fully perhaps
743			 */
744			lexer->recover(lexer);
745			return  ANTLR3_FALSE;
746		}
747
748		/* Matched correctly, do consume it
749		 */
750		lexer->input->istream->consume(lexer->input->istream);
751		string++;
752
753		/* Reset any failed indicator
754		 */
755		lexer->rec->state->failed = ANTLR3_FALSE;
756	}
757
758
759	return  ANTLR3_TRUE;
760}
761
762/** Implementation of matchc for the lexer, overrides any
763 *  base implementation in the base recognizer.
764 *
765 *  \remark
766 *  Note that the generated code lays down arrays of ints for constant
767 *  strings so that they are int UTF32 form!
768 */
769static ANTLR3_BOOLEAN
770matchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c)
771{
772	if	(lexer->input->istream->_LA(lexer->input->istream, 1) == c)
773	{
774		/* Matched correctly, do consume it
775		 */
776		lexer->input->istream->consume(lexer->input->istream);
777
778		/* Reset any failed indicator
779		 */
780		lexer->rec->state->failed = ANTLR3_FALSE;
781
782		return	ANTLR3_TRUE;
783	}
784
785	/* Failed to match, exception and recovery time.
786	 */
787	if	(lexer->rec->state->backtracking > 0)
788	{
789		lexer->rec->state->failed  = ANTLR3_TRUE;
790		return	ANTLR3_FALSE;
791	}
792
793	lexer->rec->exConstruct(lexer->rec);
794
795	/* TODO: Implement exception creation more fully perhaps
796	 */
797	lexer->recover(lexer);
798
799	return  ANTLR3_FALSE;
800}
801
802/** Implementation of match range for the lexer, overrides any
803 *  base implementation in the base recognizer.
804 *
805 *  \remark
806 *  Note that the generated code lays down arrays of ints for constant
807 *  strings so that they are int UTF32 form!
808 */
809static ANTLR3_BOOLEAN
810matchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high)
811{
812    ANTLR3_UCHAR    c;
813
814    /* What is in the stream at the moment?
815     */
816    c	= lexer->input->istream->_LA(lexer->input->istream, 1);
817    if	( c >= low && c <= high)
818    {
819	/* Matched correctly, consume it
820	 */
821	lexer->input->istream->consume(lexer->input->istream);
822
823	/* Reset any failed indicator
824	 */
825	lexer->rec->state->failed = ANTLR3_FALSE;
826
827	return	ANTLR3_TRUE;
828    }
829
830    /* Failed to match, execption and recovery time.
831     */
832
833    if	(lexer->rec->state->backtracking > 0)
834    {
835	lexer->rec->state->failed  = ANTLR3_TRUE;
836	return	ANTLR3_FALSE;
837    }
838
839    lexer->rec->exConstruct(lexer->rec);
840
841    /* TODO: Implement exception creation more fully
842     */
843    lexer->recover(lexer);
844
845    return  ANTLR3_FALSE;
846}
847
848static void
849matchAny	    (pANTLR3_LEXER lexer)
850{
851    lexer->input->istream->consume(lexer->input->istream);
852}
853
854static void
855recover	    (pANTLR3_LEXER lexer)
856{
857    lexer->input->istream->consume(lexer->input->istream);
858}
859
860static ANTLR3_UINT32
861getLine	    (pANTLR3_LEXER lexer)
862{
863    return  lexer->input->getLine(lexer->input);
864}
865
866static ANTLR3_UINT32
867getCharPositionInLine	(pANTLR3_LEXER lexer)
868{
869    return  lexer->input->charPositionInLine;
870}
871
872static ANTLR3_MARKER	getCharIndex	    (pANTLR3_LEXER lexer)
873{
874    return lexer->input->istream->index(lexer->input->istream);
875}
876
877static pANTLR3_STRING
878getText	    (pANTLR3_LEXER lexer)
879{
880	if (lexer->rec->state->text)
881	{
882		return	lexer->rec->state->text;
883
884	}
885	return  lexer->input->substr(
886									lexer->input,
887									lexer->rec->state->tokenStartCharIndex,
888									lexer->getCharIndex(lexer) - lexer->input->charByteSize
889							);
890
891}
892
893static void *
894getCurrentInputSymbol		(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream)
895{
896	return NULL;
897}
898
899static void *
900getMissingSymbol			(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM	istream, pANTLR3_EXCEPTION	e,
901									ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow)
902{
903	return NULL;
904}
905