1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \file 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Base implementation of an antlr 3 lexer. 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * An ANTLR3 lexer implements a base recongizer, a token source and 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a lexer interface. It constructs a base recognizer with default 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * functions, then overrides any of these that are parser specific (usual 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * default implementation of base recognizer. 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"] 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved. 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met: 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer. 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer in the 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// documentation and/or other materials provided with the distribution. 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// derived from this software without specific prior written permission. 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3lexer.h> 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void mTokens (pANTLR3_LEXER lexer); 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void pushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input); 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void popCharStream (pANTLR3_LEXER lexer); 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token); 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_COMMON_TOKEN emit (pANTLR3_LEXER lexer); 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN matchs (pANTLR3_LEXER lexer, ANTLR3_UCHAR * string); 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN matchc (pANTLR3_LEXER lexer, ANTLR3_UCHAR c); 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN matchRange (pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high); 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void matchAny (pANTLR3_LEXER lexer); 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void recover (pANTLR3_LEXER lexer); 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 getLine (pANTLR3_LEXER lexer); 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer); 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 getCharPositionInLine (pANTLR3_LEXER lexer); 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING getText (pANTLR3_LEXER lexer); 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_COMMON_TOKEN nextToken (pANTLR3_TOKEN_SOURCE toksource); 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void displayRecognitionError (pANTLR3_BASE_RECOGNIZER rec, pANTLR3_UINT8 * tokenNames); 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void reportError (pANTLR3_BASE_RECOGNIZER rec); 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * getCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream); 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * getMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow); 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void reset (pANTLR3_BASE_RECOGNIZER rec); 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void freeLexer (pANTLR3_LEXER lexer); 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 71324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR3_API pANTLR3_LEXER 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3LexerNew(ANTLR3_UINT32 sizeHint, pANTLR3_RECOGNIZER_SHARED_STATE state) 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_COMMON_TOKEN specialT; 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Allocate memory 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = (pANTLR3_LEXER) ANTLR3_MALLOC(sizeof(ANTLR3_LEXER)); 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer == NULL) 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return NULL; 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Now we need to create the base recognizer 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec = antlr3BaseRecognizerNew(ANTLR3_TYPE_LEXER, sizeHint, state); 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec == NULL) 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->free(lexer); 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return NULL; 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->super = lexer; 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->displayRecognitionError = displayRecognitionError; 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->reportError = reportError; 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->reset = reset; 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->getCurrentInputSymbol = getCurrentInputSymbol; 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->getMissingSymbol = getMissingSymbol; 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Now install the token source interface 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokSource == NULL) 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource = (pANTLR3_TOKEN_SOURCE)ANTLR3_CALLOC(1, sizeof(ANTLR3_TOKEN_SOURCE)); 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokSource == NULL) 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->free(lexer->rec); 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->free(lexer); 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return NULL; 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->super = lexer; 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install the default nextToken() method, which may be overridden 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * by generated code, or by anything else in fact. 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->nextToken = nextToken; 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->strFactory = NULL; 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory = NULL; 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install the lexer API 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->setCharStream = setCharStream; 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->mTokens = (void (*)(void *))(mTokens); 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->setCharStream = setCharStream; 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->pushCharStream = pushCharStream; 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->popCharStream = popCharStream; 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->emit = emit; 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->emitNew = emitNew; 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->matchs = matchs; 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->matchc = matchc; 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->matchRange = matchRange; 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->matchAny = matchAny; 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->recover = recover; 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->getLine = getLine; 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->getCharIndex = getCharIndex; 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->getCharPositionInLine = getCharPositionInLine; 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->getText = getText; 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->free = freeLexer; 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Initialise the eof token 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT = &(lexer->rec->state->tokSource->eofToken); 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver antlr3SetTokenAPI (specialT); 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->setType (specialT, ANTLR3_TOKEN_EOF); 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->strFactory = NULL; 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->textState = ANTLR3_TEXT_NONE; 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->custom = NULL; 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user1 = 0; 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user2 = 0; 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user3 = 0; 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Initialize the skip token. 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT = &(lexer->rec->state->tokSource->skipToken); 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver antlr3SetTokenAPI (specialT); 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->setType (specialT, ANTLR3_TOKEN_INVALID); 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->factoryMade = ANTLR3_TRUE; // Prevent things trying to free() it 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->strFactory = NULL; 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->custom = NULL; 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user1 = 0; 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user2 = 0; 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver specialT->user3 = 0; 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer; 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverreset (pANTLR3_BASE_RECOGNIZER rec) 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = rec->super; 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->token = NULL; 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->type = ANTLR3_TOKEN_INVALID; 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokenStartCharIndex = -1; 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokenStartCharPositionInLine = -1; 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokenStartLine = -1; 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->text = NULL; 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // OK - that's all hunky dory, but we may well have had 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a token factory that needs a reset. Do that here 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokFactory != NULL) 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory->reset(lexer->rec->state->tokFactory); 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Returns the next available token from the current input stream. 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param toksource 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Points to the implementation of a token source. The lexer is 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// addressed by the super structure pointer. 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \returns 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// The next token in the current input stream or the EOF token 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// if there are no more tokens. 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remarks 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Write remarks for nextToken here. 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \see nextToken 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 216324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR3_INLINE static pANTLR3_COMMON_TOKEN 217324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvernextTokenStr (pANTLR3_TOKEN_SOURCE toksource) 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_RECOGNIZER_SHARED_STATE state; 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INT_STREAM istream; 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = (pANTLR3_LEXER)(toksource->super); 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = lexer->rec->state; 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = lexer->input; 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver istream = input->istream; 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// Loop until we get a non skipped token or EOF 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (;;) 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Get rid of any previous token (token factory takes care of 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // any de-allocation when this token is finally used up. 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->token = NULL; 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->error = ANTLR3_FALSE; // Start out without an exception 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->failed = ANTLR3_FALSE; 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Now call the matching rules and see if we can generate a new token 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver for (;;) 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Record the start of the token in our input stream. 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->tokenStartCharIndex = (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar)); 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->tokenStartCharPositionInLine = input->charPositionInLine; 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->tokenStartLine = input->line; 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->text = NULL; 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->custom = NULL; 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->user1 = 0; 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->user2 = 0; 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->user3 = 0; 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (istream->_LA(istream, 1) == ANTLR3_CHARSTREAM_EOF) 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Reached the end of the current stream, nothing more to do if this is 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the last in the stack. 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver teof->setStartIndex (teof, lexer->getCharIndex(lexer)); 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver teof->setStopIndex (teof, lexer->getCharIndex(lexer)); 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver teof->setLine (teof, lexer->getLine(lexer)); 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return teof; 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->token = NULL; 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->error = ANTLR3_FALSE; // Start out without an exception 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->failed = ANTLR3_FALSE; 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Call the generated lexer, see if it can get a new token together. 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->mTokens(lexer->ctx); 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (state->error == ANTLR3_TRUE) 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Recognition exception, report it and try to recover. 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->failed = ANTLR3_TRUE; 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->reportError(lexer->rec); 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->recover(lexer); 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (state->token == NULL) 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Emit the real token, which adds it in to the token stream basically 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver emit(lexer); 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else if (state->token == &(toksource->skipToken)) 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // A real token could have been generated, but "Computer say's naaaaah" and it 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it is just something we need to skip altogether. 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver continue; 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Good token, not skipped, not EOF token 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return state->token; 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \brief 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Default implementation of the nextToken() call for a lexer. 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param toksource 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Points to the implementation of a token source. The lexer is 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * addressed by the super structure pointer. 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \returns 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The next token in the current input stream or the EOF token 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * if there are no more tokens in any input stream in the stack. 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Write detailed description for nextToken here. 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remarks 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Write remarks for nextToken here. 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \see nextTokenStr 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_COMMON_TOKEN 330324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvernextToken (pANTLR3_TOKEN_SOURCE toksource) 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_COMMON_TOKEN tok; 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Find the next token in the current stream 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tok = nextTokenStr(toksource); 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we got to the EOF token then switch to the previous 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // input stream if there were any and just return the 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // EOF if there are none. We must check the next token 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in any outstanding input stream we pop into the active 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // role to see if it was sitting at EOF after PUSHing the 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // stream we just consumed, otherwise we will return EOF 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // on the reinstalled input stream, when in actual fact 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // there might be more input streams to POP before the 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // real EOF of the whole logical inptu stream. Hence we 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // use a while loop here until we find somethign in the stream 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // that isn't EOF or we reach the actual end of the last input 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // stream on the stack. 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (tok->type == ANTLR3_TOKEN_EOF) 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = (pANTLR3_LEXER)(toksource->super); 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We have another input stream in the stack so we 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // need to revert to it, then resume the loop to check 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it wasn't sitting at EOF itself. 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->popCharStream(lexer); 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver tok = nextTokenStr(toksource); 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // There were no more streams on the input stack 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // so this EOF is the 'real' logical EOF for 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the input stream. So we just exit the loop and 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // return the EOF we have found. 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // return whatever token we have, which may be EOF 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return tok; 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 383324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverANTLR3_API pANTLR3_LEXER 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3LexerNewStream(ANTLR3_UINT32 sizeHint, pANTLR3_INPUT_STREAM input, pANTLR3_RECOGNIZER_SHARED_STATE state) 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Create a basic lexer first 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = antlr3LexerNew(sizeHint, state); 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer != NULL) 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Install the input stream and reset the lexer 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver setCharStream(lexer, input); 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer; 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void mTokens (pANTLR3_LEXER lexer) 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer) // Fool compiler, avoid pragmas 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n"); 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 411324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverreportError (pANTLR3_BASE_RECOGNIZER rec) 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Indicate this recognizer had an error while processing. 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rec->state->errorCount++; 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver rec->displayRecognitionError(rec, rec->state->tokenNames); 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef ANTLR3_WINDOWS 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#pragma warning( disable : 4100 ) 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Default lexer error handler (works for 8 bit streams only!!!) 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 427324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverdisplayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEXER lexer; 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_EXCEPTION ex; 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STRING ftext; 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = (pANTLR3_LEXER)(recognizer->super); 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ex = lexer->rec->state->exception; 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // See if there is a 'filename' we can use 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ex->name == NULL) 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "-unknown source-("); 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ftext = ex->streamName->to8(ex->streamName); 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ", 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ex->type, 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (pANTLR3_UINT8) (ex->message), 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ex->charPositionInLine+1 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ); 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 width; 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (width >= 1) 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (isprint(ex->c)) 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c); 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c)); 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index)); 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"); 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ", 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine), 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ); 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (width >= 1) 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n"); 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void setCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install the input interface 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input = input; 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* We may need a token factory for the lexer; we don't destroy any existing factory 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * until the lexer is destroyed, as people may still be using the tokens it produced. 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * TODO: Later I will provide a dup() method for a token so that it can extract itself 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * out of the factory. 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokFactory == NULL) 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory = antlr3TokenFactoryNew(input); 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* When the input stream is being changed on the fly, rather than 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * at the start of a new lexer, then we must tell the tokenFactory 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * which input stream to adorn the tokens with so that when they 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are asked to provide their original input strings they can 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * do so from the correct text stream. 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory->setInputStream(lexer->rec->state->tokFactory, input); 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Propagate the string factory so that we preserve the encoding form from 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the input stream. 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokSource->strFactory == NULL) 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->strFactory = input->strFactory; 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Set the newly acquired string factory up for our pre-made tokens 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // for EOF. 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokSource->eofToken.strFactory == NULL) 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->eofToken.strFactory = input->strFactory; 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* This is a lexer, install the appropriate exception creator 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->exConstruct = antlr3RecognitionExceptionNew; 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Set the current token to nothing 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->token = NULL; 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->text = NULL; 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokenStartCharIndex = -1; 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Copy the name of the char stream to the token source 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource->fileName = input->fileName; 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/*! 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \brief 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Change to a new input stream, remembering the old one. 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param lexer 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Pointer to the lexer instance to switch input streams for. 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * New input stream to install as the current one. 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Switches the current character input stream to 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a new one, saving the old one, which we will revert to at the end of this 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * new one. 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 564324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverpushCharStream (pANTLR3_LEXER lexer, pANTLR3_INPUT_STREAM input) 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Do we need a new input stream stack? 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->streams == NULL) 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // This is the first call to stack a new 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // stream and so we must create the stack first. 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->streams = antlr3StackNew(0); 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->streams == NULL) 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Could not do this, we just fail to push it. 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // TODO: Consider if this is what we want to do, but then 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // any programmer can override this method to do something else. 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return; 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We have a stack, so we can save the current input stream 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // into it. 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->mark(lexer->input->istream); 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->streams->push(lexer->rec->state->streams, lexer->input, NULL); 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // And now we can install this new one 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->setCharStream(lexer, input); 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/*! 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \brief 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Stops using the current input stream and reverts to any prior 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input stream on the stack. 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param lexer 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Description of parameter lexer. 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Pointer to a function that abandons the current input stream, whether it 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is empty or not and reverts to the previous stacked input stream. 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The function fails silently if there are no prior input streams. 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 610324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverpopCharStream (pANTLR3_LEXER lexer) 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we do not have a stream stack or we are already at the 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // stack bottom, then do nothing. 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->streams != NULL && lexer->rec->state->streams->size(lexer->rec->state->streams) > 0) 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We just leave the current stream to its fate, we do not close 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it or anything as we do not know what the programmer intended 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // for it. This method can always be overridden of course. 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // So just find out what was currently saved on the stack and use 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // that now, then pop it from the stack. 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = (pANTLR3_INPUT_STREAM)(lexer->rec->state->streams->top); 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->streams->pop(lexer->rec->state->streams); 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Now install the stream as the current one. 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->setCharStream(lexer, input); 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->rewindLast(lexer->input->istream); 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return; 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void emitNew (pANTLR3_LEXER lexer, pANTLR3_COMMON_TOKEN token) 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->token = token; /* Voila! */ 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_COMMON_TOKEN 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruveremit (pANTLR3_LEXER lexer) 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_COMMON_TOKEN token; 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* We could check pointers to token factories and so on, but 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * we are in code that we want to run as fast as possible 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * so we are not checking any errors. So make sure you have installed an input stream before 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * trying to emit a new token. 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = lexer->rec->state->tokFactory->newToken(lexer->rec->state->tokFactory); 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install the supplied information, and some other bits we already know 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * get added automatically, such as the input stream it is associated with 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * (though it can all be overridden of course) 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->type = lexer->rec->state->type; 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->channel = lexer->rec->state->channel; 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->start = lexer->rec->state->tokenStartCharIndex; 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->stop = lexer->getCharIndex(lexer) - 1; 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->line = lexer->rec->state->tokenStartLine; 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->charPosition = lexer->rec->state->tokenStartCharPositionInLine; 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->text != NULL) 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->textState = ANTLR3_TEXT_STRING; 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->tokText.text = lexer->rec->state->text; 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->textState = ANTLR3_TEXT_NONE; 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->lineStart = lexer->input->currentLine; 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->user1 = lexer->rec->state->user1; 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->user2 = lexer->rec->state->user2; 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->user3 = lexer->rec->state->user3; 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token->custom = lexer->rec->state->custom; 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->token = token; 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return token; 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Free the resources allocated by a lexer 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 688324c4644fee44b9898524c09511bd33c3f12e2dfBen GruverfreeLexer (pANTLR3_LEXER lexer) 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // This may have ben a delegate or delegator lexer, in which case the 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // state may already have been freed (and set to NULL therefore) 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // so we ignore the state if we don't have it. 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state != NULL) 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->streams != NULL) 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->streams->free(lexer->rec->state->streams); 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokFactory != NULL) 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory->close(lexer->rec->state->tokFactory); 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokFactory = NULL; 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->tokSource != NULL) 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FREE(lexer->rec->state->tokSource); 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokSource = NULL; 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec != NULL) 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->free(lexer->rec); 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec = NULL; 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FREE(lexer); 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Implementation of matchs for the lexer, overrides any 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * base implementation in the base recognizer. 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that the generated code lays down arrays of ints for constant 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * strings so that they are int UTF32 form! 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatchs(pANTLR3_LEXER lexer, ANTLR3_UCHAR * string) 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (*string != ANTLR3_STRING_TERMINATOR) 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->input->istream->_LA(lexer->input->istream, 1) != (*string)) 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->backtracking > 0) 734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_TRUE; 736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->exConstruct(lexer->rec); 740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_TRUE; 741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* TODO: Implement exception creation more fully perhaps 743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->recover(lexer); 745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Matched correctly, do consume it 749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->consume(lexer->input->istream); 751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver string++; 752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset any failed indicator 754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_FALSE; 756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_TRUE; 760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Implementation of matchc for the lexer, overrides any 763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * base implementation in the base recognizer. 764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that the generated code lays down arrays of ints for constant 767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * strings so that they are int UTF32 form! 768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN 770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvermatchc(pANTLR3_LEXER lexer, ANTLR3_UCHAR c) 771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->input->istream->_LA(lexer->input->istream, 1) == c) 773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Matched correctly, do consume it 775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->consume(lexer->input->istream); 777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset any failed indicator 779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_FALSE; 781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_TRUE; 783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Failed to match, exception and recovery time. 786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->backtracking > 0) 788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_TRUE; 790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->exConstruct(lexer->rec); 794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* TODO: Implement exception creation more fully perhaps 796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->recover(lexer); 798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Implementation of match range for the lexer, overrides any 803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * base implementation in the base recognizer. 804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that the generated code lays down arrays of ints for constant 807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * strings so that they are int UTF32 form! 808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_BOOLEAN 810324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvermatchRange(pANTLR3_LEXER lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high) 811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR c; 813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* What is in the stream at the moment? 815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c = lexer->input->istream->_LA(lexer->input->istream, 1); 817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( c >= low && c <= high) 818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Matched correctly, consume it 820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->consume(lexer->input->istream); 822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset any failed indicator 824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_FALSE; 826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_TRUE; 828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Failed to match, execption and recovery time. 831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->backtracking > 0) 834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->failed = ANTLR3_TRUE; 836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->exConstruct(lexer->rec); 840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* TODO: Implement exception creation more fully 842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->recover(lexer); 844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_FALSE; 846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 849324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvermatchAny (pANTLR3_LEXER lexer) 850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->consume(lexer->input->istream); 852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrecover (pANTLR3_LEXER lexer) 856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input->istream->consume(lexer->input->istream); 858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 861324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvergetLine (pANTLR3_LEXER lexer) 862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer->input->getLine(lexer->input); 864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 867324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvergetCharPositionInLine (pANTLR3_LEXER lexer) 868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer->input->charPositionInLine; 870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER getCharIndex (pANTLR3_LEXER lexer) 873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer->input->istream->index(lexer->input->istream); 875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING 878324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvergetText (pANTLR3_LEXER lexer) 879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (lexer->rec->state->text) 881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer->rec->state->text; 883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return lexer->input->substr( 886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->input, 887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->rec->state->tokenStartCharIndex, 888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer->getCharIndex(lexer) - lexer->input->charByteSize 889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ); 890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * 894324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvergetCurrentInputSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream) 895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return NULL; 897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * 900324c4644fee44b9898524c09511bd33c3f12e2dfBen GruvergetMissingSymbol (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_INT_STREAM istream, pANTLR3_EXCEPTION e, 901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 expectedTokenType, pANTLR3_BITSET_LIST follow) 902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return NULL; 904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 905