1/** \file 2 * Base interface for any ANTLR3 lexer. 3 * 4 * An ANLTR3 lexer builds from two sets of components: 5 * 6 * - The runtime components that provide common functionality such as 7 * traversing character streams, building tokens for output and so on. 8 * - The generated rules and struutre of the actual lexer, which call upon the 9 * runtime components. 10 * 11 * A lexer class contains a character input stream, a base recognizer interface 12 * (which it will normally implement) and a token source interface (which it also 13 * implements. The Tokensource interface is called by a token consumer (such as 14 * a parser, but in theory it can be anything that wants a set of abstract 15 * tokens in place of a raw character stream. 16 * 17 * So then, we set up a lexer in a sequence akin to: 18 * 19 * - Create a character stream (something which implements ANTLR3_INPUT_STREAM) 20 * and initialize it. 21 * - Create a lexer interface and tell it where it its input stream is. 22 * This will cause the creation of a base recognizer class, which it will 23 * override with its own implementations of some methods. The lexer creator 24 * can also then in turn override anything it likes. 25 * - The lexer token source interface is then passed to some interface that 26 * knows how to use it, byte calling for a next token. 27 * - When a next token is called, let ze lexing begin. 28 * 29 */ 30#ifndef _ANTLR3_LEXER 31#define _ANTLR3_LEXER 32 33// [The "BSD licence"] 34// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 35// http://www.temporal-wave.com 36// http://www.linkedin.com/in/jimidle 37// 38// All rights reserved. 39// 40// Redistribution and use in source and binary forms, with or without 41// modification, are permitted provided that the following conditions 42// are met: 43// 1. Redistributions of source code must retain the above copyright 44// notice, this list of conditions and the following disclaimer. 45// 2. Redistributions in binary form must reproduce the above copyright 46// notice, this list of conditions and the following disclaimer in the 47// documentation and/or other materials provided with the distribution. 48// 3. The name of the author may not be used to endorse or promote products 49// derived from this software without specific prior written permission. 50// 51// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 52// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 53// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 54// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 55// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 56// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 57// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 58// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 59// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 60// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 61 62/* Definitions 63 */ 64#define ANTLR3_STRING_TERMINATOR 0xFFFFFFFF 65 66#include <antlr3defs.h> 67#include <antlr3input.h> 68#include <antlr3commontoken.h> 69#include <antlr3tokenstream.h> 70#include <antlr3baserecognizer.h> 71 72#ifdef __cplusplus 73extern "C" { 74#endif 75 76typedef struct ANTLR3_LEXER_struct 77{ 78 /** If there is a super structure that is implementing the 79 * lexer, then a pointer to it can be stored here in case 80 * implementing functions are overridden by this super structure. 81 */ 82 void * super; 83 84 /** A generated lexer has an mTokens() function, which needs 85 * the context pointer of the generated lexer, not the base lexer interface 86 * this is stored here and initialized by the generated code (or manually 87 * if this is a manually built lexer. 88 */ 89 void * ctx; 90 91 /** A pointer to the character stream whence this lexer is receiving 92 * characters. 93 * TODO: I may come back to this and implement charstream outside 94 * the input stream as per the java implementation. 95 */ 96 pANTLR3_INPUT_STREAM input; 97 98 /** Pointer to the implementation of a base recognizer, which the lexer 99 * creates and then overrides with its own lexer oriented functions (the 100 * default implementation is parser oriented). This also contains a 101 * token source interface, which the lexer instance will provide to anything 102 * that needs it, which is anything else that implements a base recognizer, 103 * such as a parser. 104 */ 105 pANTLR3_BASE_RECOGNIZER rec; 106 107 /** Pointer to a function that sets the charstream source for the lexer and 108 * causes it to be reset. 109 */ 110 void (*setCharStream) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_INPUT_STREAM input); 111 112 /** Pointer to a function that switches the current character input stream to 113 * a new one, saving the old one, which we will revert to at the end of this 114 * new one. 115 */ 116 void (*pushCharStream) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_INPUT_STREAM input); 117 118 /** Pointer to a function that abandons the current input stream, whether it 119 * is empty or not and reverts to the previous stacked input stream. 120 */ 121 void (*popCharStream) (struct ANTLR3_LEXER_struct * lexer); 122 123 /** Pointer to a function that emits the supplied token as the next token in 124 * the stream. 125 */ 126 void (*emitNew) (struct ANTLR3_LEXER_struct * lexer, pANTLR3_COMMON_TOKEN token); 127 128 /** Pointer to a function that constructs a new token from the lexer stored information 129 */ 130 pANTLR3_COMMON_TOKEN (*emit) (struct ANTLR3_LEXER_struct * lexer); 131 132 /** Pointer to the user provided (either manually or through code generation 133 * function that causes the lexer rules to run the lexing rules and produce 134 * the next token if there iss one. This is called from nextToken() in the 135 * pANTLR3_TOKEN_SOURCE. Note that the input parameter for this funciton is 136 * the generated lexer context (stored in ctx in this interface) it is a generated 137 * function and expects the context to be the generated lexer. 138 */ 139 void (*mTokens) (void * ctx); 140 141 /** Pointer to a function that attempts to match and consume the specified string from the input 142 * stream. Note that strings muse be passed as terminated arrays of ANTLR3_UCHAR. Strings are terminated 143 * with 0xFFFFFFFF, which is an invalid UTF32 character 144 */ 145 ANTLR3_BOOLEAN (*matchs) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR * string); 146 147 /** Pointer to a function that matches and consumes the specified character from the input stream. 148 * The input stream is required to provide characters via LA() as UTF32 characters. The default lexer 149 * implementation is source encoding agnostic and so input streams do not generally need to 150 * override the default implmentation. 151 */ 152 ANTLR3_BOOLEAN (*matchc) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR c); 153 154 /** Pointer to a function that matches any character in the supplied range (I suppose it could be a token range too 155 * but this would only be useful if the tokens were in tsome guaranteed order which is 156 * only going to happen with a hand crafted token set). 157 */ 158 ANTLR3_BOOLEAN (*matchRange) (struct ANTLR3_LEXER_struct * lexer, ANTLR3_UCHAR low, ANTLR3_UCHAR high); 159 160 /** Pointer to a function that matches the next token/char in the input stream 161 * regardless of what it actaully is. 162 */ 163 void (*matchAny) (struct ANTLR3_LEXER_struct * lexer); 164 165 /** Pointer to a function that recovers from an error found in the input stream. 166 * Generally, this will be a #ANTLR3_EXCEPTION_NOVIABLE_ALT but it could also 167 * be from a mismatched token that the (*match)() could not recover from. 168 */ 169 void (*recover) (struct ANTLR3_LEXER_struct * lexer); 170 171 /** Pointer to function to return the current line number in the input stream 172 */ 173 ANTLR3_UINT32 (*getLine) (struct ANTLR3_LEXER_struct * lexer); 174 ANTLR3_MARKER (*getCharIndex) (struct ANTLR3_LEXER_struct * lexer); 175 ANTLR3_UINT32 (*getCharPositionInLine)(struct ANTLR3_LEXER_struct * lexer); 176 177 /** Pointer to function to return the text so far for the current token being generated 178 */ 179 pANTLR3_STRING (*getText) (struct ANTLR3_LEXER_struct * lexer); 180 181 182 /** Pointer to a function that knows how to free the resources of a lexer 183 */ 184 void (*free) (struct ANTLR3_LEXER_struct * lexer); 185 186} 187 ANTLR3_LEXER; 188 189#ifdef __cplusplus 190} 191#endif 192 193#endif 194