antlr3input.h revision 324c4644fee44b9898524c09511bd33c3f12e2df
1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \file 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Defines the basic structures used to manipulate character 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * streams from any input source. Any character size and encoding 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can in theory be used, so long as a set of functinos is provided that 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can return a 32 bit Integer representation of their characters amd efficiently mark and revert 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to specific offsets into their input streams. 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifndef _ANTLR3_INPUT_H 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#define _ANTLR3_INPUT_H 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"] 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved. 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met: 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer. 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer in the 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// documentation and/or other materials provided with the distribution. 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// derived from this software without specific prior written permission. 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3defs.h> 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3string.h> 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3commontoken.h> 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3intstream.h> 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3convertutf.h> 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverextern "C" { 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Master context structure for an ANTLR3 C runtime based input stream. 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \ingroup apistructures 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertypedef struct ANTLR3_INPUT_STREAM_struct 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Interfaces that provide streams must all provide 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * a generic ANTLR3_INT_STREAM interface and an ANTLR3_INPUT_STREAM 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is no different. 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INT_STREAM istream; 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Whatever super structure is providing the INPUT stream needs a pointer to itself 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * so that this can be passed back to it whenever the api functions 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are called back from this interface. 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * super; 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer the start of the input string, characters may be 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * taken as offsets from here and in original input format encoding. 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * data; 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Indicates if the data pointer was allocated by us, and so should be freed 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * when the stream dies. 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver int isAllocated; 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** String factory for this input stream 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STRING_FACTORY strFactory; 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to the next character to be consumed from the input data 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is cast to point at the encoding of the original file that 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * was read by the functions installed as pointer in this input stream 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * context instance at file/string/whatever load time. 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * nextChar; 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Number of characters that can be consumed at this point in time. 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Mostly this is just what is left in the pre-read buffer, but if the 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input source is a stream such as a socket or something then we may 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * call special read code to wait for more input. 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 sizeBuf; 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The line number we are traversing in the input file. This gets incremented 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * by a newline() call in the lexer grammar actions. 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 line; 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer into the input buffer where the current line 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * started. 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * currentLine; 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The offset within the current line of the current character 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 charPositionInLine; 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Tracks how deep mark() calls are nested 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 markDepth; 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** List of mark() points in the input stream 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_VECTOR markers; 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** File name string, set to pointer to memory if 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * you set it manually as it will be free()d 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STRING fileName; 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** File number, needs to be set manually to some file index of your devising. 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 fileNo; 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* API */ 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function that closes the input stream 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*close) (struct ANTLR3_INPUT_STREAM_struct * input); 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*free) (struct ANTLR3_INPUT_STREAM_struct * input); 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function that resets the input stream 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*reset) (struct ANTLR3_INPUT_STREAM_struct * input); 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to a function that reuses and resets an input stream by 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * supplying a new 'source' 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*reuse) (struct ANTLR3_INPUT_STREAM_struct * input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name); 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Pointer to function that installs a version of LA that always 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * returns upper case. Only valid for character streams and creates a case 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * insensitive lexer if the lexer tokens are described in upper case. The 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * tokens will preserve case in the token text. 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*setUcaseLA) (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag); 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return input stream element at 1 based 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * offset from nextChar. Same as _LA for char stream, but token 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * streams etc. have one of these that does other stuff of course. 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * (*_LT) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_INT32 lt); 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return the total size of the input buffer. For streams 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * this may be just the total we have available so far. This means of course that 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the input stream must be careful to accumulate enough input so that any backtracking 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can be satisfied. 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 (*size) (struct ANTLR3_INPUT_STREAM_struct * input); 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return a substring of the input stream. String is returned in allocated 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * memory and is in same encoding as the input stream itself, NOT internal ANTLR3_UCHAR form. 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_STRING (*substr) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_MARKER start, ANTLR3_MARKER stop); 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return the current line number in the input stream 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 (*getLine) (struct ANTLR3_INPUT_STREAM_struct * input); 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return the current line buffer in the input stream 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * The pointer returned is directly into the input stream so you must copy 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it if you wish to manipulate it without damaging the input stream. Encoding 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is obviously in the same form as the input stream. 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - Note taht this function wil lbe inaccurate if setLine is called as there 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is no way at the moment to position the input stream at a particular line 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * number offset. 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * (*getLineBuf) (struct ANTLR3_INPUT_STREAM_struct * input); 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to return the current offset in the current input stream line 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 (*getCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input); 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to set the current line number in the input stream 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*setLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 line); 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to set the current position in the current line. 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*setCharPositionInLine) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 position); 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to function to override the default newline character that the input stream 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * looks for to trigger the line/offset and line buffer recording information. 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - By default the chracter '\n' will be installed as the newline trigger character. When this 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * character is seen by the consume() function then the current line number is incremented and the 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * current line offset is reset to 0. The Pointer for the line of input we are consuming 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * is updated to point to the next character after this one in the input stream (which means it 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * may become invalid if the last newline character in the file is seen (so watch out). 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - If for some reason you do not want the counters and pointers to be restee, you can set the 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * chracter to some impossible character such as '\0' or whatever. 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - This is a single character only, so choose the last character in a sequence of two or more. 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - This is only a simple aid to error reporting - if you have a complicated binary input structure 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * it may not be adequate, but you can always override every function in the input stream with your 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * own of course, and can even write your own complete input stream set if you like. 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - It is your responsiblity to set a valid character for the input stream type. There is no point 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void (*SetNewLineChar) (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 newlineChar); 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// Character that automatically causes an internal line count 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// increment. 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR newlineChar; 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// Indicates the size, in 8 bit units, of a single character. Note that 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// the C runtime does not deal with surrogates as this would be 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// slow and complicated. If this is a UTF-8 stream then this field 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// will be set to 0. Generally you are best working internally with 32 bit characters 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// as this is the most efficient. 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT8 charByteSize; 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// Indicates the encoding scheme used in this input stream 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /// 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 encoding; 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INPUT_STREAM; 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Structure for track lex input states as part of mark() 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and rewind() of lexer. 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertypedef struct ANTLR3_LEX_STATE_struct 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer to the next character to be consumed from the input data 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This is cast to point at the encoding of the original file that 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * was read by the functions installed as pointer in this input stream 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * context instance at file/string/whatever load time. 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * nextChar; 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The line number we are traversing in the input file. This gets incremented 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * by a newline() call in the lexer grammer actions. 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 line; 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** Pointer into the input buffer where the current line 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * started. 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver void * currentLine; 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /** The offset within the current line of the current character 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 charPositionInLine; 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_LEX_STATE; 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/* Prototypes 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr38BitSetupStream (pANTLR3_INPUT_STREAM input); 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr3UTF16SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian); 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr3UTF32SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian); 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr3UTF8SetupStream (pANTLR3_INPUT_STREAM input); 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr3EBCDICSetupStream (pANTLR3_INPUT_STREAM input); 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid antlr3GenericSetupStream (pANTLR3_INPUT_STREAM input); 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif /* _ANTLR3_INPUT_H */ 282