antlr3input.h revision 324c4644fee44b9898524c09511bd33c3f12e2df
1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \file
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Defines the basic structures used to manipulate character
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * streams from any input source. Any character size and encoding
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can in theory be used, so long as a set of functinos is provided that
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * can return a 32 bit Integer representation of their characters amd efficiently mark and revert
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to specific offsets into their input streams.
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifndef	_ANTLR3_INPUT_H
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#define	_ANTLR3_INPUT_H
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"]
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved.
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met:
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    notice, this list of conditions and the following disclaimer.
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    notice, this list of conditions and the following disclaimer in the
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    documentation and/or other materials provided with the distribution.
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    derived from this software without specific prior written permission.
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3defs.h>
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3string.h>
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3commontoken.h>
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3intstream.h>
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3convertutf.h>
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverextern "C" {
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Master context structure for an ANTLR3 C runtime based input stream.
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \ingroup apistructures
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertypedef	struct	ANTLR3_INPUT_STREAM_struct
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Interfaces that provide streams must all provide
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  a generic ANTLR3_INT_STREAM interface and an ANTLR3_INPUT_STREAM
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  is no different.
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INT_STREAM	istream;
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Whatever super structure is providing the INPUT stream needs a pointer to itself
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  so that this can be passed back to it whenever the api functions
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  are called back from this interface.
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      * super;
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer the start of the input string, characters may be
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  taken as offsets from here and in original input format encoding.
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      *	data;
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Indicates if the data pointer was allocated by us, and so should be freed
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  when the stream dies.
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    int			isAllocated;
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** String factory for this input stream
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_STRING_FACTORY  strFactory;
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to the next character to be consumed from the input data
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  This is cast to point at the encoding of the original file that
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  was read by the functions installed as pointer in this input stream
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  context instance at file/string/whatever load time.
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      * nextChar;
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Number of characters that can be consumed at this point in time.
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  Mostly this is just what is left in the pre-read buffer, but if the
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  input source is a stream such as a socket or something then we may
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  call special read code to wait for more input.
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	sizeBuf;
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** The line number we are traversing in the input file. This gets incremented
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  by a newline() call in the lexer grammar actions.
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	line;
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer into the input buffer where the current line
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  started.
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      * currentLine;
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** The offset within the current line of the current character
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_INT32	charPositionInLine;
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Tracks how deep mark() calls are nested
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	markDepth;
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** List of mark() points in the input stream
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_VECTOR	markers;
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** File name string, set to pointer to memory if
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * you set it manually as it will be free()d
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_STRING	fileName;
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** File number, needs to be set manually to some file index of your devising.
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	fileNo;
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* API */
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver   /** Pointer to function that closes the input stream
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*close)	(struct	ANTLR3_INPUT_STREAM_struct * input);
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*free)		(struct	ANTLR3_INPUT_STREAM_struct * input);
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function that resets the input stream
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*reset)	(struct	ANTLR3_INPUT_STREAM_struct * input);
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to a function that reuses and resets an input stream by
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  supplying a new 'source'
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void                (*reuse)        (struct	ANTLR3_INPUT_STREAM_struct * input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name);
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /**
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * Pointer to function that installs a version of LA that always
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * returns upper case. Only valid for character streams and creates a case
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * insensitive lexer if the lexer tokens are described in upper case. The
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * tokens will preserve case in the token text.
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*setUcaseLA)		(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag);
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return input stream element at 1 based
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  offset from nextChar. Same as _LA for char stream, but token
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  streams etc. have one of these that does other stuff of course.
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void *		(*_LT)		(struct	ANTLR3_INPUT_STREAM_struct * input, ANTLR3_INT32 lt);
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return the total size of the input buffer. For streams
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  this may be just the total we have available so far. This means of course that
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  the input stream must be careful to accumulate enough input so that any backtracking
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  can be satisfied.
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	(*size)		(struct ANTLR3_INPUT_STREAM_struct * input);
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return a substring of the input stream. String is returned in allocated
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  memory and is in same encoding as the input stream itself, NOT internal ANTLR3_UCHAR form.
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_STRING	(*substr)	(struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_MARKER start, ANTLR3_MARKER stop);
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return the current line number in the input stream
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	(*getLine)	(struct ANTLR3_INPUT_STREAM_struct * input);
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return the current line buffer in the input stream
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  The pointer returned is directly into the input stream so you must copy
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  it if you wish to manipulate it without damaging the input stream. Encoding
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  is obviously in the same form as the input stream.
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  \remark
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *    - Note taht this function wil lbe inaccurate if setLine is called as there
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *      is no way at the moment to position the input stream at a particular line
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *	    number offset.
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	  *	(*getLineBuf)	(struct ANTLR3_INPUT_STREAM_struct * input);
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to return the current offset in the current input stream line
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	(*getCharPositionInLine)  (struct ANTLR3_INPUT_STREAM_struct * input);
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to set the current line number in the input stream
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*setLine)		  (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 line);
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to set the current position in the current line.
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*setCharPositionInLine)  (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 position);
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer to function to override the default newline character that the input stream
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  looks for to trigger the line/offset and line buffer recording information.
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  \remark
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   - By default the chracter '\n' will be installed as the newline trigger character. When this
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     character is seen by the consume() function then the current line number is incremented and the
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     current line offset is reset to 0. The Pointer for the line of input we are consuming
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     is updated to point to the next character after this one in the input stream (which means it
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     may become invalid if the last newline character in the file is seen (so watch out).
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   - If for some reason you do not want the counters and pointers to be restee, you can set the
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     chracter to some impossible character such as '\0' or whatever.
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   - This is a single character only, so choose the last character in a sequence of two or more.
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   - This is only a simple aid to error reporting - if you have a complicated binary input structure
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     it may not be adequate, but you can always override every function in the input stream with your
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     own of course, and can even write your own complete input stream set if you like.
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *   - It is your responsiblity to set a valid character for the input stream type. There is no point
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *     setting this to 0xFFFFFFFF if the input stream is 8 bit ASCII, as this will just be truncated and never
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *	   trigger as the comparison will be (INT32)0xFF == (INT32)0xFFFFFFFF
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void		(*SetNewLineChar)	    (struct ANTLR3_INPUT_STREAM_struct * input, ANTLR3_UINT32 newlineChar);
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// Character that automatically causes an internal line count
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ///  increment.
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ///
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UCHAR	newlineChar;
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// Indicates the size, in 8 bit units, of a single character. Note that
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// the C runtime does not deal with surrogates as this would be
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// slow and complicated. If this is a UTF-8 stream then this field
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// will be set to 0. Generally you are best working internally with 32 bit characters
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// as this is the most efficient.
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ///
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT8	charByteSize;
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /// Indicates the encoding scheme used in this input stream
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ///
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32       encoding;
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_INPUT_STREAM;
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Structure for track lex input states as part of mark()
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  and rewind() of lexer.
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvertypedef	struct	ANTLR3_LEX_STATE_struct
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        /** Pointer to the next character to be consumed from the input data
246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  This is cast to point at the encoding of the original file that
247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  was read by the functions installed as pointer in this input stream
248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  context instance at file/string/whatever load time.
249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      * nextChar;
251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** The line number we are traversing in the input file. This gets incremented
253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  by a newline() call in the lexer grammer actions.
254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32	line;
256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** Pointer into the input buffer where the current line
258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     *  started.
259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    void	      * currentLine;
261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /** The offset within the current line of the current character
263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_INT32	charPositionInLine;
265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_LEX_STATE;
268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/* Prototypes
270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid	    antlr38BitSetupStream	(pANTLR3_INPUT_STREAM input);
272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid	    antlr3UTF16SetupStream	(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian);
273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid	    antlr3UTF32SetupStream	(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian);
274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid	    antlr3UTF8SetupStream	(pANTLR3_INPUT_STREAM input);
275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid	    antlr3EBCDICSetupStream	(pANTLR3_INPUT_STREAM input);
276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid        antlr3GenericSetupStream    (pANTLR3_INPUT_STREAM input);
277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#ifdef __cplusplus
278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif
280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#endif	/* _ANTLR3_INPUT_H  */
282