1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \file
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Base functions to initialize and manipulate any input stream
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"]
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved.
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met:
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    notice, this list of conditions and the following disclaimer.
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    notice, this list of conditions and the following disclaimer in the
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    documentation and/or other materials provided with the distribution.
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//    derived from this software without specific prior written permission.
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include    <antlr3input.h>
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// -----------------------------------
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Generic 8 bit input such as latin-1
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 8Bit INT Stream API
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitConsume		(pANTLR3_INT_STREAM is);
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr38BitLA		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr38BitLA_ucase		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_MARKER   antlr38BitIndex		(pANTLR3_INT_STREAM is);
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_MARKER   antlr38BitMark		(pANTLR3_INT_STREAM is);
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitRewind		(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark);
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitRewindLast	(pANTLR3_INT_STREAM is);
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitRelease		(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark);
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitSeek		(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint);
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    pANTLR3_STRING  antlr38BitGetSourceName	(pANTLR3_INT_STREAM is);
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 8Bit Charstream API functions
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3InputClose		(pANTLR3_INPUT_STREAM input);
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3InputReset		(pANTLR3_INPUT_STREAM input);
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic      void            antlr38BitReuse            (pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name);
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void *	    antlr38BitLT		(pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt);
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UINT32   antlr38BitSize		(pANTLR3_INPUT_STREAM input);
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    pANTLR3_STRING  antlr38BitSubstr		(pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop);
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UINT32   antlr38BitGetLine		(pANTLR3_INPUT_STREAM input);
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	  * antlr38BitGetLineBuf	(pANTLR3_INPUT_STREAM input);
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UINT32   antlr38BitGetCharPosition	(pANTLR3_INPUT_STREAM input);
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitSetLine		(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line);
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitSetCharPosition	(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position);
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitSetNewLineChar	(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar);
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr38BitSetUcaseLA	(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag);
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// -----------------------------------
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 (also covers UCS2)
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INT Stream API
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF16Consume	        (pANTLR3_INT_STREAM is);
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF16LA		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF16ConsumeLE        (pANTLR3_INT_STREAM is);
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF16LALE		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF16ConsumeBE        (pANTLR3_INT_STREAM is);
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF16LABE		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_MARKER   antlr3UTF16Index		(pANTLR3_INT_STREAM is);
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF16Seek		(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint);
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 Charstream API functions
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    pANTLR3_STRING	antlr3UTF16Substr	(pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop);
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// -----------------------------------
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF32 (also covers UCS2)
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INT Stream API
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF32Consume	        (pANTLR3_INT_STREAM is);
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF32LA		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF32LALE		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF32LABE		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_MARKER   antlr3UTF32Index		(pANTLR3_INT_STREAM is);
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF32Seek		(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint);
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 Charstream API functions
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    pANTLR3_STRING  antlr3UTF32Substr	        (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop);
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF-8
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    void	    antlr3UTF8Consume	        (pANTLR3_INT_STREAM is);
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3UTF8LA		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// EBCDIC
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	    ANTLR3_UCHAR    antlr3EBCDICLA		(pANTLR3_INT_STREAM is, ANTLR3_INT32 la);
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for an 8 bit input stream.
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///   - Many of the 8 bit oriented file stream handling functions will be usable
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///     by any or at least some, other input streams. Therefore it is perfectly acceptable
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///     to call this function to install the 8Bit handler then override just those functions
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///     that would not work for the particular input encoding, such as consume for instance.
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetupStream	(pANTLR3_INPUT_STREAM input)
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Build a string factory for this stream
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->strFactory	= antlr3StringFactoryNew(input->encoding);
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Default stream API set up is for 8Bit, so we are done
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3GenericSetupStream  (pANTLR3_INPUT_STREAM input)
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Install function pointers for an 8 bit input
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Allocate stream interface
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream		= antlr3IntStreamNew();
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->type        = ANTLR3_CHARSTREAM;
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->super       = input;
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Intstream API
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->consume	    = antlr38BitConsume;	    // Consume the next 8 bit character in the buffer
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->_LA		    = antlr38BitLA;	            // Return the UTF32 character at offset n (1 based)
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->index	    = antlr38BitIndex;	            // Current index (offset from first character
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->mark	    = antlr38BitMark;		    // Record the current lex state for later restore
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->rewind	    = antlr38BitRewind;	            // How to rewind the input
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->rewindLast	    = antlr38BitRewindLast;	    // How to rewind the input
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->seek	    = antlr38BitSeek;		    // How to seek to a specific point in the stream
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->release	    = antlr38BitRelease;	    // Reset marks after mark n
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->getSourceName   = antlr38BitGetSourceName;      // Return a string that names the input source
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Charstream API
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->close		    =  antlr3InputClose;	    // Close down the stream completely
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->free			    =  antlr3InputClose;	    // Synonym for free
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->reset		    =  antlr3InputReset;	    // Reset input to start
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->reuse                    =  antlr38BitReuse;             // Install a new input string and reset
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->_LT			    =  antlr38BitLT;		    // Same as _LA for 8 bit file
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->size			    =  antlr38BitSize;		    // Return the size of the input buffer
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->substr		    =  antlr38BitSubstr;	    // Return a string from the input stream
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->getLine		    =  antlr38BitGetLine;	    // Return the current line number in the input stream
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->getLineBuf		    =  antlr38BitGetLineBuf;	    // Return a pointer to the start of the current line being consumed
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->getCharPositionInLine    =  antlr38BitGetCharPosition;   // Return the offset into the current line of input
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->setLine		    =  antlr38BitSetLine;	    // Set the input stream line number (does not set buffer pointers)
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->setCharPositionInLine    =  antlr38BitSetCharPosition;   // Set the offset in to the current line (does not set any pointers)
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->SetNewLineChar	    =  antlr38BitSetNewLineChar;    // Set the value of the newline trigger character
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->setUcaseLA		    =  antlr38BitSetUcaseLA;        // Changes the LA function to return upper case always
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charByteSize		    = 1;		// Size in bytes of characters in this stream.
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Initialize entries for tables etc
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->markers  = NULL;
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Set up the input stream brand new
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->reset(input);
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Install default line separator character (it can be replaced
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * by the grammar programmer later)
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->SetNewLineChar(input, (ANTLR3_UCHAR)'\n');
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetSourceName(pANTLR3_INT_STREAM is)
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	return	is->streamName;
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Close down an input stream and free any memory allocated by it.
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3InputClose(pANTLR3_INPUT_STREAM input)
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Close any markers in the input stream
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(input->markers != NULL)
208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->markers->free(input->markers);
210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->markers = NULL;
211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Close the string factory
214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(input->strFactory != NULL)
216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->strFactory->close(input->strFactory);
218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Free the input stream buffer if we allocated it
221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(input->isAllocated && input->data != NULL)
223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		ANTLR3_FREE(input->data);
225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->data = NULL;
226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->free(input->istream);
229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Finally, free the space for the structure itself
231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_FREE(input);
233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Done
235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetUcaseLA		(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag)
240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	(flag)
242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Return the upper case version of the characters
244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->istream->_LA		    =  antlr38BitLA_ucase;
246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Return the raw characters as they are in the buffer
250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->istream->_LA		    =  antlr38BitLA;
252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Reset a re-startable input stream to the start
257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3InputReset(pANTLR3_INPUT_STREAM input)
262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->nextChar		= input->data;	/* Input at first character */
265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->line			= 1;		/* starts at line 1	    */
266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charPositionInLine	= -1;
267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->currentLine		= input->data;
268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->markDepth		= 0;		/* Reset markers	    */
269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Clear out up the markers table if it is there
271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(input->markers != NULL)
273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input->markers->clear(input->markers);
275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        /* Install a new markers table
279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver         */
280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input->markers  = antlr3VectorNew(0);
281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Install a new source code in to a working input stream so that the
285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  input stream can be reused.
286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitReuse(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name)
289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->isAllocated	= ANTLR3_FALSE;
291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->data		= inString;
292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->sizeBuf	= size;
293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Now we can set up the file name. As we are reusing the stream, there may already
295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // be a string that we can reuse for holding the filename.
296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	(input->istream->streamName == NULL)
298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->istream->streamName	= input->strFactory->newStr(input->strFactory, name == NULL ? (pANTLR3_UINT8)"-memory-" : name);
300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->fileName		= input->istream->streamName;
301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->istream->streamName->set(input->istream->streamName,  (name == NULL ? (const char *)"-memory-" : (const char *)name));
305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->reset(input);
308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in an 8 bit input stream
311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitConsume(pANTLR3_INT_STREAM is)
316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Indicate one more character in this line
324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->charPositionInLine++;
326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if  ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar)
328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    /* Reset for start of a new line of input
330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	     */
331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->line++;
332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->charPositionInLine	= 0;
333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->currentLine		= (void *)(((pANTLR3_UINT8)input->nextChar) + 1);
334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Increment to next character position
337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1);
339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit ascii input
343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	(ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1));
363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit input and
367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *         always return the UPPER CASE character.
368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *		   Note that this is 8 bit and so we assume that the toupper
369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *		   function will use the correct locale for 8 bits.
370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLA_ucase	(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	(ANTLR3_UCHAR)toupper((*((pANTLR3_UINT8)input->nextChar + la - 1)));
390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit ascii input
395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] lt 1 based offset of next input stream element
398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void *
402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLT(pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt)
403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Casting is horrible but it means no warnings and LT should never be called
405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * on a character stream anyway I think. If it is then, the void * will need to be
406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * cast back in a similar manner. Yuck! But this means that LT for Token streams and
407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * tree streams is correct.
408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return (ANTLR3_FUNC_PTR(input->istream->_LA(input->istream, lt)));
410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Calculate the current index in the output stream.
413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER
416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitIndex(pANTLR3_INT_STREAM is)
417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar));
423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the size of the current input stream, as an 8Bit file
426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *   which in this case is the total input. Other implementations may provide
427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *   more sophisticated implementations to deal with non-recoverable streams
428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *   and so on.
429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic	ANTLR3_UINT32
433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSize(pANTLR3_INPUT_STREAM input)
434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->sizeBuf;
436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Mark the current input point in an 8Bit 8 bit stream
439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  such as a file stream, where all the input is available in the
440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  buffer.
441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] is Input stream context pointer
443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER
445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitMark	(pANTLR3_INT_STREAM is)
446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_LEX_STATE	    state;
448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM    input;
449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* New mark point
453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->markDepth++;
455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* See if we are revisiting a mark as we can just reuse the vector
457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * entry if we are, otherwise, we need a new one
458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(input->markDepth > input->markers->count)
460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	state	= ANTLR3_MALLOC(sizeof(ANTLR3_LEX_STATE));
462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Add it to the table
464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->markers->add(input->markers, state, ANTLR3_FREE_FUNC);	/* No special structure, just free() on delete */
466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	state	= (pANTLR3_LEX_STATE)input->markers->get(input->markers, input->markDepth - 1);
470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Assume no errors for speed, it will just blow up if the table failed
472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 * for some reasons, hence lots of unit tests on the tables ;-)
473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* We have created or retrieved the state, so update it with the current
477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * elements of the lexer state.
478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    state->charPositionInLine	= input->charPositionInLine;
480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    state->currentLine		= input->currentLine;
481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    state->line			= input->line;
482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    state->nextChar		= input->nextChar;
483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    is->lastMarker  = input->markDepth;
485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* And that's it
487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->markDepth;
489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the last produced mark.
491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream.
496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRewindLast	(pANTLR3_INT_STREAM is)
499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    is->rewind(is, is->lastMarker);
501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark.
504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream.
509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRewind	(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark)
512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_LEX_STATE	state;
514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) is->super);
517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Perform any clean up of the marks
519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->release(input->istream, mark);
521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Find the supplied mark state
523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    state   = (pANTLR3_LEX_STATE)input->markers->get(input->markers, (ANTLR3_UINT32)(mark - 1));
525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Seek input pointer to the requested point (note we supply the void *pointer
527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * to whatever is implementing the int stream to seek).
528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    antlr38BitSeek(is, (ANTLR3_MARKER)(state->nextChar));
530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* Reset to the reset of the information in the mark
532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charPositionInLine	= state->charPositionInLine;
534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->currentLine		= state->currentLine;
535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->line			= state->line;
536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->nextChar		= state->nextChar;
537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* And we are done
539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark.
543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream.
548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRelease	(pANTLR3_INT_STREAM is, ANTLR3_MARKER mark)
551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    /* We don't do much here in fact as we never free any higher marks in
557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     * the hashtable as we just resuse any memory allocated for them.
558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver     */
559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->markDepth	= (ANTLR3_UINT32)(mark - 1);
560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark.
563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream.
568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSeek	(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint)
571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	ANTLR3_INT32   count;
573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ANTLR3_FUNC_PTR(((pANTLR3_INPUT_STREAM) is->super));
576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* If the requested seek point is less than the current
578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	* input point, then we assume that we are resetting from a mark
579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	* and do not need to scan, but can just set to there.
580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	*/
581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	(seekPoint <= (ANTLR3_MARKER)(input->nextChar))
582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar	= ((pANTLR3_UINT8) seekPoint);
584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		count	= (ANTLR3_UINT32)(seekPoint - (ANTLR3_MARKER)(input->nextChar));
588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		while (count--)
590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		{
591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			is->consume(is);
592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return a substring of the 8 bit input stream in
596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  newly allocated memory.
597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param start Offset in input stream where the string starts
600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param stop  Offset in the input stream where the string ends.
601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING
603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSubstr		(pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop)
604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	return  input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, (ANTLR3_UINT32)(stop - start + 1));
606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the line number as understood by the 8 bit input stream.
609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return	Line number in input stream that we believe we are working on.
612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32
614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetLine		(pANTLR3_INPUT_STREAM input)
615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->line;
617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return a pointer into the input stream that points at the start
620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  of the current input line as triggered by the end of line character installed
621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  for the stream ('\n' unless told differently).
622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input
624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void	  *
626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetLineBuf	(pANTLR3_INPUT_STREAM input)
627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->currentLine;
629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return the current offset in to the current line in the input stream.
632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return      Current line offset
635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32
637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetCharPosition	(pANTLR3_INPUT_STREAM input)
638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->charPositionInLine;
640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the current line number as understood by the input stream.
643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param line  Line number to tell the input stream we are on
646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  This function does not change any pointers, it just allows the programmer to set the
649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  line number according to some external criterion, such as finding a lexed directive
650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  like: #nnn "file.c" for instance, such that error reporting and so on in is in sync
651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  with some original source format.
652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetLine		(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line)
655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->line	= line;
657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the current offset in the current line to be a particular setting.
660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input    Input stream context pointer
662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] position New setting for current offset.
663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This does not set the actual pointers in the input stream, it is purely for reporting
666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * purposes and so on as per antlr38BitSetLine();
667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetCharPosition	(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position)
670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charPositionInLine = position;
672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the newline trigger character in the input stream to the supplied parameter.
675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input	    Input stream context pointer
677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] newlineChar   Character to set to be the newline trigger.
678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark
680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *  - The supplied newLineChar is in UTF32 encoding (which means ASCII and latin1 etc
681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *    are the same encodings), but the input stream catered to by this function is 8 bit
682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *    only, so it is up to the programmer to ensure that the character supplied is valid.
683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetNewLineChar	(pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar)
686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->newlineChar	= newlineChar;
688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF16 or UCS2 input stream.
692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark
696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///  - Strictly speaking, there is no such thing as a UCS2 input stream as the term
697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///    tends to confuse the notions of character encoding, unicode and so on. UCS2 is
698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///    essentially UTF16 without any surrogates and so the standard UTF16
699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///    input stream is able to handle it without any special code.
700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16SetupStream	(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian)
703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Build a string factory for this stream. This is a UTF16 string factory which is a standard
705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // part of the ANTLR3 string. The string factory is then passed through the whole chain
706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // of lexer->parser->tree->treeparser and so on.
707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->strFactory	= antlr3StringFactoryNew(input->encoding);
709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Generic API that does not care about endianess.
711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->index	    =  antlr3UTF16Index;            // Calculate current index in input stream, UTF16 based
713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->substr		    =  antlr3UTF16Substr;	    // Return a string from the input stream
714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->seek	    =  antlr3UTF16Seek;		    // How to seek to a specific point in the stream
715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // We must install different UTF16 routines according to whether the input
717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // is the same endianess as the machine we are executing upon or not. If it is not
718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // then we must install methods that can convert the endianess on the fly as they go
719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    switch (machineBigEndian)
722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case    ANTLR3_TRUE:
724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Machine is Big Endian, if the input is also then install the
726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // methods that do not access input by bytes and reverse them.
727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Otherwise install endian aware methods.
728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if  (inputBigEndian == ANTLR3_TRUE)
730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Input is machine compatible
732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->consume	    =  antlr3UTF16Consume;	    // Consume the next UTF16 character in the buffer
734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF16LA;		    // Return the UTF32 character at offset n (1 based)
735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else
737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Need to use methods that know that the input is little endian
739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->consume	    =  antlr3UTF16ConsumeLE;	    // Consume the next UTF16 character in the buffer
741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF16LALE;		    // Return the UTF32 character at offset n (1 based)
742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            break;
744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case    ANTLR3_FALSE:
746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Machine is Little Endian, if the input is also then install the
748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // methods that do not access input by bytes and reverse them.
749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Otherwise install endian aware methods.
750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if  (inputBigEndian == ANTLR3_FALSE)
752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Input is machine compatible
754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->consume	    =  antlr3UTF16Consume;	    // Consume the next UTF16 character in the buffer
756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF16LA;		    // Return the UTF32 character at offset n (1 based)
757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else
759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Need to use methods that know that the input is Big Endian
761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->consume	    =  antlr3UTF16ConsumeBE;	    // Consume the next UTF16 character in the buffer
763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF16LABE;		    // Return the UTF32 character at offset n (1 based)
764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            break;
766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charByteSize		    = 2;			    // Size in bytes of characters in this stream.
770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream
774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Consume(pANTLR3_INT_STREAM is)
779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch;
782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch2;
783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ((pANTLR3_INPUT_STREAM) (is->super));
785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Buffer size is always in bytes
787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Indicate one more character in this line
791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->charPositionInLine++;
793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if  ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar)
795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		{
796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// Reset for start of a new line of input
797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			//
798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->line++;
799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->charPositionInLine	= 0;
800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->currentLine		= (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Increment to next character position, accounting for any surrogates
804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in natural machine byte order
806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = *((UTF16*)input->nextChar);
808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // We consumed one 16 bit character
810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Next character is in natural machine byte order
823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = *((UTF16*)input->nextChar);
825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming an 8 bit ascii input
850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element
853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32)
855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch;
861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch2;
862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF16   * nextChar;
863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Find the input interface and where we are currently pointing to
865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // in the input stream
866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input       = ((pANTLR3_INPUT_STREAM) (is->super));
868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        nextChar    = input->nextChar;
869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // If a positive offset then advance forward, else retreat
871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if  (la >= 0)
873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf )
875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Advance our copy of the input pointer
877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in natural machine byte order
879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = *nextChar++;
881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Next character is in natural machine byte order
892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = *nextChar;
894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            nextChar++;
902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // We need to go backwards from our input point
920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data )
922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Get the previous 16 bit character
924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch = *--nextChar;
926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we found a low surrogate then go back one more character if
928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // the hi surrogate is there
929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = *(nextChar-1);
933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Yes, there is a high surrogate to match it so decrement one more and point to that
936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        nextChar--;
938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Our local copy of nextChar is now pointing to either the correct character or end of file
944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Input buffer size is always in bytes
946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Pick up the next 16 character (native machine byte order)
954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ch = *nextChar++;
956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // If we have a surrogate pair then we need to consume
958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // a following valid LO surrogate.
959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If the 16 bits following the high surrogate are in the source buffer...
963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Next character is in natural machine byte order
967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = *nextChar;
969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If it's a valid low surrogate, consume it
971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Construct the UTF32 code point
975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			    + (ch2 - UNI_SUR_LOW_START) + halfBase;
978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it.
981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // it because the buffer ended
985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return ch;
989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Calculate the current index in the output stream.
993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER
996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Index(pANTLR3_INT_STREAM is)
997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  (ANTLR3_MARKER)(input->nextChar);
1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Rewind the lexer input to the state specified by the supplied mark.
1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark
1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Assumes UTF16 input stream.
1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Seek	(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint)
1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ((pANTLR3_INPUT_STREAM) is->super);
1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// If the requested seek point is less than the current
1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// input point, then we assume that we are resetting from a mark
1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// and do not need to scan, but can just set to there as rewind will
1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // reset line numbers and so on.
1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	//
1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	(seekPoint <= (ANTLR3_MARKER)(input->nextChar))
1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar	= (void *)seekPoint;
1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Call consume until we reach the asked for seek point or EOF
1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar)
1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    {
1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		is->consume(is);
1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    }
1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return a substring of the UTF16 input stream in
1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///  newly allocated memory.
1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param start Offset in input stream where the string starts
1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param stop  Offset in the input stream where the string ends.
1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING
1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Substr		(pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop)
1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/2) + 1);
1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not
1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance
1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we
1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream
1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// is fubar but we just ignore that.
1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16ConsumeLE(pANTLR3_INT_STREAM is)
1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch;
1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch2;
1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ((pANTLR3_INPUT_STREAM) (is->super));
1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Buffer size is always in bytes
1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Indicate one more character in this line
1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->charPositionInLine++;
1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if  ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar)
1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		{
1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// Reset for start of a new line of input
1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			//
1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->line++;
1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->charPositionInLine	= 0;
1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->currentLine		= (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Increment to next character position, accounting for any surrogates
1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in litle endian form
1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8);
1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // We consumed one 16 bit character
1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8);
1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element
1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32)
1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32           ch;
1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32           ch2;
1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        pANTLR3_UCHAR   nextChar;
1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Find the input interface and where we are currently pointing to
1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // in the input stream
1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input       = ((pANTLR3_INPUT_STREAM) (is->super));
1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        nextChar    = input->nextChar;
1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // If a positive offset then advance forward, else retreat
1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if  (la >= 0)
1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf )
1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Advance our copy of the input pointer
1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in Little Endian byte order
1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = (*nextChar) + (*(nextChar+1) << 8);
1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar += 2;
1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Next character is in little endian byte order
1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = (*nextChar) + (*(nextChar+1) << 8);
1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            nextChar += 2;
1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // We need to go backwards from our input point
1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data )
1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Get the previous 16 bit character
1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch = (*nextChar - 2) + ((*nextChar -1) << 8);
1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar -= 2;
1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we found a low surrogate then go back one more character if
1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // the hi surrogate is there
1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = (*nextChar - 2) + ((*nextChar -1) << 8);
1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Yes, there is a high surrogate to match it so decrement one more and point to that
1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        nextChar -=2;
1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Our local copy of nextChar is now pointing to either the correct character or end of file
1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Input buffer size is always in bytes
1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Pick up the next 16 character (little endian byte order)
1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ch = (*nextChar) + (*(nextChar+1) << 8);
1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            nextChar += 2;
1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // If we have a surrogate pair then we need to consume
1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // a following valid LO surrogate.
1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If the 16 bits following the high surrogate are in the source buffer...
1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Next character is in little endian byte order
1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = (*nextChar) + (*(nextChar+1) << 8);
1252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If it's a valid low surrogate, consume it
1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Construct the UTF32 code point
1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			    + (ch2 - UNI_SUR_LOW_START) + halfBase;
1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it.
1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // it because the buffer ended
1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return ch;
1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not
1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16ConsumeBE(pANTLR3_INT_STREAM is)
1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch;
1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32   ch2;
1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ((pANTLR3_INPUT_STREAM) (is->super));
1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Buffer size is always in bytes
1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Indicate one more character in this line
1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->charPositionInLine++;
1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		if  ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar)
1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		{
1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			// Reset for start of a new line of input
1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			//
1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->line++;
1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->charPositionInLine	= 0;
1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			input->currentLine		= (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		}
1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		// Increment to next character position, accounting for any surrogates
1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		//
1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in big endian form
1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8);
1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // We consumed one 16 bit character
1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Big endian
1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8);
1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1);
1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not
1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
1353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element
1354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32)
1356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32           ch;
1362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        UTF32           ch2;
1363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        pANTLR3_UCHAR   nextChar;
1364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Find the input interface and where we are currently pointing to
1366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // in the input stream
1367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input       = ((pANTLR3_INPUT_STREAM) (is->super));
1369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        nextChar    = input->nextChar;
1370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // If a positive offset then advance forward, else retreat
1372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if  (la >= 0)
1374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf )
1376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Advance our copy of the input pointer
1378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Next char in Big Endian byte order
1380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch  = ((*nextChar) << 8) + *(nextChar+1);
1382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar += 2;
1383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we have a surrogate pair then we need to consume
1385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // a following valid LO surrogate.
1386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If the 16 bits following the high surrogate are in the source buffer...
1390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Next character is in big endian byte order
1394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch2 = ((*nextChar) << 8) + *(nextChar+1);
1396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // If it's a valid low surrogate, consume it
1398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        {
1401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            // We consumed one 16 bit character
1402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                            //
1403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		            nextChar += 2;
1404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        }
1405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // it.
1407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it because the buffer ended
1411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we did not check for an invalid low surrogate here, or that fact that the
1414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // lo surrogate was missing. We just picked out one 16 bit character unless the character
1415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // was a valid hi surrogate, in whcih case we consumed two 16 bit characters.
1416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
1420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // We need to go backwards from our input point
1422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while   (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data )
1424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Get the previous 16 bit character
1426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ch = ((*nextChar - 2) << 8) + (*nextChar -1);
1428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar -= 2;
1429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If we found a low surrogate then go back one more character if
1431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // the hi surrogate is there
1432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
1434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = ((*nextChar - 2) << 8) + (*nextChar -1);
1436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END)
1437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Yes, there is a high surrogate to match it so decrement one more and point to that
1439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        nextChar -=2;
1441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Our local copy of nextChar is now pointing to either the correct character or end of file
1447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Input buffer size is always in bytes
1449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
1453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
1455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Pick up the next 16 character (big endian byte order)
1457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ch = ((*nextChar) << 8) + *(nextChar+1);
1459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            nextChar += 2;
1460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // If we have a surrogate pair then we need to consume
1462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // a following valid LO surrogate.
1463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
1465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If the 16 bits following the high surrogate are in the source buffer...
1467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if	((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Next character is in big endian byte order
1471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    ch2 = ((*nextChar) << 8) + *(nextChar+1);
1473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // If it's a valid low surrogate, consume it
1475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
1477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {
1478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        // Construct the UTF32 code point
1479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        //
1480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                        ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
1481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver			    + (ch2 - UNI_SUR_LOW_START) + halfBase;
1482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    }
1483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    // it.
1485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    //
1486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Note that we ignore a valid hi surrogate that has no lo surrogate to go with
1488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // it because the buffer ended
1489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return ch;
1493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF3 input stream.
1496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
1500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32SetupStream	(pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian)
1501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Build a string factory for this stream. This is a UTF32 string factory which is a standard
1503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser
1504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // and so on.
1505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->strFactory	= antlr3StringFactoryNew(input->encoding);
1507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Generic API that does not care about endianess.
1509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->index	    =  antlr3UTF32Index;            // Calculate current index in input stream, UTF16 based
1511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->substr		    =  antlr3UTF32Substr;	    // Return a string from the input stream
1512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->seek	    =  antlr3UTF32Seek;		    // How to seek to a specific point in the stream
1513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->consume	    =  antlr3UTF32Consume;	    // Consume the next UTF32 character in the buffer
1514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // We must install different UTF32 LA routines according to whether the input
1516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // is the same endianess as the machine we are executing upon or not. If it is not
1517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // then we must install methods that can convert the endianess on the fly as they go
1518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    switch (machineBigEndian)
1520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case    ANTLR3_TRUE:
1522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Machine is Big Endian, if the input is also then install the
1524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // methods that do not access input by bytes and reverse them.
1525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Otherwise install endian aware methods.
1526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if  (inputBigEndian == ANTLR3_TRUE)
1528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Input is machine compatible
1530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF32LA;		    // Return the UTF32 character at offset n (1 based)
1532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else
1534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Need to use methods that know that the input is little endian
1536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF32LALE;		    // Return the UTF32 character at offset n (1 based)
1538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            break;
1540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        case    ANTLR3_FALSE:
1542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Machine is Little Endian, if the input is also then install the
1544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // methods that do not access input by bytes and reverse them.
1545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Otherwise install endian aware methods.
1546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            if  (inputBigEndian == ANTLR3_FALSE)
1548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Input is machine compatible
1550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF32LA;		    // Return the UTF32 character at offset n (1 based)
1552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            else
1554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Need to use methods that know that the input is Big Endian
1556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                input->istream->_LA         =  antlr3UTF32LABE;		    // Return the UTF32 character at offset n (1 based)
1558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            break;
1560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charByteSize		    = 4;			    // Size in bytes of characters in this stream.
1563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in a UTF32 input stream
1566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
1568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Consume(pANTLR3_INT_STREAM is)
1571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
1573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // SizeBuf is always in bytes
1577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Indicate one more character in this line
1581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
1582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->charPositionInLine++;
1583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if  ((ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar)) == input->newlineChar)
1585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    /* Reset for start of a new line of input
1587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	     */
1588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->line++;
1589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->charPositionInLine	= 0;
1590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->currentLine		= (void *)(((pANTLR3_UINT32)input->nextChar) + 1);
1591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	/* Increment to next character position
1594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	 */
1595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->nextChar = (void *)(((pANTLR3_UINT32)input->nextChar) + 1);
1596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Calculate the current index in the output stream.
1600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
1601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER
1603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Index(pANTLR3_INT_STREAM is)
1604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
1606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  (ANTLR3_MARKER)(input->nextChar);
1610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return a substring of the UTF16 input stream in
1613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///  newly allocated memory.
1614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param start Offset in input stream where the string starts
1617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param stop  Offset in the input stream where the string ends.
1618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING
1620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Substr		(pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop)
1621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return  input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/4) + 1);
1623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Rewind the lexer input to the state specified by the supplied mark.
1626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
1628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark
1630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Assumes UTF32 input stream.
1631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Seek	(pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint)
1634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	pANTLR3_INPUT_STREAM input;
1636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input   = ((pANTLR3_INPUT_STREAM) is->super);
1638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// If the requested seek point is less than the current
1640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// input point, then we assume that we are resetting from a mark
1641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// and do not need to scan, but can just set to there as rewind will
1642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // reset line numbers and so on.
1643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	//
1644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if	(seekPoint <= (ANTLR3_MARKER)(input->nextChar))
1645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		input->nextChar	= (void *)seekPoint;
1647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	else
1649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Call consume until we reach the asked for seek point or EOF
1651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar)
1653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    {
1654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		is->consume(is);
1655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    }
1656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in natural machine byte order
1660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
1662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
1663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
1665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
1670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
1676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	(ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1));
1680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in little endian byte order
1684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
1686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
1687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
1689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
1694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
1700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ANTLR3_UCHAR   c;
1704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1));
1706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Swap Endianess to Big Endian
1708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in big endian byte order
1714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
1716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
1717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
1719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark This is the same code as LE version but seprated in case there are better optimisations fo rendinan swap
1720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
1725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver		return	ANTLR3_CHARSTREAM_EOF;
1731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ANTLR3_UCHAR   c;
1735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1));
1737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Swap Endianess to Little Endian
1739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24);
1741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF8 input stream.
1746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
1748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
1750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8SetupStream	(pANTLR3_INPUT_STREAM input)
1751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Build a string factory for this stream. This is a UTF16 string factory which is a standard
1753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser
1754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // and so on.
1755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->strFactory	= antlr3StringFactoryNew(input->encoding);
1757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Generic API that does not care about endianess.
1759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->consume	= antlr3UTF8Consume;	// Consume the next UTF32 character in the buffer
1761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->_LA         = antlr3UTF8LA;         // Return the UTF32 character at offset n (1 based)
1762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charByteSize		= 0;	                // Size in bytes of characters in this stream.
1763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------------------------
1766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Following is from Unicode.org (see antlr3convertutf.c)
1767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
1768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Index into the table below with the first byte of a UTF-8 sequence to
1770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// get the number of trailing bytes that are supposed to follow it.
1771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
1772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// left as-is for anyone who may want to do such conversion, which was
1773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// allowed in earlier algorithms.
1774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic const ANTLR3_UINT32 trailingBytesForUTF8[256] = {
1776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
1784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver};
1785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Magic values subtracted from a buffer value during UTF8 conversion.
1787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// This table contains as many values as there might be trailing bytes
1788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// in a UTF-8 sequence.
1789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
1790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic const UTF32 offsetsFromUTF8[6] =
1791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {   0x00000000UL, 0x00003080UL, 0x000E2080UL,
1792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	0x03C82080UL, 0xFA082080UL, 0x82082080UL
1793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    };
1794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// End of Unicode.org tables
1796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// -------------------------
1797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in a UTF8 input stream
1800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer
1802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void
1804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8Consume(pANTLR3_INT_STREAM is)
1805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM    input;
1807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32           extraBytesToRead;
1808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UCHAR            ch;
1809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_UINT8           nextChar;
1810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    nextChar = input->nextChar;
1814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	// Indicate one more character in this line
1818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	//
1819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	input->charPositionInLine++;
1820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Are there more bytes needed to make up the whole thing?
1822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        extraBytesToRead = trailingBytesForUTF8[*nextChar];
1824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if	(nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            input->nextChar = (((pANTLR3_UINT8)input->data) + input->sizeBuf);
1828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return;
1829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Cases deliberately fall through (see note A in antlrconvertutf.c)
1832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so
1833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // we allow it.
1834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ch  = 0;
1836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver       	switch (extraBytesToRead) {
1837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 5: ch += *nextChar++; ch <<= 6;
1838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 4: ch += *nextChar++; ch <<= 6;
1839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 3: ch += *nextChar++; ch <<= 6;
1840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 2: ch += *nextChar++; ch <<= 6;
1841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 1: ch += *nextChar++; ch <<= 6;
1842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    case 0: ch += *nextChar++;
1843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Magically correct the input value
1846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	ch -= offsetsFromUTF8[extraBytesToRead];
1848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	if  (ch == input->newlineChar)
1849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	{
1850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    /* Reset for start of a new line of input
1851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	     */
1852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->line++;
1853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->charPositionInLine	= 0;
1854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	    input->currentLine		= (void *)nextChar;
1855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver	}
1856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Update input pointer
1858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        input->nextChar = nextChar;
1860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF8 input
1863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer
1865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element
1866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver *
1867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32)
1868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */
1869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
1870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
1871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM    input;
1873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UINT32           extraBytesToRead;
1874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ANTLR3_UCHAR            ch;
1875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_UINT8           nextChar;
1876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
1878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    nextChar = input->nextChar;
1880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Do we need to traverse forwards or backwards?
1882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // - LA(0) is treated as LA(1) and we assume that the nextChar is
1883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //   already positioned.
1884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding
1885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // - LA(-n) means we must traverse backwards n chracters
1886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if (la > 1) {
1888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Make sure that we have at least one character left before trying to
1890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // loop through the buffer.
1891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        if	(nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Now traverse n-1 characters forward
1895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while (--la > 0)
1897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Does the next character require trailing bytes?
1899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // If so advance the pointer by that many bytes as well as advancing
1900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // one position for what will be at least a single byte character.
1901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar += trailingBytesForUTF8[*nextChar] + 1;
1903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                // Does that calculation take us past the byte length of the buffer?
1905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                //
1906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                if	(nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {
1908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    return ANTLR3_CHARSTREAM_EOF;
1909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                }
1910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        else
1913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            return ANTLR3_CHARSTREAM_EOF;
1915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
1918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // LA is negative so we decrease the pointer by n character positions
1920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
1921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        while   (nextChar > (pANTLR3_UINT8)input->data && la++ < 0)
1922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        {
1923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Traversing backwards in UTF8 means decermenting by one
1924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // then continuing to decrement while ever a character pattern
1925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // is flagged as being a trailing byte of an encoded code point.
1926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // Trailing UTF8 bytes always start with 10 in binary. We assumne that
1927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            // the UTF8 is well formed and do not check boundary conditions
1928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            //
1929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            nextChar--;
1930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            while ((*nextChar & 0xC0) == 0x80)
1931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {
1932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                nextChar--;
1933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            }
1934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        }
1935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // nextChar is now pointing at the UTF8 encoded character that we need to
1938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // decode and return.
1939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Are there more bytes needed to make up the whole thing?
1941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    extraBytesToRead = trailingBytesForUTF8[*nextChar];
1943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
1944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
1945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return ANTLR3_CHARSTREAM_EOF;
1946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Cases deliberately fall through (see note A in antlrconvertutf.c)
1949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ch  = 0;
1951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    switch (extraBytesToRead) {
1952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 5: ch += *nextChar++; ch <<= 6;
1953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 4: ch += *nextChar++; ch <<= 6;
1954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 3: ch += *nextChar++; ch <<= 6;
1955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 2: ch += *nextChar++; ch <<= 6;
1956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 1: ch += *nextChar++; ch <<= 6;
1957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            case 0: ch += *nextChar++;
1958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
1959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Magically correct the input value
1961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
1962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    ch -= offsetsFromUTF8[extraBytesToRead];
1963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    return ch;
1965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
1966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
1967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// EBCDIC to ASCII conversion table
1968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
1969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX
1970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// translation and the character tables are published all over the interweb.
1971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver//
1972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverconst ANTLR3_UCHAR e2a[256] =
1973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
1974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f,
1975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97,
1977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f,
1978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b,
1979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
1980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
1981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
1982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
1983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
1984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef,
1985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f,
1986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5,
1987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
1988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
1989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
1990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
1991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
1992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
1993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4,
1994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
1995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae,
1996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc,
1997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7,
1998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
1999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
2000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
2001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff,
2002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
2003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
2004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
2005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e
2006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver};
2007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
2008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a EBCDIC input stream.
2009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
2010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer
2011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
2012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid
2013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3EBCDICSetupStream	(pANTLR3_INPUT_STREAM input)
2014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
2015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // EBCDIC streams can use the standard 8 bit string factory
2016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
2017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->strFactory	= antlr3StringFactoryNew(input->encoding);
2018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
2019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    // Generic API that does not care about endianess.
2020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    //
2021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->istream->_LA         = antlr3EBCDICLA;       // Return the UTF32 character at offset n (1 based)
2022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input->charByteSize		= 1;	                // Size in bytes of characters in this stream.
2023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}
2024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
2025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming an 8 bit EBCDIC input
2026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
2027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer
2028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element
2029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
2030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32) after translation
2031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///         from EBCDIC to ASCII
2032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver///
2033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR
2034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3EBCDICLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la)
2035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{
2036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    pANTLR3_INPUT_STREAM input;
2037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
2038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    input   = ((pANTLR3_INPUT_STREAM) (is->super));
2039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
2040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    if	(( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf))
2041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
2042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return	ANTLR3_CHARSTREAM_EOF;
2043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
2044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    else
2045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    {
2046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        // Translate the required character via the constant conversion table
2047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        //
2048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        return	e2a[(*((pANTLR3_UINT8)input->nextChar + la - 1))];
2049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
2050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}