1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \file 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Base functions to initialize and manipulate any input stream 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// [The "BSD licence"] 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.temporal-wave.com 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// http://www.linkedin.com/in/jimidle 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// All rights reserved. 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Redistribution and use in source and binary forms, with or without 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// modification, are permitted provided that the following conditions 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// are met: 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1. Redistributions of source code must retain the above copyright 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer. 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 2. Redistributions in binary form must reproduce the above copyright 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// notice, this list of conditions and the following disclaimer in the 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// documentation and/or other materials provided with the distribution. 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 3. The name of the author may not be used to endorse or promote products 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// derived from this software without specific prior written permission. 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#include <antlr3input.h> 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ----------------------------------- 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Generic 8 bit input such as latin-1 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 8Bit INT Stream API 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitConsume (pANTLR3_INT_STREAM is); 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr38BitLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr38BitLA_ucase (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER antlr38BitIndex (pANTLR3_INT_STREAM is); 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER antlr38BitMark (pANTLR3_INT_STREAM is); 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitRewind (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitRewindLast (pANTLR3_INT_STREAM is); 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitRelease (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark); 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING antlr38BitGetSourceName (pANTLR3_INT_STREAM is); 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 8Bit Charstream API functions 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3InputClose (pANTLR3_INPUT_STREAM input); 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3InputReset (pANTLR3_INPUT_STREAM input); 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitReuse (pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name); 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * antlr38BitLT (pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt); 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 antlr38BitSize (pANTLR3_INPUT_STREAM input); 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING antlr38BitSubstr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 antlr38BitGetLine (pANTLR3_INPUT_STREAM input); 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * antlr38BitGetLineBuf (pANTLR3_INPUT_STREAM input); 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 antlr38BitGetCharPosition (pANTLR3_INPUT_STREAM input); 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitSetLine (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line); 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitSetCharPosition (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position); 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitSetNewLineChar (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar); 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr38BitSetUcaseLA (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag); 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ----------------------------------- 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 (also covers UCS2) 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INT Stream API 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF16Consume (pANTLR3_INT_STREAM is); 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF16LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF16ConsumeLE (pANTLR3_INT_STREAM is); 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF16LALE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF16ConsumeBE (pANTLR3_INT_STREAM is); 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF16LABE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER antlr3UTF16Index (pANTLR3_INT_STREAM is); 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF16Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 Charstream API functions 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING antlr3UTF16Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ----------------------------------- 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF32 (also covers UCS2) 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// INT Stream API 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF32Consume (pANTLR3_INT_STREAM is); 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF32LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF32LALE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF32LABE (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER antlr3UTF32Index (pANTLR3_INT_STREAM is); 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF32Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint); 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF16 Charstream API functions 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING antlr3UTF32Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop); 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------ 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// UTF-8 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void antlr3UTF8Consume (pANTLR3_INT_STREAM is); 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3UTF8LA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------ 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// EBCDIC 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR antlr3EBCDICLA (pANTLR3_INT_STREAM is, ANTLR3_INT32 la); 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for an 8 bit input stream. 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// - Many of the 8 bit oriented file stream handling functions will be usable 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// by any or at least some, other input streams. Therefore it is perfectly acceptable 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// to call this function to install the 8Bit handler then override just those functions 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// that would not work for the particular input encoding, such as consume for instance. 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetupStream (pANTLR3_INPUT_STREAM input) 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Build a string factory for this stream 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory = antlr3StringFactoryNew(input->encoding); 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Default stream API set up is for 8Bit, so we are done 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3GenericSetupStream (pANTLR3_INPUT_STREAM input) 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install function pointers for an 8 bit input 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Allocate stream interface 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream = antlr3IntStreamNew(); 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->type = ANTLR3_CHARSTREAM; 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->super = input; 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Intstream API 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr38BitConsume; // Consume the next 8 bit character in the buffer 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr38BitLA; // Return the UTF32 character at offset n (1 based) 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->index = antlr38BitIndex; // Current index (offset from first character 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->mark = antlr38BitMark; // Record the current lex state for later restore 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->rewind = antlr38BitRewind; // How to rewind the input 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->rewindLast = antlr38BitRewindLast; // How to rewind the input 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->seek = antlr38BitSeek; // How to seek to a specific point in the stream 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->release = antlr38BitRelease; // Reset marks after mark n 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->getSourceName = antlr38BitGetSourceName; // Return a string that names the input source 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Charstream API 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->close = antlr3InputClose; // Close down the stream completely 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->free = antlr3InputClose; // Synonym for free 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->reset = antlr3InputReset; // Reset input to start 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->reuse = antlr38BitReuse; // Install a new input string and reset 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->_LT = antlr38BitLT; // Same as _LA for 8 bit file 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->size = antlr38BitSize; // Return the size of the input buffer 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->substr = antlr38BitSubstr; // Return a string from the input stream 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->getLine = antlr38BitGetLine; // Return the current line number in the input stream 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->getLineBuf = antlr38BitGetLineBuf; // Return a pointer to the start of the current line being consumed 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->getCharPositionInLine = antlr38BitGetCharPosition; // Return the offset into the current line of input 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->setLine = antlr38BitSetLine; // Set the input stream line number (does not set buffer pointers) 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->setCharPositionInLine = antlr38BitSetCharPosition; // Set the offset in to the current line (does not set any pointers) 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->SetNewLineChar = antlr38BitSetNewLineChar; // Set the value of the newline trigger character 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->setUcaseLA = antlr38BitSetUcaseLA; // Changes the LA function to return upper case always 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charByteSize = 1; // Size in bytes of characters in this stream. 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Initialize entries for tables etc 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers = NULL; 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Set up the input stream brand new 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->reset(input); 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install default line separator character (it can be replaced 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * by the grammar programmer later) 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->SetNewLineChar(input, (ANTLR3_UCHAR)'\n'); 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetSourceName(pANTLR3_INT_STREAM is) 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return is->streamName; 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Close down an input stream and free any memory allocated by it. 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3InputClose(pANTLR3_INPUT_STREAM input) 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Close any markers in the input stream 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->markers != NULL) 208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers->free(input->markers); 210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers = NULL; 211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Close the string factory 214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->strFactory != NULL) 216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory->close(input->strFactory); 218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Free the input stream buffer if we allocated it 221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->isAllocated && input->data != NULL) 223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FREE(input->data); 225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->data = NULL; 226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->free(input->istream); 229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Finally, free the space for the structure itself 231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_FREE(input); 233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Done 235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetUcaseLA (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN flag) 240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (flag) 242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Return the upper case version of the characters 244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr38BitLA_ucase; 246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Return the raw characters as they are in the buffer 250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr38BitLA; 252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Reset a re-startable input stream to the start 257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3InputReset(pANTLR3_INPUT_STREAM input) 262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = input->data; /* Input at first character */ 265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line = 1; /* starts at line 1 */ 266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = -1; 267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = input->data; 268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markDepth = 0; /* Reset markers */ 269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Clear out up the markers table if it is there 271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->markers != NULL) 273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers->clear(input->markers); 275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Install a new markers table 279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers = antlr3VectorNew(0); 281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Install a new source code in to a working input stream so that the 285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input stream can be reused. 286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitReuse(pANTLR3_INPUT_STREAM input, pANTLR3_UINT8 inString, ANTLR3_UINT32 size, pANTLR3_UINT8 name) 289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->isAllocated = ANTLR3_FALSE; 291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->data = inString; 292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->sizeBuf = size; 293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Now we can set up the file name. As we are reusing the stream, there may already 295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // be a string that we can reuse for holding the filename. 296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->istream->streamName == NULL) 298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->streamName = input->strFactory->newStr(input->strFactory, name == NULL ? (pANTLR3_UINT8)"-memory-" : name); 300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->fileName = input->istream->streamName; 301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->streamName->set(input->istream->streamName, (name == NULL ? (const char *)"-memory-" : (const char *)name)); 305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->reset(input); 308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in an 8 bit input stream 311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitConsume(pANTLR3_INT_STREAM is) 316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Indicate one more character in this line 324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar)) == input->newlineChar) 328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset for start of a new line of input 330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); 334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Increment to next character position 337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT8)input->nextChar) + 1); 339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit ascii input 343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_UCHAR)(*((pANTLR3_UINT8)input->nextChar + la - 1)); 363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit input and 367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * always return the UPPER CASE character. 368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Note that this is 8 bit and so we assume that the toupper 369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * function will use the correct locale for 8 bits. 370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLA_ucase (pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_UCHAR)toupper((*((pANTLR3_UINT8)input->nextChar + la - 1))); 390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming an 8 bit ascii input 395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] lt 1 based offset of next input stream element 398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * 402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitLT(pANTLR3_INPUT_STREAM input, ANTLR3_INT32 lt) 403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Casting is horrible but it means no warnings and LT should never be called 405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * on a character stream anyway I think. If it is then, the void * will need to be 406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * cast back in a similar manner. Yuck! But this means that LT for Token streams and 407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * tree streams is correct. 408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_FUNC_PTR(input->istream->_LA(input->istream, lt))); 410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Calculate the current index in the output stream. 413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER 416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitIndex(pANTLR3_INT_STREAM is) 417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_MARKER)(((pANTLR3_UINT8)input->nextChar)); 423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the size of the current input stream, as an 8Bit file 426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * which in this case is the total input. Other implementations may provide 427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * more sophisticated implementations to deal with non-recoverable streams 428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and so on. 429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSize(pANTLR3_INPUT_STREAM input) 434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->sizeBuf; 436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Mark the current input point in an 8Bit 8 bit stream 439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * such as a file stream, where all the input is available in the 440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * buffer. 441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] is Input stream context pointer 443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER 445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitMark (pANTLR3_INT_STREAM is) 446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEX_STATE state; 448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* New mark point 453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markDepth++; 455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* See if we are revisiting a mark as we can just reuse the vector 457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * entry if we are, otherwise, we need a new one 458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (input->markDepth > input->markers->count) 460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = ANTLR3_MALLOC(sizeof(ANTLR3_LEX_STATE)); 462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Add it to the table 464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markers->add(input->markers, state, ANTLR3_FREE_FUNC); /* No special structure, just free() on delete */ 466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, input->markDepth - 1); 470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Assume no errors for speed, it will just blow up if the table failed 472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for some reasons, hence lots of unit tests on the tables ;-) 473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* We have created or retrieved the state, so update it with the current 477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * elements of the lexer state. 478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->charPositionInLine = input->charPositionInLine; 480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->currentLine = input->currentLine; 481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->line = input->line; 482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state->nextChar = input->nextChar; 483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is->lastMarker = input->markDepth; 485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* And that's it 487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->markDepth; 489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the last produced mark. 491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream. 496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRewindLast (pANTLR3_INT_STREAM is) 499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is->rewind(is, is->lastMarker); 501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark. 504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream. 509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRewind (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) 512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_LEX_STATE state; 514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) is->super); 517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Perform any clean up of the marks 519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->release(input->istream, mark); 521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Find the supplied mark state 523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver state = (pANTLR3_LEX_STATE)input->markers->get(input->markers, (ANTLR3_UINT32)(mark - 1)); 525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Seek input pointer to the requested point (note we supply the void *pointer 527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * to whatever is implementing the int stream to seek). 528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver antlr38BitSeek(is, (ANTLR3_MARKER)(state->nextChar)); 530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset to the reset of the information in the mark 532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = state->charPositionInLine; 534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = state->currentLine; 535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line = state->line; 536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = state->nextChar; 537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* And we are done 539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark. 543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream. 548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitRelease (pANTLR3_INT_STREAM is, ANTLR3_MARKER mark) 551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* We don't do much here in fact as we never free any higher marks in 557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * the hashtable as we just resuse any memory allocated for them. 558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->markDepth = (ANTLR3_UINT32)(mark - 1); 560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Rewind the lexer input to the state specified by the supplied mark. 563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * Assumes 8 Bit input stream. 568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSeek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) 571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_INT32 count; 573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ANTLR3_FUNC_PTR(((pANTLR3_INPUT_STREAM) is->super)); 576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* If the requested seek point is less than the current 578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * input point, then we assume that we are resetting from a mark 579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * and do not need to scan, but can just set to there. 580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) 582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = ((pANTLR3_UINT8) seekPoint); 584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver count = (ANTLR3_UINT32)(seekPoint - (ANTLR3_MARKER)(input->nextChar)); 588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (count--) 590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is->consume(is); 592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return a substring of the 8 bit input stream in 596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * newly allocated memory. 597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param start Offset in input stream where the string starts 600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param stop Offset in the input stream where the string ends. 601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING 603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSubstr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) 604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, (ANTLR3_UINT32)(stop - start + 1)); 606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the line number as understood by the 8 bit input stream. 609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Line number in input stream that we believe we are working on. 612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetLine (pANTLR3_INPUT_STREAM input) 615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->line; 617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return a pointer into the input stream that points at the start 620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * of the current input line as triggered by the end of line character installed 621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * for the stream ('\n' unless told differently). 622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input 624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void * 626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetLineBuf (pANTLR3_INPUT_STREAM input) 627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->currentLine; 629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Return the current offset in to the current line in the input stream. 632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Current line offset 635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UINT32 637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitGetCharPosition (pANTLR3_INPUT_STREAM input) 638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->charPositionInLine; 640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the current line number as understood by the input stream. 643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param line Line number to tell the input stream we are on 646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This function does not change any pointers, it just allows the programmer to set the 649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * line number according to some external criterion, such as finding a lexed directive 650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * like: #nnn "file.c" for instance, such that error reporting and so on in is in sync 651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * with some original source format. 652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetLine (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 line) 655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line = line; 657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the current offset in the current line to be a particular setting. 660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] position New setting for current offset. 663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * This does not set the actual pointers in the input stream, it is purely for reporting 666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * purposes and so on as per antlr38BitSetLine(); 667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetCharPosition (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 position) 670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = position; 672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** Set the newline trigger character in the input stream to the supplied parameter. 675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] newlineChar Character to set to be the newline trigger. 678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark 680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * - The supplied newLineChar is in UTF32 encoding (which means ASCII and latin1 etc 681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * are the same encodings), but the input stream catered to by this function is 8 bit 682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * only, so it is up to the programmer to ensure that the character supplied is valid. 683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr38BitSetNewLineChar (pANTLR3_INPUT_STREAM input, ANTLR3_UINT32 newlineChar) 686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->newlineChar = newlineChar; 688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF16 or UCS2 input stream. 692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark 696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// - Strictly speaking, there is no such thing as a UCS2 input stream as the term 697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// tends to confuse the notions of character encoding, unicode and so on. UCS2 is 698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// essentially UTF16 without any surrogates and so the standard UTF16 699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// input stream is able to handle it without any special code. 700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian) 703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Build a string factory for this stream. This is a UTF16 string factory which is a standard 705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // part of the ANTLR3 string. The string factory is then passed through the whole chain 706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // of lexer->parser->tree->treeparser and so on. 707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory = antlr3StringFactoryNew(input->encoding); 709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Generic API that does not care about endianess. 711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->index = antlr3UTF16Index; // Calculate current index in input stream, UTF16 based 713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->substr = antlr3UTF16Substr; // Return a string from the input stream 714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->seek = antlr3UTF16Seek; // How to seek to a specific point in the stream 715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We must install different UTF16 routines according to whether the input 717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // is the same endianess as the machine we are executing upon or not. If it is not 718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // then we must install methods that can convert the endianess on the fly as they go 719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (machineBigEndian) 722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case ANTLR3_TRUE: 724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Machine is Big Endian, if the input is also then install the 726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // methods that do not access input by bytes and reverse them. 727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Otherwise install endian aware methods. 728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (inputBigEndian == ANTLR3_TRUE) 730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input is machine compatible 732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF16Consume; // Consume the next UTF16 character in the buffer 734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF16LA; // Return the UTF32 character at offset n (1 based) 735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Need to use methods that know that the input is little endian 739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF16ConsumeLE; // Consume the next UTF16 character in the buffer 741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF16LALE; // Return the UTF32 character at offset n (1 based) 742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case ANTLR3_FALSE: 746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Machine is Little Endian, if the input is also then install the 748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // methods that do not access input by bytes and reverse them. 749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Otherwise install endian aware methods. 750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (inputBigEndian == ANTLR3_FALSE) 752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input is machine compatible 754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF16Consume; // Consume the next UTF16 character in the buffer 756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF16LA; // Return the UTF32 character at offset n (1 based) 757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Need to use methods that know that the input is Big Endian 761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF16ConsumeBE; // Consume the next UTF16 character in the buffer 763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF16LABE; // Return the UTF32 character at offset n (1 based) 764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charByteSize = 2; // Size in bytes of characters in this stream. 770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream 774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Consume(pANTLR3_INT_STREAM is) 779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Buffer size is always in bytes 787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Indicate one more character in this line 791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) 795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Reset for start of a new line of input 797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Increment to next character position, accounting for any surrogates 804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in natural machine byte order 806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *((UTF16*)input->nextChar); 808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { 817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in natural machine byte order 823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *((UTF16*)input->nextChar); 825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming an 8 bit ascii input 850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element 853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32) 855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF16 * nextChar; 863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Find the input interface and where we are currently pointing to 865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in the input stream 866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar = input->nextChar; 869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If a positive offset then advance forward, else retreat 871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (la >= 0) 873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) 875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Advance our copy of the input pointer 877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in natural machine byte order 879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *nextChar++; 881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in natural machine byte order 892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *nextChar; 894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar++; 902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We need to go backwards from our input point 920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) 922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Get the previous 16 bit character 924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *--nextChar; 926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we found a low surrogate then go back one more character if 928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the hi surrogate is there 929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *(nextChar-1); 933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Yes, there is a high surrogate to match it so decrement one more and point to that 936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar--; 938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Our local copy of nextChar is now pointing to either the correct character or end of file 944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input buffer size is always in bytes 946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Pick up the next 16 character (native machine byte order) 954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *nextChar++; 956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in natural machine byte order 967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *nextChar; 969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Construct the UTF32 code point 975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + (ch2 - UNI_SUR_LOW_START) + halfBase; 978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ch; 989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Calculate the current index in the output stream. 993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER 996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Index(pANTLR3_INT_STREAM is) 997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_MARKER)(input->nextChar); 1003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Rewind the lexer input to the state specified by the supplied mark. 1006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 1008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark 1010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Assumes UTF16 input stream. 1011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) 1014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) is->super); 1018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the requested seek point is less than the current 1020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // input point, then we assume that we are resetting from a mark 1021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // and do not need to scan, but can just set to there as rewind will 1022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // reset line numbers and so on. 1023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) 1025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)seekPoint; 1027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Call consume until we reach the asked for seek point or EOF 1031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar) 1033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is->consume(is); 1035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return a substring of the UTF16 input stream in 1039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// newly allocated memory. 1040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param start Offset in input stream where the string starts 1043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param stop Offset in the input stream where the string ends. 1044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING 1046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) 1047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/2) + 1); 1049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1051324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream when the input is Little Endian and the machine is not 1052324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Note that the UTF16 routines do not do any substantial verification of the input stream as for performance 1053324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// sake, we assume it is validly encoded. So if a low surrogate is found at the curent input position then we 1054324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// just consume it. Surrogate pairs should be seen as Hi, Lo. So if we have a Lo first, then the input stream 1055324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// is fubar but we just ignore that. 1056324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1057324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1058324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1059324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1060324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16ConsumeLE(pANTLR3_INT_STREAM is) 1061324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1062324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1063324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 1064324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 1065324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1066324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1067324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1068324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Buffer size is always in bytes 1069324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1070324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1071324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1072324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Indicate one more character in this line 1073324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1074324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 1075324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1076324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) 1077324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1078324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Reset for start of a new line of input 1079324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1080324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 1081324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 1082324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1083324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1084324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1085324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Increment to next character position, accounting for any surrogates 1086324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1087324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in litle endian form 1088324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1089324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8); 1090324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1091324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1092324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1093324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1094324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1095324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1096324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1097324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1098324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { 1099324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *((pANTLR3_UINT8)input->nextChar) + (*((pANTLR3_UINT8)input->nextChar + 1) <<8); 1105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 1123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 1124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 1125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not 1130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 1132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element 1133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32) 1135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 1141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 1142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_UCHAR nextChar; 1143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Find the input interface and where we are currently pointing to 1145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in the input stream 1146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar = input->nextChar; 1149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If a positive offset then advance forward, else retreat 1151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (la >= 0) 1153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) 1155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Advance our copy of the input pointer 1157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in Little Endian byte order 1159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = (*nextChar) + (*(nextChar+1) << 8); 1161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 1167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in little endian byte order 1173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = (*nextChar) + (*(nextChar+1) << 8); 1175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 1193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 1194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 1195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We need to go backwards from our input point 1201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) 1203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Get the previous 16 bit character 1205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = (*nextChar - 2) + ((*nextChar -1) << 8); 1207324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar -= 2; 1208324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1209324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we found a low surrogate then go back one more character if 1210324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the hi surrogate is there 1211324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1212324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 1213324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1214324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = (*nextChar - 2) + ((*nextChar -1) << 8); 1215324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 1216324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1217324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Yes, there is a high surrogate to match it so decrement one more and point to that 1218324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1219324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar -=2; 1220324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1221324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1222324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1223324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1224324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1225324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Our local copy of nextChar is now pointing to either the correct character or end of file 1226324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1227324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input buffer size is always in bytes 1228324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1229324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1230324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1231324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1232324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1233324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1234324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1235324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Pick up the next 16 character (little endian byte order) 1236324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1237324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = (*nextChar) + (*(nextChar+1) << 8); 1238324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1239324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1240324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1241324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1242324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1243324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 1244324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1245324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1246324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1247324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1248324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1249324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in little endian byte order 1250324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1251324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = (*nextChar) + (*(nextChar+1) << 8); 1252324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1253324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1254324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1255324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1256324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1257324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Construct the UTF32 code point 1258324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1259324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 1260324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + (ch2 - UNI_SUR_LOW_START) + halfBase; 1261324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1262324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1263324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1264324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1265324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1266324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1267324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1268324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1269324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1270324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1271324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ch; 1272324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1273324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1274324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Consume the next character in a UTF16 input stream when the input is Big Endian and the machine is not 1275324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1276324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1277324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1278324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1279324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16ConsumeBE(pANTLR3_INT_STREAM is) 1280324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1281324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1282324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 1283324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 1284324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1285324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1286324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1287324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Buffer size is always in bytes 1288324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1289324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1290324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1291324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Indicate one more character in this line 1292324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1293324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 1294324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1295324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)input->nextChar)) == input->newlineChar) 1296324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1297324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Reset for start of a new line of input 1298324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1299324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 1300324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 1301324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1302324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1303324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1304324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Increment to next character position, accounting for any surrogates 1305324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1306324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in big endian form 1307324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1308324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8); 1309324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1310324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1311324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1312324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1313324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1314324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1315324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1316324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1317324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { 1318324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1319324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1320324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1321324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1322324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1323324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Big endian 1324324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1325324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = *((pANTLR3_UINT8)input->nextChar + 1) + (*((pANTLR3_UINT8)input->nextChar ) <<8); 1326324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1327324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1328324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1329324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1330324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1331324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1332324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1333324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT16)input->nextChar) + 1); 1334324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1335324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1336324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1337324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1338324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1339324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1340324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1341324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1342324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1343324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 1344324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 1345324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 1346324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1347324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1348324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1349324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1350324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming a UTF16 input when the input is Little Endian and the machine is not 1351324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1352324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 1353324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element 1354324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1355324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32) 1356324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1357324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1358324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF16LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1359324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1360324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1361324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch; 1362324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver UTF32 ch2; 1363324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_UCHAR nextChar; 1364324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1365324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Find the input interface and where we are currently pointing to 1366324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // in the input stream 1367324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1368324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1369324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar = input->nextChar; 1370324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1371324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If a positive offset then advance forward, else retreat 1372324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1373324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (la >= 0) 1374324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1375324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (--la > 0 && (pANTLR3_UINT8)nextChar < ((pANTLR3_UINT8)input->data) + input->sizeBuf ) 1376324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1377324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Advance our copy of the input pointer 1378324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1379324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next char in Big Endian byte order 1380324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1381324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((*nextChar) << 8) + *(nextChar+1); 1382324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1383324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1384324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1385324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1386324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1387324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 1388324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1389324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1390324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1391324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1392324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1393324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in big endian byte order 1394324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1395324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = ((*nextChar) << 8) + *(nextChar+1); 1396324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1397324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1398324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1399324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1400324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1401324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We consumed one 16 bit character 1402324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1403324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1404324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1405324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1406324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1407324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1408324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1409324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1410324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1411324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1412324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1413324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we did not check for an invalid low surrogate here, or that fact that the 1414324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // lo surrogate was missing. We just picked out one 16 bit character unless the character 1415324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // was a valid hi surrogate, in whcih case we consumed two 16 bit characters. 1416324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1417324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1418324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1419324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1420324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1421324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We need to go backwards from our input point 1422324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1423324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (la++ < 0 && (pANTLR3_UINT8)nextChar > (pANTLR3_UINT8)input->data ) 1424324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1425324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Get the previous 16 bit character 1426324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1427324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((*nextChar - 2) << 8) + (*nextChar -1); 1428324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar -= 2; 1429324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1430324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we found a low surrogate then go back one more character if 1431324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the hi surrogate is there 1432324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1433324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) 1434324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1435324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = ((*nextChar - 2) << 8) + (*nextChar -1); 1436324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_HIGH_START && ch2 <= UNI_SUR_HIGH_END) 1437324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1438324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Yes, there is a high surrogate to match it so decrement one more and point to that 1439324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1440324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar -=2; 1441324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1442324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1443324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1444324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1445324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1446324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Our local copy of nextChar is now pointing to either the correct character or end of file 1447324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1448324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input buffer size is always in bytes 1449324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1450324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ( (pANTLR3_UINT8)nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1451324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1452324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1453324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1454324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1455324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1456324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Pick up the next 16 character (big endian byte order) 1457324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1458324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((*nextChar) << 8) + *(nextChar+1); 1459324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += 2; 1460324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1461324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If we have a surrogate pair then we need to consume 1462324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // a following valid LO surrogate. 1463324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1464324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) 1465324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1466324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the 16 bits following the high surrogate are in the source buffer... 1467324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1468324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1469324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1470324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Next character is in big endian byte order 1471324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1472324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch2 = ((*nextChar) << 8) + *(nextChar+1); 1473324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1474324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If it's a valid low surrogate, consume it 1475324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1476324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) 1477324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1478324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Construct the UTF32 code point 1479324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1480324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = ((ch - UNI_SUR_HIGH_START) << halfShift) 1481324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver + (ch2 - UNI_SUR_LOW_START) + halfBase; 1482324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1483324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1484324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it. 1485324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1486324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1487324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Note that we ignore a valid hi surrogate that has no lo surrogate to go with 1488324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // it because the buffer ended 1489324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1490324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1491324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1492324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ch; 1493324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1494324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1495324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF3 input stream. 1496324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1497324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1498324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1499324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 1500324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32SetupStream (pANTLR3_INPUT_STREAM input, ANTLR3_BOOLEAN machineBigEndian, ANTLR3_BOOLEAN inputBigEndian) 1501324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1502324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Build a string factory for this stream. This is a UTF32 string factory which is a standard 1503324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser 1504324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // and so on. 1505324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1506324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory = antlr3StringFactoryNew(input->encoding); 1507324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1508324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Generic API that does not care about endianess. 1509324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1510324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->index = antlr3UTF32Index; // Calculate current index in input stream, UTF16 based 1511324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->substr = antlr3UTF32Substr; // Return a string from the input stream 1512324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->seek = antlr3UTF32Seek; // How to seek to a specific point in the stream 1513324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF32Consume; // Consume the next UTF32 character in the buffer 1514324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1515324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // We must install different UTF32 LA routines according to whether the input 1516324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // is the same endianess as the machine we are executing upon or not. If it is not 1517324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // then we must install methods that can convert the endianess on the fly as they go 1518324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1519324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (machineBigEndian) 1520324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1521324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case ANTLR3_TRUE: 1522324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1523324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Machine is Big Endian, if the input is also then install the 1524324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // methods that do not access input by bytes and reverse them. 1525324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Otherwise install endian aware methods. 1526324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1527324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (inputBigEndian == ANTLR3_TRUE) 1528324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1529324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input is machine compatible 1530324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1531324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF32LA; // Return the UTF32 character at offset n (1 based) 1532324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1533324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1534324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1535324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Need to use methods that know that the input is little endian 1536324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1537324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF32LALE; // Return the UTF32 character at offset n (1 based) 1538324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1539324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 1540324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1541324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case ANTLR3_FALSE: 1542324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1543324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Machine is Little Endian, if the input is also then install the 1544324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // methods that do not access input by bytes and reverse them. 1545324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Otherwise install endian aware methods. 1546324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1547324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (inputBigEndian == ANTLR3_FALSE) 1548324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1549324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Input is machine compatible 1550324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1551324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF32LA; // Return the UTF32 character at offset n (1 based) 1552324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1553324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1554324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1555324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Need to use methods that know that the input is Big Endian 1556324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1557324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF32LABE; // Return the UTF32 character at offset n (1 based) 1558324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1559324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break; 1560324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1561324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1562324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charByteSize = 4; // Size in bytes of characters in this stream. 1563324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1564324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1565324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in a UTF32 input stream 1566324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1567324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 1568324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1569324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1570324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Consume(pANTLR3_INT_STREAM is) 1571324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1572324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1573324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1574324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1575324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1576324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // SizeBuf is always in bytes 1577324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1578324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((pANTLR3_UINT8)(input->nextChar) < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1579324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1580324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Indicate one more character in this line 1581324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1582324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 1583324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1584324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if ((ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar)) == input->newlineChar) 1585324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1586324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset for start of a new line of input 1587324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1588324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 1589324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 1590324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)(((pANTLR3_UINT32)input->nextChar) + 1); 1591324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1592324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1593324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Increment to next character position 1594324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1595324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)(((pANTLR3_UINT32)input->nextChar) + 1); 1596324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1597324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1598324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1599324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Calculate the current index in the output stream. 1600324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 1601324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1602324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_MARKER 1603324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Index(pANTLR3_INT_STREAM is) 1604324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1605324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1606324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1607324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1608324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1609324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_MARKER)(input->nextChar); 1610324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1611324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1612324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return a substring of the UTF16 input stream in 1613324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// newly allocated memory. 1614324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1615324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1616324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param start Offset in input stream where the string starts 1617324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param stop Offset in the input stream where the string ends. 1618324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1619324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic pANTLR3_STRING 1620324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Substr (pANTLR3_INPUT_STREAM input, ANTLR3_MARKER start, ANTLR3_MARKER stop) 1621324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1622324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return input->strFactory->newPtr(input->strFactory, (pANTLR3_UINT8)start, ((ANTLR3_UINT32_CAST(stop - start))/4) + 1); 1623324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1624324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1625324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Rewind the lexer input to the state specified by the supplied mark. 1626324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1627324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 1628324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1629324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \remark 1630324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Assumes UTF32 input stream. 1631324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1632324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1633324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32Seek (pANTLR3_INT_STREAM is, ANTLR3_MARKER seekPoint) 1634324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1635324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1636324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1637324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) is->super); 1638324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1639324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If the requested seek point is less than the current 1640324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // input point, then we assume that we are resetting from a mark 1641324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // and do not need to scan, but can just set to there as rewind will 1642324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // reset line numbers and so on. 1643324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1644324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (seekPoint <= (ANTLR3_MARKER)(input->nextChar)) 1645324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1646324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (void *)seekPoint; 1647324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1648324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1649324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1650324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Call consume until we reach the asked for seek point or EOF 1651324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1652324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (is->_LA(is, 1) != ANTLR3_CHARSTREAM_EOF && seekPoint < (ANTLR3_MARKER)input->nextChar) 1653324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1654324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver is->consume(is); 1655324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1656324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1657324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1658324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1659324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in natural machine byte order 1660324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1661324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 1662324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 1663324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1664324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 1665324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1666324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1667324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1668324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1669324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1670324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1671324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1672324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1673324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1674324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1675324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1676324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1677324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1678324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1679324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); 1680324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1681324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1682324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1683324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in little endian byte order 1684324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1685324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 1686324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 1687324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1688324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 1689324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1690324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1691324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LALE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1692324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1693324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1694324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1695324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1696324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1697324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1698324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1699324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1700324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1701324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1702324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1703324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR c; 1704324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1705324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); 1706324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1707324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Swap Endianess to Big Endian 1708324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1709324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); 1710324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1711324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1712324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1713324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF32 input in big endian byte order 1714324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1715324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 1716324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 1717324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1718324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 1719324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \remark This is the same code as LE version but seprated in case there are better optimisations fo rendinan swap 1720324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1721324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1722324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF32LABE(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1723324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1724324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1725324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1726324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1727324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1728324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1729324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1730324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1731324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1732324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1733324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1734324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR c; 1735324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1736324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver c = (ANTLR3_UCHAR)(*((pANTLR3_UINT32)input->nextChar + la - 1)); 1737324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1738324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Swap Endianess to Little Endian 1739324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1740324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return (c>>24) | ((c<<8) & 0x00FF0000) | ((c>>8) & 0x0000FF00) | (c<<24); 1741324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1742324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1743324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1744324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1745324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a UTF8 input stream. 1746324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1747324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 1748324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1749324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 1750324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8SetupStream (pANTLR3_INPUT_STREAM input) 1751324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1752324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Build a string factory for this stream. This is a UTF16 string factory which is a standard 1753324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // part of the ANTLR3 string. The string factory is then passed through the whole chain of lexer->parser->tree->treeparser 1754324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // and so on. 1755324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1756324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory = antlr3StringFactoryNew(input->encoding); 1757324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1758324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Generic API that does not care about endianess. 1759324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1760324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->consume = antlr3UTF8Consume; // Consume the next UTF32 character in the buffer 1761324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3UTF8LA; // Return the UTF32 character at offset n (1 based) 1762324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charByteSize = 0; // Size in bytes of characters in this stream. 1763324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1764324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1765324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------------------------------------ 1766324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// Following is from Unicode.org (see antlr3convertutf.c) 1767324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1768324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1769324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Index into the table below with the first byte of a UTF-8 sequence to 1770324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// get the number of trailing bytes that are supposed to follow it. 1771324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is 1772324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// left as-is for anyone who may want to do such conversion, which was 1773324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// allowed in earlier algorithms. 1774324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1775324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic const ANTLR3_UINT32 trailingBytesForUTF8[256] = { 1776324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1777324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1778324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1779324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1780324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1781324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1782324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1783324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 1784324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}; 1785324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1786324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// Magic values subtracted from a buffer value during UTF8 conversion. 1787324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// This table contains as many values as there might be trailing bytes 1788324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// in a UTF-8 sequence. 1789324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 1790324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic const UTF32 offsetsFromUTF8[6] = 1791324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 1792324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x03C82080UL, 0xFA082080UL, 0x82082080UL 1793324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver }; 1794324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1795324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// End of Unicode.org tables 1796324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// ------------------------- 1797324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1798324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1799324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Consume the next character in a UTF8 input stream 1800324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1801324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param input Input stream context pointer 1802324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1803324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic void 1804324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8Consume(pANTLR3_INT_STREAM is) 1805324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1806324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1807324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 extraBytesToRead; 1808324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR ch; 1809324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_UINT8 nextChar; 1810324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1811324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1812324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1813324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar = input->nextChar; 1814324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1815324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1816324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1817324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Indicate one more character in this line 1818324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1819324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine++; 1820324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1821324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Are there more bytes needed to make up the whole thing? 1822324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1823324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver extraBytesToRead = trailingBytesForUTF8[*nextChar]; 1824324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1825324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1826324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1827324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = (((pANTLR3_UINT8)input->data) + input->sizeBuf); 1828324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return; 1829324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1830324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1831324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Cases deliberately fall through (see note A in antlrconvertutf.c) 1832324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Legal UTF8 is only 4 bytes but 6 bytes could be used in old UTF8 so 1833324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // we allow it. 1834324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1835324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = 0; 1836324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (extraBytesToRead) { 1837324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 5: ch += *nextChar++; ch <<= 6; 1838324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 4: ch += *nextChar++; ch <<= 6; 1839324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 3: ch += *nextChar++; ch <<= 6; 1840324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 2: ch += *nextChar++; ch <<= 6; 1841324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 1: ch += *nextChar++; ch <<= 6; 1842324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 0: ch += *nextChar++; 1843324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1844324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1845324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Magically correct the input value 1846324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1847324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch -= offsetsFromUTF8[extraBytesToRead]; 1848324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (ch == input->newlineChar) 1849324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1850324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver /* Reset for start of a new line of input 1851324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1852324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->line++; 1853324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charPositionInLine = 0; 1854324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->currentLine = (void *)nextChar; 1855324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1856324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1857324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Update input pointer 1858324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1859324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->nextChar = nextChar; 1860324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1861324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1862324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/** \brief Return the input element assuming a UTF8 input 1863324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1864324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] input Input stream context pointer 1865324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \param[in] la 1 based offset of next input stream element 1866324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * 1867324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver * \return Next input character in internal ANTLR3 encoding (UTF32) 1868324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver */ 1869324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 1870324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3UTF8LA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 1871324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1872324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 1873324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UINT32 extraBytesToRead; 1874324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ANTLR3_UCHAR ch; 1875324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_UINT8 nextChar; 1876324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1877324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 1878324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1879324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar = input->nextChar; 1880324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1881324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Do we need to traverse forwards or backwards? 1882324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // - LA(0) is treated as LA(1) and we assume that the nextChar is 1883324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // already positioned. 1884324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // - LA(n+) ; n>1 means we must traverse forward n-1 characters catering for UTF8 encoding 1885324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // - LA(-n) means we must traverse backwards n chracters 1886324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1887324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (la > 1) { 1888324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1889324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Make sure that we have at least one character left before trying to 1890324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // loop through the buffer. 1891324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1892324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (nextChar < (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1893324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1894324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Now traverse n-1 characters forward 1895324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1896324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (--la > 0) 1897324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1898324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Does the next character require trailing bytes? 1899324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // If so advance the pointer by that many bytes as well as advancing 1900324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // one position for what will be at least a single byte character. 1901324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1902324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar += trailingBytesForUTF8[*nextChar] + 1; 1903324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1904324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Does that calculation take us past the byte length of the buffer? 1905324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1906324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (nextChar >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1907324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1908324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1909324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1910324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1911324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1912324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1913324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1914324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1915324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1916324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1917324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 1918324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1919324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // LA is negative so we decrease the pointer by n character positions 1920324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1921324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while (nextChar > (pANTLR3_UINT8)input->data && la++ < 0) 1922324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1923324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Traversing backwards in UTF8 means decermenting by one 1924324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // then continuing to decrement while ever a character pattern 1925324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // is flagged as being a trailing byte of an encoded code point. 1926324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Trailing UTF8 bytes always start with 10 in binary. We assumne that 1927324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // the UTF8 is well formed and do not check boundary conditions 1928324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1929324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar--; 1930324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while ((*nextChar & 0xC0) == 0x80) 1931324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1932324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver nextChar--; 1933324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1934324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1935324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1936324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1937324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // nextChar is now pointing at the UTF8 encoded character that we need to 1938324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // decode and return. 1939324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1940324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Are there more bytes needed to make up the whole thing? 1941324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1942324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver extraBytesToRead = trailingBytesForUTF8[*nextChar]; 1943324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (nextChar + extraBytesToRead >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 1944324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 1945324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 1946324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1947324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1948324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Cases deliberately fall through (see note A in antlrconvertutf.c) 1949324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1950324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch = 0; 1951324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver switch (extraBytesToRead) { 1952324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 5: ch += *nextChar++; ch <<= 6; 1953324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 4: ch += *nextChar++; ch <<= 6; 1954324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 3: ch += *nextChar++; ch <<= 6; 1955324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 2: ch += *nextChar++; ch <<= 6; 1956324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 1: ch += *nextChar++; ch <<= 6; 1957324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver case 0: ch += *nextChar++; 1958324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 1959324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1960324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Magically correct the input value 1961324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 1962324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ch -= offsetsFromUTF8[extraBytesToRead]; 1963324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1964324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ch; 1965324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 1966324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 1967324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// EBCDIC to ASCII conversion table 1968324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1969324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// This for EBCDIC EDF04 translated to ISO-8859.1 which is the usually accepted POSIX 1970324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// translation and the character tables are published all over the interweb. 1971324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver// 1972324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverconst ANTLR3_UCHAR e2a[256] = 1973324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 1974324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x00, 0x01, 0x02, 0x03, 0x85, 0x09, 0x86, 0x7f, 1975324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x87, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1976324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x10, 0x11, 0x12, 0x13, 0x8f, 0x0a, 0x08, 0x97, 1977324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x18, 0x19, 0x9c, 0x9d, 0x1c, 0x1d, 0x1e, 0x1f, 1978324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x80, 0x81, 0x82, 0x83, 0x84, 0x92, 0x17, 0x1b, 1979324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 1980324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 1981324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, 1982324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5, 1983324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xe7, 0xf1, 0x60, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 1984324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 1985324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x9f, 1986324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 1987324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xc7, 0xd1, 0x5e, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, 1988324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf, 1989324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xcc, 0xa8, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 1990324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 1991324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 1992324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 1993324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8, 0xc6, 0xa4, 1994324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xb5, 0xaf, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 1995324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0xdd, 0xde, 0xae, 1996324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xa2, 0xa3, 0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 1997324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xbd, 0xbe, 0xac, 0x5b, 0x5c, 0x5d, 0xb4, 0xd7, 1998324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xf9, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 1999324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5, 2000324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xa6, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 2001324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xdb, 0xfa, 0xff, 2002324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0xd9, 0xf7, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 2003324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5, 2004324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 2005324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 0x38, 0x39, 0xb3, 0x7b, 0xdc, 0x7d, 0xda, 0x7e 2006324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}; 2007324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2008324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Common function to setup function interface for a EBCDIC input stream. 2009324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 2010324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param input Input stream context pointer 2011324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 2012324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruvervoid 2013324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3EBCDICSetupStream (pANTLR3_INPUT_STREAM input) 2014324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 2015324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // EBCDIC streams can use the standard 8 bit string factory 2016324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 2017324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->strFactory = antlr3StringFactoryNew(input->encoding); 2018324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2019324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Generic API that does not care about endianess. 2020324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 2021324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->istream->_LA = antlr3EBCDICLA; // Return the UTF32 character at offset n (1 based) 2022324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input->charByteSize = 1; // Size in bytes of characters in this stream. 2023324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver} 2024324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2025324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \brief Return the input element assuming an 8 bit EBCDIC input 2026324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 2027324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] input Input stream context pointer 2028324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \param[in] la 1 based offset of next input stream element 2029324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 2030324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// \return Next input character in internal ANTLR3 encoding (UTF32) after translation 2031324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// from EBCDIC to ASCII 2032324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver/// 2033324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverstatic ANTLR3_UCHAR 2034324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverantlr3EBCDICLA(pANTLR3_INT_STREAM is, ANTLR3_INT32 la) 2035324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver{ 2036324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pANTLR3_INPUT_STREAM input; 2037324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2038324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = ((pANTLR3_INPUT_STREAM) (is->super)); 2039324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 2040324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if (( ((pANTLR3_UINT8)input->nextChar) + la - 1) >= (((pANTLR3_UINT8)input->data) + input->sizeBuf)) 2041324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 2042324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return ANTLR3_CHARSTREAM_EOF; 2043324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 2044324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver else 2045324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver { 2046324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // Translate the required character via the constant conversion table 2047324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver // 2048324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return e2a[(*((pANTLR3_UINT8)input->nextChar + la - 1))]; 2049324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 2050324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver}