antlr3string.c revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/** \file
2 * Implementation of the ANTLR3 string and string factory classes
3 */
4
5// [The "BSD licence"]
6// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
7// http://www.temporal-wave.com
8// http://www.linkedin.com/in/jimidle
9//
10// All rights reserved.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions
14// are met:
15// 1. Redistributions of source code must retain the above copyright
16//    notice, this list of conditions and the following disclaimer.
17// 2. Redistributions in binary form must reproduce the above copyright
18//    notice, this list of conditions and the following disclaimer in the
19//    documentation and/or other materials provided with the distribution.
20// 3. The name of the author may not be used to endorse or promote products
21//    derived from this software without specific prior written permission.
22//
23// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
34#include    <antlr3string.h>
35
36/* Factory API
37 */
38static    pANTLR3_STRING    newRaw8	(pANTLR3_STRING_FACTORY factory);
39static    pANTLR3_STRING    newRawUTF16	(pANTLR3_STRING_FACTORY factory);
40static    pANTLR3_STRING    newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
41static    pANTLR3_STRING    newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
42static    pANTLR3_STRING    newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
43static    pANTLR3_STRING    newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
44static    pANTLR3_STRING    newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
45static    pANTLR3_STRING    newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
46static    pANTLR3_STRING    newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
47static    pANTLR3_STRING    newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
48static    void		    destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
49static    pANTLR3_STRING    printable8	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
50static    pANTLR3_STRING    printableUTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
51static    void		    closeFactory(pANTLR3_STRING_FACTORY factory);
52
53/* String API
54 */
55static    pANTLR3_UINT8	    set8	(pANTLR3_STRING string, const char * chars);
56static    pANTLR3_UINT8	    setUTF16_8	(pANTLR3_STRING string, const char * chars);
57static    pANTLR3_UINT8	    setUTF16_UTF16	(pANTLR3_STRING string, const char * chars);
58static    pANTLR3_UINT8	    append8	(pANTLR3_STRING string, const char * newbit);
59static    pANTLR3_UINT8	    appendUTF16_8	(pANTLR3_STRING string, const char * newbit);
60static    pANTLR3_UINT8	    appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit);
61static	  pANTLR3_UINT8	    insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
62static	  pANTLR3_UINT8	    insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
63static	  pANTLR3_UINT8	    insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
64
65static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars);
66static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit);
67static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);
68
69static    pANTLR3_UINT8	    addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c);
70static    pANTLR3_UINT8	    addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c);
71static    pANTLR3_UINT8	    addi8	(pANTLR3_STRING string, ANTLR3_INT32 i);
72static    pANTLR3_UINT8	    addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i);
73static	  pANTLR3_UINT8	    inserti8	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
74static	  pANTLR3_UINT8	    insertiUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
75
76static    ANTLR3_UINT32     compare8	(pANTLR3_STRING string, const char * compStr);
77static    ANTLR3_UINT32     compareUTF16_8	(pANTLR3_STRING string, const char * compStr);
78static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
79static    ANTLR3_UINT32     compareS	(pANTLR3_STRING string, pANTLR3_STRING compStr);
80static    ANTLR3_UCHAR      charAt8	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
81static    ANTLR3_UCHAR      charAtUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 offset);
82static    pANTLR3_STRING    subString8	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
83static    pANTLR3_STRING    subStringUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
84static	  ANTLR3_INT32	    toInt32_8	(pANTLR3_STRING string);
85static	  ANTLR3_INT32	    toInt32_UTF16  (pANTLR3_STRING string);
86static	  pANTLR3_STRING    to8_8		(pANTLR3_STRING string);
87static	  pANTLR3_STRING    to8_UTF16		(pANTLR3_STRING string);
88static	pANTLR3_STRING		toUTF8_8	(pANTLR3_STRING string);
89static	pANTLR3_STRING		toUTF8_UTF16	(pANTLR3_STRING string);
90
91/* Local helpers
92 */
93static	void			stringInit8	(pANTLR3_STRING string);
94static	void			stringInitUTF16	(pANTLR3_STRING string);
95static	void	ANTLR3_CDECL	stringFree	(pANTLR3_STRING string);
96
97ANTLR3_API pANTLR3_STRING_FACTORY
98antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
99{
100	pANTLR3_STRING_FACTORY  factory;
101
102	/* Allocate memory
103	*/
104	factory	= (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));
105
106	if	(factory == NULL)
107	{
108		return	NULL;
109	}
110
111	/* Now we make a new list to track the strings.
112	*/
113	factory->strings	= antlr3VectorNew(0);
114	factory->index	= 0;
115
116	if	(factory->strings == NULL)
117	{
118		ANTLR3_FREE(factory);
119		return	NULL;
120	}
121
122    // Install the API
123    //
124    // TODO: These encodings need equivalent functions to
125    // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
126	// The STRING stuff was intended as a quick and dirty hack for people that did not
127	// want to worry about memory and performance very much, but nobody ever reads the
128	// notes or comments or uses the email list search. I want to discourage using these
129	// interfaces as it is much more efficient to use the pointers within the tokens
130	// directly, so I am not implementing the string stuff for the newer encodings.
131    // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
132	// will not be useful beyond returning the text.
133	//
134    switch(encoding)
135    {
136		case    ANTLR3_ENC_UTF32:
137			break;
138
139		case    ANTLR3_ENC_UTF32BE:
140			break;
141
142		case    ANTLR3_ENC_UTF32LE:
143			break;
144
145		case    ANTLR3_ENC_UTF16BE:
146		case    ANTLR3_ENC_UTF16LE:
147		case    ANTLR3_ENC_UTF16:
148
149			factory->newRaw	    =  newRawUTF16;
150			factory->newSize	=  newSizeUTF16;
151			factory->newPtr	    =  newPtrUTF16_UTF16;
152			factory->newPtr8	=  newPtrUTF16_8;
153			factory->newStr	    =  newStrUTF16_UTF16;
154			factory->newStr8	=  newStrUTF16_8;
155			factory->printable	=  printableUTF16;
156			factory->destroy	=  destroy;
157			factory->close	    =  closeFactory;
158			break;
159
160		case    ANTLR3_ENC_UTF8:
161		case    ANTLR3_ENC_EBCDIC:
162		case    ANTLR3_ENC_8BIT:
163		default:
164
165			factory->newRaw	    =  newRaw8;
166			factory->newSize	=  newSize8;
167			factory->newPtr	    =  newPtr8;
168			factory->newPtr8	=  newPtr8;
169			factory->newStr	    =  newStr8;
170			factory->newStr8	=  newStr8;
171			factory->printable	=  printable8;
172			factory->destroy	=  destroy;
173			factory->close	    =  closeFactory;
174			break;
175    }
176	return  factory;
177}
178
179
180/**
181 *
182 * \param factory
183 * \return
184 */
185static    pANTLR3_STRING
186newRaw8	(pANTLR3_STRING_FACTORY factory)
187{
188    pANTLR3_STRING  string;
189
190    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
191
192    if	(string == NULL)
193    {
194		return	NULL;
195    }
196
197    /* Structure is allocated, now fill in the API etc.
198     */
199    stringInit8(string);
200    string->factory = factory;
201
202    /* Add the string into the allocated list
203     */
204    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
205    string->index   = factory->index++;
206
207    return string;
208}
209/**
210 *
211 * \param factory
212 * \return
213 */
214static    pANTLR3_STRING
215newRawUTF16	(pANTLR3_STRING_FACTORY factory)
216{
217    pANTLR3_STRING  string;
218
219    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));
220
221    if	(string == NULL)
222    {
223		return	NULL;
224    }
225
226    /* Structure is allocated, now fill in the API etc.
227     */
228    stringInitUTF16(string);
229    string->factory = factory;
230
231    /* Add the string into the allocated list
232     */
233    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
234    string->index   = factory->index++;
235
236    return string;
237}
238static
239void	ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
240{
241    /* First free the string itself if there was anything in it
242     */
243    if	(string->chars)
244    {
245	ANTLR3_FREE(string->chars);
246    }
247
248    /* Now free the space for this string
249     */
250    ANTLR3_FREE(string);
251
252    return;
253}
254/**
255 *
256 * \param string
257 * \return
258 */
259static	void
260stringInit8  (pANTLR3_STRING string)
261{
262    string->len			= 0;
263    string->size		= 0;
264    string->chars		= NULL;
265    string->encoding	= ANTLR3_ENC_8BIT ;
266
267    /* API for 8 bit strings*/
268
269    string->set		= set8;
270    string->set8	= set8;
271    string->append	= append8;
272    string->append8	= append8;
273    string->insert	= insert8;
274    string->insert8	= insert8;
275    string->addi	= addi8;
276    string->inserti	= inserti8;
277    string->addc	= addc8;
278    string->charAt	= charAt8;
279    string->compare	= compare8;
280    string->compare8	= compare8;
281    string->subString	= subString8;
282    string->toInt32	= toInt32_8;
283    string->to8		= to8_8;
284    string->toUTF8	= toUTF8_8;
285    string->compareS	= compareS;
286    string->setS	= setS;
287    string->appendS	= appendS;
288    string->insertS	= insertS;
289
290}
291/**
292 *
293 * \param string
294 * \return
295 */
296static	void
297stringInitUTF16  (pANTLR3_STRING string)
298{
299    string->len		= 0;
300    string->size	= 0;
301    string->chars	= NULL;
302    string->encoding	= ANTLR3_ENC_8BIT;
303
304    /* API for UTF16 strings */
305
306    string->set		= setUTF16_UTF16;
307    string->set8	= setUTF16_8;
308    string->append	= appendUTF16_UTF16;
309    string->append8	= appendUTF16_8;
310    string->insert	= insertUTF16_UTF16;
311    string->insert8	= insertUTF16_8;
312    string->addi	= addiUTF16;
313    string->inserti	= insertiUTF16;
314    string->addc	= addcUTF16;
315    string->charAt	= charAtUTF16;
316    string->compare	= compareUTF16_UTF16;
317    string->compare8	= compareUTF16_8;
318    string->subString	= subStringUTF16;
319    string->toInt32	= toInt32_UTF16;
320    string->to8		= to8_UTF16;
321    string->toUTF8	= toUTF8_UTF16;
322
323    string->compareS	= compareS;
324    string->setS	= setS;
325    string->appendS	= appendS;
326    string->insertS	= insertS;
327}
328/**
329 *
330 * \param string
331 * \return
332 * TODO: Implement UTF-8
333 */
334static	void
335stringInitUTF8  (pANTLR3_STRING string)
336{
337    string->len	    = 0;
338    string->size    = 0;
339    string->chars   = NULL;
340
341    /* API */
342
343}
344
345// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
346// a memcpy as we make no assumptions about the 8 bit encoding.
347//
348static	pANTLR3_STRING
349toUTF8_8	(pANTLR3_STRING string)
350{
351	return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
352}
353
354// Convert a UTF16 string into a UTF8 representation using the Unicode.org
355// supplied C algorithms, which are now contained within the ANTLR3 C runtime
356// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
357// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
358//
359static	pANTLR3_STRING
360toUTF8_UTF16	(pANTLR3_STRING string)
361{
362
363    UTF8	      * outputEnd;
364    UTF16	      * inputEnd;
365    pANTLR3_STRING	utf8String;
366
367    ConversionResult	cResult;
368
369    // Allocate the output buffer, which needs to accommodate potentially
370    // 3X (in bytes) the input size (in chars).
371    //
372    utf8String	= string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");
373
374    if	(utf8String != NULL)
375    {
376        // Free existing allocation
377        //
378        ANTLR3_FREE(utf8String->chars);
379
380        // Reallocate according to maximum expected size
381        //
382        utf8String->size	= string->len *3;
383        utf8String->chars	= (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);
384
385        if	(utf8String->chars != NULL)
386        {
387            inputEnd  = (UTF16 *)	(string->chars);
388            outputEnd = (UTF8 *)	(utf8String->chars);
389
390            // Call the Unicode converter
391            //
392            cResult =  ConvertUTF16toUTF8
393                (
394                (const UTF16**)&inputEnd,
395                ((const UTF16 *)(string->chars)) + string->len,
396                &outputEnd,
397                outputEnd + utf8String->size - 1,
398                lenientConversion
399                );
400
401            // We don't really care if things failed or not here, we just converted
402            // everything that was vaguely possible and stopped when it wasn't. It is
403            // up to the grammar programmer to verify that the input is sensible.
404            //
405            utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);
406
407            *(outputEnd+1) = '\0';		// Always null terminate
408        }
409    }
410    return utf8String;
411}
412
413/**
414 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
415 *
416 * \param[in] factory - Pointer to the string factory that owns strings
417 * \param[in] size - In characters
418 * \return pointer to the new string.
419 */
420static    pANTLR3_STRING
421newSize8	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
422{
423    pANTLR3_STRING  string;
424
425    string  = factory->newRaw(factory);
426
427    if	(string == NULL)
428    {
429        return	string;
430    }
431
432    /* Always add one more byte for a terminator ;-)
433    */
434    string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
435    *(string->chars)	= '\0';
436    string->size	= size + 1;
437
438
439    return string;
440}
441/**
442 * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
443 *
444 * \param[in] factory - Pointer to the string factory that owns strings
445 * \param[in] size - In characters (count double for surrogate pairs!!!)
446 * \return pointer to the new string.
447 */
448static    pANTLR3_STRING
449newSizeUTF16	(pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
450{
451    pANTLR3_STRING  string;
452
453    string  = factory->newRaw(factory);
454
455    if	(string == NULL)
456    {
457        return	string;
458    }
459
460    /* Always add one more byte for a terminator ;-)
461    */
462    string->chars	= (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
463    *(string->chars)	= '\0';
464    string->size	= size+1;	/* Size is always in characters, as is len */
465
466    return string;
467}
468
469/** Creates a new 8 bit string initialized with the 8 bit characters at the
470 *  supplied ptr, of pre-determined size.
471 * \param[in] factory - Pointer to the string factory that owns the strings
472 * \param[in] ptr - Pointer to 8 bit encoded characters
473 * \return pointer to the new string
474 */
475static    pANTLR3_STRING
476newPtr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
477{
478	pANTLR3_STRING  string;
479
480	string  = factory->newSize(factory, size);
481
482	if	(string == NULL)
483	{
484		return	NULL;
485	}
486
487	if	(size <= 0)
488	{
489		return	string;
490	}
491
492	if	(ptr != NULL)
493	{
494		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
495		*(string->chars + size) = '\0';	    /* Terminate, these strings are usually used for Token streams and printing etc.	*/
496		string->len = size;
497	}
498
499	return  string;
500}
501
502/** Creates a new UTF16 string initialized with the 8 bit characters at the
503 *  supplied 8 bit character ptr, of pre-determined size.
504 * \param[in] factory - Pointer to the string factory that owns the strings
505 * \param[in] ptr - Pointer to 8 bit encoded characters
506 * \return pointer to the new string
507 */
508static    pANTLR3_STRING
509newPtrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
510{
511	pANTLR3_STRING  string;
512
513	/* newSize accepts size in characters, not bytes
514	*/
515	string  = factory->newSize(factory, size);
516
517	if	(string == NULL)
518	{
519		return	NULL;
520	}
521
522	if	(size <= 0)
523	{
524		return	string;
525	}
526
527	if	(ptr != NULL)
528	{
529		pANTLR3_UINT16	out;
530		ANTLR3_INT32    inSize;
531
532		out = (pANTLR3_UINT16)(string->chars);
533		inSize	= size;
534
535		while	(inSize-- > 0)
536		{
537			*out++ = (ANTLR3_UINT16)(*ptr++);
538		}
539
540		/* Terminate, these strings are usually used for Token streams and printing etc.
541		*/
542		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
543
544		string->len = size;
545	}
546
547	return  string;
548}
549
550/** Creates a new UTF16 string initialized with the UTF16 characters at the
551 *  supplied ptr, of pre-determined size.
552 * \param[in] factory - Pointer to the string factory that owns the strings
553 * \param[in] ptr - Pointer to UTF16 encoded characters
554 * \return pointer to the new string
555 */
556static    pANTLR3_STRING
557newPtrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
558{
559	pANTLR3_STRING  string;
560
561	string  = factory->newSize(factory, size);
562
563	if	(string == NULL)
564	{
565		return	NULL;
566	}
567
568	if	(size <= 0)
569	{
570		return	string;
571	}
572
573	if	(ptr != NULL)
574	{
575		ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));
576
577		/* Terminate, these strings are usually used for Token streams and printing etc.
578		*/
579		*(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
580		string->len = size;
581	}
582
583	return  string;
584}
585
586/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
587 * \param[in] factory - Pointer to the string factory that owns strings.
588 * \param[in] ptr - Pointer to the 8 bit encoded string
589 * \return Pointer to the newly initialized string
590 */
591static    pANTLR3_STRING
592newStr8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
593{
594    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
595}
596
597/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
598 * \param[in] factory - Pointer to the string factory that owns strings.
599 * \param[in] ptr - Pointer to the 8 bit encoded string
600 * \return POinter to the newly initialized string
601 */
602static    pANTLR3_STRING
603newStrUTF16_8	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
604{
605    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
606}
607
608/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
609 * \param[in] factory - Pointer to the string factory that owns strings.
610 * \param[in] ptr - Pointer to the UTF16 encoded string
611 * \return Pointer to the newly initialized string
612 */
613static    pANTLR3_STRING
614newStrUTF16_UTF16	(pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
615{
616    pANTLR3_UINT16  in;
617    ANTLR3_UINT32   count;
618
619    /** First, determine the length of the input string
620     */
621    in	    = (pANTLR3_UINT16)ptr;
622    count   = 0;
623
624    while   (*in++ != '\0')
625    {
626		count++;
627    }
628    return factory->newPtr(factory, ptr, count);
629}
630
631static    void
632destroy	(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
633{
634    // Record which string we are deleting
635    //
636    ANTLR3_UINT32 strIndex = string->index;
637
638    // Ensure that the string was not factory made, or we would try
639    // to delete memory that wasn't allocated outside the factory
640    // block.
641    // Remove the specific indexed string from the vector
642    //
643    factory->strings->del(factory->strings, strIndex);
644
645    // One less string in the vector, so decrement the factory index
646    // so that the next string allocated is indexed correctly with
647    // respect to the vector.
648    //
649    factory->index--;
650
651    // Now we have to reindex the strings in the vector that followed
652    // the one we just deleted. We only do this if the one we just deleted
653    // was not the last one.
654    //
655    if  (strIndex< factory->index)
656    {
657        // We must reindex the strings after the one we just deleted.
658        // The one that follows the one we just deleted is also out
659        // of whack, so we start there.
660        //
661        ANTLR3_UINT32 i;
662
663        for (i = strIndex; i < factory->index; i++)
664        {
665            // Renumber the entry
666            //
667            ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
668        }
669    }
670
671    // The string has been destroyed and the elements of the factory are reindexed.
672    //
673
674}
675
676static    pANTLR3_STRING
677printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
678{
679    pANTLR3_STRING  string;
680
681    /* We don't need to be too efficient here, this is mostly for error messages and so on.
682     */
683    pANTLR3_UINT8   scannedText;
684    ANTLR3_UINT32   i;
685
686    /* Assume we need as much as twice as much space to parse out the control characters
687     */
688    string  = factory->newSize(factory, instr->len *2 + 1);
689
690    /* Scan through and replace unprintable (in terms of this routine)
691     * characters
692     */
693    scannedText = string->chars;
694
695    for	(i = 0; i < instr->len; i++)
696    {
697		if (*(instr->chars + i) == '\n')
698		{
699			*scannedText++ = '\\';
700			*scannedText++ = 'n';
701		}
702		else if (*(instr->chars + i) == '\r')
703		{
704			*scannedText++ = '\\';
705			*scannedText++ = 'r';
706		}
707		else if	(!isprint(*(instr->chars +i)))
708		{
709			*scannedText++ = '?';
710		}
711		else
712		{
713			*scannedText++ = *(instr->chars + i);
714		}
715    }
716    *scannedText  = '\0';
717
718    string->len	= (ANTLR3_UINT32)(scannedText - string->chars);
719
720    return  string;
721}
722
723static    pANTLR3_STRING
724printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
725{
726    pANTLR3_STRING  string;
727
728    /* We don't need to be too efficient here, this is mostly for error messages and so on.
729     */
730    pANTLR3_UINT16  scannedText;
731    pANTLR3_UINT16  inText;
732    ANTLR3_UINT32   i;
733    ANTLR3_UINT32   outLen;
734
735    /* Assume we need as much as twice as much space to parse out the control characters
736     */
737    string  = factory->newSize(factory, instr->len *2 + 1);
738
739    /* Scan through and replace unprintable (in terms of this routine)
740     * characters
741     */
742    scannedText = (pANTLR3_UINT16)(string->chars);
743    inText	= (pANTLR3_UINT16)(instr->chars);
744    outLen	= 0;
745
746    for	(i = 0; i < instr->len; i++)
747    {
748		if (*(inText + i) == '\n')
749		{
750			*scannedText++   = '\\';
751			*scannedText++   = 'n';
752			outLen	    += 2;
753		}
754		else if (*(inText + i) == '\r')
755		{
756			*scannedText++   = '\\';
757			*scannedText++   = 'r';
758			outLen	    += 2;
759		}
760		else if	(!isprint(*(inText +i)))
761		{
762			*scannedText++ = '?';
763			outLen++;
764		}
765		else
766		{
767			*scannedText++ = *(inText + i);
768			outLen++;
769		}
770    }
771    *scannedText  = '\0';
772
773    string->len	= outLen;
774
775    return  string;
776}
777
778/** Fascist Capitalist Pig function created
779 *  to oppress the workers comrade.
780 */
781static    void
782closeFactory	(pANTLR3_STRING_FACTORY factory)
783{
784    /* Delete the vector we were tracking the strings with, this will
785     * causes all the allocated strings to be deallocated too
786     */
787    factory->strings->free(factory->strings);
788
789    /* Delete the space for the factory itself
790     */
791    ANTLR3_FREE((void *)factory);
792}
793
794static    pANTLR3_UINT8
795append8	(pANTLR3_STRING string, const char * newbit)
796{
797    ANTLR3_UINT32 len;
798
799    len	= (ANTLR3_UINT32)strlen(newbit);
800
801    if	(string->size < (string->len + len + 1))
802    {
803		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
804		string->size	= string->len + len + 1;
805    }
806
807    /* Note we copy one more byte than the strlen in order to get the trailing
808     */
809    ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
810    string->len	+= len;
811
812    return string->chars;
813}
814
815static    pANTLR3_UINT8
816appendUTF16_8	(pANTLR3_STRING string, const char * newbit)
817{
818    ANTLR3_UINT32   len;
819    pANTLR3_UINT16  apPoint;
820    ANTLR3_UINT32   count;
821
822    len	= (ANTLR3_UINT32)strlen(newbit);
823
824    if	(string->size < (string->len + len + 1))
825    {
826		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
827		string->size	= string->len + len + 1;
828    }
829
830    apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
831    string->len	+= len;
832
833    for	(count = 0; count < len; count++)
834    {
835		*apPoint++   = *(newbit + count);
836    }
837    *apPoint = '\0';
838
839    return string->chars;
840}
841
842static    pANTLR3_UINT8
843appendUTF16_UTF16	(pANTLR3_STRING string, const char * newbit)
844{
845    ANTLR3_UINT32 len;
846    pANTLR3_UINT16  in;
847
848    /** First, determine the length of the input string
849     */
850    in	    = (pANTLR3_UINT16)newbit;
851    len   = 0;
852
853    while   (*in++ != '\0')
854    {
855		len++;
856    }
857
858    if	(string->size < (string->len + len + 1))
859    {
860		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
861		string->size	= string->len + len + 1;
862    }
863
864    /* Note we copy one more byte than the strlen in order to get the trailing delimiter
865     */
866    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
867    string->len	+= len;
868
869    return string->chars;
870}
871
872static    pANTLR3_UINT8
873set8	(pANTLR3_STRING string, const char * chars)
874{
875    ANTLR3_UINT32	len;
876
877    len = (ANTLR3_UINT32)strlen(chars);
878    if	(string->size < len + 1)
879    {
880		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
881		string->size	= len + 1;
882    }
883
884    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
885     */
886    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
887    string->len	    = len;
888
889    return  string->chars;
890
891}
892
893static    pANTLR3_UINT8
894setUTF16_8	(pANTLR3_STRING string, const char * chars)
895{
896    ANTLR3_UINT32	len;
897    ANTLR3_UINT32	count;
898    pANTLR3_UINT16	apPoint;
899
900    len = (ANTLR3_UINT32)strlen(chars);
901    if	(string->size < len + 1)
902	{
903		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
904		string->size	= len + 1;
905    }
906    apPoint = ((pANTLR3_UINT16)string->chars);
907    string->len	= len;
908
909    for	(count = 0; count < string->len; count++)
910    {
911		*apPoint++   = *(chars + count);
912    }
913    *apPoint = '\0';
914
915    return  string->chars;
916}
917
918static    pANTLR3_UINT8
919setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
920{
921    ANTLR3_UINT32   len;
922    pANTLR3_UINT16  in;
923
924    /** First, determine the length of the input string
925     */
926    in	    = (pANTLR3_UINT16)chars;
927    len   = 0;
928
929    while   (*in++ != '\0')
930    {
931		len++;
932    }
933
934    if	(string->size < len + 1)
935    {
936		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
937		string->size	= len + 1;
938    }
939
940    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
941     */
942    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
943    string->len	    = len;
944
945    return  string->chars;
946
947}
948
949static    pANTLR3_UINT8
950addc8	(pANTLR3_STRING string, ANTLR3_UINT32 c)
951{
952    if	(string->size < string->len + 2)
953    {
954		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
955		string->size	= string->len + 2;
956    }
957    *(string->chars + string->len)	= (ANTLR3_UINT8)c;
958    *(string->chars + string->len + 1)	= '\0';
959    string->len++;
960
961    return  string->chars;
962}
963
964static    pANTLR3_UINT8
965addcUTF16	(pANTLR3_STRING string, ANTLR3_UINT32 c)
966{
967    pANTLR3_UINT16  ptr;
968
969    if	(string->size < string->len + 2)
970    {
971		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
972		string->size	= string->len + 2;
973    }
974    ptr	= (pANTLR3_UINT16)(string->chars);
975
976    *(ptr + string->len)	= (ANTLR3_UINT16)c;
977    *(ptr + string->len + 1)	= '\0';
978    string->len++;
979
980    return  string->chars;
981}
982
983static    pANTLR3_UINT8
984addi8	(pANTLR3_STRING string, ANTLR3_INT32 i)
985{
986    ANTLR3_UINT8	    newbit[32];
987
988    sprintf((char *)newbit, "%d", i);
989
990    return  string->append8(string, (const char *)newbit);
991}
992static    pANTLR3_UINT8
993addiUTF16	(pANTLR3_STRING string, ANTLR3_INT32 i)
994{
995    ANTLR3_UINT8	    newbit[32];
996
997    sprintf((char *)newbit, "%d", i);
998
999    return  string->append8(string, (const char *)newbit);
1000}
1001
1002static	  pANTLR3_UINT8
1003inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1004{
1005    ANTLR3_UINT8	    newbit[32];
1006
1007    sprintf((char *)newbit, "%d", i);
1008    return  string->insert8(string, point, (const char *)newbit);
1009}
1010static	  pANTLR3_UINT8
1011insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
1012{
1013    ANTLR3_UINT8	    newbit[32];
1014
1015    sprintf((char *)newbit, "%d", i);
1016    return  string->insert8(string, point, (const char *)newbit);
1017}
1018
1019static	pANTLR3_UINT8
1020insert8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1021{
1022    ANTLR3_UINT32	len;
1023
1024    if	(point >= string->len)
1025    {
1026		return	string->append(string, newbit);
1027    }
1028
1029    len	= (ANTLR3_UINT32)strlen(newbit);
1030
1031    if	(len == 0)
1032    {
1033		return	string->chars;
1034    }
1035
1036    if	(string->size < (string->len + len + 1))
1037    {
1038		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
1039		string->size	= string->len + len + 1;
1040    }
1041
1042    /* Move the characters we are inserting before, including the delimiter
1043     */
1044    ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));
1045
1046    /* Note we copy the exact number of bytes
1047     */
1048    ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));
1049
1050    string->len += len;
1051
1052    return  string->chars;
1053}
1054
1055static	pANTLR3_UINT8
1056insertUTF16_8	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1057{
1058    ANTLR3_UINT32	len;
1059    ANTLR3_UINT32	count;
1060    pANTLR3_UINT16	inPoint;
1061
1062    if	(point >= string->len)
1063    {
1064		return	string->append8(string, newbit);
1065    }
1066
1067    len	= (ANTLR3_UINT32)strlen(newbit);
1068
1069    if	(len == 0)
1070    {
1071		return	string->chars;
1072    }
1073
1074    if	(string->size < (string->len + len + 1))
1075    {
1076	string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1077	string->size	= string->len + len + 1;
1078    }
1079
1080    /* Move the characters we are inserting before, including the delimiter
1081     */
1082    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1083
1084    string->len += len;
1085
1086    inPoint = ((pANTLR3_UINT16)(string->chars))+point;
1087    for	(count = 0; count<len; count++)
1088    {
1089		*(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
1090    }
1091
1092    return  string->chars;
1093}
1094
1095static	pANTLR3_UINT8
1096insertUTF16_UTF16	(pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
1097{
1098    ANTLR3_UINT32	len;
1099    pANTLR3_UINT16	in;
1100
1101    if	(point >= string->len)
1102    {
1103		return	string->append(string, newbit);
1104    }
1105
1106    /** First, determine the length of the input string
1107     */
1108    in	    = (pANTLR3_UINT16)newbit;
1109    len	    = 0;
1110
1111    while   (*in++ != '\0')
1112    {
1113		len++;
1114    }
1115
1116    if	(len == 0)
1117    {
1118		return	string->chars;
1119    }
1120
1121    if	(string->size < (string->len + len + 1))
1122    {
1123		string->chars	= (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
1124		string->size	= string->len + len + 1;
1125    }
1126
1127    /* Move the characters we are inserting before, including the delimiter
1128     */
1129    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));
1130
1131
1132    /* Note we copy the exact number of characters
1133     */
1134    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));
1135
1136    string->len += len;
1137
1138    return  string->chars;
1139}
1140
1141static    pANTLR3_UINT8	    setS	(pANTLR3_STRING string, pANTLR3_STRING chars)
1142{
1143    return  string->set(string, (const char *)(chars->chars));
1144}
1145
1146static    pANTLR3_UINT8	    appendS	(pANTLR3_STRING string, pANTLR3_STRING newbit)
1147{
1148    /* We may be passed an empty string, in which case we just return the current pointer
1149     */
1150    if	(newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
1151    {
1152		return	string->chars;
1153    }
1154    else
1155    {
1156		return  string->append(string, (const char *)(newbit->chars));
1157    }
1158}
1159
1160static	  pANTLR3_UINT8	    insertS	(pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
1161{
1162    return  string->insert(string, point, (const char *)(newbit->chars));
1163}
1164
1165/* Function that compares the text of a string to the supplied
1166 * 8 bit character string and returns a result a la strcmp()
1167 */
1168static ANTLR3_UINT32
1169compare8	(pANTLR3_STRING string, const char * compStr)
1170{
1171    return  strcmp((const char *)(string->chars), compStr);
1172}
1173
1174/* Function that compares the text of a string with the supplied character string
1175 * (which is assumed to be in the same encoding as the string itself) and returns a result
1176 * a la strcmp()
1177 */
1178static ANTLR3_UINT32
1179compareUTF16_8	(pANTLR3_STRING string, const char * compStr)
1180{
1181    pANTLR3_UINT16  ourString;
1182    ANTLR3_UINT32   charDiff;
1183
1184    ourString	= (pANTLR3_UINT16)(string->chars);
1185
1186    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
1187    {
1188		charDiff = *ourString - *compStr;
1189		if  (charDiff != 0)
1190		{
1191			return charDiff;
1192		}
1193		ourString++;
1194		compStr++;
1195    }
1196
1197    /* At this point, one of the strings was terminated
1198     */
1199    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1200
1201}
1202
1203/* Function that compares the text of a string with the supplied character string
1204 * (which is assumed to be in the same encoding as the string itself) and returns a result
1205 * a la strcmp()
1206 */
1207static ANTLR3_UINT32
1208compareUTF16_UTF16	(pANTLR3_STRING string, const char * compStr8)
1209{
1210    pANTLR3_UINT16  ourString;
1211    pANTLR3_UINT16  compStr;
1212    ANTLR3_UINT32   charDiff;
1213
1214    ourString	= (pANTLR3_UINT16)(string->chars);
1215    compStr	= (pANTLR3_UINT16)(compStr8);
1216
1217    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
1218    {
1219		charDiff = *ourString - *compStr;
1220		if  (charDiff != 0)
1221		{
1222			return charDiff;
1223		}
1224		ourString++;
1225		compStr++;
1226    }
1227
1228    /* At this point, one of the strings was terminated
1229     */
1230    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
1231}
1232
1233/* Function that compares the text of a string with the supplied string
1234 * (which is assumed to be in the same encoding as the string itself) and returns a result
1235 * a la strcmp()
1236 */
1237static ANTLR3_UINT32
1238compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
1239{
1240    return  string->compare(string, (const char *)compStr->chars);
1241}
1242
1243
1244/* Function that returns the character indexed at the supplied
1245 * offset as a 32 bit character.
1246 */
1247static ANTLR3_UCHAR
1248charAt8	    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1249{
1250    if	(offset > string->len)
1251    {
1252		return (ANTLR3_UCHAR)'\0';
1253    }
1254    else
1255    {
1256		return  (ANTLR3_UCHAR)(*(string->chars + offset));
1257    }
1258}
1259
1260/* Function that returns the character indexed at the supplied
1261 * offset as a 32 bit character.
1262 */
1263static ANTLR3_UCHAR
1264charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
1265{
1266    if	(offset > string->len)
1267    {
1268		return (ANTLR3_UCHAR)'\0';
1269    }
1270    else
1271    {
1272		return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
1273    }
1274}
1275
1276/* Function that returns a substring of the supplied string a la .subString(s,e)
1277 * in java runtimes.
1278 */
1279static pANTLR3_STRING
1280subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1281{
1282    pANTLR3_STRING newStr;
1283
1284    if	(endIndex > string->len)
1285    {
1286		endIndex = string->len + 1;
1287    }
1288    newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);
1289
1290    return newStr;
1291}
1292
1293/* Returns a substring of the supplied string a la .subString(s,e)
1294 * in java runtimes.
1295 */
1296static pANTLR3_STRING
1297subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
1298{
1299    pANTLR3_STRING newStr;
1300
1301    if	(endIndex > string->len)
1302    {
1303		endIndex = string->len + 1;
1304    }
1305    newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);
1306
1307    return newStr;
1308}
1309
1310/* Function that can convert the characters in the string to an integer
1311 */
1312static ANTLR3_INT32
1313toInt32_8	    (struct ANTLR3_STRING_struct * string)
1314{
1315    return  atoi((const char *)(string->chars));
1316}
1317
1318/* Function that can convert the characters in the string to an integer
1319 */
1320static ANTLR3_INT32
1321toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
1322{
1323    pANTLR3_UINT16  input;
1324    ANTLR3_INT32   value;
1325    ANTLR3_BOOLEAN  negate;
1326
1327    value   = 0;
1328    input   = (pANTLR3_UINT16)(string->chars);
1329    negate  = ANTLR3_FALSE;
1330
1331    if	(*input == (ANTLR3_UCHAR)'-')
1332    {
1333		negate = ANTLR3_TRUE;
1334		input++;
1335    }
1336    else if (*input == (ANTLR3_UCHAR)'+')
1337    {
1338		input++;
1339    }
1340
1341    while   (*input != '\0' && isdigit(*input))
1342    {
1343		value	 = value * 10;
1344		value	+= ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
1345		input++;
1346    }
1347
1348    return negate ? -value : value;
1349}
1350
1351/* Function that returns a pointer to an 8 bit version of the string,
1352 * which in this case is just the string as this is
1353 * 8 bit encodiing anyway.
1354 */
1355static	  pANTLR3_STRING	    to8_8	(pANTLR3_STRING string)
1356{
1357    return  string;
1358}
1359
1360/* Function that returns an 8 bit version of the string,
1361 * which in this case is returning all the UTF16 characters
1362 * narrowed back into 8 bits, with characters that are too large
1363 * replaced with '_'
1364 */
1365static	  pANTLR3_STRING    to8_UTF16	(pANTLR3_STRING string)
1366{
1367	pANTLR3_STRING  newStr;
1368	ANTLR3_UINT32   i;
1369
1370	/* Create a new 8 bit string
1371	*/
1372	newStr  = newRaw8(string->factory);
1373
1374	if	(newStr == NULL)
1375	{
1376		return	NULL;
1377	}
1378
1379	/* Always add one more byte for a terminator
1380	*/
1381	newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
1382	newStr->size    = string->len + 1;
1383	newStr->len	    = string->len;
1384
1385	/* Now copy each UTF16 charActer , making it an 8 bit character of
1386	* some sort.
1387	*/
1388	for	(i=0; i<string->len; i++)
1389	{
1390		ANTLR3_UCHAR	c;
1391
1392		c = *(((pANTLR3_UINT16)(string->chars)) + i);
1393
1394		*(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
1395	}
1396
1397	/* Terminate
1398	*/
1399	*(newStr->chars + newStr->len) = '\0';
1400
1401	return newStr;
1402}
1403