1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Copyright (C) 1998-2011, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* File ucbuf.c
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History:
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   05/10/01    Ram         Creation.
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/putil.h"
2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/uchar.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h"
2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/ustring.h"
2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ustrfmt.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucbuf.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_IN_BUF 1000
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_U_BUF 1500
3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define CONTEXT_LEN 20
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UCHARBUF {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* buffer;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* currentPos;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* bufLimit;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t bufCapacity;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t remaining;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t signatureLength;
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FileStream* in;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UConverter* conv;
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool showWarning; /* makes this API not produce any errors */
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isBuffered;
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UBool U_EXPORT2
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char start[8];
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t numRead;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar target[1]={ 0 };
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* pTarget;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* pStart;
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* read a few bytes */
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    numRead=T_FileStream_read(in, start, sizeof(start));
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* unread the bytes beyond what was consumed for U+FEFF */
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    T_FileStream_rewind(in);
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (*signatureLength > 0) {
6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        T_FileStream_read(in, start, *signatureLength);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(*cp==NULL){
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *conv =NULL;
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* open the converter for the detected Unicode charset */
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *conv = ucnv_open(*cp,error);
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* convert and ignore initial U+FEFF, and the buffer overflow */
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pTarget = target;
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pStart = start;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error);
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *signatureLength = (int32_t)(pStart - start);
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(*error==U_BUFFER_OVERFLOW_ERROR) {
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error=U_ZERO_ERROR;
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* verify that we successfully read exactly U+FEFF */
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error=U_INTERNAL_PROGRAM_ERROR;
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool ucbuf_isCPKnown(const char* cp){
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-8",cp)==0){
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-16BE",cp)==0){
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-16LE",cp)==0){
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-16",cp)==0){
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-32",cp)==0){
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-32BE",cp)==0){
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-32LE",cp)==0){
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("SCSU",cp)==0){
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("BOCU-1",cp)==0){
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucnv_compareNames("UTF-7",cp)==0){
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return FALSE;
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI FileStream * U_EXPORT2
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FileStream* in=NULL;
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(conv==NULL || cp==NULL || fileName==NULL){
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error = U_ILLEGAL_ARGUMENT_ERROR;
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* open the file */
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    in= T_FileStream_open(fileName,"rb");
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(in == NULL){
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error=U_FILE_ACCESS_ERROR;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return in;
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_close(*conv);
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *conv=NULL;
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        T_FileStream_close(in);
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* fill the uchar buffer */
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UCHARBUF*
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* pTarget=NULL;
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* target=NULL;
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* source=NULL;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char  carr[MAX_IN_BUF] = {'\0'};
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char* cbuf =  carr;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t inputRead=0;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t outputWritten=0;
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offset=0;
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* sourceLimit =NULL;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t cbufSize=0;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pTarget = buf->buffer;
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* check if we arrived here without exhausting the buffer*/
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->currentPos<buf->bufLimit){
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        offset = (int32_t)(buf->bufLimit-buf->currentPos);
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar));
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if DEBUG
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset));
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->isBuffered){
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cbufSize = MAX_IN_BUF;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* read the file */
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->remaining-=inputRead;
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cbufSize = T_FileStream_size(buf->in);
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cbuf = (char*)uprv_malloc(cbufSize);
19185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if (cbuf == NULL) {
19285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        	*error = U_MEMORY_ALLOCATION_ERROR;
19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        	return NULL;
19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->remaining-=inputRead;
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* just to be sure...*/
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if ( 0 == inputRead )
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru       buf->remaining = 0;
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target=pTarget;
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* convert the bytes */
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->conv){
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* set the callback to stop */
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UConverterToUCallback toUOldAction ;
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        void* toUOldContext;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        void* toUNewContext=NULL;
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_setToUCallBack(buf->conv,
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           UCNV_TO_U_CALLBACK_STOP,
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           toUNewContext,
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           &toUOldAction,
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           (const void**)&toUOldContext,
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru           error);
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* since state is saved in the converter we add offset to source*/
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        target = pTarget+offset;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        source = cbuf;
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        sourceLimit = source + inputRead;
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        &source,sourceLimit,NULL,
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        (UBool)(buf->remaining==0),error);
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(*error)){
22585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            char context[CONTEXT_LEN+1];
22685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            char preContext[CONTEXT_LEN+1];
22785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            char postContext[CONTEXT_LEN+1];
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int8_t len = CONTEXT_LEN;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t start=0;
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t stop =0;
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t pos =0;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* use erro1 to preserve the error code */
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UErrorCode error1 =U_ZERO_ERROR;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if( buf->showWarning==TRUE){
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               " converting input stream to target encoding: %s\n",
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               u_errorName(*error));
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* now get the context chars */
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_getInvalidChars(buf->conv,context,&len,&error1);
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            context[len]= 0 ; /* null terminate the buffer */
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos = (int32_t)(source - cbuf - len);
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* for pre-context */
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            stop  = pos-len;
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            memcpy(preContext,cbuf+start,stop-start);
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* null terminate the buffer */
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            preContext[stop-start] = 0;
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* for post-context */
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start = pos+len;
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            memcpy(postContext,source,stop-start);
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* null terminate the buffer */
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            postContext[stop-start] = 0;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(buf->showWarning ==TRUE){
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* print out the context */
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"\tPre-context: %s\n",preContext);
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"\tContext: %s\n",context);
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"\tPost-context: %s\n", postContext);
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* reset the converter */
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_reset(buf->conv);
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* set the call back to substitute
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * and restart conversion
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_setToUCallBack(buf->conv,
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               UCNV_TO_U_CALLBACK_SUBSTITUTE,
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               toUNewContext,
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               &toUOldAction,
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               (const void**)&toUOldContext,
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru               &error1);
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* reset source and target start positions */
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target = pTarget+offset;
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            source = cbuf;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* re convert */
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            &source,sourceLimit,NULL,
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                            (UBool)(buf->remaining==0),&error1);
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        outputWritten = (int32_t)(target - pTarget);
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if DEBUG
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        {
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int i;
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target = pTarget;
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for(i=0;i<numRead;i++){
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru              /*  printf("%c", (char)(*target++));*/
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        u_charsToUChars(cbuf,target+offset,inputRead);
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buf->currentPos = pTarget;
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buf->bufLimit=pTarget+outputWritten;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *buf->bufLimit=0; /*NUL terminate*/
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cbuf!=carr){
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(cbuf);
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return buf;
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get a UChar from the stream*/
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getc(UCHARBUF* buf,UErrorCode* error){
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->currentPos>=buf->bufLimit){
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->remaining==0){
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U_EOF;
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf=ucbuf_fillucbuf(buf,error);
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(*error)){
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U_EOF;
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return *(buf->currentPos++);
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get a UChar32 from the stream*/
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t retVal = (int32_t)U_EOF;
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->currentPos+1>=buf->bufLimit){
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->remaining==0){
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U_EOF;
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf=ucbuf_fillucbuf(buf,error);
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(*error)){
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return U_EOF;
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
35783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    if(U16_IS_LEAD(*(buf->currentPos))){
35883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->currentPos+=2;
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        retVal = *(buf->currentPos++);
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return retVal;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* u_unescapeAt() callback to return a UChar*/
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar U_CALLCONV
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_charAt(int32_t offset, void *context) {
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return ((UCHARBUF*) context)->currentPos[offset];
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* getc and escape it */
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t length;
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t offset;
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 c32,c1,c2;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Fill the buffer if it is empty */
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buf->currentPos >=buf->bufLimit-2) {
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucbuf_fillucbuf(buf,error);
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Get the next character in the buffer */
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buf->currentPos < buf->bufLimit) {
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c1 = *(buf->currentPos)++;
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        c1 = U_EOF;
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c2 = *(buf->currentPos);
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* If it isn't a backslash, return it */
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (c1 != 0x005C) {
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return c1;
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Determine the amount of data in the buffer */
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    length = (int32_t)(buf->bufLimit - buf->currentPos);
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* The longest escape sequence is \Uhhhhhhhh; make sure
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru       we have at least that many characters */
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (length < 10) {
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* fill the buffer */
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucbuf_fillucbuf(buf,error);
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        length = (int32_t)(buf->bufLimit - buf->buffer);
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* Process the escape */
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    offset = 0;
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* check if u_unescapeAt unescaped and converted
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * to c32 or not
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(c32==0xFFFFFFFF){
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->showWarning) {
42185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            char context[CONTEXT_LEN+1];
42285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            int32_t len = CONTEXT_LEN;
42385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            if(length < len) {
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                len = length;
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            context[len]= 0 ; /* null terminate the buffer */
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            u_UCharsToChars( buf->currentPos, context, len);
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error= U_ILLEGAL_ESCAPE_SEQUENCE;
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return c1;
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* Update the current buffer position */
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->currentPos += offset;
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* unescaping failed so we just return
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * c1 and not consume the buffer
438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * this is useful for rules with escapes
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * in resouce bundles
440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * eg: \' \\ \"
441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return c1;
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return c32;
446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UCHARBUF* U_EXPORT2
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    FileStream* in = NULL;
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t fileSize=0;
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* knownCp;
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(cp==NULL || fileName==NULL){
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error = U_ILLEGAL_ARGUMENT_ERROR;
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (!uprv_strcmp(fileName, "-")) {
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        in = T_FileStream_stdin();
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        in = T_FileStream_open(fileName, "rb");
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(in!=NULL){
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fileSize = T_FileStream_size(in);
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf == NULL){
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *error = U_MEMORY_ALLOCATION_ERROR;
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            T_FileStream_close(in);
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->in=in;
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->conv=NULL;
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->showWarning = showWarning;
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->isBuffered = buffered;
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->signatureLength=0;
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(*cp==NULL || **cp=='\0'){
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* don't have code page name... try to autodetect */
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }else if(ucbuf_isCPKnown(*cp)){
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* discard BOM */
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_SUCCESS(*error) && buf->conv==NULL) {
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf->conv=ucnv_open(*cp,error);
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(*error)){
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(buf->conv);
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_free(buf);
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            T_FileStream_close(in);
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((buf->conv==NULL) && (buf->showWarning==TRUE)){
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->remaining=fileSize-buf->signatureLength;
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->isBuffered){
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf->bufCapacity=MAX_U_BUF;
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }else{
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (buf->buffer == NULL) {
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *error = U_MEMORY_ALLOCATION_ERROR;
50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            ucbuf_close(buf);
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->currentPos=buf->buffer;
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->bufLimit=buf->buffer;
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(U_FAILURE(*error)){
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
51685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            ucbuf_close(buf);
51785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            return NULL;
51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        }
51985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        ucbuf_fillucbuf(buf,error);
52085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        if(U_FAILURE(*error)){
52185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            ucbuf_close(buf);
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return buf;
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *error =U_FILE_ACCESS_ERROR;
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return NULL;
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* TODO: this method will fail if at the
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * begining of buffer and the uchar to unget
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is from the previous buffer. Need to implement
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * system to take care of that situation.
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_ungetc(int32_t c,UCHARBUF* buf){
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* decrement currentPos pointer
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * if not at the begining of buffer
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->currentPos!=buf->buffer){
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(*(buf->currentPos-1)==c){
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf->currentPos--;
54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        } else {
54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho            /* ungetc failed - did not match. */
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
54885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    } else {
54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho       /* ungetc failed - beginning of buffer. */
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* frees the resources of UChar* buffer */
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_closebuf(UCHARBUF* buf){
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(buf->buffer);
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buf->buffer = NULL;
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* close the buf and release resources*/
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_close(UCHARBUF* buf){
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf!=NULL){
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->conv){
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_close(buf->conv);
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        T_FileStream_close(buf->in);
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucbuf_closebuf(buf);
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(buf);
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* rewind the buf and file stream */
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf){
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->currentPos=buf->buffer;
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->bufLimit=buf->buffer;
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        T_FileStream_rewind(buf->in);
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ucnv_resetToUnicode(buf->conv);
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->signatureLength>0) {
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar target[1]={ 0 };
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar* pTarget;
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            char start[8];
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            const char* pStart;
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t numRead;
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* read the signature bytes */
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* convert and ignore initial U+FEFF, and the buffer overflow */
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pTarget = target;
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pStart = start;
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error);
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(*error==U_BUFFER_OVERFLOW_ERROR) {
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *error=U_ZERO_ERROR;
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* verify that we successfully read exactly U+FEFF */
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *error=U_INTERNAL_PROGRAM_ERROR;
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_size(UCHARBUF* buf){
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf){
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(buf->isBuffered){
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }else{
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return (int32_t)(buf->bufLimit - buf->buffer);
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UChar* U_EXPORT2
626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(error==NULL || U_FAILURE(*error)){
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf==NULL || len==NULL){
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *error = U_ILLEGAL_ARGUMENT_ERROR;
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    *len = (int32_t)(buf->bufLimit - buf->buffer);
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return buf->buffer;
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const char* U_EXPORT2
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t requiredLen = 0;
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t dirlen =  0;
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t filelen = 0;
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(status==NULL || U_FAILURE(*status)){
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){
648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        *status = U_ILLEGAL_ARGUMENT_ERROR;
649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dirlen  = (int32_t)uprv_strlen(inputDir);
654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    filelen = (int32_t)uprv_strlen(fileName);
655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        requiredLen = dirlen + filelen + 2;
657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((*len < requiredLen) || target==NULL){
658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *len = requiredLen;
659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        target[0] = '\0';
664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * append the input dir to openFileName if the first char in
666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * filename is not file seperation char and the last char input directory is  not '.'.
667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * This is to support :
668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * genrb -s. /home/icu/data
669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * genrb -s. icu/data
670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * The user cannot mix notations like
671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * genrb -s. /icu/data --- the absolute path specified. -s redundant
672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * user should use
673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * genrb -s. icu/data  --- start from CWD and look in icu/data dir
674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            uprv_strcpy(target, inputDir);
677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            target[dirlen]     = U_FILE_SEP_CHAR;
678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        target[dirlen + 1] = '\0';
680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        requiredLen = dirlen + filelen + 1;
682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((*len < requiredLen) || target==NULL){
683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *len = requiredLen;
684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            *status = U_BUFFER_OVERFLOW_ERROR;
685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return NULL;
686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_strcpy(target, inputDir);
689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcat(target, fileName);
692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return target;
693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unicode TR 13 says any of the below chars is
696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a new line char in a readline function in addition
697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to CR+LF combination which needs to be
698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * handled seperately
699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool ucbuf_isCharNewLine(UChar c){
701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    switch(c){
702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x000A: /* LF  */
703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x000D: /* CR  */
704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x000C: /* FF  */
705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x0085: /* NEL */
706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x2028: /* LS  */
707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    case 0x2029: /* PS  */
708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    default:
710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UChar* U_EXPORT2
715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* temp = buf->currentPos;
717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* savePos =NULL;
718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar c=0x0000;
719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(buf->isBuffered){
720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* The input is buffered we have to do more
721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        * for returning a pointer U_TRUNCATED_CHAR_FOUND
722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        */
723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;){
724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c = *temp++;
725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(buf->remaining==0){
726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return NULL; /* end of file is reached return NULL */
727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *err= U_TRUNCATED_CHAR_FOUND;
730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return NULL;
731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }else{
732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ucbuf_fillucbuf(buf,err);
733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(U_FAILURE(*err)){
734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return NULL;
735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * Accoding to TR 13 readLine functions must interpret
739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Windows CR LF */
742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *len = (int32_t)(temp++ - buf->currentPos);
744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savePos = buf->currentPos;
745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf->currentPos = temp;
746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return savePos;
747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* else */
749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *len = (int32_t)(temp - buf->currentPos);
752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savePos = buf->currentPos;
753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf->currentPos = temp;
754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return savePos;
755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* we know that all input is read into the internal
759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    * buffer so we can safely return pointers
760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        */
761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for(;;){
762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            c = *temp++;
763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(buf->currentPos==buf->bufLimit){
765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return NULL; /* end of file is reached return NULL */
766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* Windows CR LF */
768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){
769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *len = (int32_t)(temp++ - buf->currentPos);
770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savePos = buf->currentPos;
771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf->currentPos = temp;
772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return savePos;
773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* else */
775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                *len = (int32_t)(temp - buf->currentPos);
777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savePos = buf->currentPos;
778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf->currentPos = temp;
779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return savePos;
780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* not reached */
784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* A compiler warning will appear if all paths don't contain a return statement. */
785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*    return NULL;*/
786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
788