1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1998-2011, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* File ucbuf.c 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Modification History: 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 05/10/01 Ram Creation. 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/putil.h" 2083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/uchar.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/ucnv_err.h" 2383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/ustring.h" 2483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ustrfmt.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "ucbuf.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_CONVERSION 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_IN_BUF 1000 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_U_BUF 1500 3785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho#define CONTEXT_LEN 20 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustruct UCHARBUF { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* buffer; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* currentPos; 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* bufLimit; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t bufCapacity; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t remaining; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t signatureLength; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FileStream* in; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverter* conv; 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool showWarning; /* makes this API not produce any errors */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isBuffered; 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UBool U_EXPORT2 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char start[8]; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t numRead; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar target[1]={ 0 }; 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* pTarget; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* pStart; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read a few bytes */ 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numRead=T_FileStream_read(in, start, sizeof(start)); 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unread the bytes beyond what was consumed for U+FEFF */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_rewind(in); 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (*signatureLength > 0) { 6983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius T_FileStream_read(in, start, *signatureLength); 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*cp==NULL){ 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *conv =NULL; 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the converter for the detected Unicode charset */ 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *conv = ucnv_open(*cp,error); 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert and ignore initial U+FEFF, and the buffer overflow */ 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pTarget = target; 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pStart = start; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *signatureLength = (int32_t)(pStart - start); 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*error==U_BUFFER_OVERFLOW_ERROR) { 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error=U_ZERO_ERROR; 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* verify that we successfully read exactly U+FEFF */ 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error=U_INTERNAL_PROGRAM_ERROR; 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool ucbuf_isCPKnown(const char* cp){ 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-8",cp)==0){ 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-16BE",cp)==0){ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-16LE",cp)==0){ 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-16",cp)==0){ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-32",cp)==0){ 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-32BE",cp)==0){ 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-32LE",cp)==0){ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("SCSU",cp)==0){ 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("BOCU-1",cp)==0){ 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucnv_compareNames("UTF-7",cp)==0){ 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI FileStream * U_EXPORT2 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FileStream* in=NULL; 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(conv==NULL || cp==NULL || fileName==NULL){ 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error = U_ILLEGAL_ARGUMENT_ERROR; 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* open the file */ 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru in= T_FileStream_open(fileName,"rb"); 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(in == NULL){ 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error=U_FILE_ACCESS_ERROR; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return in; 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(*conv); 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *conv=NULL; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_close(in); 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* fill the uchar buffer */ 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UCHARBUF* 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* pTarget=NULL; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* target=NULL; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* source=NULL; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char carr[MAX_IN_BUF] = {'\0'}; 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char* cbuf = carr; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t inputRead=0; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t outputWritten=0; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset=0; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* sourceLimit =NULL; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t cbufSize=0; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pTarget = buf->buffer; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if we arrived here without exhausting the buffer*/ 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->currentPos<buf->bufLimit){ 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset = (int32_t)(buf->bufLimit-buf->currentPos); 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if DEBUG 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->isBuffered){ 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cbufSize = MAX_IN_BUF; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read the file */ 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->remaining-=inputRead; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cbufSize = T_FileStream_size(buf->in); 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cbuf = (char*)uprv_malloc(cbufSize); 19185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (cbuf == NULL) { 19285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho *error = U_MEMORY_ALLOCATION_ERROR; 19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 19485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->remaining-=inputRead; 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* just to be sure...*/ 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if ( 0 == inputRead ) 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->remaining = 0; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target=pTarget; 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert the bytes */ 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->conv){ 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the callback to stop */ 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UConverterToUCallback toUOldAction ; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void* toUOldContext; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void* toUNewContext=NULL; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_setToUCallBack(buf->conv, 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TO_U_CALLBACK_STOP, 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUNewContext, 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &toUOldAction, 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (const void**)&toUOldContext, 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru error); 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* since state is saved in the converter we add offset to source*/ 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target = pTarget+offset; 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source = cbuf; 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sourceLimit = source + inputRead; 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &source,sourceLimit,NULL, 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UBool)(buf->remaining==0),error); 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*error)){ 22585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho char context[CONTEXT_LEN+1]; 22685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho char preContext[CONTEXT_LEN+1]; 22785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho char postContext[CONTEXT_LEN+1]; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int8_t len = CONTEXT_LEN; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start=0; 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t stop =0; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos =0; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* use erro1 to preserve the error code */ 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode error1 =U_ZERO_ERROR; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( buf->showWarning==TRUE){ 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru " converting input stream to target encoding: %s\n", 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_errorName(*error)); 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now get the context chars */ 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_getInvalidChars(buf->conv,context,&len,&error1); 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru context[len]= 0 ; /* null terminate the buffer */ 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos = (int32_t)(source - cbuf - len); 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* for pre-context */ 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stop = pos-len; 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(preContext,cbuf+start,stop-start); 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* null terminate the buffer */ 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru preContext[stop-start] = 0; 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* for post-context */ 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = pos+len; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru memcpy(postContext,source,stop-start); 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* null terminate the buffer */ 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru postContext[stop-start] = 0; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->showWarning ==TRUE){ 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* print out the context */ 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"\tPre-context: %s\n",preContext); 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"\tContext: %s\n",context); 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"\tPost-context: %s\n", postContext); 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset the converter */ 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_reset(buf->conv); 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the call back to substitute 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and restart conversion 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_setToUCallBack(buf->conv, 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCNV_TO_U_CALLBACK_SUBSTITUTE, 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUNewContext, 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &toUOldAction, 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (const void**)&toUOldContext, 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &error1); 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* reset source and target start positions */ 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target = pTarget+offset; 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru source = cbuf; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* re convert */ 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru &source,sourceLimit,NULL, 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (UBool)(buf->remaining==0),&error1); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outputWritten = (int32_t)(target - pTarget); 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if DEBUG 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int i; 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target = pTarget; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0;i<numRead;i++){ 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* printf("%c", (char)(*target++));*/ 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_charsToUChars(cbuf,target+offset,inputRead); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos = pTarget; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->bufLimit=pTarget+outputWritten; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *buf->bufLimit=0; /*NUL terminate*/ 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cbuf!=carr){ 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(cbuf); 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return buf; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get a UChar from the stream*/ 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->currentPos>=buf->bufLimit){ 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->remaining==0){ 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_EOF; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf=ucbuf_fillucbuf(buf,error); 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*error)){ 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_EOF; 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *(buf->currentPos++); 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* get a UChar32 from the stream*/ 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t retVal = (int32_t)U_EOF; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->currentPos+1>=buf->bufLimit){ 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->remaining==0){ 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_EOF; 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf=ucbuf_fillucbuf(buf,error); 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*error)){ 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return U_EOF; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 35783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if(U16_IS_LEAD(*(buf->currentPos))){ 35883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos+=2; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru retVal = *(buf->currentPos++); 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return retVal; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* u_unescapeAt() callback to return a UChar*/ 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UChar U_CALLCONV 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_charAt(int32_t offset, void *context) { 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return ((UCHARBUF*) context)->currentPos[offset]; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* getc and escape it */ 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t length; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t offset; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c32,c1,c2; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Fill the buffer if it is empty */ 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf->currentPos >=buf->bufLimit-2) { 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_fillucbuf(buf,error); 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Get the next character in the buffer */ 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf->currentPos < buf->bufLimit) { 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c1 = *(buf->currentPos)++; 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c1 = U_EOF; 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c2 = *(buf->currentPos); 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* If it isn't a backslash, return it */ 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c1 != 0x005C) { 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c1; 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Determine the amount of data in the buffer */ 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = (int32_t)(buf->bufLimit - buf->currentPos); 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* The longest escape sequence is \Uhhhhhhhh; make sure 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru we have at least that many characters */ 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (length < 10) { 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* fill the buffer */ 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_fillucbuf(buf,error); 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru length = (int32_t)(buf->bufLimit - buf->buffer); 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Process the escape */ 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset = 0; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* check if u_unescapeAt unescaped and converted 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to c32 or not 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c32==0xFFFFFFFF){ 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->showWarning) { 42185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho char context[CONTEXT_LEN+1]; 42285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t len = CONTEXT_LEN; 42385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(length < len) { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru len = length; 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru context[len]= 0 ; /* null terminate the buffer */ 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_UCharsToChars( buf->currentPos, context, len); 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error= U_ILLEGAL_ESCAPE_SEQUENCE; 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c1; 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Update the current buffer position */ 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos += offset; 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* unescaping failed so we just return 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * c1 and not consume the buffer 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * this is useful for rules with escapes 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * in resouce bundles 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * eg: \' \\ \" 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c1; 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return c32; 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI UCHARBUF* U_EXPORT2 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FileStream* in = NULL; 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t fileSize=0; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* knownCp; 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(cp==NULL || fileName==NULL){ 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error = U_ILLEGAL_ARGUMENT_ERROR; 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!uprv_strcmp(fileName, "-")) { 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru in = T_FileStream_stdin(); 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru in = T_FileStream_open(fileName, "rb"); 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(in!=NULL){ 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fileSize = T_FileStream_size(in); 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf == NULL){ 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error = U_MEMORY_ALLOCATION_ERROR; 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_close(in); 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->in=in; 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->conv=NULL; 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->showWarning = showWarning; 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->isBuffered = buffered; 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->signatureLength=0; 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*cp==NULL || **cp=='\0'){ 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* don't have code page name... try to autodetect */ 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(ucbuf_isCPKnown(*cp)){ 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* discard BOM */ 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*error) && buf->conv==NULL) { 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->conv=ucnv_open(*cp,error); 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*error)){ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(buf->conv); 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(buf); 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_close(in); 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->remaining=fileSize-buf->signatureLength; 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->isBuffered){ 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->bufCapacity=MAX_U_BUF; 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf->buffer == NULL) { 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error = U_MEMORY_ALLOCATION_ERROR; 50985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucbuf_close(buf); 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos=buf->buffer; 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->bufLimit=buf->buffer; 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*error)){ 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 51685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucbuf_close(buf); 51785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return NULL; 51885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 51985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucbuf_fillucbuf(buf,error); 52085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(*error)){ 52185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho ucbuf_close(buf); 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return buf; 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error =U_FILE_ACCESS_ERROR; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* TODO: this method will fail if at the 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * begining of buffer and the uchar to unget 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is from the previous buffer. Need to implement 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * system to take care of that situation. 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_ungetc(int32_t c,UCHARBUF* buf){ 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* decrement currentPos pointer 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if not at the begining of buffer 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->currentPos!=buf->buffer){ 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*(buf->currentPos-1)==c){ 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos--; 54585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 54685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ungetc failed - did not match. */ 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 54885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } else { 54985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* ungetc failed - beginning of buffer. */ 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* frees the resources of UChar* buffer */ 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_closebuf(UCHARBUF* buf){ 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(buf->buffer); 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->buffer = NULL; 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* close the buf and release resources*/ 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_close(UCHARBUF* buf){ 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf!=NULL){ 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->conv){ 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_close(buf->conv); 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_close(buf->in); 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_closebuf(buf); 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(buf); 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* rewind the buf and file stream */ 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI void U_EXPORT2 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf){ 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos=buf->buffer; 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->bufLimit=buf->buffer; 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_rewind(buf->in); 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_resetToUnicode(buf->conv); 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->signatureLength>0) { 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar target[1]={ 0 }; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* pTarget; 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char start[8]; 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* pStart; 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t numRead; 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* read the signature bytes */ 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* convert and ignore initial U+FEFF, and the buffer overflow */ 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pTarget = target; 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pStart = start; 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(*error==U_BUFFER_OVERFLOW_ERROR) { 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error=U_ZERO_ERROR; 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* verify that we successfully read exactly U+FEFF */ 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error=U_INTERNAL_PROGRAM_ERROR; 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI int32_t U_EXPORT2 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_size(UCHARBUF* buf){ 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf){ 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->isBuffered){ 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (int32_t)(buf->bufLimit - buf->buffer); 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 621ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 622ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 623ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 625ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UChar* U_EXPORT2 626ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(error==NULL || U_FAILURE(*error)){ 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf==NULL || len==NULL){ 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *error = U_ILLEGAL_ARGUMENT_ERROR; 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 633ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = (int32_t)(buf->bufLimit - buf->buffer); 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return buf->buffer; 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const char* U_EXPORT2 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t requiredLen = 0; 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t dirlen = 0; 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t filelen = 0; 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(status==NULL || U_FAILURE(*status)){ 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 648ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status = U_ILLEGAL_ARGUMENT_ERROR; 649ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 650ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 651ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 652ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 653ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dirlen = (int32_t)uprv_strlen(inputDir); 654ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru filelen = (int32_t)uprv_strlen(fileName); 655ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 656ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru requiredLen = dirlen + filelen + 2; 657ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((*len < requiredLen) || target==NULL){ 658ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = requiredLen; 659ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 660ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 661ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 662ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 663ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[0] = '\0'; 664ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 665ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * append the input dir to openFileName if the first char in 666ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * filename is not file seperation char and the last char input directory is not '.'. 667ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is to support : 668ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * genrb -s. /home/icu/data 669ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * genrb -s. icu/data 670ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The user cannot mix notations like 671ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * genrb -s. /icu/data --- the absolute path specified. -s redundant 672ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * user should use 673ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * genrb -s. icu/data --- start from CWD and look in icu/data dir 674ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 675ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 676ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(target, inputDir); 677ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[dirlen] = U_FILE_SEP_CHAR; 678ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 679ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target[dirlen + 1] = '\0'; 680ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 681ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru requiredLen = dirlen + filelen + 1; 682ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((*len < requiredLen) || target==NULL){ 683ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = requiredLen; 684ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *status = U_BUFFER_OVERFLOW_ERROR; 685ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 686ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 687ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 688ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(target, inputDir); 689ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 690ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 691ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcat(target, fileName); 692ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return target; 693ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 694ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 695ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Unicode TR 13 says any of the below chars is 696ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * a new line char in a readline function in addition 697ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to CR+LF combination which needs to be 698ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * handled seperately 699ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 700ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool ucbuf_isCharNewLine(UChar c){ 701ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru switch(c){ 702ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x000A: /* LF */ 703ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x000D: /* CR */ 704ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x000C: /* FF */ 705ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x0085: /* NEL */ 706ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x2028: /* LS */ 707ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case 0x2029: /* PS */ 708ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 709ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru default: 710ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 711ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 712ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 713ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 714ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CAPI const UChar* U_EXPORT2 715ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 716ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* temp = buf->currentPos; 717ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* savePos =NULL; 718ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c=0x0000; 719ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->isBuffered){ 720ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* The input is buffered we have to do more 721ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * for returning a pointer U_TRUNCATED_CHAR_FOUND 722ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 723ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;){ 724ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = *temp++; 725ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->remaining==0){ 726ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; /* end of file is reached return NULL */ 727ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 728ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 729ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *err= U_TRUNCATED_CHAR_FOUND; 730ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 731ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 732ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ucbuf_fillucbuf(buf,err); 733ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*err)){ 734ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 735ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 736ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 737ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 738ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Accoding to TR 13 readLine functions must interpret 739ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 740ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 741ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Windows CR LF */ 742ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 743ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = (int32_t)(temp++ - buf->currentPos); 744ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savePos = buf->currentPos; 745ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos = temp; 746ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return savePos; 747ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 748ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else */ 749ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 750ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 751ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = (int32_t)(temp - buf->currentPos); 752ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savePos = buf->currentPos; 753ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos = temp; 754ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return savePos; 755ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 756ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 757ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 758ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* we know that all input is read into the internal 759ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * buffer so we can safely return pointers 760ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 761ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(;;){ 762ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = *temp++; 763ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 764ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(buf->currentPos==buf->bufLimit){ 765ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; /* end of file is reached return NULL */ 766ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 767ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Windows CR LF */ 768ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 769ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = (int32_t)(temp++ - buf->currentPos); 770ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savePos = buf->currentPos; 771ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos = temp; 772ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return savePos; 773ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 774ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* else */ 775ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 776ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *len = (int32_t)(temp - buf->currentPos); 777ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savePos = buf->currentPos; 778ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf->currentPos = temp; 779ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return savePos; 780ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 781ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 782ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 783ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* not reached */ 784ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* A compiler warning will appear if all paths don't contain a return statement. */ 785ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* return NULL;*/ 786ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 787ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 788