16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 1998-2011, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* File ucbuf.c 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Modification History: 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Date Name Description 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 05/10/01 Ram Creation. 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/putil.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uchar.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv_err.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ustring.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "filestrm.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustrfmt.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucbuf.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h> 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_CONVERSION 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_IN_BUF 1000 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_U_BUF 1500 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define CONTEXT_LEN 20 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstruct UCHARBUF { 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* buffer; 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* currentPos; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* bufLimit; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t bufCapacity; 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t remaining; 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t signatureLength; 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FileStream* in; 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter* conv; 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool showWarning; /* makes this API not produce any errors */ 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isBuffered; 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UBool U_EXPORT2 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){ 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char start[8]; 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numRead; 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar target[1]={ 0 }; 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* pTarget; 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* pStart; 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read a few bytes */ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org numRead=T_FileStream_read(in, start, sizeof(start)); 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error); 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unread the bytes beyond what was consumed for U+FEFF */ 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_rewind(in); 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (*signatureLength > 0) { 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_read(in, start, *signatureLength); 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*cp==NULL){ 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *conv =NULL; 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* open the converter for the detected Unicode charset */ 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *conv = ucnv_open(*cp,error); 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert and ignore initial U+FEFF, and the buffer overflow */ 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pTarget = target; 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pStart = start; 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, NULL, FALSE, error); 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *signatureLength = (int32_t)(pStart - start); 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*error==U_BUFFER_OVERFLOW_ERROR) { 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error=U_ZERO_ERROR; 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* verify that we successfully read exactly U+FEFF */ 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) { 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error=U_INTERNAL_PROGRAM_ERROR; 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool ucbuf_isCPKnown(const char* cp){ 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-8",cp)==0){ 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-16BE",cp)==0){ 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-16LE",cp)==0){ 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-16",cp)==0){ 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-32",cp)==0){ 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-32BE",cp)==0){ 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-32LE",cp)==0){ 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("SCSU",cp)==0){ 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("BOCU-1",cp)==0){ 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucnv_compareNames("UTF-7",cp)==0){ 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI FileStream * U_EXPORT2 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){ 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FileStream* in=NULL; 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(conv==NULL || cp==NULL || fileName==NULL){ 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_ILLEGAL_ARGUMENT_ERROR; 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* open the file */ 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org in= T_FileStream_open(fileName,"rb"); 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(in == NULL){ 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error=U_FILE_ACCESS_ERROR; 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) { 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return in; 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_close(*conv); 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *conv=NULL; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_close(in); 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* fill the uchar buffer */ 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UCHARBUF* 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){ 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* pTarget=NULL; 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* target=NULL; 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* source=NULL; 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char carr[MAX_IN_BUF] = {'\0'}; 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char* cbuf = carr; 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t inputRead=0; 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t outputWritten=0; 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=0; 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* sourceLimit =NULL; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t cbufSize=0; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pTarget = buf->buffer; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* check if we arrived here without exhausting the buffer*/ 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->currentPos<buf->bufLimit){ 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset = (int32_t)(buf->bufLimit-buf->currentPos); 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org memmove(buf->buffer,buf->currentPos,offset* sizeof(UChar)); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if DEBUG 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org memset(pTarget+offset,0xff,sizeof(UChar)*(MAX_IN_BUF-offset)); 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->isBuffered){ 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbufSize = MAX_IN_BUF; 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read the file */ 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset); 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->remaining-=inputRead; 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbufSize = T_FileStream_size(buf->in); 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cbuf = (char*)uprv_malloc(cbufSize); 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cbuf == NULL) { 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_MEMORY_ALLOCATION_ERROR; 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org inputRead= T_FileStream_read(buf->in,cbuf,cbufSize); 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->remaining-=inputRead; 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just to be sure...*/ 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ( 0 == inputRead ) 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->remaining = 0; 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=pTarget; 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the bytes */ 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->conv){ 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the callback to stop */ 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterToUCallback toUOldAction ; 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void* toUOldContext; 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void* toUNewContext=NULL; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_setToUCallBack(buf->conv, 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_TO_U_CALLBACK_STOP, 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUNewContext, 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &toUOldAction, 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (const void**)&toUOldContext, 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org error); 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* since state is saved in the converter we add offset to source*/ 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target = pTarget+offset; 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source = cbuf; 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit = source + inputRead; 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &source,sourceLimit,NULL, 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UBool)(buf->remaining==0),error); 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char context[CONTEXT_LEN+1]; 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char preContext[CONTEXT_LEN+1]; 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char postContext[CONTEXT_LEN+1]; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t len = CONTEXT_LEN; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t start=0; 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stop =0; 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos =0; 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use erro1 to preserve the error code */ 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode error1 =U_ZERO_ERROR; 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( buf->showWarning==TRUE){ 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while" 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org " converting input stream to target encoding: %s\n", 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_errorName(*error)); 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now get the context chars */ 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_getInvalidChars(buf->conv,context,&len,&error1); 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org context[len]= 0 ; /* null terminate the buffer */ 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = (int32_t)(source - cbuf - len); 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* for pre-context */ 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1)); 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stop = pos-len; 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org memcpy(preContext,cbuf+start,stop-start); 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* null terminate the buffer */ 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org preContext[stop-start] = 0; 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* for post-context */ 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start = pos+len; 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stop = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf)); 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org memcpy(postContext,source,stop-start); 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* null terminate the buffer */ 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org postContext[stop-start] = 0; 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->showWarning ==TRUE){ 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* print out the context */ 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"\tPre-context: %s\n",preContext); 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"\tContext: %s\n",context); 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"\tPost-context: %s\n", postContext); 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reset the converter */ 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_reset(buf->conv); 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the call back to substitute 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and restart conversion 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_setToUCallBack(buf->conv, 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_TO_U_CALLBACK_SUBSTITUTE, 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUNewContext, 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &toUOldAction, 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (const void**)&toUOldContext, 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &error1); 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reset source and target start positions */ 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target = pTarget+offset; 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source = cbuf; 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* re convert */ 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset), 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &source,sourceLimit,NULL, 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UBool)(buf->remaining==0),&error1); 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outputWritten = (int32_t)(target - pTarget); 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if DEBUG 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int i; 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target = pTarget; 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0;i<numRead;i++){ 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* printf("%c", (char)(*target++));*/ 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_charsToUChars(cbuf,target+offset,inputRead); 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset); 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos = pTarget; 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->bufLimit=pTarget+outputWritten; 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *buf->bufLimit=0; /*NUL terminate*/ 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cbuf!=carr){ 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(cbuf); 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buf; 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* get a UChar from the stream*/ 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_getc(UCHARBUF* buf,UErrorCode* error){ 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->currentPos>=buf->bufLimit){ 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->remaining==0){ 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_EOF; 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf=ucbuf_fillucbuf(buf,error); 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_EOF; 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *(buf->currentPos++); 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* get a UChar32 from the stream*/ 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_getc32(UCHARBUF* buf,UErrorCode* error){ 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t retVal = (int32_t)U_EOF; 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->currentPos+1>=buf->bufLimit){ 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->remaining==0){ 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_EOF; 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf=ucbuf_fillucbuf(buf,error); 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return U_EOF; 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_LEAD(*(buf->currentPos))){ 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]); 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos+=2; 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org retVal = *(buf->currentPos++); 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return retVal; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* u_unescapeAt() callback to return a UChar*/ 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar U_CALLCONV 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_charAt(int32_t offset, void *context) { 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ((UCHARBUF*) context)->currentPos[offset]; 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* getc and escape it */ 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) { 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset; 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c32,c1,c2; 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Fill the buffer if it is empty */ 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (buf->currentPos >=buf->bufLimit-2) { 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_fillucbuf(buf,error); 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Get the next character in the buffer */ 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (buf->currentPos < buf->bufLimit) { 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c1 = *(buf->currentPos)++; 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c1 = U_EOF; 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c2 = *(buf->currentPos); 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* If it isn't a backslash, return it */ 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (c1 != 0x005C) { 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c1; 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Determine the amount of data in the buffer */ 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = (int32_t)(buf->bufLimit - buf->currentPos); 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* The longest escape sequence is \Uhhhhhhhh; make sure 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org we have at least that many characters */ 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (length < 10) { 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fill the buffer */ 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_fillucbuf(buf,error); 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = (int32_t)(buf->bufLimit - buf->buffer); 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Process the escape */ 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset = 0; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf); 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* check if u_unescapeAt unescaped and converted 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to c32 or not 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c32==0xFFFFFFFF){ 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->showWarning) { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char context[CONTEXT_LEN+1]; 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t len = CONTEXT_LEN; 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length < len) { 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org len = length; 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org context[len]= 0 ; /* null terminate the buffer */ 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_UCharsToChars( buf->currentPos, context, len); 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context); 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error= U_ILLEGAL_ESCAPE_SEQUENCE; 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c1; 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){ 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Update the current buffer position */ 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos += offset; 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unescaping failed so we just return 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c1 and not consume the buffer 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * this is useful for rules with escapes 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in resouce bundles 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * eg: \' \\ \" 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c1; 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c32; 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI UCHARBUF* U_EXPORT2 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){ 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FileStream* in = NULL; 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t fileSize=0; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* knownCp; 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cp==NULL || fileName==NULL){ 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_ILLEGAL_ARGUMENT_ERROR; 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (!uprv_strcmp(fileName, "-")) { 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org in = T_FileStream_stdin(); 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org in = T_FileStream_open(fileName, "rb"); 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(in!=NULL){ 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF)); 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileSize = T_FileStream_size(in); 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf == NULL){ 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_MEMORY_ALLOCATION_ERROR; 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_close(in); 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->in=in; 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->conv=NULL; 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->showWarning = showWarning; 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->isBuffered = buffered; 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->signatureLength=0; 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*cp==NULL || **cp=='\0'){ 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* don't have code page name... try to autodetect */ 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error); 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else if(ucbuf_isCPKnown(*cp)){ 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* discard BOM */ 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error); 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*error) && buf->conv==NULL) { 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->conv=ucnv_open(*cp,error); 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_close(buf->conv); 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(buf); 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_close(in); 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((buf->conv==NULL) && (buf->showWarning==TRUE)){ 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n"); 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->remaining=fileSize-buf->signatureLength; 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->isBuffered){ 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->bufCapacity=MAX_U_BUF; 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/; 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->buffer=(UChar*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity ); 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (buf->buffer == NULL) { 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_MEMORY_ALLOCATION_ERROR; 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_close(buf); 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos=buf->buffer; 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->bufLimit=buf->buffer; 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error)); 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_close(buf); 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_fillucbuf(buf,error); 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*error)){ 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_close(buf); 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buf; 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error =U_FILE_ACCESS_ERROR; 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* TODO: this method will fail if at the 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * begining of buffer and the uchar to unget 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is from the previous buffer. Need to implement 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * system to take care of that situation. 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_ungetc(int32_t c,UCHARBUF* buf){ 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* decrement currentPos pointer 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if not at the begining of buffer 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->currentPos!=buf->buffer){ 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*(buf->currentPos-1)==c){ 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos--; 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* ungetc failed - did not match. */ 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* ungetc failed - beginning of buffer. */ 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* frees the resources of UChar* buffer */ 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_closebuf(UCHARBUF* buf){ 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(buf->buffer); 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->buffer = NULL; 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* close the buf and release resources*/ 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_close(UCHARBUF* buf){ 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf!=NULL){ 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->conv){ 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_close(buf->conv); 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_close(buf->in); 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_closebuf(buf); 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(buf); 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* rewind the buf and file stream */ 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI void U_EXPORT2 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_rewind(UCHARBUF* buf,UErrorCode* error){ 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf){ 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos=buf->buffer; 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->bufLimit=buf->buffer; 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org T_FileStream_rewind(buf->in); 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength; 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_resetToUnicode(buf->conv); 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->signatureLength>0) { 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar target[1]={ 0 }; 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* pTarget; 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char start[8]; 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char* pStart; 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t numRead; 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read the signature bytes */ 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org numRead=T_FileStream_read(buf->in, start, buf->signatureLength); 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert and ignore initial U+FEFF, and the buffer overflow */ 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pTarget = target; 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pStart = start; 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, NULL, FALSE, error); 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*error==U_BUFFER_OVERFLOW_ERROR) { 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error=U_ZERO_ERROR; 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* verify that we successfully read exactly U+FEFF */ 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) { 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error=U_INTERNAL_PROGRAM_ERROR; 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI int32_t U_EXPORT2 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_size(UCHARBUF* buf){ 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf){ 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->isBuffered){ 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv); 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (int32_t)(buf->bufLimit - buf->buffer); 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const UChar* U_EXPORT2 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){ 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(error==NULL || U_FAILURE(*error)){ 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf==NULL || len==NULL){ 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *error = U_ILLEGAL_ARGUMENT_ERROR; 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = (int32_t)(buf->bufLimit - buf->buffer); 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return buf->buffer; 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const char* U_EXPORT2 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){ 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t requiredLen = 0; 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t dirlen = 0; 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t filelen = 0; 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(status==NULL || U_FAILURE(*status)){ 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inputDir == NULL || fileName == NULL || len==NULL || (target==NULL && *len>0)){ 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_ILLEGAL_ARGUMENT_ERROR; 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dirlen = (int32_t)uprv_strlen(inputDir); 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org filelen = (int32_t)uprv_strlen(fileName); 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) { 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org requiredLen = dirlen + filelen + 2; 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((*len < requiredLen) || target==NULL){ 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = requiredLen; 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_BUFFER_OVERFLOW_ERROR; 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target[0] = '\0'; 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * append the input dir to openFileName if the first char in 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * filename is not file seperation char and the last char input directory is not '.'. 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is to support : 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * genrb -s. /home/icu/data 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * genrb -s. icu/data 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The user cannot mix notations like 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * genrb -s. /icu/data --- the absolute path specified. -s redundant 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * user should use 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * genrb -s. icu/data --- start from CWD and look in icu/data dir 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){ 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcpy(target, inputDir); 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target[dirlen] = U_FILE_SEP_CHAR; 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target[dirlen + 1] = '\0'; 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org requiredLen = dirlen + filelen + 1; 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((*len < requiredLen) || target==NULL){ 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = requiredLen; 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *status = U_BUFFER_OVERFLOW_ERROR; 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcpy(target, inputDir); 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcat(target, fileName); 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return target; 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode TR 13 says any of the below chars is 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a new line char in a readline function in addition 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to CR+LF combination which needs to be 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * handled seperately 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool ucbuf_isCharNewLine(UChar c){ 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(c){ 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x000A: /* LF */ 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x000D: /* CR */ 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x000C: /* FF */ 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x0085: /* NEL */ 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x2028: /* LS */ 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 0x2029: /* PS */ 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CAPI const UChar* U_EXPORT2 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){ 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* temp = buf->currentPos; 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* savePos =NULL; 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c=0x0000; 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->isBuffered){ 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* The input is buffered we have to do more 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for returning a pointer U_TRUNCATED_CHAR_FOUND 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;){ 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = *temp++; 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->remaining==0){ 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; /* end of file is reached return NULL */ 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){ 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *err= U_TRUNCATED_CHAR_FOUND; 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucbuf_fillucbuf(buf,err); 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*err)){ 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Accoding to TR 13 readLine functions must interpret 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Windows CR LF */ 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = (int32_t)(temp++ - buf->currentPos); 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savePos = buf->currentPos; 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos = temp; 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return savePos; 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* else */ 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){ /* Unipad inserts 2028 line separators! */ 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = (int32_t)(temp - buf->currentPos); 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savePos = buf->currentPos; 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos = temp; 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return savePos; 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* we know that all input is read into the internal 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * buffer so we can safely return pointers 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(;;){ 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c = *temp++; 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(buf->currentPos==buf->bufLimit){ 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return NULL; /* end of file is reached return NULL */ 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Windows CR LF */ 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c ==0x0d && temp+1<=buf->bufLimit && *(temp+1) == 0x0a ){ 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = (int32_t)(temp++ - buf->currentPos); 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savePos = buf->currentPos; 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos = temp; 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return savePos; 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* else */ 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) { /* Unipad inserts 2028 line separators! */ 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *len = (int32_t)(temp - buf->currentPos); 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savePos = buf->currentPos; 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buf->currentPos = temp; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return savePos; 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not reached */ 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* A compiler warning will appear if all paths don't contain a return statement. */ 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* return NULL;*/ 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 788