1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (c) 2004,2011 International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Author: Alan Liu 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created: March 19 2004 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Since: ICU 3.0 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "textfile.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "intltest.h" 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If the symbol CCP is defined, then the 'name' and 'encoding' 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// constructor parameters are copied. Otherwise they are aliased. 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// #define CCP 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) : 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru file(0), 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name(0), encoding(0), 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer(0), 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru capacity(0), 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru lineNo(0) 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec) || _name == 0 || _encoding == 0) { 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(ec)) { 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ILLEGAL_ARGUMENT_ERROR; 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef CCP 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name = uprv_malloc(uprv_strlen(_name) + 1); 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru encoding = uprv_malloc(uprv_strlen(_encoding) + 1); 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (name == 0 || encoding == 0) { 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(name, _name); 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(encoding, _encoding); 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name = (char*) _name; 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru encoding = (char*) _encoding; 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const char* testDir = IntlTest::getSourceTestData(ec); 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec)) { 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!ensureCapacity((int32_t)(uprv_strlen(testDir) + uprv_strlen(name) + 1))) { 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcpy(buffer, testDir); 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcat(buffer, name); 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru file = T_FileStream_open(buffer, "rb"); 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (file == 0) { 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ILLEGAL_ARGUMENT_ERROR; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextFile::~TextFile() { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (file != 0) T_FileStream_close(file); 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buffer != 0) uprv_free(buffer); 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef CCP 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(name); 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(encoding); 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) { 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (T_FileStream_eof(file)) { 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: 'buffer' may change after ensureCapacity() is called, 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // so don't use 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // p=buffer; *p++=c; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // but rather 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // i=; buffer[i++]=c; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t n = 0; 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int c = T_FileStream_getc(file); // sic: int, not int32_t 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c < 0 || c == 0xD || c == 0xA) { 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // consume 0xA following 0xD 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c == 0xD) { 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru c = T_FileStream_getc(file); 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (c != 0xA && c >= 0) { 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru T_FileStream_ungetc(c, file); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!setBuffer(n++, c, ec)) return FALSE; 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!setBuffer(n++, 0, ec)) return FALSE; 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str(buffer, encoding); 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Remove BOM in first line, if present 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (lineNo == 0 && str[0] == 0xFEFF) { 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru str.remove(0, 1); 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++lineNo; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru line = str.unescape(); 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec, 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool trim) { 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (;;) { 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!readLine(line, ec)) return FALSE; 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Skip over white space 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos = 0; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::skipWhitespace(line, pos, TRUE); 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore blank lines and comment lines 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) { 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Process line 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (trim) line.remove(0, pos); 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set buffer[index] to c, growing buffer if necessary. Return TRUE if 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful. 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) { 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (capacity <= index) { 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!ensureCapacity(index+1)) { 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_MEMORY_ALLOCATION_ERROR; 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer[index] = c; 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Make sure that 'buffer' has at least 'mincapacity' bytes. 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return TRUE upon success. Upon return, 'buffer' may change 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * value. In any case, previous contents are preserved. 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius #define LOWEST_MIN_CAPACITY 64 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::ensureCapacity(int32_t mincapacity) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (capacity >= mincapacity) { 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Grow by factor of 2 to prevent frequent allocation 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: 'capacity' may be 0 15683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius int32_t i = (capacity < LOWEST_MIN_CAPACITY)? LOWEST_MIN_CAPACITY: capacity; 15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius while (i < mincapacity) { 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i <<= 1; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i < 0) { 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i = 0x7FFFFFFF; 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mincapacity = i; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Simple realloc() no good; contents not preserved 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Note: 'buffer' may be 0 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char* newbuffer = (char*) uprv_malloc(mincapacity); 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (newbuffer == 0) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buffer != 0) { 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strncpy(newbuffer, buffer, capacity); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(buffer); 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buffer = newbuffer; 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru capacity = mincapacity; 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 181