1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (c) 2004,2011 International Business Machines
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Author: Alan Liu
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Created: March 19 2004
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Since: ICU 3.0
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "textfile.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "intltest.h"
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// If the symbol CCP is defined, then the 'name' and 'encoding'
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// constructor parameters are copied.  Otherwise they are aliased.
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// #define CCP
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextFile::TextFile(const char* _name, const char* _encoding, UErrorCode& ec) :
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    file(0),
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    name(0), encoding(0),
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buffer(0),
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    capacity(0),
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    lineNo(0)
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(ec) || _name == 0 || _encoding == 0) {
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_SUCCESS(ec)) {
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ec = U_ILLEGAL_ARGUMENT_ERROR;
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef CCP
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    name = uprv_malloc(uprv_strlen(_name) + 1);
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    encoding = uprv_malloc(uprv_strlen(_encoding) + 1);
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (name == 0 || encoding == 0) {
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcpy(name, _name);
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcpy(encoding, _encoding);
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    name = (char*) _name;
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    encoding = (char*) _encoding;
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const char* testDir = IntlTest::getSourceTestData(ec);
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (U_FAILURE(ec)) {
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (!ensureCapacity((int32_t)(uprv_strlen(testDir) + uprv_strlen(name) + 1))) {
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ec = U_MEMORY_ALLOCATION_ERROR;
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcpy(buffer, testDir);
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcat(buffer, name);
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    file = T_FileStream_open(buffer, "rb");
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (file == 0) {
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ec = U_ILLEGAL_ARGUMENT_ERROR;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return;
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTextFile::~TextFile() {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (file != 0) T_FileStream_close(file);
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buffer != 0) uprv_free(buffer);
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#ifdef CCP
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(name);
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(encoding);
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::readLine(UnicodeString& line, UErrorCode& ec) {
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (T_FileStream_eof(file)) {
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: 'buffer' may change after ensureCapacity() is called,
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // so don't use
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   p=buffer; *p++=c;
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // but rather
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    //   i=; buffer[i++]=c;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t n = 0;
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (;;) {
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int c = T_FileStream_getc(file); // sic: int, not int32_t
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (c < 0 || c == 0xD || c == 0xA) {
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // consume 0xA following 0xD
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (c == 0xD) {
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                c = T_FileStream_getc(file);
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (c != 0xA && c >= 0) {
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    T_FileStream_ungetc(c, file);
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!setBuffer(n++, c, ec)) return FALSE;
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (!setBuffer(n++, 0, ec)) return FALSE;
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString str(buffer, encoding);
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Remove BOM in first line, if present
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (lineNo == 0 && str[0] == 0xFEFF) {
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        str.remove(0, 1);
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ++lineNo;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    line = str.unescape();
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::readLineSkippingComments(UnicodeString& line, UErrorCode& ec,
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                         UBool trim) {
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (;;) {
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!readLine(line, ec)) return FALSE;
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Skip over white space
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t pos = 0;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ICU_Utility::skipWhitespace(line, pos, TRUE);
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Ignore blank lines and comment lines
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (pos == line.length() || line.charAt(pos) == 0x23/*'#'*/) {
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Process line
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (trim) line.remove(0, pos);
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Set buffer[index] to c, growing buffer if necessary. Return TRUE if
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * successful.
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::setBuffer(int32_t index, char c, UErrorCode& ec) {
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (capacity <= index) {
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (!ensureCapacity(index+1)) {
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ec = U_MEMORY_ALLOCATION_ERROR;
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return FALSE;
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buffer[index] = c;
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Make sure that 'buffer' has at least 'mincapacity' bytes.
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return TRUE upon success. Upon return, 'buffer' may change
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * value. In any case, previous contents are preserved.
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
14883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius #define LOWEST_MIN_CAPACITY 64
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool TextFile::ensureCapacity(int32_t mincapacity) {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (capacity >= mincapacity) {
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return TRUE;
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Grow by factor of 2 to prevent frequent allocation
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: 'capacity' may be 0
15683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    int32_t i = (capacity < LOWEST_MIN_CAPACITY)? LOWEST_MIN_CAPACITY: capacity;
15783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    while (i < mincapacity) {
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        i <<= 1;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (i < 0) {
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            i = 0x7FFFFFFF;
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    mincapacity = i;
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Simple realloc() no good; contents not preserved
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Note: 'buffer' may be 0
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char* newbuffer = (char*) uprv_malloc(mincapacity);
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (newbuffer == 0) {
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (buffer != 0) {
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_strncpy(newbuffer, buffer, capacity);
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uprv_free(buffer);
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    buffer = newbuffer;
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    capacity = mincapacity;
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
181