1/*
2*******************************************************************************
3*
4*   Copyright (C) 1998-2008, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*
9* File ucbuf.c
10*
11* Modification History:
12*
13*   Date        Name        Description
14*   05/10/01    Ram         Creation.
15*
16* This API reads in files and returns UChars
17*******************************************************************************
18*/
19
20#include "unicode/ucnv.h"
21#include "filestrm.h"
22
23#if !UCONFIG_NO_CONVERSION
24
25#ifndef UCBUF_H
26#define UCBUF_H 1
27
28typedef struct UCHARBUF UCHARBUF;
29/**
30 * End of file value
31 */
32#define U_EOF 0xFFFFFFFF
33/**
34 * Error value if a sequence cannot be unescaped
35 */
36#define U_ERR 0xFFFFFFFE
37
38typedef struct ULine ULine;
39
40struct  ULine {
41    UChar     *name;
42    int32_t   len;
43};
44
45/**
46 * Opens the UCHARBUF with the given file stream and code page for conversion
47 * @param fileName  Name of the file to open.
48 * @param codepage  The encoding of the file stream to convert to Unicode.
49 *                  If *codepoge is NULL on input the API will try to autodetect
50 *                  popular Unicode encodings
51 * @param showWarning Flag to print out warnings to STDOUT
52 * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads
53 *                  the whole file into memory and converts it.
54 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
55 *        indicates a failure on entry, the function will immediately return.
56 *        On exit the value will indicate the success of the operation.
57 * @return pointer to the newly opened UCHARBUF
58 */
59U_CAPI UCHARBUF* U_EXPORT2
60ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
61
62/**
63 * Gets a UTF-16 code unit at the current position from the converted buffer
64 * and increments the current position
65 * @param buf Pointer to UCHARBUF structure
66 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
67 *        indicates a failure on entry, the function will immediately return.
68 *        On exit the value will indicate the success of the operation.
69 */
70U_CAPI int32_t U_EXPORT2
71ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
72
73/**
74 * Gets a UTF-32 code point at the current position from the converted buffer
75 * and increments the current position
76 * @param buf Pointer to UCHARBUF structure
77 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
78 *        indicates a failure on entry, the function will immediately return.
79 *        On exit the value will indicate the success of the operation.
80 */
81U_CAPI int32_t U_EXPORT2
82ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
83
84/**
85 * Gets a UTF-16 code unit at the current position from the converted buffer after
86 * unescaping and increments the current position. If the escape sequence is for UTF-32
87 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
88 * @param buf Pointer to UCHARBUF structure
89 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
90 *        indicates a failure on entry, the function will immediately return.
91 *        On exit the value will indicate the success of the operation.
92 */
93U_CAPI int32_t U_EXPORT2
94ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
95
96/**
97 * Gets a pointer to the current position in the internal buffer and length of the line.
98 * It imperative to make a copy of the returned buffere before performing operations on it.
99 * @param buf Pointer to UCHARBUF structure
100 * @param len Output param to receive the len of the buffer returned till end of the line
101 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
102 *        indicates a failure on entry, the function will immediately return.
103 *        On exit the value will indicate the success of the operation.
104 *        Error: U_TRUNCATED_CHAR_FOUND
105 * @return Pointer to the internal buffer, NULL if EOF
106 */
107U_CAPI const UChar* U_EXPORT2
108ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
109
110
111/**
112 * Resets the buffers and the underlying file stream.
113 * @param buf Pointer to UCHARBUF structure
114 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
115 *        indicates a failure on entry, the function will immediately return.
116 *        On exit the value will indicate the success of the operation.
117 */
118U_CAPI void U_EXPORT2
119ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
120
121/**
122 * Returns a pointer to the internal converted buffer
123 * @param buf Pointer to UCHARBUF structure
124 * @param len Pointer to int32_t to receive the lenth of buffer
125 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
126 *        indicates a failure on entry, the function will immediately return.
127 *        On exit the value will indicate the success of the operation.
128 * @return Pointer to internal UChar buffer
129 */
130U_CAPI const UChar* U_EXPORT2
131ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
132
133/**
134 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
135 * @param buf Pointer to UCHARBUF structure
136 */
137U_CAPI void U_EXPORT2
138ucbuf_close(UCHARBUF* buf);
139
140/**
141 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
142 */
143U_CAPI void U_EXPORT2
144ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
145
146
147/**
148 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
149 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
150 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
151 * is necessary.
152 * If the charset was autodetected, the caller must close both the input FileStream
153 * and the converter.
154 *
155 * @param fileName The file name to be opened and encoding autodected
156 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
157 * @param cp Output param to receive the detected encoding
158 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
159 *        indicates a failure on entry, the function will immediately return.
160 *        On exit the value will indicate the success of the operation.
161 * @return The input FileStream if its charset was autodetected; NULL otherwise.
162 */
163U_CAPI FileStream * U_EXPORT2
164ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
165int32_t* signatureLength, UErrorCode* status);
166
167/**
168 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
169 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
170 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
171 * is necessary.
172 * If the charset was autodetected, the caller must close the converter.
173 *
174 * @param fileStream The file stream whose encoding is to be detected
175 * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
176 * @param cp Output param to receive the detected encoding
177 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
178 *        indicates a failure on entry, the function will immediately return.
179 *        On exit the value will indicate the success of the operation.
180 * @return Boolean whether the Unicode charset was autodetected.
181 */
182
183U_CAPI UBool U_EXPORT2
184ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
185
186/**
187 * Returns the approximate size in UChars required for converting the file to UChars
188 */
189U_CAPI int32_t U_EXPORT2
190ucbuf_size(UCHARBUF* buf);
191
192U_CAPI const char* U_EXPORT2
193ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
194
195#endif
196#endif
197
198