ustdio.c revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1998-2007, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   11/18/98    stephen     Creation.
15 *   03/12/99    stephen     Modified for new C API.
16 *   07/19/99    stephen     Fixed read() and gets()
17 ******************************************************************************
18 */
19
20#include "unicode/ustdio.h"
21#include "unicode/putil.h"
22#include "cmemory.h"
23#include "cstring.h"
24#include "ufile.h"
25#include "ufmt_cmn.h"
26#include "unicode/ucnv.h"
27#include "unicode/ustring.h"
28
29#include <string.h>
30
31#define DELIM_LF 0x000A
32#define DELIM_VT 0x000B
33#define DELIM_FF 0x000C
34#define DELIM_CR 0x000D
35#define DELIM_NEL 0x0085
36#define DELIM_LS 0x2028
37#define DELIM_PS 0x2029
38
39/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
40#ifdef U_WINDOWS
41static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
42static const uint32_t DELIMITERS_LEN = 2;
43/* TODO: Default newline writing should be detected based upon the converter being used. */
44#else
45static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
46static const uint32_t DELIMITERS_LEN = 1;
47#endif
48
49#define IS_FIRST_STRING_DELIMITER(c1) \
50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51        || (c1) == DELIM_NEL \
52        || (c1) == DELIM_LS \
53        || (c1) == DELIM_PS)
54#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
57
58
59#if !UCONFIG_NO_TRANSLITERATION
60
61U_CAPI UTransliterator* U_EXPORT2
62u_fsettransliterator(UFILE *file, UFileDirection direction,
63                     UTransliterator *adopt, UErrorCode *status)
64{
65    UTransliterator *old = NULL;
66
67    if(U_FAILURE(*status))
68    {
69        return adopt;
70    }
71
72    if(!file)
73    {
74        *status = U_ILLEGAL_ARGUMENT_ERROR;
75        return adopt;
76    }
77
78    if(direction & U_READ)
79    {
80        /** TODO: implement */
81        *status = U_UNSUPPORTED_ERROR;
82        return adopt;
83    }
84
85    if(adopt == NULL) /* they are clearing it */
86    {
87        if(file->fTranslit != NULL)
88        {
89            /* TODO: Check side */
90            old = file->fTranslit->translit;
91            uprv_free(file->fTranslit->buffer);
92            file->fTranslit->buffer=NULL;
93            uprv_free(file->fTranslit);
94            file->fTranslit=NULL;
95        }
96    }
97    else
98    {
99        if(file->fTranslit == NULL)
100        {
101            file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
102            if(!file->fTranslit)
103            {
104                *status = U_MEMORY_ALLOCATION_ERROR;
105                return adopt;
106            }
107            file->fTranslit->capacity = 0;
108            file->fTranslit->length = 0;
109            file->fTranslit->pos = 0;
110            file->fTranslit->buffer = NULL;
111        }
112        else
113        {
114            old = file->fTranslit->translit;
115            ufile_flush_translit(file);
116        }
117
118        file->fTranslit->translit = adopt;
119    }
120
121    return old;
122}
123
124static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
125{
126    int32_t newlen;
127    int32_t junkCount = 0;
128    int32_t textLength;
129    int32_t textLimit;
130    UTransPosition pos;
131    UErrorCode status = U_ZERO_ERROR;
132
133    if(count == NULL)
134    {
135        count = &junkCount;
136    }
137
138    if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
139    {
140        /* fast path */
141        return src;
142    }
143
144    /* First: slide over everything */
145    if(f->fTranslit->length > f->fTranslit->pos)
146    {
147        memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
148            (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
149    }
150    f->fTranslit->length -= f->fTranslit->pos; /* always */
151    f->fTranslit->pos = 0;
152
153    /* Calculate new buffer size needed */
154    newlen = (*count + f->fTranslit->length) * 4;
155
156    if(newlen > f->fTranslit->capacity)
157    {
158        if(f->fTranslit->buffer == NULL)
159        {
160            f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
161        }
162        else
163        {
164            f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
165        }
166        f->fTranslit->capacity = newlen;
167    }
168
169    /* Now, copy any data over */
170    u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
171        src,
172        *count);
173    f->fTranslit->length += *count;
174
175    /* Now, translit in place as much as we can  */
176    if(flush == FALSE)
177    {
178        textLength = f->fTranslit->length;
179        pos.contextStart = 0;
180        pos.contextLimit = textLength;
181        pos.start        = 0;
182        pos.limit        = textLength;
183
184        utrans_transIncrementalUChars(f->fTranslit->translit,
185            f->fTranslit->buffer, /* because we shifted */
186            &textLength,
187            f->fTranslit->capacity,
188            &pos,
189            &status);
190
191        /* now: start/limit point to the transliterated text */
192        /* Transliterated is [buffer..pos.start) */
193        *count            = pos.start;
194        f->fTranslit->pos = pos.start;
195        f->fTranslit->length = pos.limit;
196
197        return f->fTranslit->buffer;
198    }
199    else
200    {
201        textLength = f->fTranslit->length;
202        textLimit = f->fTranslit->length;
203
204        utrans_transUChars(f->fTranslit->translit,
205            f->fTranslit->buffer,
206            &textLength,
207            f->fTranslit->capacity,
208            0,
209            &textLimit,
210            &status);
211
212        /* out: converted len */
213        *count = textLimit;
214
215        /* Set pointers to 0 */
216        f->fTranslit->pos = 0;
217        f->fTranslit->length = 0;
218
219        return f->fTranslit->buffer;
220    }
221}
222
223#endif
224
225void
226ufile_flush_translit(UFILE *f)
227{
228#if !UCONFIG_NO_TRANSLITERATION
229    if((!f)||(!f->fTranslit))
230        return;
231#endif
232
233    u_file_write_flush(NULL, 0, f, FALSE, TRUE);
234}
235
236
237void
238ufile_close_translit(UFILE *f)
239{
240#if !UCONFIG_NO_TRANSLITERATION
241    if((!f)||(!f->fTranslit))
242        return;
243#endif
244
245    ufile_flush_translit(f);
246
247#if !UCONFIG_NO_TRANSLITERATION
248    if(f->fTranslit->translit)
249        utrans_close(f->fTranslit->translit);
250
251    if(f->fTranslit->buffer)
252    {
253        uprv_free(f->fTranslit->buffer);
254    }
255
256    uprv_free(f->fTranslit);
257    f->fTranslit = NULL;
258#endif
259}
260
261
262/* Input/output */
263
264U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
265u_fputs(const UChar    *s,
266        UFILE        *f)
267{
268    int32_t count = u_file_write(s, u_strlen(s), f);
269    count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
270    return count;
271}
272
273U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
274u_fputc(UChar32      uc,
275        UFILE        *f)
276{
277    UChar buf[2];
278    int32_t idx = 0;
279    UBool isError = FALSE;
280
281    U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
282    if (isError) {
283        return U_EOF;
284    }
285    return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
286}
287
288
289U_CFUNC int32_t U_EXPORT2
290u_file_write_flush(const UChar *chars,
291                   int32_t     count,
292                   UFILE       *f,
293                   UBool       flushIO,
294                   UBool       flushTranslit)
295{
296    /* Set up conversion parameters */
297    UErrorCode  status       = U_ZERO_ERROR;
298    const UChar *mySource    = chars;
299    const UChar *mySourceEnd;
300    char        charBuffer[UFILE_CHARBUFFER_SIZE];
301    char        *myTarget   = charBuffer;
302    int32_t     written      = 0;
303    int32_t     numConverted = 0;
304
305    if (count < 0) {
306        count = u_strlen(chars);
307    }
308
309#if !UCONFIG_NO_TRANSLITERATION
310    if((f->fTranslit) && (f->fTranslit->translit))
311    {
312        /* Do the transliteration */
313        mySource = u_file_translit(f, chars, &count, flushTranslit);
314    }
315#endif
316
317    /* Write to a string. */
318    if (!f->fFile) {
319        int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
320        if (flushIO && charsLeft > count) {
321            count++;
322        }
323        written = ufmt_min(count, charsLeft);
324        u_strncpy(f->str.fPos, mySource, written);
325        f->str.fPos += written;
326        return written;
327    }
328
329    mySourceEnd = mySource + count;
330
331    /* Perform the conversion in a loop */
332    do {
333        status     = U_ZERO_ERROR;
334        if(f->fConverter != NULL) { /* We have a valid converter */
335            ucnv_fromUnicode(f->fConverter,
336                &myTarget,
337                charBuffer + UFILE_CHARBUFFER_SIZE,
338                &mySource,
339                mySourceEnd,
340                NULL,
341                flushIO,
342                &status);
343        } else { /*weiv: do the invariant conversion */
344            u_UCharsToChars(mySource, myTarget, count);
345            myTarget += count;
346        }
347        numConverted = (int32_t)(myTarget - charBuffer);
348
349        if (numConverted > 0) {
350            /* write the converted bytes */
351            fwrite(charBuffer,
352                sizeof(char),
353                numConverted,
354                f->fFile);
355
356            written     += numConverted;
357        }
358        myTarget     = charBuffer;
359    }
360    while(status == U_BUFFER_OVERFLOW_ERROR);
361
362    /* return # of chars written */
363    return written;
364}
365
366U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
367u_file_write(    const UChar     *chars,
368             int32_t        count,
369             UFILE         *f)
370{
371    return u_file_write_flush(chars,count,f,FALSE,FALSE);
372}
373
374
375/* private function used for buffering input */
376void
377ufile_fill_uchar_buffer(UFILE *f)
378{
379    UErrorCode  status;
380    const char  *mySource;
381    const char  *mySourceEnd;
382    UChar       *myTarget;
383    int32_t     bufferSize;
384    int32_t     maxCPBytes;
385    int32_t     bytesRead;
386    int32_t     availLength;
387    int32_t     dataSize;
388    char        charBuffer[UFILE_CHARBUFFER_SIZE];
389    u_localized_string *str;
390
391    if (f->fFile == NULL) {
392        /* There is nothing to do. It's a string. */
393        return;
394    }
395
396    str = &f->str;
397    dataSize = (int32_t)(str->fLimit - str->fPos);
398    if (f->fFileno == 0 && dataSize > 0) {
399        /* Don't read from stdin too many times. There is still some data. */
400        return;
401    }
402
403    /* shift the buffer if it isn't empty */
404    if(dataSize != 0) {
405        uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
406    }
407
408
409    /* record how much buffer space is available */
410    availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
411
412    /* Determine the # of codepage bytes needed to fill our UChar buffer */
413    /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
414    maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
415
416    /* Read in the data to convert */
417    if (f->fFileno == 0) {
418        /* Special case. Read from stdin one line at a time. */
419        char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
420        bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
421    }
422    else {
423        /* A normal file */
424        bytesRead = (int32_t)fread(charBuffer,
425            sizeof(char),
426            ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
427            f->fFile);
428    }
429
430    /* Set up conversion parameters */
431    status      = U_ZERO_ERROR;
432    mySource    = charBuffer;
433    mySourceEnd = charBuffer + bytesRead;
434    myTarget    = f->fUCBuffer + dataSize;
435    bufferSize  = UFILE_UCHARBUFFER_SIZE;
436
437    if(f->fConverter != NULL) { /* We have a valid converter */
438        /* Perform the conversion */
439        ucnv_toUnicode(f->fConverter,
440            &myTarget,
441            f->fUCBuffer + bufferSize,
442            &mySource,
443            mySourceEnd,
444            NULL,
445            (UBool)(feof(f->fFile) != 0),
446            &status);
447
448    } else { /*weiv: do the invariant conversion */
449        u_charsToUChars(mySource, myTarget, bytesRead);
450        myTarget += bytesRead;
451    }
452
453    /* update the pointers into our array */
454    str->fPos    = str->fBuffer;
455    str->fLimit  = myTarget;
456}
457
458U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
459u_fgets(UChar        *s,
460        int32_t       n,
461        UFILE        *f)
462{
463    int32_t dataSize;
464    int32_t count;
465    UChar *alias;
466    const UChar *limit;
467    UChar *sItr;
468    UChar currDelim = 0;
469    u_localized_string *str;
470
471    if (n <= 0) {
472        /* Caller screwed up. We need to write the null terminatior. */
473        return NULL;
474    }
475
476    /* fill the buffer if needed */
477    str = &f->str;
478    if (str->fPos >= str->fLimit) {
479        ufile_fill_uchar_buffer(f);
480    }
481
482    /* subtract 1 from n to compensate for the terminator */
483    --n;
484
485    /* determine the amount of data in the buffer */
486    dataSize = (int32_t)(str->fLimit - str->fPos);
487
488    /* if 0 characters were left, return 0 */
489    if (dataSize == 0)
490        return NULL;
491
492    /* otherwise, iteratively fill the buffer and copy */
493    count = 0;
494    sItr = s;
495    currDelim = 0;
496    while (dataSize > 0 && count < n) {
497        alias = str->fPos;
498
499        /* Find how much to copy */
500        if (dataSize < (n - count)) {
501            limit = str->fLimit;
502        }
503        else {
504            limit = alias + (n - count);
505        }
506
507        if (!currDelim) {
508            /* Copy UChars until we find the first occurrence of a delimiter character */
509            while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
510                count++;
511                *(sItr++) = *(alias++);
512            }
513            /* Preserve the newline */
514            if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
515                if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
516                    currDelim = *alias;
517                }
518                else {
519                    currDelim = 1;  /* This isn't a newline, but it's used to say
520                                    that we should break later. We've checked all
521                                    possible newline combinations even across buffer
522                                    boundaries. */
523                }
524                count++;
525                *(sItr++) = *(alias++);
526            }
527        }
528        /* If we have a CRLF combination, preserve that too. */
529        if (alias < limit) {
530            if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
531                count++;
532                *(sItr++) = *(alias++);
533            }
534            currDelim = 1;  /* This isn't a newline, but it's used to say
535                            that we should break later. We've checked all
536                            possible newline combinations even across buffer
537                            boundaries. */
538        }
539
540        /* update the current buffer position */
541        str->fPos = alias;
542
543        /* if we found a delimiter */
544        if (currDelim == 1) {
545            /* break out */
546            break;
547        }
548
549        /* refill the buffer */
550        ufile_fill_uchar_buffer(f);
551
552        /* determine the amount of data in the buffer */
553        dataSize = (int32_t)(str->fLimit - str->fPos);
554    }
555
556    /* add the terminator and return s */
557    *sItr = 0x0000;
558    return s;
559}
560
561U_CFUNC UBool U_EXPORT2
562ufile_getch(UFILE *f, UChar *ch)
563{
564    UBool isValidChar = FALSE;
565
566    *ch = U_EOF;
567    /* if we have an available character in the buffer, return it */
568    if(f->str.fPos < f->str.fLimit){
569        *ch = *(f->str.fPos)++;
570        isValidChar = TRUE;
571    }
572    else if (f) {
573        /* otherwise, fill the buffer and return the next character */
574        if(f->str.fPos >= f->str.fLimit) {
575            ufile_fill_uchar_buffer(f);
576        }
577        if(f->str.fPos < f->str.fLimit) {
578            *ch = *(f->str.fPos)++;
579            isValidChar = TRUE;
580        }
581    }
582    return isValidChar;
583}
584
585U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
586u_fgetc(UFILE        *f)
587{
588    UChar ch;
589    ufile_getch(f, &ch);
590    return ch;
591}
592
593U_CFUNC UBool U_EXPORT2
594ufile_getch32(UFILE *f, UChar32 *c32)
595{
596    UBool isValidChar = FALSE;
597    u_localized_string *str;
598
599    *c32 = U_EOF;
600
601    /* Fill the buffer if it is empty */
602    str = &f->str;
603    if (f && str->fPos + 1 >= str->fLimit) {
604        ufile_fill_uchar_buffer(f);
605    }
606
607    /* Get the next character in the buffer */
608    if (str->fPos < str->fLimit) {
609        *c32 = *(str->fPos)++;
610        if (U_IS_LEAD(*c32)) {
611            if (str->fPos < str->fLimit) {
612                UChar c16 = *(str->fPos)++;
613                *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
614                isValidChar = TRUE;
615            }
616            else {
617                *c32 = U_EOF;
618            }
619        }
620        else {
621            isValidChar = TRUE;
622        }
623    }
624
625    return isValidChar;
626}
627
628U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
629u_fgetcx(UFILE        *f)
630{
631    UChar32 ch;
632    ufile_getch32(f, &ch);
633    return ch;
634}
635
636U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
637u_fungetc(UChar32        ch,
638    UFILE        *f)
639{
640    u_localized_string *str;
641
642    str = &f->str;
643
644    /* if we're at the beginning of the buffer, sorry! */
645    if (str->fPos == str->fBuffer
646        || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
647    {
648        ch = U_EOF;
649    }
650    else {
651        /* otherwise, put the character back */
652        /* Remember, read them back on in the reverse order. */
653        if (U_IS_LEAD(ch)) {
654            if (*--(str->fPos) != U16_TRAIL(ch)
655                || *--(str->fPos) != U16_LEAD(ch))
656            {
657                ch = U_EOF;
658            }
659        }
660        else if (*--(str->fPos) != ch) {
661            ch = U_EOF;
662        }
663    }
664    return ch;
665}
666
667U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
668u_file_read(    UChar        *chars,
669    int32_t        count,
670    UFILE         *f)
671{
672    int32_t dataSize;
673    int32_t read = 0;
674    u_localized_string *str = &f->str;
675
676    do {
677
678        /* determine the amount of data in the buffer */
679        dataSize = (int32_t)(str->fLimit - str->fPos);
680        if (dataSize <= 0) {
681            /* fill the buffer */
682            ufile_fill_uchar_buffer(f);
683            dataSize = (int32_t)(str->fLimit - str->fPos);
684        }
685
686        /* Make sure that we don't read too much */
687        if (dataSize > (count - read)) {
688            dataSize = count - read;
689        }
690
691        /* copy the current data in the buffer */
692        memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
693
694        /* update number of items read */
695        read += dataSize;
696
697        /* update the current buffer position */
698        str->fPos += dataSize;
699    }
700    while (dataSize != 0 && read < count);
701
702    return read;
703}
704