1/*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1998-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   11/18/98    stephen     Creation.
15 *   03/12/99    stephen     Modified for new C API.
16 *   07/19/99    stephen     Fixed read() and gets()
17 ******************************************************************************
18 */
19
20#include "unicode/ustdio.h"
21#include "unicode/putil.h"
22#include "cmemory.h"
23#include "cstring.h"
24#include "ufile.h"
25#include "ufmt_cmn.h"
26#include "unicode/ucnv.h"
27#include "unicode/ustring.h"
28
29#include <string.h>
30
31#define DELIM_LF 0x000A
32#define DELIM_VT 0x000B
33#define DELIM_FF 0x000C
34#define DELIM_CR 0x000D
35#define DELIM_NEL 0x0085
36#define DELIM_LS 0x2028
37#define DELIM_PS 0x2029
38
39/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
40#if U_PLATFORM_USES_ONLY_WIN32_API
41static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
42static const uint32_t DELIMITERS_LEN = 2;
43/* TODO: Default newline writing should be detected based upon the converter being used. */
44#else
45static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
46static const uint32_t DELIMITERS_LEN = 1;
47#endif
48
49#define IS_FIRST_STRING_DELIMITER(c1) \
50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51        || (c1) == DELIM_NEL \
52        || (c1) == DELIM_LS \
53        || (c1) == DELIM_PS)
54#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
57
58
59#if !UCONFIG_NO_TRANSLITERATION
60
61U_CAPI UTransliterator* U_EXPORT2
62u_fsettransliterator(UFILE *file, UFileDirection direction,
63                     UTransliterator *adopt, UErrorCode *status)
64{
65    UTransliterator *old = NULL;
66
67    if(U_FAILURE(*status))
68    {
69        return adopt;
70    }
71
72    if(!file)
73    {
74        *status = U_ILLEGAL_ARGUMENT_ERROR;
75        return adopt;
76    }
77
78    if(direction & U_READ)
79    {
80        /** TODO: implement */
81        *status = U_UNSUPPORTED_ERROR;
82        return adopt;
83    }
84
85    if(adopt == NULL) /* they are clearing it */
86    {
87        if(file->fTranslit != NULL)
88        {
89            /* TODO: Check side */
90            old = file->fTranslit->translit;
91            uprv_free(file->fTranslit->buffer);
92            file->fTranslit->buffer=NULL;
93            uprv_free(file->fTranslit);
94            file->fTranslit=NULL;
95        }
96    }
97    else
98    {
99        if(file->fTranslit == NULL)
100        {
101            file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
102            if(!file->fTranslit)
103            {
104                *status = U_MEMORY_ALLOCATION_ERROR;
105                return adopt;
106            }
107            file->fTranslit->capacity = 0;
108            file->fTranslit->length = 0;
109            file->fTranslit->pos = 0;
110            file->fTranslit->buffer = NULL;
111        }
112        else
113        {
114            old = file->fTranslit->translit;
115            ufile_flush_translit(file);
116        }
117
118        file->fTranslit->translit = adopt;
119    }
120
121    return old;
122}
123
124static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
125{
126    int32_t newlen;
127    int32_t junkCount = 0;
128    int32_t textLength;
129    int32_t textLimit;
130    UTransPosition pos;
131    UErrorCode status = U_ZERO_ERROR;
132
133    if(count == NULL)
134    {
135        count = &junkCount;
136    }
137
138    if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
139    {
140        /* fast path */
141        return src;
142    }
143
144    /* First: slide over everything */
145    if(f->fTranslit->length > f->fTranslit->pos)
146    {
147        memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
148            (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
149    }
150    f->fTranslit->length -= f->fTranslit->pos; /* always */
151    f->fTranslit->pos = 0;
152
153    /* Calculate new buffer size needed */
154    newlen = (*count + f->fTranslit->length) * 4;
155
156    if(newlen > f->fTranslit->capacity)
157    {
158        if(f->fTranslit->buffer == NULL)
159        {
160            f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
161        }
162        else
163        {
164            f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
165        }
166        /* Check for malloc/realloc failure. */
167        if (f->fTranslit->buffer == NULL) {
168        	return NULL;
169        }
170        f->fTranslit->capacity = newlen;
171    }
172
173    /* Now, copy any data over */
174    u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
175        src,
176        *count);
177    f->fTranslit->length += *count;
178
179    /* Now, translit in place as much as we can  */
180    if(flush == FALSE)
181    {
182        textLength = f->fTranslit->length;
183        pos.contextStart = 0;
184        pos.contextLimit = textLength;
185        pos.start        = 0;
186        pos.limit        = textLength;
187
188        utrans_transIncrementalUChars(f->fTranslit->translit,
189            f->fTranslit->buffer, /* because we shifted */
190            &textLength,
191            f->fTranslit->capacity,
192            &pos,
193            &status);
194
195        /* now: start/limit point to the transliterated text */
196        /* Transliterated is [buffer..pos.start) */
197        *count            = pos.start;
198        f->fTranslit->pos = pos.start;
199        f->fTranslit->length = pos.limit;
200
201        return f->fTranslit->buffer;
202    }
203    else
204    {
205        textLength = f->fTranslit->length;
206        textLimit = f->fTranslit->length;
207
208        utrans_transUChars(f->fTranslit->translit,
209            f->fTranslit->buffer,
210            &textLength,
211            f->fTranslit->capacity,
212            0,
213            &textLimit,
214            &status);
215
216        /* out: converted len */
217        *count = textLimit;
218
219        /* Set pointers to 0 */
220        f->fTranslit->pos = 0;
221        f->fTranslit->length = 0;
222
223        return f->fTranslit->buffer;
224    }
225}
226
227#endif
228
229void
230ufile_flush_translit(UFILE *f)
231{
232#if !UCONFIG_NO_TRANSLITERATION
233    if((!f)||(!f->fTranslit))
234        return;
235#endif
236
237    u_file_write_flush(NULL, 0, f, FALSE, TRUE);
238}
239
240
241void
242ufile_flush_io(UFILE *f)
243{
244  if((!f) || (!f->fFile)) {
245    return; /* skip if no file */
246  }
247
248  u_file_write_flush(NULL, 0, f, TRUE, FALSE);
249}
250
251
252void
253ufile_close_translit(UFILE *f)
254{
255#if !UCONFIG_NO_TRANSLITERATION
256    if((!f)||(!f->fTranslit))
257        return;
258#endif
259
260    ufile_flush_translit(f);
261
262#if !UCONFIG_NO_TRANSLITERATION
263    if(f->fTranslit->translit)
264        utrans_close(f->fTranslit->translit);
265
266    if(f->fTranslit->buffer)
267    {
268        uprv_free(f->fTranslit->buffer);
269    }
270
271    uprv_free(f->fTranslit);
272    f->fTranslit = NULL;
273#endif
274}
275
276
277/* Input/output */
278
279U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
280u_fputs(const UChar    *s,
281        UFILE        *f)
282{
283    int32_t count = u_file_write(s, u_strlen(s), f);
284    count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
285    return count;
286}
287
288U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
289u_fputc(UChar32      uc,
290        UFILE        *f)
291{
292    UChar buf[2];
293    int32_t idx = 0;
294    UBool isError = FALSE;
295
296    U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
297    if (isError) {
298        return U_EOF;
299    }
300    return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
301}
302
303
304U_CFUNC int32_t U_EXPORT2
305u_file_write_flush(const UChar *chars,
306                   int32_t     count,
307                   UFILE       *f,
308                   UBool       flushIO,
309                   UBool       flushTranslit)
310{
311    /* Set up conversion parameters */
312    UErrorCode  status       = U_ZERO_ERROR;
313    const UChar *mySource    = chars;
314    const UChar *mySourceBegin;
315    const UChar *mySourceEnd;
316    char        charBuffer[UFILE_CHARBUFFER_SIZE];
317    char        *myTarget   = charBuffer;
318    int32_t     written      = 0;
319    int32_t     numConverted = 0;
320
321    if (count < 0) {
322        count = u_strlen(chars);
323    }
324
325#if !UCONFIG_NO_TRANSLITERATION
326    if((f->fTranslit) && (f->fTranslit->translit))
327    {
328        /* Do the transliteration */
329        mySource = u_file_translit(f, chars, &count, flushTranslit);
330    }
331#endif
332
333    /* Write to a string. */
334    if (!f->fFile) {
335        int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
336        if (flushIO && charsLeft > count) {
337            count++;
338        }
339        written = ufmt_min(count, charsLeft);
340        u_strncpy(f->str.fPos, mySource, written);
341        f->str.fPos += written;
342        return written;
343    }
344
345    mySourceEnd = mySource + count;
346
347    /* Perform the conversion in a loop */
348    do {
349        mySourceBegin = mySource; /* beginning location for this loop */
350        status     = U_ZERO_ERROR;
351        if(f->fConverter != NULL) { /* We have a valid converter */
352            ucnv_fromUnicode(f->fConverter,
353                &myTarget,
354                charBuffer + UFILE_CHARBUFFER_SIZE,
355                &mySource,
356                mySourceEnd,
357                NULL,
358                flushIO,
359                &status);
360        } else { /*weiv: do the invariant conversion */
361            int32_t convertChars = (int32_t) (mySourceEnd - mySource);
362            if (convertChars > UFILE_CHARBUFFER_SIZE) {
363                convertChars = UFILE_CHARBUFFER_SIZE;
364                status = U_BUFFER_OVERFLOW_ERROR;
365            }
366            u_UCharsToChars(mySource, myTarget, convertChars);
367            mySource += convertChars;
368            myTarget += convertChars;
369        }
370        numConverted = (int32_t)(myTarget - charBuffer);
371
372        if (numConverted > 0) {
373            /* write the converted bytes */
374            fwrite(charBuffer,
375                sizeof(char),
376                numConverted,
377                f->fFile);
378
379            written     += (int32_t) (mySource - mySourceBegin);
380        }
381        myTarget     = charBuffer;
382    }
383    while(status == U_BUFFER_OVERFLOW_ERROR);
384
385    /* return # of chars written */
386    return written;
387}
388
389U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
390u_file_write(    const UChar     *chars,
391             int32_t        count,
392             UFILE         *f)
393{
394    return u_file_write_flush(chars,count,f,FALSE,FALSE);
395}
396
397
398/* private function used for buffering input */
399void
400ufile_fill_uchar_buffer(UFILE *f)
401{
402    UErrorCode  status;
403    const char  *mySource;
404    const char  *mySourceEnd;
405    UChar       *myTarget;
406    int32_t     bufferSize;
407    int32_t     maxCPBytes;
408    int32_t     bytesRead;
409    int32_t     availLength;
410    int32_t     dataSize;
411    char        charBuffer[UFILE_CHARBUFFER_SIZE];
412    u_localized_string *str;
413
414    if (f->fFile == NULL) {
415        /* There is nothing to do. It's a string. */
416        return;
417    }
418
419    str = &f->str;
420    dataSize = (int32_t)(str->fLimit - str->fPos);
421    if (f->fFileno == 0 && dataSize > 0) {
422        /* Don't read from stdin too many times. There is still some data. */
423        return;
424    }
425
426    /* shift the buffer if it isn't empty */
427    if(dataSize != 0) {
428        uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */
429    }
430
431
432    /* record how much buffer space is available */
433    availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
434
435    /* Determine the # of codepage bytes needed to fill our UChar buffer */
436    /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
437    maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
438
439    /* Read in the data to convert */
440    if (f->fFileno == 0) {
441        /* Special case. Read from stdin one line at a time. */
442        char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
443        bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
444    }
445    else {
446        /* A normal file */
447        bytesRead = (int32_t)fread(charBuffer,
448            sizeof(char),
449            ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
450            f->fFile);
451    }
452
453    /* Set up conversion parameters */
454    status      = U_ZERO_ERROR;
455    mySource    = charBuffer;
456    mySourceEnd = charBuffer + bytesRead;
457    myTarget    = f->fUCBuffer + dataSize;
458    bufferSize  = UFILE_UCHARBUFFER_SIZE;
459
460    if(f->fConverter != NULL) { /* We have a valid converter */
461        /* Perform the conversion */
462        ucnv_toUnicode(f->fConverter,
463            &myTarget,
464            f->fUCBuffer + bufferSize,
465            &mySource,
466            mySourceEnd,
467            NULL,
468            (UBool)(feof(f->fFile) != 0),
469            &status);
470
471    } else { /*weiv: do the invariant conversion */
472        u_charsToUChars(mySource, myTarget, bytesRead);
473        myTarget += bytesRead;
474    }
475
476    /* update the pointers into our array */
477    str->fPos    = str->fBuffer;
478    str->fLimit  = myTarget;
479}
480
481U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
482u_fgets(UChar        *s,
483        int32_t       n,
484        UFILE        *f)
485{
486    int32_t dataSize;
487    int32_t count;
488    UChar *alias;
489    const UChar *limit;
490    UChar *sItr;
491    UChar currDelim = 0;
492    u_localized_string *str;
493
494    if (n <= 0) {
495        /* Caller screwed up. We need to write the null terminatior. */
496        return NULL;
497    }
498
499    /* fill the buffer if needed */
500    str = &f->str;
501    if (str->fPos >= str->fLimit) {
502        ufile_fill_uchar_buffer(f);
503    }
504
505    /* subtract 1 from n to compensate for the terminator */
506    --n;
507
508    /* determine the amount of data in the buffer */
509    dataSize = (int32_t)(str->fLimit - str->fPos);
510
511    /* if 0 characters were left, return 0 */
512    if (dataSize == 0)
513        return NULL;
514
515    /* otherwise, iteratively fill the buffer and copy */
516    count = 0;
517    sItr = s;
518    currDelim = 0;
519    while (dataSize > 0 && count < n) {
520        alias = str->fPos;
521
522        /* Find how much to copy */
523        if (dataSize < (n - count)) {
524            limit = str->fLimit;
525        }
526        else {
527            limit = alias + (n - count);
528        }
529
530        if (!currDelim) {
531            /* Copy UChars until we find the first occurrence of a delimiter character */
532            while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
533                count++;
534                *(sItr++) = *(alias++);
535            }
536            /* Preserve the newline */
537            if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
538                if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
539                    currDelim = *alias;
540                }
541                else {
542                    currDelim = 1;  /* This isn't a newline, but it's used to say
543                                    that we should break later. We've checked all
544                                    possible newline combinations even across buffer
545                                    boundaries. */
546                }
547                count++;
548                *(sItr++) = *(alias++);
549            }
550        }
551        /* If we have a CRLF combination, preserve that too. */
552        if (alias < limit) {
553            if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
554                count++;
555                *(sItr++) = *(alias++);
556            }
557            currDelim = 1;  /* This isn't a newline, but it's used to say
558                            that we should break later. We've checked all
559                            possible newline combinations even across buffer
560                            boundaries. */
561        }
562
563        /* update the current buffer position */
564        str->fPos = alias;
565
566        /* if we found a delimiter */
567        if (currDelim == 1) {
568            /* break out */
569            break;
570        }
571
572        /* refill the buffer */
573        ufile_fill_uchar_buffer(f);
574
575        /* determine the amount of data in the buffer */
576        dataSize = (int32_t)(str->fLimit - str->fPos);
577    }
578
579    /* add the terminator and return s */
580    *sItr = 0x0000;
581    return s;
582}
583
584U_CFUNC UBool U_EXPORT2
585ufile_getch(UFILE *f, UChar *ch)
586{
587    UBool isValidChar = FALSE;
588
589    *ch = U_EOF;
590    /* if we have an available character in the buffer, return it */
591    if(f->str.fPos < f->str.fLimit){
592        *ch = *(f->str.fPos)++;
593        isValidChar = TRUE;
594    }
595    else {
596        /* otherwise, fill the buffer and return the next character */
597        if(f->str.fPos >= f->str.fLimit) {
598            ufile_fill_uchar_buffer(f);
599        }
600        if(f->str.fPos < f->str.fLimit) {
601            *ch = *(f->str.fPos)++;
602            isValidChar = TRUE;
603        }
604    }
605    return isValidChar;
606}
607
608U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
609u_fgetc(UFILE        *f)
610{
611    UChar ch;
612    ufile_getch(f, &ch);
613    return ch;
614}
615
616U_CFUNC UBool U_EXPORT2
617ufile_getch32(UFILE *f, UChar32 *c32)
618{
619    UBool isValidChar = FALSE;
620    u_localized_string *str;
621
622    *c32 = U_EOF;
623
624    /* Fill the buffer if it is empty */
625    str = &f->str;
626    if (f && str->fPos + 1 >= str->fLimit) {
627        ufile_fill_uchar_buffer(f);
628    }
629
630    /* Get the next character in the buffer */
631    if (str->fPos < str->fLimit) {
632        *c32 = *(str->fPos)++;
633        if (U_IS_LEAD(*c32)) {
634            if (str->fPos < str->fLimit) {
635                UChar c16 = *(str->fPos)++;
636                *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
637                isValidChar = TRUE;
638            }
639            else {
640                *c32 = U_EOF;
641            }
642        }
643        else {
644            isValidChar = TRUE;
645        }
646    }
647
648    return isValidChar;
649}
650
651U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
652u_fgetcx(UFILE        *f)
653{
654    UChar32 ch;
655    ufile_getch32(f, &ch);
656    return ch;
657}
658
659U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
660u_fungetc(UChar32        ch,
661    UFILE        *f)
662{
663    u_localized_string *str;
664
665    str = &f->str;
666
667    /* if we're at the beginning of the buffer, sorry! */
668    if (str->fPos == str->fBuffer
669        || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
670    {
671        ch = U_EOF;
672    }
673    else {
674        /* otherwise, put the character back */
675        /* Remember, read them back on in the reverse order. */
676        if (U_IS_LEAD(ch)) {
677            if (*--(str->fPos) != U16_TRAIL(ch)
678                || *--(str->fPos) != U16_LEAD(ch))
679            {
680                ch = U_EOF;
681            }
682        }
683        else if (*--(str->fPos) != ch) {
684            ch = U_EOF;
685        }
686    }
687    return ch;
688}
689
690U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
691u_file_read(    UChar        *chars,
692    int32_t        count,
693    UFILE         *f)
694{
695    int32_t dataSize;
696    int32_t read = 0;
697    u_localized_string *str = &f->str;
698
699    do {
700
701        /* determine the amount of data in the buffer */
702        dataSize = (int32_t)(str->fLimit - str->fPos);
703        if (dataSize <= 0) {
704            /* fill the buffer */
705            ufile_fill_uchar_buffer(f);
706            dataSize = (int32_t)(str->fLimit - str->fPos);
707        }
708
709        /* Make sure that we don't read too much */
710        if (dataSize > (count - read)) {
711            dataSize = count - read;
712        }
713
714        /* copy the current data in the buffer */
715        memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
716
717        /* update number of items read */
718        read += dataSize;
719
720        /* update the current buffer position */
721        str->fPos += dataSize;
722    }
723    while (dataSize != 0 && read < count);
724
725    return read;
726}
727