1/*
2 ******************************************************************************
3 *
4 *   Copyright (C) 1998-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   11/18/98    stephen     Creation.
15 *   03/12/99    stephen     Modified for new C API.
16 *   07/19/99    stephen     Fixed read() and gets()
17 ******************************************************************************
18 */
19
20#include "unicode/ustdio.h"
21
22#if !UCONFIG_NO_CONVERSION
23
24#include "unicode/putil.h"
25#include "cmemory.h"
26#include "cstring.h"
27#include "ufile.h"
28#include "ufmt_cmn.h"
29#include "unicode/ucnv.h"
30#include "unicode/ustring.h"
31
32#include <string.h>
33
34#define DELIM_LF 0x000A
35#define DELIM_VT 0x000B
36#define DELIM_FF 0x000C
37#define DELIM_CR 0x000D
38#define DELIM_NEL 0x0085
39#define DELIM_LS 0x2028
40#define DELIM_PS 0x2029
41
42/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
43#if U_PLATFORM_USES_ONLY_WIN32_API
44static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
45static const uint32_t DELIMITERS_LEN = 2;
46/* TODO: Default newline writing should be detected based upon the converter being used. */
47#else
48static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
49static const uint32_t DELIMITERS_LEN = 1;
50#endif
51
52#define IS_FIRST_STRING_DELIMITER(c1) \
53 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
54        || (c1) == DELIM_NEL \
55        || (c1) == DELIM_LS \
56        || (c1) == DELIM_PS)
57#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
58#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
59 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
60
61
62#if !UCONFIG_NO_TRANSLITERATION
63
64U_CAPI UTransliterator* U_EXPORT2
65u_fsettransliterator(UFILE *file, UFileDirection direction,
66                     UTransliterator *adopt, UErrorCode *status)
67{
68    UTransliterator *old = NULL;
69
70    if(U_FAILURE(*status))
71    {
72        return adopt;
73    }
74
75    if(!file)
76    {
77        *status = U_ILLEGAL_ARGUMENT_ERROR;
78        return adopt;
79    }
80
81    if(direction & U_READ)
82    {
83        /** TODO: implement */
84        *status = U_UNSUPPORTED_ERROR;
85        return adopt;
86    }
87
88    if(adopt == NULL) /* they are clearing it */
89    {
90        if(file->fTranslit != NULL)
91        {
92            /* TODO: Check side */
93            old = file->fTranslit->translit;
94            uprv_free(file->fTranslit->buffer);
95            file->fTranslit->buffer=NULL;
96            uprv_free(file->fTranslit);
97            file->fTranslit=NULL;
98        }
99    }
100    else
101    {
102        if(file->fTranslit == NULL)
103        {
104            file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
105            if(!file->fTranslit)
106            {
107                *status = U_MEMORY_ALLOCATION_ERROR;
108                return adopt;
109            }
110            file->fTranslit->capacity = 0;
111            file->fTranslit->length = 0;
112            file->fTranslit->pos = 0;
113            file->fTranslit->buffer = NULL;
114        }
115        else
116        {
117            old = file->fTranslit->translit;
118            ufile_flush_translit(file);
119        }
120
121        file->fTranslit->translit = adopt;
122    }
123
124    return old;
125}
126
127static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
128{
129    int32_t newlen;
130    int32_t junkCount = 0;
131    int32_t textLength;
132    int32_t textLimit;
133    UTransPosition pos;
134    UErrorCode status = U_ZERO_ERROR;
135
136    if(count == NULL)
137    {
138        count = &junkCount;
139    }
140
141    if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
142    {
143        /* fast path */
144        return src;
145    }
146
147    /* First: slide over everything */
148    if(f->fTranslit->length > f->fTranslit->pos)
149    {
150        memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
151            (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
152    }
153    f->fTranslit->length -= f->fTranslit->pos; /* always */
154    f->fTranslit->pos = 0;
155
156    /* Calculate new buffer size needed */
157    newlen = (*count + f->fTranslit->length) * 4;
158
159    if(newlen > f->fTranslit->capacity)
160    {
161        if(f->fTranslit->buffer == NULL)
162        {
163            f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
164        }
165        else
166        {
167            f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
168        }
169        /* Check for malloc/realloc failure. */
170        if (f->fTranslit->buffer == NULL) {
171        	return NULL;
172        }
173        f->fTranslit->capacity = newlen;
174    }
175
176    /* Now, copy any data over */
177    u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
178        src,
179        *count);
180    f->fTranslit->length += *count;
181
182    /* Now, translit in place as much as we can  */
183    if(flush == FALSE)
184    {
185        textLength = f->fTranslit->length;
186        pos.contextStart = 0;
187        pos.contextLimit = textLength;
188        pos.start        = 0;
189        pos.limit        = textLength;
190
191        utrans_transIncrementalUChars(f->fTranslit->translit,
192            f->fTranslit->buffer, /* because we shifted */
193            &textLength,
194            f->fTranslit->capacity,
195            &pos,
196            &status);
197
198        /* now: start/limit point to the transliterated text */
199        /* Transliterated is [buffer..pos.start) */
200        *count            = pos.start;
201        f->fTranslit->pos = pos.start;
202        f->fTranslit->length = pos.limit;
203
204        return f->fTranslit->buffer;
205    }
206    else
207    {
208        textLength = f->fTranslit->length;
209        textLimit = f->fTranslit->length;
210
211        utrans_transUChars(f->fTranslit->translit,
212            f->fTranslit->buffer,
213            &textLength,
214            f->fTranslit->capacity,
215            0,
216            &textLimit,
217            &status);
218
219        /* out: converted len */
220        *count = textLimit;
221
222        /* Set pointers to 0 */
223        f->fTranslit->pos = 0;
224        f->fTranslit->length = 0;
225
226        return f->fTranslit->buffer;
227    }
228}
229
230#endif
231
232void
233ufile_flush_translit(UFILE *f)
234{
235#if !UCONFIG_NO_TRANSLITERATION
236    if((!f)||(!f->fTranslit))
237        return;
238#endif
239
240    u_file_write_flush(NULL, 0, f, FALSE, TRUE);
241}
242
243
244void
245ufile_flush_io(UFILE *f)
246{
247  if((!f) || (!f->fFile)) {
248    return; /* skip if no file */
249  }
250
251  u_file_write_flush(NULL, 0, f, TRUE, FALSE);
252}
253
254
255void
256ufile_close_translit(UFILE *f)
257{
258#if !UCONFIG_NO_TRANSLITERATION
259    if((!f)||(!f->fTranslit))
260        return;
261#endif
262
263    ufile_flush_translit(f);
264
265#if !UCONFIG_NO_TRANSLITERATION
266    if(f->fTranslit->translit)
267        utrans_close(f->fTranslit->translit);
268
269    if(f->fTranslit->buffer)
270    {
271        uprv_free(f->fTranslit->buffer);
272    }
273
274    uprv_free(f->fTranslit);
275    f->fTranslit = NULL;
276#endif
277}
278
279
280/* Input/output */
281
282U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
283u_fputs(const UChar    *s,
284        UFILE        *f)
285{
286    int32_t count = u_file_write(s, u_strlen(s), f);
287    count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
288    return count;
289}
290
291U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
292u_fputc(UChar32      uc,
293        UFILE        *f)
294{
295    UChar buf[2];
296    int32_t idx = 0;
297    UBool isError = FALSE;
298
299    U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
300    if (isError) {
301        return U_EOF;
302    }
303    return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
304}
305
306
307U_CFUNC int32_t U_EXPORT2
308u_file_write_flush(const UChar *chars,
309                   int32_t     count,
310                   UFILE       *f,
311                   UBool       flushIO,
312                   UBool       flushTranslit)
313{
314    /* Set up conversion parameters */
315    UErrorCode  status       = U_ZERO_ERROR;
316    const UChar *mySource    = chars;
317    const UChar *mySourceBegin;
318    const UChar *mySourceEnd;
319    char        charBuffer[UFILE_CHARBUFFER_SIZE];
320    char        *myTarget   = charBuffer;
321    int32_t     written      = 0;
322    int32_t     numConverted = 0;
323
324    if (count < 0) {
325        count = u_strlen(chars);
326    }
327
328#if !UCONFIG_NO_TRANSLITERATION
329    if((f->fTranslit) && (f->fTranslit->translit))
330    {
331        /* Do the transliteration */
332        mySource = u_file_translit(f, chars, &count, flushTranslit);
333    }
334#endif
335
336    /* Write to a string. */
337    if (!f->fFile) {
338        int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
339        if (flushIO && charsLeft > count) {
340            count++;
341        }
342        written = ufmt_min(count, charsLeft);
343        u_strncpy(f->str.fPos, mySource, written);
344        f->str.fPos += written;
345        return written;
346    }
347
348    mySourceEnd = mySource + count;
349
350    /* Perform the conversion in a loop */
351    do {
352        mySourceBegin = mySource; /* beginning location for this loop */
353        status     = U_ZERO_ERROR;
354        if(f->fConverter != NULL) { /* We have a valid converter */
355            ucnv_fromUnicode(f->fConverter,
356                &myTarget,
357                charBuffer + UFILE_CHARBUFFER_SIZE,
358                &mySource,
359                mySourceEnd,
360                NULL,
361                flushIO,
362                &status);
363        } else { /*weiv: do the invariant conversion */
364            int32_t convertChars = (int32_t) (mySourceEnd - mySource);
365            if (convertChars > UFILE_CHARBUFFER_SIZE) {
366                convertChars = UFILE_CHARBUFFER_SIZE;
367                status = U_BUFFER_OVERFLOW_ERROR;
368            }
369            u_UCharsToChars(mySource, myTarget, convertChars);
370            mySource += convertChars;
371            myTarget += convertChars;
372        }
373        numConverted = (int32_t)(myTarget - charBuffer);
374
375        if (numConverted > 0) {
376            /* write the converted bytes */
377            fwrite(charBuffer,
378                sizeof(char),
379                numConverted,
380                f->fFile);
381
382            written     += (int32_t) (mySource - mySourceBegin);
383        }
384        myTarget     = charBuffer;
385    }
386    while(status == U_BUFFER_OVERFLOW_ERROR);
387
388    /* return # of chars written */
389    return written;
390}
391
392U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
393u_file_write(    const UChar     *chars,
394             int32_t        count,
395             UFILE         *f)
396{
397    return u_file_write_flush(chars,count,f,FALSE,FALSE);
398}
399
400
401/* private function used for buffering input */
402void
403ufile_fill_uchar_buffer(UFILE *f)
404{
405    UErrorCode  status;
406    const char  *mySource;
407    const char  *mySourceEnd;
408    UChar       *myTarget;
409    int32_t     bufferSize;
410    int32_t     maxCPBytes;
411    int32_t     bytesRead;
412    int32_t     availLength;
413    int32_t     dataSize;
414    char        charBuffer[UFILE_CHARBUFFER_SIZE];
415    u_localized_string *str;
416
417    if (f->fFile == NULL) {
418        /* There is nothing to do. It's a string. */
419        return;
420    }
421
422    str = &f->str;
423    dataSize = (int32_t)(str->fLimit - str->fPos);
424    if (f->fFileno == 0 && dataSize > 0) {
425        /* Don't read from stdin too many times. There is still some data. */
426        return;
427    }
428
429    /* shift the buffer if it isn't empty */
430    if(dataSize != 0) {
431        uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */
432    }
433
434
435    /* record how much buffer space is available */
436    availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
437
438    /* Determine the # of codepage bytes needed to fill our UChar buffer */
439    /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
440    maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
441
442    /* Read in the data to convert */
443    if (f->fFileno == 0) {
444        /* Special case. Read from stdin one line at a time. */
445        char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
446        bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
447    }
448    else {
449        /* A normal file */
450        bytesRead = (int32_t)fread(charBuffer,
451            sizeof(char),
452            ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
453            f->fFile);
454    }
455
456    /* Set up conversion parameters */
457    status      = U_ZERO_ERROR;
458    mySource    = charBuffer;
459    mySourceEnd = charBuffer + bytesRead;
460    myTarget    = f->fUCBuffer + dataSize;
461    bufferSize  = UFILE_UCHARBUFFER_SIZE;
462
463    if(f->fConverter != NULL) { /* We have a valid converter */
464        /* Perform the conversion */
465        ucnv_toUnicode(f->fConverter,
466            &myTarget,
467            f->fUCBuffer + bufferSize,
468            &mySource,
469            mySourceEnd,
470            NULL,
471            (UBool)(feof(f->fFile) != 0),
472            &status);
473
474    } else { /*weiv: do the invariant conversion */
475        u_charsToUChars(mySource, myTarget, bytesRead);
476        myTarget += bytesRead;
477    }
478
479    /* update the pointers into our array */
480    str->fPos    = str->fBuffer;
481    str->fLimit  = myTarget;
482}
483
484U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
485u_fgets(UChar        *s,
486        int32_t       n,
487        UFILE        *f)
488{
489    int32_t dataSize;
490    int32_t count;
491    UChar *alias;
492    const UChar *limit;
493    UChar *sItr;
494    UChar currDelim = 0;
495    u_localized_string *str;
496
497    if (n <= 0) {
498        /* Caller screwed up. We need to write the null terminatior. */
499        return NULL;
500    }
501
502    /* fill the buffer if needed */
503    str = &f->str;
504    if (str->fPos >= str->fLimit) {
505        ufile_fill_uchar_buffer(f);
506    }
507
508    /* subtract 1 from n to compensate for the terminator */
509    --n;
510
511    /* determine the amount of data in the buffer */
512    dataSize = (int32_t)(str->fLimit - str->fPos);
513
514    /* if 0 characters were left, return 0 */
515    if (dataSize == 0)
516        return NULL;
517
518    /* otherwise, iteratively fill the buffer and copy */
519    count = 0;
520    sItr = s;
521    currDelim = 0;
522    while (dataSize > 0 && count < n) {
523        alias = str->fPos;
524
525        /* Find how much to copy */
526        if (dataSize < (n - count)) {
527            limit = str->fLimit;
528        }
529        else {
530            limit = alias + (n - count);
531        }
532
533        if (!currDelim) {
534            /* Copy UChars until we find the first occurrence of a delimiter character */
535            while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
536                count++;
537                *(sItr++) = *(alias++);
538            }
539            /* Preserve the newline */
540            if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
541                if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
542                    currDelim = *alias;
543                }
544                else {
545                    currDelim = 1;  /* This isn't a newline, but it's used to say
546                                    that we should break later. We've checked all
547                                    possible newline combinations even across buffer
548                                    boundaries. */
549                }
550                count++;
551                *(sItr++) = *(alias++);
552            }
553        }
554        /* If we have a CRLF combination, preserve that too. */
555        if (alias < limit) {
556            if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
557                count++;
558                *(sItr++) = *(alias++);
559            }
560            currDelim = 1;  /* This isn't a newline, but it's used to say
561                            that we should break later. We've checked all
562                            possible newline combinations even across buffer
563                            boundaries. */
564        }
565
566        /* update the current buffer position */
567        str->fPos = alias;
568
569        /* if we found a delimiter */
570        if (currDelim == 1) {
571            /* break out */
572            break;
573        }
574
575        /* refill the buffer */
576        ufile_fill_uchar_buffer(f);
577
578        /* determine the amount of data in the buffer */
579        dataSize = (int32_t)(str->fLimit - str->fPos);
580    }
581
582    /* add the terminator and return s */
583    *sItr = 0x0000;
584    return s;
585}
586
587U_CFUNC UBool U_EXPORT2
588ufile_getch(UFILE *f, UChar *ch)
589{
590    UBool isValidChar = FALSE;
591
592    *ch = U_EOF;
593    /* if we have an available character in the buffer, return it */
594    if(f->str.fPos < f->str.fLimit){
595        *ch = *(f->str.fPos)++;
596        isValidChar = TRUE;
597    }
598    else {
599        /* otherwise, fill the buffer and return the next character */
600        if(f->str.fPos >= f->str.fLimit) {
601            ufile_fill_uchar_buffer(f);
602        }
603        if(f->str.fPos < f->str.fLimit) {
604            *ch = *(f->str.fPos)++;
605            isValidChar = TRUE;
606        }
607    }
608    return isValidChar;
609}
610
611U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
612u_fgetc(UFILE        *f)
613{
614    UChar ch;
615    ufile_getch(f, &ch);
616    return ch;
617}
618
619U_CFUNC UBool U_EXPORT2
620ufile_getch32(UFILE *f, UChar32 *c32)
621{
622    UBool isValidChar = FALSE;
623    u_localized_string *str;
624
625    *c32 = U_EOF;
626
627    /* Fill the buffer if it is empty */
628    str = &f->str;
629    if (f && str->fPos + 1 >= str->fLimit) {
630        ufile_fill_uchar_buffer(f);
631    }
632
633    /* Get the next character in the buffer */
634    if (str->fPos < str->fLimit) {
635        *c32 = *(str->fPos)++;
636        if (U_IS_LEAD(*c32)) {
637            if (str->fPos < str->fLimit) {
638                UChar c16 = *(str->fPos)++;
639                *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
640                isValidChar = TRUE;
641            }
642            else {
643                *c32 = U_EOF;
644            }
645        }
646        else {
647            isValidChar = TRUE;
648        }
649    }
650
651    return isValidChar;
652}
653
654U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
655u_fgetcx(UFILE        *f)
656{
657    UChar32 ch;
658    ufile_getch32(f, &ch);
659    return ch;
660}
661
662U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
663u_fungetc(UChar32        ch,
664    UFILE        *f)
665{
666    u_localized_string *str;
667
668    str = &f->str;
669
670    /* if we're at the beginning of the buffer, sorry! */
671    if (str->fPos == str->fBuffer
672        || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
673    {
674        ch = U_EOF;
675    }
676    else {
677        /* otherwise, put the character back */
678        /* Remember, read them back on in the reverse order. */
679        if (U_IS_LEAD(ch)) {
680            if (*--(str->fPos) != U16_TRAIL(ch)
681                || *--(str->fPos) != U16_LEAD(ch))
682            {
683                ch = U_EOF;
684            }
685        }
686        else if (*--(str->fPos) != ch) {
687            ch = U_EOF;
688        }
689    }
690    return ch;
691}
692
693U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
694u_file_read(    UChar        *chars,
695    int32_t        count,
696    UFILE         *f)
697{
698    int32_t dataSize;
699    int32_t read = 0;
700    u_localized_string *str = &f->str;
701
702    do {
703
704        /* determine the amount of data in the buffer */
705        dataSize = (int32_t)(str->fLimit - str->fPos);
706        if (dataSize <= 0) {
707            /* fill the buffer */
708            ufile_fill_uchar_buffer(f);
709            dataSize = (int32_t)(str->fLimit - str->fPos);
710        }
711
712        /* Make sure that we don't read too much */
713        if (dataSize > (count - read)) {
714            dataSize = count - read;
715        }
716
717        /* copy the current data in the buffer */
718        memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
719
720        /* update number of items read */
721        read += dataSize;
722
723        /* update the current buffer position */
724        str->fPos += dataSize;
725    }
726    while (dataSize != 0 && read < count);
727
728    return read;
729}
730#endif
731