1/* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * File ustdio.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 11/18/98 stephen Creation. 15 * 03/12/99 stephen Modified for new C API. 16 * 07/19/99 stephen Fixed read() and gets() 17 ****************************************************************************** 18 */ 19 20#include "unicode/ustdio.h" 21 22#if !UCONFIG_NO_CONVERSION 23 24#include "unicode/putil.h" 25#include "cmemory.h" 26#include "cstring.h" 27#include "ufile.h" 28#include "ufmt_cmn.h" 29#include "unicode/ucnv.h" 30#include "unicode/ustring.h" 31 32#include <string.h> 33 34#define DELIM_LF 0x000A 35#define DELIM_VT 0x000B 36#define DELIM_FF 0x000C 37#define DELIM_CR 0x000D 38#define DELIM_NEL 0x0085 39#define DELIM_LS 0x2028 40#define DELIM_PS 0x2029 41 42/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 43#if U_PLATFORM_USES_ONLY_WIN32_API 44static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; 45static const uint32_t DELIMITERS_LEN = 2; 46/* TODO: Default newline writing should be detected based upon the converter being used. */ 47#else 48static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; 49static const uint32_t DELIMITERS_LEN = 1; 50#endif 51 52#define IS_FIRST_STRING_DELIMITER(c1) \ 53 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 54 || (c1) == DELIM_NEL \ 55 || (c1) == DELIM_LS \ 56 || (c1) == DELIM_PS) 57#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 58#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 59 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 60 61 62#if !UCONFIG_NO_TRANSLITERATION 63 64U_CAPI UTransliterator* U_EXPORT2 65u_fsettransliterator(UFILE *file, UFileDirection direction, 66 UTransliterator *adopt, UErrorCode *status) 67{ 68 UTransliterator *old = NULL; 69 70 if(U_FAILURE(*status)) 71 { 72 return adopt; 73 } 74 75 if(!file) 76 { 77 *status = U_ILLEGAL_ARGUMENT_ERROR; 78 return adopt; 79 } 80 81 if(direction & U_READ) 82 { 83 /** TODO: implement */ 84 *status = U_UNSUPPORTED_ERROR; 85 return adopt; 86 } 87 88 if(adopt == NULL) /* they are clearing it */ 89 { 90 if(file->fTranslit != NULL) 91 { 92 /* TODO: Check side */ 93 old = file->fTranslit->translit; 94 uprv_free(file->fTranslit->buffer); 95 file->fTranslit->buffer=NULL; 96 uprv_free(file->fTranslit); 97 file->fTranslit=NULL; 98 } 99 } 100 else 101 { 102 if(file->fTranslit == NULL) 103 { 104 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); 105 if(!file->fTranslit) 106 { 107 *status = U_MEMORY_ALLOCATION_ERROR; 108 return adopt; 109 } 110 file->fTranslit->capacity = 0; 111 file->fTranslit->length = 0; 112 file->fTranslit->pos = 0; 113 file->fTranslit->buffer = NULL; 114 } 115 else 116 { 117 old = file->fTranslit->translit; 118 ufile_flush_translit(file); 119 } 120 121 file->fTranslit->translit = adopt; 122 } 123 124 return old; 125} 126 127static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) 128{ 129 int32_t newlen; 130 int32_t junkCount = 0; 131 int32_t textLength; 132 int32_t textLimit; 133 UTransPosition pos; 134 UErrorCode status = U_ZERO_ERROR; 135 136 if(count == NULL) 137 { 138 count = &junkCount; 139 } 140 141 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) 142 { 143 /* fast path */ 144 return src; 145 } 146 147 /* First: slide over everything */ 148 if(f->fTranslit->length > f->fTranslit->pos) 149 { 150 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, 151 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); 152 } 153 f->fTranslit->length -= f->fTranslit->pos; /* always */ 154 f->fTranslit->pos = 0; 155 156 /* Calculate new buffer size needed */ 157 newlen = (*count + f->fTranslit->length) * 4; 158 159 if(newlen > f->fTranslit->capacity) 160 { 161 if(f->fTranslit->buffer == NULL) 162 { 163 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); 164 } 165 else 166 { 167 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); 168 } 169 /* Check for malloc/realloc failure. */ 170 if (f->fTranslit->buffer == NULL) { 171 return NULL; 172 } 173 f->fTranslit->capacity = newlen; 174 } 175 176 /* Now, copy any data over */ 177 u_strncpy(f->fTranslit->buffer + f->fTranslit->length, 178 src, 179 *count); 180 f->fTranslit->length += *count; 181 182 /* Now, translit in place as much as we can */ 183 if(flush == FALSE) 184 { 185 textLength = f->fTranslit->length; 186 pos.contextStart = 0; 187 pos.contextLimit = textLength; 188 pos.start = 0; 189 pos.limit = textLength; 190 191 utrans_transIncrementalUChars(f->fTranslit->translit, 192 f->fTranslit->buffer, /* because we shifted */ 193 &textLength, 194 f->fTranslit->capacity, 195 &pos, 196 &status); 197 198 /* now: start/limit point to the transliterated text */ 199 /* Transliterated is [buffer..pos.start) */ 200 *count = pos.start; 201 f->fTranslit->pos = pos.start; 202 f->fTranslit->length = pos.limit; 203 204 return f->fTranslit->buffer; 205 } 206 else 207 { 208 textLength = f->fTranslit->length; 209 textLimit = f->fTranslit->length; 210 211 utrans_transUChars(f->fTranslit->translit, 212 f->fTranslit->buffer, 213 &textLength, 214 f->fTranslit->capacity, 215 0, 216 &textLimit, 217 &status); 218 219 /* out: converted len */ 220 *count = textLimit; 221 222 /* Set pointers to 0 */ 223 f->fTranslit->pos = 0; 224 f->fTranslit->length = 0; 225 226 return f->fTranslit->buffer; 227 } 228} 229 230#endif 231 232void 233ufile_flush_translit(UFILE *f) 234{ 235#if !UCONFIG_NO_TRANSLITERATION 236 if((!f)||(!f->fTranslit)) 237 return; 238#endif 239 240 u_file_write_flush(NULL, 0, f, FALSE, TRUE); 241} 242 243 244void 245ufile_flush_io(UFILE *f) 246{ 247 if((!f) || (!f->fFile)) { 248 return; /* skip if no file */ 249 } 250 251 u_file_write_flush(NULL, 0, f, TRUE, FALSE); 252} 253 254 255void 256ufile_close_translit(UFILE *f) 257{ 258#if !UCONFIG_NO_TRANSLITERATION 259 if((!f)||(!f->fTranslit)) 260 return; 261#endif 262 263 ufile_flush_translit(f); 264 265#if !UCONFIG_NO_TRANSLITERATION 266 if(f->fTranslit->translit) 267 utrans_close(f->fTranslit->translit); 268 269 if(f->fTranslit->buffer) 270 { 271 uprv_free(f->fTranslit->buffer); 272 } 273 274 uprv_free(f->fTranslit); 275 f->fTranslit = NULL; 276#endif 277} 278 279 280/* Input/output */ 281 282U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 283u_fputs(const UChar *s, 284 UFILE *f) 285{ 286 int32_t count = u_file_write(s, u_strlen(s), f); 287 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); 288 return count; 289} 290 291U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 292u_fputc(UChar32 uc, 293 UFILE *f) 294{ 295 UChar buf[2]; 296 int32_t idx = 0; 297 UBool isError = FALSE; 298 299 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); 300 if (isError) { 301 return U_EOF; 302 } 303 return u_file_write(buf, idx, f) == idx ? uc : U_EOF; 304} 305 306 307U_CFUNC int32_t U_EXPORT2 308u_file_write_flush(const UChar *chars, 309 int32_t count, 310 UFILE *f, 311 UBool flushIO, 312 UBool flushTranslit) 313{ 314 /* Set up conversion parameters */ 315 UErrorCode status = U_ZERO_ERROR; 316 const UChar *mySource = chars; 317 const UChar *mySourceBegin; 318 const UChar *mySourceEnd; 319 char charBuffer[UFILE_CHARBUFFER_SIZE]; 320 char *myTarget = charBuffer; 321 int32_t written = 0; 322 int32_t numConverted = 0; 323 324 if (count < 0) { 325 count = u_strlen(chars); 326 } 327 328#if !UCONFIG_NO_TRANSLITERATION 329 if((f->fTranslit) && (f->fTranslit->translit)) 330 { 331 /* Do the transliteration */ 332 mySource = u_file_translit(f, chars, &count, flushTranslit); 333 } 334#endif 335 336 /* Write to a string. */ 337 if (!f->fFile) { 338 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); 339 if (flushIO && charsLeft > count) { 340 count++; 341 } 342 written = ufmt_min(count, charsLeft); 343 u_strncpy(f->str.fPos, mySource, written); 344 f->str.fPos += written; 345 return written; 346 } 347 348 mySourceEnd = mySource + count; 349 350 /* Perform the conversion in a loop */ 351 do { 352 mySourceBegin = mySource; /* beginning location for this loop */ 353 status = U_ZERO_ERROR; 354 if(f->fConverter != NULL) { /* We have a valid converter */ 355 ucnv_fromUnicode(f->fConverter, 356 &myTarget, 357 charBuffer + UFILE_CHARBUFFER_SIZE, 358 &mySource, 359 mySourceEnd, 360 NULL, 361 flushIO, 362 &status); 363 } else { /*weiv: do the invariant conversion */ 364 int32_t convertChars = (int32_t) (mySourceEnd - mySource); 365 if (convertChars > UFILE_CHARBUFFER_SIZE) { 366 convertChars = UFILE_CHARBUFFER_SIZE; 367 status = U_BUFFER_OVERFLOW_ERROR; 368 } 369 u_UCharsToChars(mySource, myTarget, convertChars); 370 mySource += convertChars; 371 myTarget += convertChars; 372 } 373 numConverted = (int32_t)(myTarget - charBuffer); 374 375 if (numConverted > 0) { 376 /* write the converted bytes */ 377 fwrite(charBuffer, 378 sizeof(char), 379 numConverted, 380 f->fFile); 381 382 written += (int32_t) (mySource - mySourceBegin); 383 } 384 myTarget = charBuffer; 385 } 386 while(status == U_BUFFER_OVERFLOW_ERROR); 387 388 /* return # of chars written */ 389 return written; 390} 391 392U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 393u_file_write( const UChar *chars, 394 int32_t count, 395 UFILE *f) 396{ 397 return u_file_write_flush(chars,count,f,FALSE,FALSE); 398} 399 400 401/* private function used for buffering input */ 402void 403ufile_fill_uchar_buffer(UFILE *f) 404{ 405 UErrorCode status; 406 const char *mySource; 407 const char *mySourceEnd; 408 UChar *myTarget; 409 int32_t bufferSize; 410 int32_t maxCPBytes; 411 int32_t bytesRead; 412 int32_t availLength; 413 int32_t dataSize; 414 char charBuffer[UFILE_CHARBUFFER_SIZE]; 415 u_localized_string *str; 416 417 if (f->fFile == NULL) { 418 /* There is nothing to do. It's a string. */ 419 return; 420 } 421 422 str = &f->str; 423 dataSize = (int32_t)(str->fLimit - str->fPos); 424 if (f->fFileno == 0 && dataSize > 0) { 425 /* Don't read from stdin too many times. There is still some data. */ 426 return; 427 } 428 429 /* shift the buffer if it isn't empty */ 430 if(dataSize != 0) { 431 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); /* not accessing beyond memory */ 432 } 433 434 435 /* record how much buffer space is available */ 436 availLength = UFILE_UCHARBUFFER_SIZE - dataSize; 437 438 /* Determine the # of codepage bytes needed to fill our UChar buffer */ 439 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 440 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); 441 442 /* Read in the data to convert */ 443 if (f->fFileno == 0) { 444 /* Special case. Read from stdin one line at a time. */ 445 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); 446 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); 447 } 448 else { 449 /* A normal file */ 450 bytesRead = (int32_t)fread(charBuffer, 451 sizeof(char), 452 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), 453 f->fFile); 454 } 455 456 /* Set up conversion parameters */ 457 status = U_ZERO_ERROR; 458 mySource = charBuffer; 459 mySourceEnd = charBuffer + bytesRead; 460 myTarget = f->fUCBuffer + dataSize; 461 bufferSize = UFILE_UCHARBUFFER_SIZE; 462 463 if(f->fConverter != NULL) { /* We have a valid converter */ 464 /* Perform the conversion */ 465 ucnv_toUnicode(f->fConverter, 466 &myTarget, 467 f->fUCBuffer + bufferSize, 468 &mySource, 469 mySourceEnd, 470 NULL, 471 (UBool)(feof(f->fFile) != 0), 472 &status); 473 474 } else { /*weiv: do the invariant conversion */ 475 u_charsToUChars(mySource, myTarget, bytesRead); 476 myTarget += bytesRead; 477 } 478 479 /* update the pointers into our array */ 480 str->fPos = str->fBuffer; 481 str->fLimit = myTarget; 482} 483 484U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 485u_fgets(UChar *s, 486 int32_t n, 487 UFILE *f) 488{ 489 int32_t dataSize; 490 int32_t count; 491 UChar *alias; 492 const UChar *limit; 493 UChar *sItr; 494 UChar currDelim = 0; 495 u_localized_string *str; 496 497 if (n <= 0) { 498 /* Caller screwed up. We need to write the null terminatior. */ 499 return NULL; 500 } 501 502 /* fill the buffer if needed */ 503 str = &f->str; 504 if (str->fPos >= str->fLimit) { 505 ufile_fill_uchar_buffer(f); 506 } 507 508 /* subtract 1 from n to compensate for the terminator */ 509 --n; 510 511 /* determine the amount of data in the buffer */ 512 dataSize = (int32_t)(str->fLimit - str->fPos); 513 514 /* if 0 characters were left, return 0 */ 515 if (dataSize == 0) 516 return NULL; 517 518 /* otherwise, iteratively fill the buffer and copy */ 519 count = 0; 520 sItr = s; 521 currDelim = 0; 522 while (dataSize > 0 && count < n) { 523 alias = str->fPos; 524 525 /* Find how much to copy */ 526 if (dataSize < (n - count)) { 527 limit = str->fLimit; 528 } 529 else { 530 limit = alias + (n - count); 531 } 532 533 if (!currDelim) { 534 /* Copy UChars until we find the first occurrence of a delimiter character */ 535 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { 536 count++; 537 *(sItr++) = *(alias++); 538 } 539 /* Preserve the newline */ 540 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { 541 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { 542 currDelim = *alias; 543 } 544 else { 545 currDelim = 1; /* This isn't a newline, but it's used to say 546 that we should break later. We've checked all 547 possible newline combinations even across buffer 548 boundaries. */ 549 } 550 count++; 551 *(sItr++) = *(alias++); 552 } 553 } 554 /* If we have a CRLF combination, preserve that too. */ 555 if (alias < limit) { 556 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { 557 count++; 558 *(sItr++) = *(alias++); 559 } 560 currDelim = 1; /* This isn't a newline, but it's used to say 561 that we should break later. We've checked all 562 possible newline combinations even across buffer 563 boundaries. */ 564 } 565 566 /* update the current buffer position */ 567 str->fPos = alias; 568 569 /* if we found a delimiter */ 570 if (currDelim == 1) { 571 /* break out */ 572 break; 573 } 574 575 /* refill the buffer */ 576 ufile_fill_uchar_buffer(f); 577 578 /* determine the amount of data in the buffer */ 579 dataSize = (int32_t)(str->fLimit - str->fPos); 580 } 581 582 /* add the terminator and return s */ 583 *sItr = 0x0000; 584 return s; 585} 586 587U_CFUNC UBool U_EXPORT2 588ufile_getch(UFILE *f, UChar *ch) 589{ 590 UBool isValidChar = FALSE; 591 592 *ch = U_EOF; 593 /* if we have an available character in the buffer, return it */ 594 if(f->str.fPos < f->str.fLimit){ 595 *ch = *(f->str.fPos)++; 596 isValidChar = TRUE; 597 } 598 else { 599 /* otherwise, fill the buffer and return the next character */ 600 if(f->str.fPos >= f->str.fLimit) { 601 ufile_fill_uchar_buffer(f); 602 } 603 if(f->str.fPos < f->str.fLimit) { 604 *ch = *(f->str.fPos)++; 605 isValidChar = TRUE; 606 } 607 } 608 return isValidChar; 609} 610 611U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 612u_fgetc(UFILE *f) 613{ 614 UChar ch; 615 ufile_getch(f, &ch); 616 return ch; 617} 618 619U_CFUNC UBool U_EXPORT2 620ufile_getch32(UFILE *f, UChar32 *c32) 621{ 622 UBool isValidChar = FALSE; 623 u_localized_string *str; 624 625 *c32 = U_EOF; 626 627 /* Fill the buffer if it is empty */ 628 str = &f->str; 629 if (f && str->fPos + 1 >= str->fLimit) { 630 ufile_fill_uchar_buffer(f); 631 } 632 633 /* Get the next character in the buffer */ 634 if (str->fPos < str->fLimit) { 635 *c32 = *(str->fPos)++; 636 if (U_IS_LEAD(*c32)) { 637 if (str->fPos < str->fLimit) { 638 UChar c16 = *(str->fPos)++; 639 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); 640 isValidChar = TRUE; 641 } 642 else { 643 *c32 = U_EOF; 644 } 645 } 646 else { 647 isValidChar = TRUE; 648 } 649 } 650 651 return isValidChar; 652} 653 654U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 655u_fgetcx(UFILE *f) 656{ 657 UChar32 ch; 658 ufile_getch32(f, &ch); 659 return ch; 660} 661 662U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 663u_fungetc(UChar32 ch, 664 UFILE *f) 665{ 666 u_localized_string *str; 667 668 str = &f->str; 669 670 /* if we're at the beginning of the buffer, sorry! */ 671 if (str->fPos == str->fBuffer 672 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) 673 { 674 ch = U_EOF; 675 } 676 else { 677 /* otherwise, put the character back */ 678 /* Remember, read them back on in the reverse order. */ 679 if (U_IS_LEAD(ch)) { 680 if (*--(str->fPos) != U16_TRAIL(ch) 681 || *--(str->fPos) != U16_LEAD(ch)) 682 { 683 ch = U_EOF; 684 } 685 } 686 else if (*--(str->fPos) != ch) { 687 ch = U_EOF; 688 } 689 } 690 return ch; 691} 692 693U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 694u_file_read( UChar *chars, 695 int32_t count, 696 UFILE *f) 697{ 698 int32_t dataSize; 699 int32_t read = 0; 700 u_localized_string *str = &f->str; 701 702 do { 703 704 /* determine the amount of data in the buffer */ 705 dataSize = (int32_t)(str->fLimit - str->fPos); 706 if (dataSize <= 0) { 707 /* fill the buffer */ 708 ufile_fill_uchar_buffer(f); 709 dataSize = (int32_t)(str->fLimit - str->fPos); 710 } 711 712 /* Make sure that we don't read too much */ 713 if (dataSize > (count - read)) { 714 dataSize = count - read; 715 } 716 717 /* copy the current data in the buffer */ 718 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); 719 720 /* update number of items read */ 721 read += dataSize; 722 723 /* update the current buffer position */ 724 str->fPos += dataSize; 725 } 726 while (dataSize != 0 && read < count); 727 728 return read; 729} 730#endif 731