ustdio.c revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/* 2 ****************************************************************************** 3 * 4 * Copyright (C) 1998-2007, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ****************************************************************************** 8 * 9 * File ustdio.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 11/18/98 stephen Creation. 15 * 03/12/99 stephen Modified for new C API. 16 * 07/19/99 stephen Fixed read() and gets() 17 ****************************************************************************** 18 */ 19 20#include "unicode/ustdio.h" 21#include "unicode/putil.h" 22#include "cmemory.h" 23#include "cstring.h" 24#include "ufile.h" 25#include "ufmt_cmn.h" 26#include "unicode/ucnv.h" 27#include "unicode/ustring.h" 28 29#include <string.h> 30 31#define DELIM_LF 0x000A 32#define DELIM_VT 0x000B 33#define DELIM_FF 0x000C 34#define DELIM_CR 0x000D 35#define DELIM_NEL 0x0085 36#define DELIM_LS 0x2028 37#define DELIM_PS 0x2029 38 39/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 40#ifdef U_WINDOWS 41static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; 42static const uint32_t DELIMITERS_LEN = 2; 43/* TODO: Default newline writing should be detected based upon the converter being used. */ 44#else 45static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; 46static const uint32_t DELIMITERS_LEN = 1; 47#endif 48 49#define IS_FIRST_STRING_DELIMITER(c1) \ 50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 51 || (c1) == DELIM_NEL \ 52 || (c1) == DELIM_LS \ 53 || (c1) == DELIM_PS) 54#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 55#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 57 58 59#if !UCONFIG_NO_TRANSLITERATION 60 61U_CAPI UTransliterator* U_EXPORT2 62u_fsettransliterator(UFILE *file, UFileDirection direction, 63 UTransliterator *adopt, UErrorCode *status) 64{ 65 UTransliterator *old = NULL; 66 67 if(U_FAILURE(*status)) 68 { 69 return adopt; 70 } 71 72 if(!file) 73 { 74 *status = U_ILLEGAL_ARGUMENT_ERROR; 75 return adopt; 76 } 77 78 if(direction & U_READ) 79 { 80 /** TODO: implement */ 81 *status = U_UNSUPPORTED_ERROR; 82 return adopt; 83 } 84 85 if(adopt == NULL) /* they are clearing it */ 86 { 87 if(file->fTranslit != NULL) 88 { 89 /* TODO: Check side */ 90 old = file->fTranslit->translit; 91 uprv_free(file->fTranslit->buffer); 92 file->fTranslit->buffer=NULL; 93 uprv_free(file->fTranslit); 94 file->fTranslit=NULL; 95 } 96 } 97 else 98 { 99 if(file->fTranslit == NULL) 100 { 101 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); 102 if(!file->fTranslit) 103 { 104 *status = U_MEMORY_ALLOCATION_ERROR; 105 return adopt; 106 } 107 file->fTranslit->capacity = 0; 108 file->fTranslit->length = 0; 109 file->fTranslit->pos = 0; 110 file->fTranslit->buffer = NULL; 111 } 112 else 113 { 114 old = file->fTranslit->translit; 115 ufile_flush_translit(file); 116 } 117 118 file->fTranslit->translit = adopt; 119 } 120 121 return old; 122} 123 124static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) 125{ 126 int32_t newlen; 127 int32_t junkCount = 0; 128 int32_t textLength; 129 int32_t textLimit; 130 UTransPosition pos; 131 UErrorCode status = U_ZERO_ERROR; 132 133 if(count == NULL) 134 { 135 count = &junkCount; 136 } 137 138 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) 139 { 140 /* fast path */ 141 return src; 142 } 143 144 /* First: slide over everything */ 145 if(f->fTranslit->length > f->fTranslit->pos) 146 { 147 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, 148 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); 149 } 150 f->fTranslit->length -= f->fTranslit->pos; /* always */ 151 f->fTranslit->pos = 0; 152 153 /* Calculate new buffer size needed */ 154 newlen = (*count + f->fTranslit->length) * 4; 155 156 if(newlen > f->fTranslit->capacity) 157 { 158 if(f->fTranslit->buffer == NULL) 159 { 160 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); 161 } 162 else 163 { 164 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); 165 } 166 f->fTranslit->capacity = newlen; 167 } 168 169 /* Now, copy any data over */ 170 u_strncpy(f->fTranslit->buffer + f->fTranslit->length, 171 src, 172 *count); 173 f->fTranslit->length += *count; 174 175 /* Now, translit in place as much as we can */ 176 if(flush == FALSE) 177 { 178 textLength = f->fTranslit->length; 179 pos.contextStart = 0; 180 pos.contextLimit = textLength; 181 pos.start = 0; 182 pos.limit = textLength; 183 184 utrans_transIncrementalUChars(f->fTranslit->translit, 185 f->fTranslit->buffer, /* because we shifted */ 186 &textLength, 187 f->fTranslit->capacity, 188 &pos, 189 &status); 190 191 /* now: start/limit point to the transliterated text */ 192 /* Transliterated is [buffer..pos.start) */ 193 *count = pos.start; 194 f->fTranslit->pos = pos.start; 195 f->fTranslit->length = pos.limit; 196 197 return f->fTranslit->buffer; 198 } 199 else 200 { 201 textLength = f->fTranslit->length; 202 textLimit = f->fTranslit->length; 203 204 utrans_transUChars(f->fTranslit->translit, 205 f->fTranslit->buffer, 206 &textLength, 207 f->fTranslit->capacity, 208 0, 209 &textLimit, 210 &status); 211 212 /* out: converted len */ 213 *count = textLimit; 214 215 /* Set pointers to 0 */ 216 f->fTranslit->pos = 0; 217 f->fTranslit->length = 0; 218 219 return f->fTranslit->buffer; 220 } 221} 222 223#endif 224 225void 226ufile_flush_translit(UFILE *f) 227{ 228#if !UCONFIG_NO_TRANSLITERATION 229 if((!f)||(!f->fTranslit)) 230 return; 231#endif 232 233 u_file_write_flush(NULL, 0, f, FALSE, TRUE); 234} 235 236 237void 238ufile_close_translit(UFILE *f) 239{ 240#if !UCONFIG_NO_TRANSLITERATION 241 if((!f)||(!f->fTranslit)) 242 return; 243#endif 244 245 ufile_flush_translit(f); 246 247#if !UCONFIG_NO_TRANSLITERATION 248 if(f->fTranslit->translit) 249 utrans_close(f->fTranslit->translit); 250 251 if(f->fTranslit->buffer) 252 { 253 uprv_free(f->fTranslit->buffer); 254 } 255 256 uprv_free(f->fTranslit); 257 f->fTranslit = NULL; 258#endif 259} 260 261 262/* Input/output */ 263 264U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 265u_fputs(const UChar *s, 266 UFILE *f) 267{ 268 int32_t count = u_file_write(s, u_strlen(s), f); 269 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); 270 return count; 271} 272 273U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 274u_fputc(UChar32 uc, 275 UFILE *f) 276{ 277 UChar buf[2]; 278 int32_t idx = 0; 279 UBool isError = FALSE; 280 281 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError); 282 if (isError) { 283 return U_EOF; 284 } 285 return u_file_write(buf, idx, f) == idx ? uc : U_EOF; 286} 287 288 289U_CFUNC int32_t U_EXPORT2 290u_file_write_flush(const UChar *chars, 291 int32_t count, 292 UFILE *f, 293 UBool flushIO, 294 UBool flushTranslit) 295{ 296 /* Set up conversion parameters */ 297 UErrorCode status = U_ZERO_ERROR; 298 const UChar *mySource = chars; 299 const UChar *mySourceEnd; 300 char charBuffer[UFILE_CHARBUFFER_SIZE]; 301 char *myTarget = charBuffer; 302 int32_t written = 0; 303 int32_t numConverted = 0; 304 305 if (count < 0) { 306 count = u_strlen(chars); 307 } 308 309#if !UCONFIG_NO_TRANSLITERATION 310 if((f->fTranslit) && (f->fTranslit->translit)) 311 { 312 /* Do the transliteration */ 313 mySource = u_file_translit(f, chars, &count, flushTranslit); 314 } 315#endif 316 317 /* Write to a string. */ 318 if (!f->fFile) { 319 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); 320 if (flushIO && charsLeft > count) { 321 count++; 322 } 323 written = ufmt_min(count, charsLeft); 324 u_strncpy(f->str.fPos, mySource, written); 325 f->str.fPos += written; 326 return written; 327 } 328 329 mySourceEnd = mySource + count; 330 331 /* Perform the conversion in a loop */ 332 do { 333 status = U_ZERO_ERROR; 334 if(f->fConverter != NULL) { /* We have a valid converter */ 335 ucnv_fromUnicode(f->fConverter, 336 &myTarget, 337 charBuffer + UFILE_CHARBUFFER_SIZE, 338 &mySource, 339 mySourceEnd, 340 NULL, 341 flushIO, 342 &status); 343 } else { /*weiv: do the invariant conversion */ 344 u_UCharsToChars(mySource, myTarget, count); 345 myTarget += count; 346 } 347 numConverted = (int32_t)(myTarget - charBuffer); 348 349 if (numConverted > 0) { 350 /* write the converted bytes */ 351 fwrite(charBuffer, 352 sizeof(char), 353 numConverted, 354 f->fFile); 355 356 written += numConverted; 357 } 358 myTarget = charBuffer; 359 } 360 while(status == U_BUFFER_OVERFLOW_ERROR); 361 362 /* return # of chars written */ 363 return written; 364} 365 366U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 367u_file_write( const UChar *chars, 368 int32_t count, 369 UFILE *f) 370{ 371 return u_file_write_flush(chars,count,f,FALSE,FALSE); 372} 373 374 375/* private function used for buffering input */ 376void 377ufile_fill_uchar_buffer(UFILE *f) 378{ 379 UErrorCode status; 380 const char *mySource; 381 const char *mySourceEnd; 382 UChar *myTarget; 383 int32_t bufferSize; 384 int32_t maxCPBytes; 385 int32_t bytesRead; 386 int32_t availLength; 387 int32_t dataSize; 388 char charBuffer[UFILE_CHARBUFFER_SIZE]; 389 u_localized_string *str; 390 391 if (f->fFile == NULL) { 392 /* There is nothing to do. It's a string. */ 393 return; 394 } 395 396 str = &f->str; 397 dataSize = (int32_t)(str->fLimit - str->fPos); 398 if (f->fFileno == 0 && dataSize > 0) { 399 /* Don't read from stdin too many times. There is still some data. */ 400 return; 401 } 402 403 /* shift the buffer if it isn't empty */ 404 if(dataSize != 0) { 405 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar)); 406 } 407 408 409 /* record how much buffer space is available */ 410 availLength = UFILE_UCHARBUFFER_SIZE - dataSize; 411 412 /* Determine the # of codepage bytes needed to fill our UChar buffer */ 413 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 414 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); 415 416 /* Read in the data to convert */ 417 if (f->fFileno == 0) { 418 /* Special case. Read from stdin one line at a time. */ 419 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); 420 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); 421 } 422 else { 423 /* A normal file */ 424 bytesRead = (int32_t)fread(charBuffer, 425 sizeof(char), 426 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), 427 f->fFile); 428 } 429 430 /* Set up conversion parameters */ 431 status = U_ZERO_ERROR; 432 mySource = charBuffer; 433 mySourceEnd = charBuffer + bytesRead; 434 myTarget = f->fUCBuffer + dataSize; 435 bufferSize = UFILE_UCHARBUFFER_SIZE; 436 437 if(f->fConverter != NULL) { /* We have a valid converter */ 438 /* Perform the conversion */ 439 ucnv_toUnicode(f->fConverter, 440 &myTarget, 441 f->fUCBuffer + bufferSize, 442 &mySource, 443 mySourceEnd, 444 NULL, 445 (UBool)(feof(f->fFile) != 0), 446 &status); 447 448 } else { /*weiv: do the invariant conversion */ 449 u_charsToUChars(mySource, myTarget, bytesRead); 450 myTarget += bytesRead; 451 } 452 453 /* update the pointers into our array */ 454 str->fPos = str->fBuffer; 455 str->fLimit = myTarget; 456} 457 458U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 459u_fgets(UChar *s, 460 int32_t n, 461 UFILE *f) 462{ 463 int32_t dataSize; 464 int32_t count; 465 UChar *alias; 466 const UChar *limit; 467 UChar *sItr; 468 UChar currDelim = 0; 469 u_localized_string *str; 470 471 if (n <= 0) { 472 /* Caller screwed up. We need to write the null terminatior. */ 473 return NULL; 474 } 475 476 /* fill the buffer if needed */ 477 str = &f->str; 478 if (str->fPos >= str->fLimit) { 479 ufile_fill_uchar_buffer(f); 480 } 481 482 /* subtract 1 from n to compensate for the terminator */ 483 --n; 484 485 /* determine the amount of data in the buffer */ 486 dataSize = (int32_t)(str->fLimit - str->fPos); 487 488 /* if 0 characters were left, return 0 */ 489 if (dataSize == 0) 490 return NULL; 491 492 /* otherwise, iteratively fill the buffer and copy */ 493 count = 0; 494 sItr = s; 495 currDelim = 0; 496 while (dataSize > 0 && count < n) { 497 alias = str->fPos; 498 499 /* Find how much to copy */ 500 if (dataSize < (n - count)) { 501 limit = str->fLimit; 502 } 503 else { 504 limit = alias + (n - count); 505 } 506 507 if (!currDelim) { 508 /* Copy UChars until we find the first occurrence of a delimiter character */ 509 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { 510 count++; 511 *(sItr++) = *(alias++); 512 } 513 /* Preserve the newline */ 514 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { 515 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { 516 currDelim = *alias; 517 } 518 else { 519 currDelim = 1; /* This isn't a newline, but it's used to say 520 that we should break later. We've checked all 521 possible newline combinations even across buffer 522 boundaries. */ 523 } 524 count++; 525 *(sItr++) = *(alias++); 526 } 527 } 528 /* If we have a CRLF combination, preserve that too. */ 529 if (alias < limit) { 530 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { 531 count++; 532 *(sItr++) = *(alias++); 533 } 534 currDelim = 1; /* This isn't a newline, but it's used to say 535 that we should break later. We've checked all 536 possible newline combinations even across buffer 537 boundaries. */ 538 } 539 540 /* update the current buffer position */ 541 str->fPos = alias; 542 543 /* if we found a delimiter */ 544 if (currDelim == 1) { 545 /* break out */ 546 break; 547 } 548 549 /* refill the buffer */ 550 ufile_fill_uchar_buffer(f); 551 552 /* determine the amount of data in the buffer */ 553 dataSize = (int32_t)(str->fLimit - str->fPos); 554 } 555 556 /* add the terminator and return s */ 557 *sItr = 0x0000; 558 return s; 559} 560 561U_CFUNC UBool U_EXPORT2 562ufile_getch(UFILE *f, UChar *ch) 563{ 564 UBool isValidChar = FALSE; 565 566 *ch = U_EOF; 567 /* if we have an available character in the buffer, return it */ 568 if(f->str.fPos < f->str.fLimit){ 569 *ch = *(f->str.fPos)++; 570 isValidChar = TRUE; 571 } 572 else if (f) { 573 /* otherwise, fill the buffer and return the next character */ 574 if(f->str.fPos >= f->str.fLimit) { 575 ufile_fill_uchar_buffer(f); 576 } 577 if(f->str.fPos < f->str.fLimit) { 578 *ch = *(f->str.fPos)++; 579 isValidChar = TRUE; 580 } 581 } 582 return isValidChar; 583} 584 585U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 586u_fgetc(UFILE *f) 587{ 588 UChar ch; 589 ufile_getch(f, &ch); 590 return ch; 591} 592 593U_CFUNC UBool U_EXPORT2 594ufile_getch32(UFILE *f, UChar32 *c32) 595{ 596 UBool isValidChar = FALSE; 597 u_localized_string *str; 598 599 *c32 = U_EOF; 600 601 /* Fill the buffer if it is empty */ 602 str = &f->str; 603 if (f && str->fPos + 1 >= str->fLimit) { 604 ufile_fill_uchar_buffer(f); 605 } 606 607 /* Get the next character in the buffer */ 608 if (str->fPos < str->fLimit) { 609 *c32 = *(str->fPos)++; 610 if (U_IS_LEAD(*c32)) { 611 if (str->fPos < str->fLimit) { 612 UChar c16 = *(str->fPos)++; 613 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); 614 isValidChar = TRUE; 615 } 616 else { 617 *c32 = U_EOF; 618 } 619 } 620 else { 621 isValidChar = TRUE; 622 } 623 } 624 625 return isValidChar; 626} 627 628U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 629u_fgetcx(UFILE *f) 630{ 631 UChar32 ch; 632 ufile_getch32(f, &ch); 633 return ch; 634} 635 636U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 637u_fungetc(UChar32 ch, 638 UFILE *f) 639{ 640 u_localized_string *str; 641 642 str = &f->str; 643 644 /* if we're at the beginning of the buffer, sorry! */ 645 if (str->fPos == str->fBuffer 646 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) 647 { 648 ch = U_EOF; 649 } 650 else { 651 /* otherwise, put the character back */ 652 /* Remember, read them back on in the reverse order. */ 653 if (U_IS_LEAD(ch)) { 654 if (*--(str->fPos) != U16_TRAIL(ch) 655 || *--(str->fPos) != U16_LEAD(ch)) 656 { 657 ch = U_EOF; 658 } 659 } 660 else if (*--(str->fPos) != ch) { 661 ch = U_EOF; 662 } 663 } 664 return ch; 665} 666 667U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 668u_file_read( UChar *chars, 669 int32_t count, 670 UFILE *f) 671{ 672 int32_t dataSize; 673 int32_t read = 0; 674 u_localized_string *str = &f->str; 675 676 do { 677 678 /* determine the amount of data in the buffer */ 679 dataSize = (int32_t)(str->fLimit - str->fPos); 680 if (dataSize <= 0) { 681 /* fill the buffer */ 682 ufile_fill_uchar_buffer(f); 683 dataSize = (int32_t)(str->fLimit - str->fPos); 684 } 685 686 /* Make sure that we don't read too much */ 687 if (dataSize > (count - read)) { 688 dataSize = count - read; 689 } 690 691 /* copy the current data in the buffer */ 692 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); 693 694 /* update number of items read */ 695 read += dataSize; 696 697 /* update the current buffer position */ 698 str->fPos += dataSize; 699 } 700 while (dataSize != 0 && read < count); 701 702 return read; 703} 704