1/* 2******************************************************************************* 3* 4* Copyright (C) 1998-2008, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* 9* File parse.c 10* 11* Modification History: 12* 13* Date Name Description 14* 05/26/99 stephen Creation. 15* 02/25/00 weiv Overhaul to write udata 16* 5/10/01 Ram removed ustdio dependency 17* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten 18******************************************************************************* 19*/ 20 21#include "ucol_imp.h" 22#include "parse.h" 23#include "errmsg.h" 24#include "uhash.h" 25#include "cmemory.h" 26#include "cstring.h" 27#include "uinvchar.h" 28#include "read.h" 29#include "ustr.h" 30#include "reslist.h" 31#include "rbt_pars.h" 32#include "unicode/ustring.h" 33#include "unicode/putil.h" 34#include <stdio.h> 35 36/* Number of tokens to read ahead of the current stream position */ 37#define MAX_LOOKAHEAD 3 38 39#define CR 0x000D 40#define LF 0x000A 41#define SPACE 0x0020 42#define TAB 0x0009 43#define ESCAPE 0x005C 44#define HASH 0x0023 45#define QUOTE 0x0027 46#define ZERO 0x0030 47#define STARTCOMMAND 0x005B 48#define ENDCOMMAND 0x005D 49#define OPENSQBRACKET 0x005B 50#define CLOSESQBRACKET 0x005D 51 52typedef struct SResource * 53ParseResourceFunction(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status); 54 55struct Lookahead 56{ 57 enum ETokenType type; 58 struct UString value; 59 struct UString comment; 60 uint32_t line; 61}; 62 63/* keep in sync with token defines in read.h */ 64const char *tokenNames[TOK_TOKEN_COUNT] = 65{ 66 "string", /* A string token, such as "MonthNames" */ 67 "'{'", /* An opening brace character */ 68 "'}'", /* A closing brace character */ 69 "','", /* A comma */ 70 "':'", /* A colon */ 71 72 "<end of file>", /* End of the file has been reached successfully */ 73 "<end of line>" 74}; 75 76/* Just to store "TRUE" */ 77static const UChar trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000}; 78 79static struct Lookahead lookahead[MAX_LOOKAHEAD + 1]; 80static uint32_t lookaheadPosition; 81static UCHARBUF *buffer; 82 83static struct SRBRoot *bundle; 84static const char *inputdir; 85static uint32_t inputdirLength; 86static const char *outputdir; 87static uint32_t outputdirLength; 88 89static UBool gMakeBinaryCollation = TRUE; 90static UBool gOmitCollationRules = FALSE; 91 92static struct SResource *parseResource(char *tag, const struct UString *comment, UErrorCode *status); 93 94/* The nature of the lookahead buffer: 95 There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides 96 MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value. 97 When getToken is called, the current pointer is moved to the next slot and the 98 old slot is filled with the next token from the reader by calling getNextToken. 99 The token values are stored in the slot, which means that token values don't 100 survive a call to getToken, ie. 101 102 UString *value; 103 104 getToken(&value, NULL, status); 105 getToken(NULL, NULL, status); bad - value is now a different string 106*/ 107static void 108initLookahead(UCHARBUF *buf, UErrorCode *status) 109{ 110 static uint32_t initTypeStrings = 0; 111 uint32_t i; 112 113 if (!initTypeStrings) 114 { 115 initTypeStrings = 1; 116 } 117 118 lookaheadPosition = 0; 119 buffer = buf; 120 121 resetLineNumber(); 122 123 for (i = 0; i < MAX_LOOKAHEAD; i++) 124 { 125 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status); 126 if (U_FAILURE(*status)) 127 { 128 return; 129 } 130 } 131 132 *status = U_ZERO_ERROR; 133} 134 135static void 136cleanupLookahead() 137{ 138 uint32_t i; 139 for (i = 0; i < MAX_LOOKAHEAD; i++) 140 { 141 ustr_deinit(&lookahead[i].value); 142 ustr_deinit(&lookahead[i].comment); 143 } 144 145} 146 147static enum ETokenType 148getToken(struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status) 149{ 150 enum ETokenType result; 151 uint32_t i; 152 153 result = lookahead[lookaheadPosition].type; 154 155 if (tokenValue != NULL) 156 { 157 *tokenValue = &lookahead[lookaheadPosition].value; 158 } 159 160 if (linenumber != NULL) 161 { 162 *linenumber = lookahead[lookaheadPosition].line; 163 } 164 165 if (comment != NULL) 166 { 167 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status); 168 } 169 170 i = (lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1); 171 lookaheadPosition = (lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1); 172 ustr_setlen(&lookahead[i].comment, 0, status); 173 ustr_setlen(&lookahead[i].value, 0, status); 174 lookahead[i].type = getNextToken(buffer, &lookahead[i].value, &lookahead[i].line, &lookahead[i].comment, status); 175 176 /* printf("getToken, returning %s\n", tokenNames[result]); */ 177 178 return result; 179} 180 181static enum ETokenType 182peekToken(uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status) 183{ 184 uint32_t i = (lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1); 185 186 if (U_FAILURE(*status)) 187 { 188 return TOK_ERROR; 189 } 190 191 if (lookaheadCount >= MAX_LOOKAHEAD) 192 { 193 *status = U_INTERNAL_PROGRAM_ERROR; 194 return TOK_ERROR; 195 } 196 197 if (tokenValue != NULL) 198 { 199 *tokenValue = &lookahead[i].value; 200 } 201 202 if (linenumber != NULL) 203 { 204 *linenumber = lookahead[i].line; 205 } 206 207 if(comment != NULL){ 208 ustr_cpy(comment, &(lookahead[lookaheadPosition].comment), status); 209 } 210 211 return lookahead[i].type; 212} 213 214static void 215expect(enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status) 216{ 217 uint32_t line; 218 219 enum ETokenType token = getToken(tokenValue, comment, &line, status); 220 221 if (linenumber != NULL) 222 { 223 *linenumber = line; 224 } 225 226 if (U_FAILURE(*status)) 227 { 228 return; 229 } 230 231 if (token != expectedToken) 232 { 233 *status = U_INVALID_FORMAT_ERROR; 234 error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]); 235 } 236 else 237 { 238 *status = U_ZERO_ERROR; 239 } 240} 241 242static char *getInvariantString(uint32_t *line, struct UString *comment, UErrorCode *status) 243{ 244 struct UString *tokenValue; 245 char *result; 246 uint32_t count; 247 248 expect(TOK_STRING, &tokenValue, comment, line, status); 249 250 if (U_FAILURE(*status)) 251 { 252 return NULL; 253 } 254 255 count = u_strlen(tokenValue->fChars); 256 if(!uprv_isInvariantUString(tokenValue->fChars, count)) { 257 *status = U_INVALID_FORMAT_ERROR; 258 error(*line, "invariant characters required for table keys, binary data, etc."); 259 return NULL; 260 } 261 262 result = uprv_malloc(count+1); 263 264 if (result == NULL) 265 { 266 *status = U_MEMORY_ALLOCATION_ERROR; 267 return NULL; 268 } 269 270 u_UCharsToChars(tokenValue->fChars, result, count+1); 271 return result; 272} 273 274static struct SResource * 275parseUCARules(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 276{ 277 struct SResource *result = NULL; 278 struct UString *tokenValue; 279 FileStream *file = NULL; 280 char filename[256] = { '\0' }; 281 char cs[128] = { '\0' }; 282 uint32_t line; 283 int len=0; 284 UBool quoted = FALSE; 285 UCHARBUF *ucbuf=NULL; 286 UChar32 c = 0; 287 const char* cp = NULL; 288 UChar *pTarget = NULL; 289 UChar *target = NULL; 290 UChar *targetLimit = NULL; 291 int32_t size = 0; 292 293 expect(TOK_STRING, &tokenValue, NULL, &line, status); 294 295 if(isVerbose()){ 296 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 297 } 298 299 if (U_FAILURE(*status)) 300 { 301 return NULL; 302 } 303 /* make the filename including the directory */ 304 if (inputdir != NULL) 305 { 306 uprv_strcat(filename, inputdir); 307 308 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 309 { 310 uprv_strcat(filename, U_FILE_SEP_STRING); 311 } 312 } 313 314 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 315 316 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 317 318 if (U_FAILURE(*status)) 319 { 320 return NULL; 321 } 322 uprv_strcat(filename, cs); 323 324 if(gOmitCollationRules) { 325 return res_none(); 326 } 327 328 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 329 330 if (U_FAILURE(*status)) { 331 error(line, "An error occured while opening the input file %s\n", filename); 332 return NULL; 333 } 334 335 /* We allocate more space than actually required 336 * since the actual size needed for storing UChars 337 * is not known in UTF-8 byte stream 338 */ 339 size = ucbuf_size(ucbuf) + 1; 340 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * size); 341 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 342 target = pTarget; 343 targetLimit = pTarget+size; 344 345 /* read the rules into the buffer */ 346 while (target < targetLimit) 347 { 348 c = ucbuf_getc(ucbuf, status); 349 if(c == QUOTE) { 350 quoted = (UBool)!quoted; 351 } 352 /* weiv (06/26/2002): adding the following: 353 * - preserving spaces in commands [...] 354 * - # comments until the end of line 355 */ 356 if (c == STARTCOMMAND && !quoted) 357 { 358 /* preserve commands 359 * closing bracket will be handled by the 360 * append at the end of the loop 361 */ 362 while(c != ENDCOMMAND) { 363 U_APPEND_CHAR32(c, target,len); 364 c = ucbuf_getc(ucbuf, status); 365 } 366 } 367 else if (c == HASH && !quoted) { 368 /* skip comments */ 369 while(c != CR && c != LF) { 370 c = ucbuf_getc(ucbuf, status); 371 } 372 continue; 373 } 374 else if (c == ESCAPE) 375 { 376 c = unescape(ucbuf, status); 377 378 if (c == U_ERR) 379 { 380 uprv_free(pTarget); 381 T_FileStream_close(file); 382 return NULL; 383 } 384 } 385 else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF)) 386 { 387 /* ignore spaces carriage returns 388 * and line feed unless in the form \uXXXX 389 */ 390 continue; 391 } 392 393 /* Append UChar * after dissembling if c > 0xffff*/ 394 if (c != U_EOF) 395 { 396 U_APPEND_CHAR32(c, target,len); 397 } 398 else 399 { 400 break; 401 } 402 } 403 404 /* terminate the string */ 405 if(target < targetLimit){ 406 *target = 0x0000; 407 } 408 409 result = string_open(bundle, tag, pTarget, (int32_t)(target - pTarget), NULL, status); 410 411 412 ucbuf_close(ucbuf); 413 uprv_free(pTarget); 414 T_FileStream_close(file); 415 416 return result; 417} 418 419static struct SResource * 420parseTransliterator(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 421{ 422 struct SResource *result = NULL; 423 struct UString *tokenValue; 424 FileStream *file = NULL; 425 char filename[256] = { '\0' }; 426 char cs[128] = { '\0' }; 427 uint32_t line; 428 UCHARBUF *ucbuf=NULL; 429 const char* cp = NULL; 430 UChar *pTarget = NULL; 431 const UChar *pSource = NULL; 432 int32_t size = 0; 433 434 expect(TOK_STRING, &tokenValue, NULL, &line, status); 435 436 if(isVerbose()){ 437 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 438 } 439 440 if (U_FAILURE(*status)) 441 { 442 return NULL; 443 } 444 /* make the filename including the directory */ 445 if (inputdir != NULL) 446 { 447 uprv_strcat(filename, inputdir); 448 449 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 450 { 451 uprv_strcat(filename, U_FILE_SEP_STRING); 452 } 453 } 454 455 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 456 457 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 458 459 if (U_FAILURE(*status)) 460 { 461 return NULL; 462 } 463 uprv_strcat(filename, cs); 464 465 466 ucbuf = ucbuf_open(filename, &cp, getShowWarning(),FALSE, status); 467 468 if (U_FAILURE(*status)) { 469 error(line, "An error occured while opening the input file %s\n", filename); 470 return NULL; 471 } 472 473 /* We allocate more space than actually required 474 * since the actual size needed for storing UChars 475 * is not known in UTF-8 byte stream 476 */ 477 pSource = ucbuf_getBuffer(ucbuf, &size, status); 478 pTarget = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1)); 479 uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR); 480 481#if !UCONFIG_NO_TRANSLITERATION 482 size = utrans_stripRules(pSource, size, pTarget, status); 483#else 484 size = 0; 485 fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n"); 486#endif 487 result = string_open(bundle, tag, pTarget, size, NULL, status); 488 489 ucbuf_close(ucbuf); 490 uprv_free(pTarget); 491 T_FileStream_close(file); 492 493 return result; 494} 495static struct SResource* dependencyArray = NULL; 496 497static struct SResource * 498parseDependency(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 499{ 500 struct SResource *result = NULL; 501 struct SResource *elem = NULL; 502 struct UString *tokenValue; 503 uint32_t line; 504 char filename[256] = { '\0' }; 505 char cs[128] = { '\0' }; 506 507 expect(TOK_STRING, &tokenValue, NULL, &line, status); 508 509 if(isVerbose()){ 510 printf(" %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 511 } 512 513 if (U_FAILURE(*status)) 514 { 515 return NULL; 516 } 517 /* make the filename including the directory */ 518 if (outputdir != NULL) 519 { 520 uprv_strcat(filename, outputdir); 521 522 if (outputdir[outputdirLength - 1] != U_FILE_SEP_CHAR) 523 { 524 uprv_strcat(filename, U_FILE_SEP_STRING); 525 } 526 } 527 528 u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength); 529 530 if (U_FAILURE(*status)) 531 { 532 return NULL; 533 } 534 uprv_strcat(filename, cs); 535 if(!T_FileStream_file_exists(filename)){ 536 if(isStrict()){ 537 error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 538 }else{ 539 warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename); 540 } 541 } 542 if(dependencyArray==NULL){ 543 dependencyArray = array_open(bundle, "%%DEPENDENCY", NULL, status); 544 } 545 if(tag!=NULL){ 546 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 547 } 548 elem = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, comment, status); 549 550 array_add(dependencyArray, elem, status); 551 552 if (U_FAILURE(*status)) 553 { 554 return NULL; 555 } 556 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 557 return result; 558} 559static struct SResource * 560parseString(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 561{ 562 struct UString *tokenValue; 563 struct SResource *result = NULL; 564 565/* if (tag != NULL && uprv_strcmp(tag, "%%UCARULES") == 0) 566 { 567 return parseUCARules(tag, startline, status); 568 }*/ 569 if(isVerbose()){ 570 printf(" string %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 571 } 572 expect(TOK_STRING, &tokenValue, NULL, NULL, status); 573 574 if (U_SUCCESS(*status)) 575 { 576 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 577 doesn't survive expect either) */ 578 579 result = string_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 580 if(U_SUCCESS(*status) && result) { 581 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 582 583 if (U_FAILURE(*status)) 584 { 585 res_close(result); 586 return NULL; 587 } 588 } 589 } 590 591 return result; 592} 593 594static struct SResource * 595parseAlias(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 596{ 597 struct UString *tokenValue; 598 struct SResource *result = NULL; 599 600 expect(TOK_STRING, &tokenValue, NULL, NULL, status); 601 602 if(isVerbose()){ 603 printf(" alias %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 604 } 605 606 if (U_SUCCESS(*status)) 607 { 608 /* create the string now - tokenValue doesn't survive a call to getToken (and therefore 609 doesn't survive expect either) */ 610 611 result = alias_open(bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status); 612 613 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 614 615 if (U_FAILURE(*status)) 616 { 617 res_close(result); 618 return NULL; 619 } 620 } 621 622 return result; 623} 624 625static struct SResource * 626addCollation(struct SResource *result, uint32_t startline, UErrorCode *status) 627{ 628 struct SResource *member = NULL; 629 struct UString *tokenValue; 630 struct UString comment; 631 enum ETokenType token; 632 char subtag[1024]; 633 UVersionInfo version; 634 UBool override = FALSE; 635 uint32_t line; 636 /* '{' . (name resource)* '}' */ 637 version[0]=0; version[1]=0; version[2]=0; version[3]=0; 638 639 for (;;) 640 { 641 ustr_init(&comment); 642 token = getToken(&tokenValue, &comment, &line, status); 643 644 if (token == TOK_CLOSE_BRACE) 645 { 646 return result; 647 } 648 649 if (token != TOK_STRING) 650 { 651 res_close(result); 652 *status = U_INVALID_FORMAT_ERROR; 653 654 if (token == TOK_EOF) 655 { 656 error(startline, "unterminated table"); 657 } 658 else 659 { 660 error(line, "Unexpected token %s", tokenNames[token]); 661 } 662 663 return NULL; 664 } 665 666 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 667 668 if (U_FAILURE(*status)) 669 { 670 res_close(result); 671 return NULL; 672 } 673 674 member = parseResource(subtag, NULL, status); 675 676 if (U_FAILURE(*status)) 677 { 678 res_close(result); 679 return NULL; 680 } 681 682 if (uprv_strcmp(subtag, "Version") == 0) 683 { 684 char ver[40]; 685 int32_t length = member->u.fString.fLength; 686 687 if (length >= (int32_t) sizeof(ver)) 688 { 689 length = (int32_t) sizeof(ver) - 1; 690 } 691 692 u_UCharsToChars(member->u.fString.fChars, ver, length + 1); /* +1 for copying NULL */ 693 u_versionFromString(version, ver); 694 695 table_add(result, member, line, status); 696 697 } 698 else if (uprv_strcmp(subtag, "Override") == 0) 699 { 700 override = FALSE; 701 702 if (u_strncmp(member->u.fString.fChars, trueValue, u_strlen(trueValue)) == 0) 703 { 704 override = TRUE; 705 } 706 table_add(result, member, line, status); 707 708 } 709 else if(uprv_strcmp(subtag, "%%CollationBin")==0) 710 { 711 /* discard duplicate %%CollationBin if any*/ 712 } 713 else if (uprv_strcmp(subtag, "Sequence") == 0) 714 { 715#if UCONFIG_NO_COLLATION 716 warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION, see uconfig.h"); 717#else 718 /* in order to achieve smaller data files, we can direct genrb */ 719 /* to omit collation rules */ 720 if(!gOmitCollationRules) { 721 /* first we add the "Sequence", so that we always have rules */ 722 table_add(result, member, line, status); 723 } 724 if(gMakeBinaryCollation) { 725 UErrorCode intStatus = U_ZERO_ERROR; 726 727 /* do the collation elements */ 728 int32_t len = 0; 729 uint8_t *data = NULL; 730 UCollator *coll = NULL; 731 UParseError parseError; 732 /* add sequence */ 733 /*table_add(result, member, line, status);*/ 734 735 coll = ucol_openRules(member->u.fString.fChars, member->u.fString.fLength, 736 UCOL_OFF, UCOL_DEFAULT_STRENGTH,&parseError, &intStatus); 737 738 if (U_SUCCESS(intStatus) && coll != NULL) 739 { 740 len = ucol_cloneBinary(coll, NULL, 0, &intStatus); 741 data = (uint8_t *)uprv_malloc(len); 742 intStatus = U_ZERO_ERROR; 743 len = ucol_cloneBinary(coll, data, len, &intStatus); 744 /*data = ucol_cloneRuleData(coll, &len, &intStatus);*/ 745 746 /* tailoring rules version */ 747 /* This is wrong! */ 748 /*coll->dataInfo.dataVersion[1] = version[0];*/ 749 /* Copy tailoring version. Builder version already */ 750 /* set in ucol_openRules */ 751 ((UCATableHeader *)data)->version[1] = version[0]; 752 ((UCATableHeader *)data)->version[2] = version[1]; 753 ((UCATableHeader *)data)->version[3] = version[2]; 754 755 if (U_SUCCESS(intStatus) && data != NULL) 756 { 757 member = bin_open(bundle, "%%CollationBin", len, data, NULL, NULL, status); 758 /*table_add(bundle->fRoot, member, line, status);*/ 759 table_add(result, member, line, status); 760 uprv_free(data); 761 } 762 else 763 { 764 warning(line, "could not obtain rules from collator"); 765 if(isStrict()){ 766 *status = U_INVALID_FORMAT_ERROR; 767 return NULL; 768 } 769 } 770 771 ucol_close(coll); 772 } 773 else 774 { 775 warning(line, "%%Collation could not be constructed from CollationElements - check context!"); 776 if(isStrict()){ 777 *status = intStatus; 778 return NULL; 779 } 780 } 781 } else { 782 if(isVerbose()) { 783 printf("Not building Collation binary\n"); 784 } 785 } 786#endif 787 } 788 789 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 790 791 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 792 793 if (U_FAILURE(*status)) 794 { 795 res_close(result); 796 return NULL; 797 } 798 } 799 800 /* not reached */ 801 /* A compiler warning will appear if all paths don't contain a return statement. */ 802/* *status = U_INTERNAL_PROGRAM_ERROR; 803 return NULL;*/ 804} 805 806static struct SResource * 807parseCollationElements(char *tag, uint32_t startline, UBool newCollation, UErrorCode *status) 808{ 809 struct SResource *result = NULL; 810 struct SResource *member = NULL; 811 struct SResource *collationRes = NULL; 812 struct UString *tokenValue; 813 struct UString comment; 814 enum ETokenType token; 815 char subtag[1024], typeKeyword[1024]; 816 uint32_t line; 817 818 result = table_open(bundle, tag, NULL, status); 819 820 if (result == NULL || U_FAILURE(*status)) 821 { 822 return NULL; 823 } 824 if(isVerbose()){ 825 printf(" collation elements %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 826 } 827 if(!newCollation) { 828 return addCollation(result, startline, status); 829 } 830 else { 831 for(;;) { 832 ustr_init(&comment); 833 token = getToken(&tokenValue, &comment, &line, status); 834 835 if (token == TOK_CLOSE_BRACE) 836 { 837 return result; 838 } 839 840 if (token != TOK_STRING) 841 { 842 res_close(result); 843 *status = U_INVALID_FORMAT_ERROR; 844 845 if (token == TOK_EOF) 846 { 847 error(startline, "unterminated table"); 848 } 849 else 850 { 851 error(line, "Unexpected token %s", tokenNames[token]); 852 } 853 854 return NULL; 855 } 856 857 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 858 859 if (U_FAILURE(*status)) 860 { 861 res_close(result); 862 return NULL; 863 } 864 865 if (uprv_strcmp(subtag, "default") == 0) 866 { 867 member = parseResource(subtag, NULL, status); 868 869 if (U_FAILURE(*status)) 870 { 871 res_close(result); 872 return NULL; 873 } 874 875 table_add(result, member, line, status); 876 } 877 else 878 { 879 token = peekToken(0, &tokenValue, &line, &comment, status); 880 /* this probably needs to be refactored or recursively use the parser */ 881 /* first we assume that our collation table won't have the explicit type */ 882 /* then, we cannot handle aliases */ 883 if(token == TOK_OPEN_BRACE) { 884 token = getToken(&tokenValue, &comment, &line, status); 885 collationRes = table_open(bundle, subtag, NULL, status); 886 table_add(result, addCollation(collationRes, startline, status), startline, status); 887 } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */ 888 /* we could have a table too */ 889 token = peekToken(1, &tokenValue, &line, &comment, status); 890 u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1); 891 if(uprv_strcmp(typeKeyword, "alias") == 0) { 892 member = parseResource(subtag, NULL, status); 893 894 if (U_FAILURE(*status)) 895 { 896 res_close(result); 897 return NULL; 898 } 899 900 table_add(result, member, line, status); 901 } else { 902 res_close(result); 903 *status = U_INVALID_FORMAT_ERROR; 904 return NULL; 905 } 906 } else { 907 res_close(result); 908 *status = U_INVALID_FORMAT_ERROR; 909 return NULL; 910 } 911 } 912 913 /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/ 914 915 /*expect(TOK_CLOSE_BRACE, NULL, NULL, status);*/ 916 917 if (U_FAILURE(*status)) 918 { 919 res_close(result); 920 return NULL; 921 } 922 } 923 } 924} 925 926/* Necessary, because CollationElements requires the bundle->fRoot member to be present which, 927 if this weren't special-cased, wouldn't be set until the entire file had been processed. */ 928static struct SResource * 929realParseTable(struct SResource *table, char *tag, uint32_t startline, UErrorCode *status) 930{ 931 struct SResource *member = NULL; 932 struct UString *tokenValue=NULL; 933 struct UString comment; 934 enum ETokenType token; 935 char subtag[1024]; 936 uint32_t line; 937 UBool readToken = FALSE; 938 939 /* '{' . (name resource)* '}' */ 940 if(isVerbose()){ 941 printf(" parsing table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 942 } 943 for (;;) 944 { 945 ustr_init(&comment); 946 token = getToken(&tokenValue, &comment, &line, status); 947 948 if (token == TOK_CLOSE_BRACE) 949 { 950 if (!readToken) { 951 warning(startline, "Encountered empty table"); 952 } 953 return table; 954 } 955 956 if (token != TOK_STRING) 957 { 958 *status = U_INVALID_FORMAT_ERROR; 959 960 if (token == TOK_EOF) 961 { 962 error(startline, "unterminated table"); 963 } 964 else 965 { 966 error(line, "unexpected token %s", tokenNames[token]); 967 } 968 969 return NULL; 970 } 971 972 if(uprv_isInvariantUString(tokenValue->fChars, -1)) { 973 u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1); 974 } else { 975 *status = U_INVALID_FORMAT_ERROR; 976 error(line, "invariant characters required for table keys"); 977 return NULL; 978 } 979 980 if (U_FAILURE(*status)) 981 { 982 error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status)); 983 return NULL; 984 } 985 986 member = parseResource(subtag, &comment, status); 987 988 if (member == NULL || U_FAILURE(*status)) 989 { 990 error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status)); 991 return NULL; 992 } 993 994 table_add(table, member, line, status); 995 996 if (U_FAILURE(*status)) 997 { 998 error(line, "parse error. Stopped parsing table with %s", u_errorName(*status)); 999 return NULL; 1000 } 1001 readToken = TRUE; 1002 ustr_deinit(&comment); 1003 } 1004 1005 /* not reached */ 1006 /* A compiler warning will appear if all paths don't contain a return statement. */ 1007/* *status = U_INTERNAL_PROGRAM_ERROR; 1008 return NULL;*/ 1009} 1010 1011static struct SResource * 1012parseTable(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1013{ 1014 struct SResource *result; 1015 1016 if (tag != NULL && uprv_strcmp(tag, "CollationElements") == 0) 1017 { 1018 return parseCollationElements(tag, startline, FALSE, status); 1019 } 1020 if (tag != NULL && uprv_strcmp(tag, "collations") == 0) 1021 { 1022 return parseCollationElements(tag, startline, TRUE, status); 1023 } 1024 if(isVerbose()){ 1025 printf(" table %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1026 } 1027 1028 result = table_open(bundle, tag, comment, status); 1029 1030 if (result == NULL || U_FAILURE(*status)) 1031 { 1032 return NULL; 1033 } 1034 1035 return realParseTable(result, tag, startline, status); 1036} 1037 1038static struct SResource * 1039parseArray(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1040{ 1041 struct SResource *result = NULL; 1042 struct SResource *member = NULL; 1043 struct UString *tokenValue; 1044 struct UString memberComments; 1045 enum ETokenType token; 1046 UBool readToken = FALSE; 1047 1048 result = array_open(bundle, tag, comment, status); 1049 1050 if (result == NULL || U_FAILURE(*status)) 1051 { 1052 return NULL; 1053 } 1054 if(isVerbose()){ 1055 printf(" array %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1056 } 1057 1058 ustr_init(&memberComments); 1059 1060 /* '{' . resource [','] '}' */ 1061 for (;;) 1062 { 1063 /* reset length */ 1064 ustr_setlen(&memberComments, 0, status); 1065 1066 /* check for end of array, but don't consume next token unless it really is the end */ 1067 token = peekToken(0, &tokenValue, NULL, &memberComments, status); 1068 1069 1070 if (token == TOK_CLOSE_BRACE) 1071 { 1072 getToken(NULL, NULL, NULL, status); 1073 if (!readToken) { 1074 warning(startline, "Encountered empty array"); 1075 } 1076 break; 1077 } 1078 1079 if (token == TOK_EOF) 1080 { 1081 res_close(result); 1082 *status = U_INVALID_FORMAT_ERROR; 1083 error(startline, "unterminated array"); 1084 return NULL; 1085 } 1086 1087 /* string arrays are a special case */ 1088 if (token == TOK_STRING) 1089 { 1090 getToken(&tokenValue, &memberComments, NULL, status); 1091 member = string_open(bundle, NULL, tokenValue->fChars, tokenValue->fLength, &memberComments, status); 1092 } 1093 else 1094 { 1095 member = parseResource(NULL, &memberComments, status); 1096 } 1097 1098 if (member == NULL || U_FAILURE(*status)) 1099 { 1100 res_close(result); 1101 return NULL; 1102 } 1103 1104 array_add(result, member, status); 1105 1106 if (U_FAILURE(*status)) 1107 { 1108 res_close(result); 1109 return NULL; 1110 } 1111 1112 /* eat optional comma if present */ 1113 token = peekToken(0, NULL, NULL, NULL, status); 1114 1115 if (token == TOK_COMMA) 1116 { 1117 getToken(NULL, NULL, NULL, status); 1118 } 1119 1120 if (U_FAILURE(*status)) 1121 { 1122 res_close(result); 1123 return NULL; 1124 } 1125 readToken = TRUE; 1126 } 1127 1128 ustr_deinit(&memberComments); 1129 return result; 1130} 1131 1132static struct SResource * 1133parseIntVector(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1134{ 1135 struct SResource *result = NULL; 1136 enum ETokenType token; 1137 char *string; 1138 int32_t value; 1139 UBool readToken = FALSE; 1140 char *stopstring; 1141 uint32_t len; 1142 struct UString memberComments; 1143 1144 result = intvector_open(bundle, tag, comment, status); 1145 1146 if (result == NULL || U_FAILURE(*status)) 1147 { 1148 return NULL; 1149 } 1150 1151 if(isVerbose()){ 1152 printf(" vector %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1153 } 1154 ustr_init(&memberComments); 1155 /* '{' . string [','] '}' */ 1156 for (;;) 1157 { 1158 ustr_setlen(&memberComments, 0, status); 1159 1160 /* check for end of array, but don't consume next token unless it really is the end */ 1161 token = peekToken(0, NULL, NULL,&memberComments, status); 1162 1163 if (token == TOK_CLOSE_BRACE) 1164 { 1165 /* it's the end, consume the close brace */ 1166 getToken(NULL, NULL, NULL, status); 1167 if (!readToken) { 1168 warning(startline, "Encountered empty int vector"); 1169 } 1170 ustr_deinit(&memberComments); 1171 return result; 1172 } 1173 1174 string = getInvariantString(NULL, NULL, status); 1175 1176 if (U_FAILURE(*status)) 1177 { 1178 res_close(result); 1179 return NULL; 1180 } 1181 1182 /* For handling illegal char in the Intvector */ 1183 value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/ 1184 len=(uint32_t)(stopstring-string); 1185 1186 if(len==uprv_strlen(string)) 1187 { 1188 intvector_add(result, value, status); 1189 uprv_free(string); 1190 token = peekToken(0, NULL, NULL, NULL, status); 1191 } 1192 else 1193 { 1194 uprv_free(string); 1195 *status=U_INVALID_CHAR_FOUND; 1196 } 1197 1198 if (U_FAILURE(*status)) 1199 { 1200 res_close(result); 1201 return NULL; 1202 } 1203 1204 /* the comma is optional (even though it is required to prevent the reader from concatenating 1205 consecutive entries) so that a missing comma on the last entry isn't an error */ 1206 if (token == TOK_COMMA) 1207 { 1208 getToken(NULL, NULL, NULL, status); 1209 } 1210 readToken = TRUE; 1211 } 1212 1213 /* not reached */ 1214 /* A compiler warning will appear if all paths don't contain a return statement. */ 1215/* intvector_close(result, status); 1216 *status = U_INTERNAL_PROGRAM_ERROR; 1217 return NULL;*/ 1218} 1219 1220static struct SResource * 1221parseBinary(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1222{ 1223 struct SResource *result = NULL; 1224 uint8_t *value; 1225 char *string; 1226 char toConv[3] = {'\0', '\0', '\0'}; 1227 uint32_t count; 1228 uint32_t i; 1229 uint32_t line; 1230 char *stopstring; 1231 uint32_t len; 1232 1233 string = getInvariantString(&line, NULL, status); 1234 1235 if (string == NULL || U_FAILURE(*status)) 1236 { 1237 return NULL; 1238 } 1239 1240 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1241 1242 if (U_FAILURE(*status)) 1243 { 1244 uprv_free(string); 1245 return NULL; 1246 } 1247 1248 if(isVerbose()){ 1249 printf(" binary %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1250 } 1251 1252 count = (uint32_t)uprv_strlen(string); 1253 if (count > 0){ 1254 if((count % 2)==0){ 1255 value = uprv_malloc(sizeof(uint8_t) * count); 1256 1257 if (value == NULL) 1258 { 1259 uprv_free(string); 1260 *status = U_MEMORY_ALLOCATION_ERROR; 1261 return NULL; 1262 } 1263 1264 for (i = 0; i < count; i += 2) 1265 { 1266 toConv[0] = string[i]; 1267 toConv[1] = string[i + 1]; 1268 1269 value[i >> 1] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16); 1270 len=(uint32_t)(stopstring-toConv); 1271 1272 if(len!=uprv_strlen(toConv)) 1273 { 1274 uprv_free(string); 1275 *status=U_INVALID_CHAR_FOUND; 1276 return NULL; 1277 } 1278 } 1279 1280 result = bin_open(bundle, tag, (i >> 1), value,NULL, comment, status); 1281 1282 uprv_free(value); 1283 } 1284 else 1285 { 1286 *status = U_INVALID_CHAR_FOUND; 1287 uprv_free(string); 1288 error(line, "Encountered invalid binary string"); 1289 return NULL; 1290 } 1291 } 1292 else 1293 { 1294 result = bin_open(bundle, tag, 0, NULL, "",comment,status); 1295 warning(startline, "Encountered empty binary tag"); 1296 } 1297 uprv_free(string); 1298 1299 return result; 1300} 1301 1302static struct SResource * 1303parseInteger(char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status) 1304{ 1305 struct SResource *result = NULL; 1306 int32_t value; 1307 char *string; 1308 char *stopstring; 1309 uint32_t len; 1310 1311 string = getInvariantString(NULL, NULL, status); 1312 1313 if (string == NULL || U_FAILURE(*status)) 1314 { 1315 return NULL; 1316 } 1317 1318 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1319 1320 if (U_FAILURE(*status)) 1321 { 1322 uprv_free(string); 1323 return NULL; 1324 } 1325 1326 if(isVerbose()){ 1327 printf(" integer %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1328 } 1329 1330 if (uprv_strlen(string) <= 0) 1331 { 1332 warning(startline, "Encountered empty integer. Default value is 0."); 1333 } 1334 1335 /* Allow integer support for hexdecimal, octal digit and decimal*/ 1336 /* and handle illegal char in the integer*/ 1337 value = uprv_strtoul(string, &stopstring, 0); 1338 len=(uint32_t)(stopstring-string); 1339 if(len==uprv_strlen(string)) 1340 { 1341 result = int_open(bundle, tag, value, comment, status); 1342 } 1343 else 1344 { 1345 *status=U_INVALID_CHAR_FOUND; 1346 } 1347 uprv_free(string); 1348 1349 return result; 1350} 1351 1352static struct SResource * 1353parseImport(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1354{ 1355 struct SResource *result; 1356 FileStream *file; 1357 int32_t len; 1358 uint8_t *data; 1359 char *filename; 1360 uint32_t line; 1361 char *fullname = NULL; 1362 int32_t numRead = 0; 1363 filename = getInvariantString(&line, NULL, status); 1364 1365 if (U_FAILURE(*status)) 1366 { 1367 return NULL; 1368 } 1369 1370 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1371 1372 if (U_FAILURE(*status)) 1373 { 1374 uprv_free(filename); 1375 return NULL; 1376 } 1377 1378 if(isVerbose()){ 1379 printf(" import %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1380 } 1381 1382 /* Open the input file for reading */ 1383 if (inputdir == NULL) 1384 { 1385#if 1 1386 /* 1387 * Always save file file name, even if there's 1388 * no input directory specified. MIGHT BREAK SOMETHING 1389 */ 1390 int32_t filenameLength = uprv_strlen(filename); 1391 1392 fullname = (char *) uprv_malloc(filenameLength + 1); 1393 uprv_strcpy(fullname, filename); 1394#endif 1395 1396 file = T_FileStream_open(filename, "rb"); 1397 } 1398 else 1399 { 1400 1401 int32_t count = (int32_t)uprv_strlen(filename); 1402 1403 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 1404 { 1405 fullname = (char *) uprv_malloc(inputdirLength + count + 2); 1406 1407 /* test for NULL */ 1408 if(fullname == NULL) 1409 { 1410 *status = U_MEMORY_ALLOCATION_ERROR; 1411 return NULL; 1412 } 1413 1414 uprv_strcpy(fullname, inputdir); 1415 1416 fullname[inputdirLength] = U_FILE_SEP_CHAR; 1417 fullname[inputdirLength + 1] = '\0'; 1418 1419 uprv_strcat(fullname, filename); 1420 } 1421 else 1422 { 1423 fullname = (char *) uprv_malloc(inputdirLength + count + 1); 1424 1425 /* test for NULL */ 1426 if(fullname == NULL) 1427 { 1428 *status = U_MEMORY_ALLOCATION_ERROR; 1429 return NULL; 1430 } 1431 1432 uprv_strcpy(fullname, inputdir); 1433 uprv_strcat(fullname, filename); 1434 } 1435 1436 file = T_FileStream_open(fullname, "rb"); 1437 1438 } 1439 1440 if (file == NULL) 1441 { 1442 error(line, "couldn't open input file %s", filename); 1443 *status = U_FILE_ACCESS_ERROR; 1444 return NULL; 1445 } 1446 1447 len = T_FileStream_size(file); 1448 data = (uint8_t*)uprv_malloc(len * sizeof(uint8_t)); 1449 /* test for NULL */ 1450 if(data == NULL) 1451 { 1452 *status = U_MEMORY_ALLOCATION_ERROR; 1453 T_FileStream_close (file); 1454 return NULL; 1455 } 1456 1457 numRead = T_FileStream_read (file, data, len); 1458 T_FileStream_close (file); 1459 1460 result = bin_open(bundle, tag, len, data, fullname, comment, status); 1461 1462 uprv_free(data); 1463 uprv_free(filename); 1464 uprv_free(fullname); 1465 1466 return result; 1467} 1468 1469static struct SResource * 1470parseInclude(char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status) 1471{ 1472 struct SResource *result; 1473 int32_t len=0; 1474 char *filename; 1475 uint32_t line; 1476 UChar *pTarget = NULL; 1477 1478 UCHARBUF *ucbuf; 1479 char *fullname = NULL; 1480 int32_t count = 0; 1481 const char* cp = NULL; 1482 const UChar* uBuffer = NULL; 1483 1484 filename = getInvariantString(&line, NULL, status); 1485 count = (int32_t)uprv_strlen(filename); 1486 1487 if (U_FAILURE(*status)) 1488 { 1489 return NULL; 1490 } 1491 1492 expect(TOK_CLOSE_BRACE, NULL, NULL, NULL, status); 1493 1494 if (U_FAILURE(*status)) 1495 { 1496 uprv_free(filename); 1497 return NULL; 1498 } 1499 1500 if(isVerbose()){ 1501 printf(" include %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1502 } 1503 1504 fullname = (char *) uprv_malloc(inputdirLength + count + 2); 1505 /* test for NULL */ 1506 if(fullname == NULL) 1507 { 1508 *status = U_MEMORY_ALLOCATION_ERROR; 1509 uprv_free(filename); 1510 return NULL; 1511 } 1512 1513 if(inputdir!=NULL){ 1514 if (inputdir[inputdirLength - 1] != U_FILE_SEP_CHAR) 1515 { 1516 1517 uprv_strcpy(fullname, inputdir); 1518 1519 fullname[inputdirLength] = U_FILE_SEP_CHAR; 1520 fullname[inputdirLength + 1] = '\0'; 1521 1522 uprv_strcat(fullname, filename); 1523 } 1524 else 1525 { 1526 uprv_strcpy(fullname, inputdir); 1527 uprv_strcat(fullname, filename); 1528 } 1529 }else{ 1530 uprv_strcpy(fullname,filename); 1531 } 1532 1533 ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),FALSE,status); 1534 1535 if (U_FAILURE(*status)) { 1536 error(line, "couldn't open input file %s\n", filename); 1537 return NULL; 1538 } 1539 1540 uBuffer = ucbuf_getBuffer(ucbuf,&len,status); 1541 result = string_open(bundle, tag, uBuffer, len, comment, status); 1542 1543 uprv_free(pTarget); 1544 1545 uprv_free(filename); 1546 uprv_free(fullname); 1547 1548 return result; 1549} 1550 1551 1552 1553 1554 1555U_STRING_DECL(k_type_string, "string", 6); 1556U_STRING_DECL(k_type_binary, "binary", 6); 1557U_STRING_DECL(k_type_bin, "bin", 3); 1558U_STRING_DECL(k_type_table, "table", 5); 1559U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17); 1560U_STRING_DECL(k_type_int, "int", 3); 1561U_STRING_DECL(k_type_integer, "integer", 7); 1562U_STRING_DECL(k_type_array, "array", 5); 1563U_STRING_DECL(k_type_alias, "alias", 5); 1564U_STRING_DECL(k_type_intvector, "intvector", 9); 1565U_STRING_DECL(k_type_import, "import", 6); 1566U_STRING_DECL(k_type_include, "include", 7); 1567U_STRING_DECL(k_type_reserved, "reserved", 8); 1568 1569/* Various non-standard processing plugins that create one or more special resources. */ 1570U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1571U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18); 1572U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23); 1573U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19); 1574 1575typedef enum EResourceType 1576{ 1577 RT_UNKNOWN, 1578 RT_STRING, 1579 RT_BINARY, 1580 RT_TABLE, 1581 RT_TABLE_NO_FALLBACK, 1582 RT_INTEGER, 1583 RT_ARRAY, 1584 RT_ALIAS, 1585 RT_INTVECTOR, 1586 RT_IMPORT, 1587 RT_INCLUDE, 1588 RT_PROCESS_UCA_RULES, 1589 RT_PROCESS_COLLATION, 1590 RT_PROCESS_TRANSLITERATOR, 1591 RT_PROCESS_DEPENDENCY, 1592 RT_RESERVED 1593} EResourceType; 1594 1595static struct { 1596 const char *nameChars; /* only used for debugging */ 1597 const UChar *nameUChars; 1598 ParseResourceFunction *parseFunction; 1599} gResourceTypes[] = { 1600 {"Unknown", NULL, NULL}, 1601 {"string", k_type_string, parseString}, 1602 {"binary", k_type_binary, parseBinary}, 1603 {"table", k_type_table, parseTable}, 1604 {"table(nofallback)", k_type_table_no_fallback, NULL}, /* parseFunction will never be called */ 1605 {"integer", k_type_integer, parseInteger}, 1606 {"array", k_type_array, parseArray}, 1607 {"alias", k_type_alias, parseAlias}, 1608 {"intvector", k_type_intvector, parseIntVector}, 1609 {"import", k_type_import, parseImport}, 1610 {"include", k_type_include, parseInclude}, 1611 {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules}, 1612 {"process(collation)", k_type_plugin_collation, NULL /* not implemented yet */}, 1613 {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator}, 1614 {"process(dependency)", k_type_plugin_dependency, parseDependency}, 1615 {"reserved", NULL, NULL} 1616}; 1617 1618void initParser(UBool omitBinaryCollation, UBool omitCollationRules) 1619{ 1620 uint32_t i; 1621 1622 U_STRING_INIT(k_type_string, "string", 6); 1623 U_STRING_INIT(k_type_binary, "binary", 6); 1624 U_STRING_INIT(k_type_bin, "bin", 3); 1625 U_STRING_INIT(k_type_table, "table", 5); 1626 U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17); 1627 U_STRING_INIT(k_type_int, "int", 3); 1628 U_STRING_INIT(k_type_integer, "integer", 7); 1629 U_STRING_INIT(k_type_array, "array", 5); 1630 U_STRING_INIT(k_type_alias, "alias", 5); 1631 U_STRING_INIT(k_type_intvector, "intvector", 9); 1632 U_STRING_INIT(k_type_import, "import", 6); 1633 U_STRING_INIT(k_type_reserved, "reserved", 8); 1634 U_STRING_INIT(k_type_include, "include", 7); 1635 1636 U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18); 1637 U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18); 1638 U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23); 1639 U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19); 1640 1641 for (i = 0; i < MAX_LOOKAHEAD + 1; i++) 1642 { 1643 ustr_init(&lookahead[i].value); 1644 } 1645 gMakeBinaryCollation = !omitBinaryCollation; 1646 gOmitCollationRules = omitCollationRules; 1647} 1648 1649static U_INLINE UBool isTable(enum EResourceType type) { 1650 return (UBool)(type==RT_TABLE || type==RT_TABLE_NO_FALLBACK); 1651} 1652 1653static enum EResourceType 1654parseResourceType(UErrorCode *status) 1655{ 1656 struct UString *tokenValue; 1657 struct UString comment; 1658 enum EResourceType result = RT_UNKNOWN; 1659 uint32_t line=0; 1660 ustr_init(&comment); 1661 expect(TOK_STRING, &tokenValue, &comment, &line, status); 1662 1663 if (U_FAILURE(*status)) 1664 { 1665 return RT_UNKNOWN; 1666 } 1667 1668 *status = U_ZERO_ERROR; 1669 1670 /* Search for normal types */ 1671 result=RT_UNKNOWN; 1672 while (++result < RT_RESERVED) { 1673 if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) { 1674 break; 1675 } 1676 } 1677 /* Now search for the aliases */ 1678 if (u_strcmp(tokenValue->fChars, k_type_int) == 0) { 1679 result = RT_INTEGER; 1680 } 1681 else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) { 1682 result = RT_BINARY; 1683 } 1684 else if (result == RT_RESERVED) { 1685 char tokenBuffer[1024]; 1686 u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer)); 1687 tokenBuffer[sizeof(tokenBuffer) - 1] = 0; 1688 *status = U_INVALID_FORMAT_ERROR; 1689 error(line, "unknown resource type '%s'", tokenBuffer); 1690 } 1691 1692 return result; 1693} 1694 1695/* parse a non-top-level resource */ 1696static struct SResource * 1697parseResource(char *tag, const struct UString *comment, UErrorCode *status) 1698{ 1699 enum ETokenType token; 1700 enum EResourceType resType = RT_UNKNOWN; 1701 ParseResourceFunction *parseFunction = NULL; 1702 struct UString *tokenValue; 1703 uint32_t startline; 1704 uint32_t line; 1705 1706 token = getToken(&tokenValue, NULL, &startline, status); 1707 1708 if(isVerbose()){ 1709 printf(" resource %s at line %i \n", (tag == NULL) ? "(null)" : tag, (int)startline); 1710 } 1711 1712 /* name . [ ':' type ] '{' resource '}' */ 1713 /* This function parses from the colon onwards. If the colon is present, parse the 1714 type then try to parse a resource of that type. If there is no explicit type, 1715 work it out using the lookahead tokens. */ 1716 switch (token) 1717 { 1718 case TOK_EOF: 1719 *status = U_INVALID_FORMAT_ERROR; 1720 error(startline, "Unexpected EOF encountered"); 1721 return NULL; 1722 1723 case TOK_ERROR: 1724 *status = U_INVALID_FORMAT_ERROR; 1725 return NULL; 1726 1727 case TOK_COLON: 1728 resType = parseResourceType(status); 1729 expect(TOK_OPEN_BRACE, &tokenValue, NULL, &startline, status); 1730 1731 if (U_FAILURE(*status)) 1732 { 1733 return NULL; 1734 } 1735 1736 break; 1737 1738 case TOK_OPEN_BRACE: 1739 break; 1740 1741 default: 1742 *status = U_INVALID_FORMAT_ERROR; 1743 error(startline, "syntax error while reading a resource, expected '{' or ':'"); 1744 return NULL; 1745 } 1746 1747 if (resType == RT_UNKNOWN) 1748 { 1749 /* No explicit type, so try to work it out. At this point, we've read the first '{'. 1750 We could have any of the following: 1751 { { => array (nested) 1752 { :/} => array 1753 { string , => string array 1754 1755 { string { => table 1756 1757 { string :/{ => table 1758 { string } => string 1759 */ 1760 1761 token = peekToken(0, NULL, &line, NULL,status); 1762 1763 if (U_FAILURE(*status)) 1764 { 1765 return NULL; 1766 } 1767 1768 if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE ) 1769 { 1770 resType = RT_ARRAY; 1771 } 1772 else if (token == TOK_STRING) 1773 { 1774 token = peekToken(1, NULL, &line, NULL, status); 1775 1776 if (U_FAILURE(*status)) 1777 { 1778 return NULL; 1779 } 1780 1781 switch (token) 1782 { 1783 case TOK_COMMA: resType = RT_ARRAY; break; 1784 case TOK_OPEN_BRACE: resType = RT_TABLE; break; 1785 case TOK_CLOSE_BRACE: resType = RT_STRING; break; 1786 case TOK_COLON: resType = RT_TABLE; break; 1787 default: 1788 *status = U_INVALID_FORMAT_ERROR; 1789 error(line, "Unexpected token after string, expected ',', '{' or '}'"); 1790 return NULL; 1791 } 1792 } 1793 else 1794 { 1795 *status = U_INVALID_FORMAT_ERROR; 1796 error(line, "Unexpected token after '{'"); 1797 return NULL; 1798 } 1799 1800 /* printf("Type guessed as %s\n", resourceNames[resType]); */ 1801 } else if(resType == RT_TABLE_NO_FALLBACK) { 1802 *status = U_INVALID_FORMAT_ERROR; 1803 error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars); 1804 return NULL; 1805 } 1806 1807 /* We should now know what we need to parse next, so call the appropriate parser 1808 function and return. */ 1809 parseFunction = gResourceTypes[resType].parseFunction; 1810 if (parseFunction != NULL) { 1811 return parseFunction(tag, startline, comment, status); 1812 } 1813 else { 1814 *status = U_INTERNAL_PROGRAM_ERROR; 1815 error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars); 1816 } 1817 1818 return NULL; 1819} 1820 1821/* parse the top-level resource */ 1822struct SRBRoot * 1823parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, UErrorCode *status) 1824{ 1825 struct UString *tokenValue; 1826 struct UString comment; 1827 uint32_t line; 1828 enum EResourceType bundleType; 1829 enum ETokenType token; 1830 1831 initLookahead(buf, status); 1832 1833 inputdir = inputDir; 1834 inputdirLength = (inputdir != NULL) ? (uint32_t)uprv_strlen(inputdir) : 0; 1835 outputdir = outputDir; 1836 outputdirLength = (outputdir != NULL) ? (uint32_t)uprv_strlen(outputdir) : 0; 1837 1838 ustr_init(&comment); 1839 expect(TOK_STRING, &tokenValue, &comment, NULL, status); 1840 1841 bundle = bundle_open(&comment, status); 1842 1843 if (bundle == NULL || U_FAILURE(*status)) 1844 { 1845 return NULL; 1846 } 1847 1848 1849 bundle_setlocale(bundle, tokenValue->fChars, status); 1850 /* The following code is to make Empty bundle work no matter with :table specifer or not */ 1851 token = getToken(NULL, NULL, &line, status); 1852 if(token==TOK_COLON) { 1853 *status=U_ZERO_ERROR; 1854 bundleType=parseResourceType(status); 1855 1856 if(isTable(bundleType)) 1857 { 1858 expect(TOK_OPEN_BRACE, NULL, NULL, &line, status); 1859 } 1860 else 1861 { 1862 *status=U_PARSE_ERROR; 1863 error(line, "parse error. Stopped parsing with %s", u_errorName(*status)); 1864 } 1865 } 1866 else 1867 { 1868 /* not a colon */ 1869 if(token==TOK_OPEN_BRACE) 1870 { 1871 *status=U_ZERO_ERROR; 1872 bundleType=RT_TABLE; 1873 } 1874 else 1875 { 1876 /* neither colon nor open brace */ 1877 *status=U_PARSE_ERROR; 1878 bundleType=RT_UNKNOWN; 1879 error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status)); 1880 } 1881 } 1882 1883 if (U_FAILURE(*status)) 1884 { 1885 bundle_close(bundle, status); 1886 return NULL; 1887 } 1888 1889 if(bundleType==RT_TABLE_NO_FALLBACK) { 1890 /* 1891 * Parse a top-level table with the table(nofallback) declaration. 1892 * This is the same as a regular table, but also sets the 1893 * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] . 1894 */ 1895 bundle->noFallback=TRUE; 1896 } 1897 /* top-level tables need not handle special table names like "collations" */ 1898 realParseTable(bundle->fRoot, NULL, line, status); 1899 1900 if(dependencyArray!=NULL){ 1901 table_add(bundle->fRoot, dependencyArray, 0, status); 1902 dependencyArray = NULL; 1903 } 1904 if (U_FAILURE(*status)) 1905 { 1906 bundle_close(bundle, status); 1907 res_close(dependencyArray); 1908 return NULL; 1909 } 1910 1911 if (getToken(NULL, NULL, &line, status) != TOK_EOF) 1912 { 1913 warning(line, "extraneous text after resource bundle (perhaps unmatched braces)"); 1914 if(isStrict()){ 1915 *status = U_INVALID_FORMAT_ERROR; 1916 return NULL; 1917 } 1918 } 1919 1920 cleanupLookahead(); 1921 ustr_deinit(&comment); 1922 return bundle; 1923} 1924 1925