1/* 2********************************************************************** 3* Copyright (C) 2002-2010, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 10/11/02 aliu Creation. 8********************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12#include "unicode/putil.h" 13#include "unicode/uclean.h" 14#include "cmemory.h" 15#include "cstring.h" 16#include "filestrm.h" 17#include "uarrsort.h" 18#include "unewdata.h" 19#include "uoptions.h" 20#include "uprops.h" 21#include "propname.h" 22#include "uassert.h" 23 24#include <stdio.h> 25 26U_NAMESPACE_USE 27 28// TODO: Clean up and comment this code. 29 30//---------------------------------------------------------------------- 31// BEGIN DATA 32// 33// This is the raw data to be output. We define the data structure, 34// then include a machine-generated header that contains the actual 35// data. 36 37#include "unicode/uchar.h" 38#include "unicode/uscript.h" 39#include "unicode/unorm.h" 40#include "unicode/unorm2.h" 41 42class AliasName { 43public: 44 const char* str; 45 int32_t index; 46 47 AliasName(const char* str, int32_t index); 48 49 int compare(const AliasName& other) const; 50 51 UBool operator==(const AliasName& other) const { 52 return compare(other) == 0; 53 } 54 55 UBool operator!=(const AliasName& other) const { 56 return compare(other) != 0; 57 } 58}; 59 60AliasName::AliasName(const char* _str, 61 int32_t _index) : 62 str(_str), 63 index(_index) 64{ 65} 66 67int AliasName::compare(const AliasName& other) const { 68 return uprv_comparePropertyNames(str, other.str); 69} 70 71class Alias { 72public: 73 int32_t enumValue; 74 int32_t nameGroupIndex; 75 76 Alias(int32_t enumValue, 77 int32_t nameGroupIndex); 78 79 int32_t getUniqueNames(int32_t* nameGroupIndices) const; 80}; 81 82Alias::Alias(int32_t anEnumValue, 83 int32_t aNameGroupIndex) : 84 enumValue(anEnumValue), 85 nameGroupIndex(aNameGroupIndex) 86{ 87} 88 89class Property : public Alias { 90public: 91 int32_t valueCount; 92 const Alias* valueList; 93 94 Property(int32_t enumValue, 95 int32_t nameGroupIndex, 96 int32_t valueCount, 97 const Alias* valueList); 98}; 99 100Property::Property(int32_t _enumValue, 101 int32_t _nameGroupIndex, 102 int32_t _valueCount, 103 const Alias* _valueList) : 104 Alias(_enumValue, _nameGroupIndex), 105 valueCount(_valueCount), 106 valueList(_valueList) 107{ 108} 109 110// *** Include the data header *** 111#include "data.h" 112 113/* return a list of unique names, not including "", for this property 114 * @param stringIndices array of at least MAX_NAMES_PER_GROUP 115 * elements, will be filled with indices into STRING_TABLE 116 * @return number of indices, >= 1 117 */ 118int32_t Alias::getUniqueNames(int32_t* stringIndices) const { 119 int32_t count = 0; 120 int32_t i = nameGroupIndex; 121 UBool done = FALSE; 122 while (!done) { 123 int32_t j = NAME_GROUP[i++]; 124 if (j < 0) { 125 done = TRUE; 126 j = -j; 127 } 128 if (j == 0) continue; // omit "" entries 129 UBool dupe = FALSE; 130 for (int32_t k=0; k<count; ++k) { 131 if (stringIndices[k] == j) { 132 dupe = TRUE; 133 break; 134 } 135 // also do a string check for things like "age|Age" 136 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) { 137 //printf("Found dupe %s|%s\n", 138 // STRING_TABLE[stringIndices[k]].str, 139 // STRING_TABLE[j].str); 140 dupe = TRUE; 141 break; 142 } 143 } 144 if (dupe) continue; // omit duplicates 145 stringIndices[count++] = j; 146 } 147 return count; 148} 149 150// END DATA 151//---------------------------------------------------------------------- 152 153#define MALLOC(type, count) \ 154 (type*) uprv_malloc(sizeof(type) * count) 155 156void die(const char* msg) { 157 fprintf(stderr, "Error: %s\n", msg); 158 exit(1); 159} 160 161//---------------------------------------------------------------------- 162 163/** 164 * A list of Alias objects. 165 */ 166class AliasList { 167public: 168 virtual ~AliasList(); 169 virtual const Alias& operator[](int32_t i) const = 0; 170 virtual int32_t count() const = 0; 171}; 172 173AliasList::~AliasList() {} 174 175/** 176 * A single array. 177 */ 178class AliasArrayList : public AliasList { 179 const Alias* a; 180 int32_t n; 181public: 182 AliasArrayList(const Alias* _a, int32_t _n) { 183 a = _a; 184 n = _n; 185 } 186 virtual const Alias& operator[](int32_t i) const { 187 return a[i]; 188 } 189 virtual int32_t count() const { 190 return n; 191 } 192}; 193 194/** 195 * A single array. 196 */ 197class PropertyArrayList : public AliasList { 198 const Property* a; 199 int32_t n; 200public: 201 PropertyArrayList(const Property* _a, int32_t _n) { 202 a = _a; 203 n = _n; 204 } 205 virtual const Alias& operator[](int32_t i) const { 206 return a[i]; 207 } 208 virtual int32_t count() const { 209 return n; 210 } 211}; 212 213//---------------------------------------------------------------------- 214 215/** 216 * An element in a name index. It maps a name (given by index) into 217 * an enum value. 218 */ 219class NameToEnumEntry { 220public: 221 int32_t nameIndex; 222 int32_t enumValue; 223 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; } 224}; 225 226// Sort function for NameToEnumEntry (sort by name) 227U_CFUNC int32_t 228compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) { 229 return 230 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex]. 231 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]); 232} 233 234//---------------------------------------------------------------------- 235 236/** 237 * An element in an enum index. It maps an enum into a name group entry 238 * (given by index). 239 */ 240class EnumToNameGroupEntry { 241public: 242 int32_t enumValue; 243 int32_t nameGroupIndex; 244 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; } 245 246 // are enumValues contiguous for count entries starting with this one? 247 // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** 248 UBool isContiguous(int32_t count) const { 249 const EnumToNameGroupEntry* p = this; 250 for (int32_t i=1; i<count; ++i) { 251 if (p[i].enumValue != (this->enumValue + i)) { 252 return FALSE; 253 } 254 } 255 return TRUE; 256 } 257}; 258 259// Sort function for EnumToNameGroupEntry (sort by name index) 260U_CFUNC int32_t 261compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) { 262 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue; 263} 264 265//---------------------------------------------------------------------- 266 267/** 268 * An element in the map from enumerated property enums to value maps. 269 */ 270class EnumToValueEntry { 271public: 272 int32_t enumValue; 273 EnumToNameGroupEntry* enumToName; 274 int32_t enumToName_count; 275 NameToEnumEntry* nameToEnum; 276 int32_t nameToEnum_count; 277 278 // are enumValues contiguous for count entries starting with this one? 279 // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** 280 UBool isContiguous(int32_t count) const { 281 const EnumToValueEntry* p = this; 282 for (int32_t i=1; i<count; ++i) { 283 if (p[i].enumValue != (this->enumValue + i)) { 284 return FALSE; 285 } 286 } 287 return TRUE; 288 } 289}; 290 291// Sort function for EnumToValueEntry (sort by enum) 292U_CFUNC int32_t 293compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) { 294 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue; 295} 296 297//---------------------------------------------------------------------- 298// BEGIN Builder 299 300#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET)) 301 302class Builder { 303 // header: 304 PropertyAliases header; 305 306 // 0: 307 NonContiguousEnumToOffset* enumToName; 308 int32_t enumToName_size; 309 Offset enumToName_offset; 310 311 // 1: (deleted) 312 313 // 2: 314 NameToEnum* nameToEnum; 315 int32_t nameToEnum_size; 316 Offset nameToEnum_offset; 317 318 // 3: 319 NonContiguousEnumToOffset* enumToValue; 320 int32_t enumToValue_size; 321 Offset enumToValue_offset; 322 323 // 4: 324 ValueMap* valueMap; 325 int32_t valueMap_size; 326 int32_t valueMap_count; 327 Offset valueMap_offset; 328 329 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is 330 // NULL and one is not. valueEnumToName_size[i] is the size of 331 // the non-NULL one. i=0..valueMapCount-1 332 // 5a: 333 EnumToOffset** valueEnumToName; 334 // 5b: 335 NonContiguousEnumToOffset** valueNCEnumToName; 336 int32_t* valueEnumToName_size; 337 Offset* valueEnumToName_offset; 338 // 6: 339 // arrays of valueMap_count pointers, sizes, & offsets 340 NameToEnum** valueNameToEnum; 341 int32_t* valueNameToEnum_size; 342 Offset* valueNameToEnum_offset; 343 344 // 98: 345 Offset* nameGroupPool; 346 int32_t nameGroupPool_count; 347 int32_t nameGroupPool_size; 348 Offset nameGroupPool_offset; 349 350 // 99: 351 char* stringPool; 352 int32_t stringPool_count; 353 int32_t stringPool_size; 354 Offset stringPool_offset; 355 Offset* stringPool_offsetArray; // relative to stringPool 356 357 int32_t total_size; // size of everything 358 359 int32_t debug; 360 361public: 362 363 Builder(int32_t debugLevel); 364 ~Builder(); 365 366 void buildTopLevelProperties(const NameToEnumEntry* propName, 367 int32_t propNameCount, 368 const EnumToNameGroupEntry* propEnum, 369 int32_t propEnumCount); 370 371 void buildValues(const EnumToValueEntry* e2v, 372 int32_t count); 373 374 void buildStringPool(const AliasName* propertyNames, 375 int32_t propertyNameCount, 376 const int32_t* nameGroupIndices, 377 int32_t nameGroupIndicesCount); 378 379 void fixup(); 380 381 int8_t* createData(int32_t& length) const; 382 383private: 384 385 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng, 386 int32_t count, 387 int32_t& size); 388 static NonContiguousEnumToOffset* 389 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, 390 int32_t count, 391 int32_t& size); 392 393 static NonContiguousEnumToOffset* 394 buildNCEnumToValue(const EnumToValueEntry* e2v, 395 int32_t count, 396 int32_t& size); 397 398 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum, 399 int32_t count, 400 int32_t& size); 401 402 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const; 403 void fixupNameToEnum(NameToEnum* n); 404 void fixupEnumToNameGroup(EnumToOffset* e2ng); 405 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng); 406 407 void computeOffsets(); 408 void fixupStringPoolOffsets(); 409 void fixupNameGroupPoolOffsets(); 410 void fixupMiscellaneousOffsets(); 411 412 static int32_t align(int32_t a); 413 static void erase(void* p, int32_t size); 414}; 415 416Builder::Builder(int32_t debugLevel) { 417 debug = debugLevel; 418 enumToName = 0; 419 nameToEnum = 0; 420 enumToValue = 0; 421 valueMap_count = 0; 422 valueMap = 0; 423 valueEnumToName = 0; 424 valueNCEnumToName = 0; 425 valueEnumToName_size = 0; 426 valueEnumToName_offset = 0; 427 valueNameToEnum = 0; 428 valueNameToEnum_size = 0; 429 valueNameToEnum_offset = 0; 430 nameGroupPool = 0; 431 stringPool = 0; 432 stringPool_offsetArray = 0; 433} 434 435Builder::~Builder() { 436 uprv_free(enumToName); 437 uprv_free(nameToEnum); 438 uprv_free(enumToValue); 439 uprv_free(valueMap); 440 for (int32_t i=0; i<valueMap_count; ++i) { 441 uprv_free(valueEnumToName[i]); 442 uprv_free(valueNCEnumToName[i]); 443 uprv_free(valueNameToEnum[i]); 444 } 445 uprv_free(valueEnumToName); 446 uprv_free(valueNCEnumToName); 447 uprv_free(valueEnumToName_size); 448 uprv_free(valueEnumToName_offset); 449 uprv_free(valueNameToEnum); 450 uprv_free(valueNameToEnum_size); 451 uprv_free(valueNameToEnum_offset); 452 uprv_free(nameGroupPool); 453 uprv_free(stringPool); 454 uprv_free(stringPool_offsetArray); 455} 456 457int32_t Builder::align(int32_t a) { 458 U_ASSERT(a >= 0); 459 int32_t k = a % sizeof(int32_t); 460 if (k == 0) { 461 return a; 462 } 463 a += sizeof(int32_t) - k; 464 return a; 465} 466 467void Builder::erase(void* p, int32_t size) { 468 U_ASSERT(size >= 0); 469 int8_t* q = (int8_t*) p; 470 while (size--) { 471 *q++ = 0; 472 } 473} 474 475EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng, 476 int32_t count, 477 int32_t& size) { 478 U_ASSERT(e2ng->isContiguous(count)); 479 size = align(EnumToOffset::getSize(count)); 480 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size); 481 erase(result, size); 482 result->enumStart = e2ng->enumValue; 483 result->enumLimit = e2ng->enumValue + count; 484 Offset* p = result->getOffsetArray(); 485 for (int32_t i=0; i<count; ++i) { 486 // set these to NGI index values 487 // fix them up to NGI offset values 488 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); 489 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later 490 } 491 return result; 492} 493 494NonContiguousEnumToOffset* 495Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, 496 int32_t count, 497 int32_t& size) { 498 U_ASSERT(!e2ng->isContiguous(count)); 499 size = align(NonContiguousEnumToOffset::getSize(count)); 500 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size); 501 erase(nc, size); 502 nc->count = count; 503 EnumValue* e = nc->getEnumArray(); 504 Offset* p = nc->getOffsetArray(); 505 for (int32_t i=0; i<count; ++i) { 506 // set these to NGI index values 507 // fix them up to NGI offset values 508 e[i] = e2ng[i].enumValue; 509 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); 510 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later 511 } 512 return nc; 513} 514 515NonContiguousEnumToOffset* 516Builder::buildNCEnumToValue(const EnumToValueEntry* e2v, 517 int32_t count, 518 int32_t& size) { 519 U_ASSERT(!e2v->isContiguous(count)); 520 size = align(NonContiguousEnumToOffset::getSize(count)); 521 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size); 522 erase(result, size); 523 result->count = count; 524 EnumValue* e = result->getEnumArray(); 525 for (int32_t i=0; i<count; ++i) { 526 e[i] = e2v[i].enumValue; 527 // offset must be set later 528 } 529 return result; 530} 531 532/** 533 * Given an index into the string pool, return an offset. computeOffsets() 534 * must have been called already. If allowNegative is true, allow negatives 535 * and preserve their sign. 536 */ 537Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const { 538 // Index 0 is ""; we turn this into an Offset of zero 539 if (index == 0) return 0; 540 if (index < 0) { 541 if (allowNegative) { 542 return -Builder::stringIndexToOffset(-index); 543 } else { 544 die("Negative string pool index"); 545 } 546 } else { 547 if (index >= stringPool_count) { 548 die("String pool index too large"); 549 } 550 Offset result = stringPool_offset + stringPool_offsetArray[index]; 551 U_ASSERT(result >= 0 && result < total_size); 552 return result; 553 } 554 return 0; // never executed; make compiler happy 555} 556 557NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum, 558 int32_t count, 559 int32_t& size) { 560 size = align(NameToEnum::getSize(count)); 561 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size); 562 erase(n2e, size); 563 n2e->count = count; 564 Offset* p = n2e->getNameArray(); 565 EnumValue* e = n2e->getEnumArray(); 566 for (int32_t i=0; i<count; ++i) { 567 // set these to SP index values 568 // fix them up to SP offset values 569 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex)); 570 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later 571 e[i] = nameToEnum[i].enumValue; 572 } 573 return n2e; 574} 575 576 577void Builder::buildTopLevelProperties(const NameToEnumEntry* propName, 578 int32_t propNameCount, 579 const EnumToNameGroupEntry* propEnum, 580 int32_t propEnumCount) { 581 enumToName = buildNCEnumToNameGroup(propEnum, 582 propEnumCount, 583 enumToName_size); 584 nameToEnum = buildNameToEnum(propName, 585 propNameCount, 586 nameToEnum_size); 587} 588 589void Builder::buildValues(const EnumToValueEntry* e2v, 590 int32_t count) { 591 int32_t i; 592 593 U_ASSERT(!e2v->isContiguous(count)); 594 595 valueMap_count = count; 596 597 enumToValue = buildNCEnumToValue(e2v, count, 598 enumToValue_size); 599 600 valueMap_size = align(count * sizeof(ValueMap)); 601 valueMap = (ValueMap*) uprv_malloc(valueMap_size); 602 erase(valueMap, valueMap_size); 603 604 valueEnumToName = MALLOC(EnumToOffset*, count); 605 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count); 606 valueEnumToName_size = MALLOC(int32_t, count); 607 valueEnumToName_offset = MALLOC(Offset, count); 608 valueNameToEnum = MALLOC(NameToEnum*, count); 609 valueNameToEnum_size = MALLOC(int32_t, count); 610 valueNameToEnum_offset = MALLOC(Offset, count); 611 612 for (i=0; i<count; ++i) { 613 UBool isContiguous = 614 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count); 615 valueEnumToName[i] = 0; 616 valueNCEnumToName[i] = 0; 617 if (isContiguous) { 618 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName, 619 e2v[i].enumToName_count, 620 valueEnumToName_size[i]); 621 } else { 622 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName, 623 e2v[i].enumToName_count, 624 valueEnumToName_size[i]); 625 } 626 valueNameToEnum[i] = 627 buildNameToEnum(e2v[i].nameToEnum, 628 e2v[i].nameToEnum_count, 629 valueNameToEnum_size[i]); 630 } 631} 632 633void Builder::buildStringPool(const AliasName* propertyNames, 634 int32_t propertyNameCount, 635 const int32_t* nameGroupIndices, 636 int32_t nameGroupIndicesCount) { 637 int32_t i; 638 639 nameGroupPool_count = nameGroupIndicesCount; 640 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count; 641 nameGroupPool = MALLOC(Offset, nameGroupPool_count); 642 643 for (i=0; i<nameGroupPool_count; ++i) { 644 // Some indices are negative. 645 int32_t a = nameGroupIndices[i]; 646 if (a < 0) a = -a; 647 U_ASSERT(IS_VALID_OFFSET(a)); 648 nameGroupPool[i] = (Offset) nameGroupIndices[i]; 649 } 650 651 stringPool_count = propertyNameCount; 652 stringPool_size = 0; 653 // first string must be "" -- we skip it 654 U_ASSERT(*propertyNames[0].str == 0); 655 for (i=1 /*sic*/; i<propertyNameCount; ++i) { 656 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1); 657 } 658 stringPool = MALLOC(char, stringPool_size); 659 stringPool_offsetArray = MALLOC(Offset, stringPool_count); 660 Offset soFar = 0; 661 char* p = stringPool; 662 stringPool_offsetArray[0] = -1; // we don't use this entry 663 for (i=1 /*sic*/; i<propertyNameCount; ++i) { 664 const char* str = propertyNames[i].str; 665 int32_t len = (int32_t)uprv_strlen(str); 666 uprv_strcpy(p, str); 667 p += len; 668 *p++ = 0; 669 stringPool_offsetArray[i] = soFar; 670 soFar += (Offset)(len+1); 671 } 672 U_ASSERT(soFar == stringPool_size); 673 U_ASSERT(p == (stringPool + stringPool_size)); 674} 675 676// Confirm that PropertyAliases is a POD (plain old data; see C++ 677// std). The following union will _fail to compile_ if 678// PropertyAliases is _not_ a POD. (Note: We used to use the offsetof 679// macro to check this, but that's not quite right, so that test is 680// commented out -- see below.) 681typedef union { 682 int32_t i; 683 PropertyAliases p; 684} PropertyAliasesPODTest; 685 686void Builder::computeOffsets() { 687 int32_t i; 688 Offset off = sizeof(header); 689 690 if (debug>0) { 691 printf("header \t offset=%4d size=%5d\n", 0, off); 692 } 693 694 // PropertyAliases must have no v-table and must be 695 // padded (if necessary) to the next 32-bit boundary. 696 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above 697 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0); 698 699 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t) 700 701 #define COMPUTE_OFFSET2(foo,type) \ 702 if (debug>0)\ 703 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\ 704 foo##_offset = off;\ 705 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\ 706 U_ASSERT(foo##_offset % sizeof(type) == 0);\ 707 off = (Offset) (off + foo##_size); 708 709 COMPUTE_OFFSET(enumToName); // 0: 710 COMPUTE_OFFSET(nameToEnum); // 2: 711 COMPUTE_OFFSET(enumToValue); // 3: 712 COMPUTE_OFFSET(valueMap); // 4: 713 714 for (i=0; i<valueMap_count; ++i) { 715 if (debug>0) { 716 printf(" enumToName[%d]\t offset=%4d size=%5d\n", 717 (int)i, off, (int)valueEnumToName_size[i]); 718 } 719 720 valueEnumToName_offset[i] = off; // 5: 721 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i])); 722 off = (Offset) (off + valueEnumToName_size[i]); 723 724 if (debug>0) { 725 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n", 726 (int)i, off, (int)valueNameToEnum_size[i]); 727 } 728 729 valueNameToEnum_offset[i] = off; // 6: 730 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i])); 731 off = (Offset) (off + valueNameToEnum_size[i]); 732 } 733 734 // These last two chunks have weaker alignment needs 735 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98: 736 COMPUTE_OFFSET2(stringPool,char); // 99: 737 738 total_size = off; 739 if (debug>0) printf("total size=%5d\n\n", (int)total_size); 740 U_ASSERT(total_size <= (MAX_OFFSET+1)); 741} 742 743void Builder::fixupNameToEnum(NameToEnum* n) { 744 // Fix the string pool offsets in n 745 Offset* p = n->getNameArray(); 746 for (int32_t i=0; i<n->count; ++i) { 747 p[i] = stringIndexToOffset(p[i]); 748 } 749} 750 751void Builder::fixupStringPoolOffsets() { 752 int32_t i; 753 754 // 2: 755 fixupNameToEnum(nameToEnum); 756 757 // 6: 758 for (i=0; i<valueMap_count; ++i) { 759 fixupNameToEnum(valueNameToEnum[i]); 760 } 761 762 // 98: 763 for (i=0; i<nameGroupPool_count; ++i) { 764 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE); 765 } 766} 767 768void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) { 769 EnumValue i; 770 int32_t j; 771 Offset* p = e2ng->getOffsetArray(); 772 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) { 773 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j]; 774 } 775} 776 777void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) { 778 int32_t i; 779 /*EnumValue* e = e2ng->getEnumArray();*/ 780 Offset* p = e2ng->getOffsetArray(); 781 for (i=0; i<e2ng->count; ++i) { 782 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i]; 783 } 784} 785 786void Builder::fixupNameGroupPoolOffsets() { 787 int32_t i; 788 789 // 0: 790 fixupNCEnumToNameGroup(enumToName); 791 792 // 1: (deleted) 793 794 // 5: 795 for (i=0; i<valueMap_count; ++i) { 796 // 5a: 797 if (valueEnumToName[i] != 0) { 798 fixupEnumToNameGroup(valueEnumToName[i]); 799 } 800 // 5b: 801 if (valueNCEnumToName[i] != 0) { 802 fixupNCEnumToNameGroup(valueNCEnumToName[i]); 803 } 804 } 805} 806 807void Builder::fixupMiscellaneousOffsets() { 808 int32_t i; 809 810 // header: 811 erase(&header, sizeof(header)); 812 header.enumToName_offset = enumToName_offset; 813 header.nameToEnum_offset = nameToEnum_offset; 814 header.enumToValue_offset = enumToValue_offset; 815 // header meta-info used by Java: 816 U_ASSERT(total_size > 0 && total_size < 0x7FFF); 817 header.total_size = (int16_t) total_size; 818 header.valueMap_offset = valueMap_offset; 819 header.valueMap_count = (int16_t) valueMap_count; 820 header.nameGroupPool_offset = nameGroupPool_offset; 821 header.nameGroupPool_count = (int16_t) nameGroupPool_count; 822 header.stringPool_offset = stringPool_offset; 823 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry 824 825 U_ASSERT(valueMap_count <= 0x7FFF); 826 U_ASSERT(nameGroupPool_count <= 0x7FFF); 827 U_ASSERT(stringPool_count <= 0x7FFF); 828 829 // 3: 830 Offset* p = enumToValue->getOffsetArray(); 831 /*EnumValue* e = enumToValue->getEnumArray();*/ 832 U_ASSERT(valueMap_count == enumToValue->count); 833 for (i=0; i<valueMap_count; ++i) { 834 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i); 835 } 836 837 // 4: 838 for (i=0; i<valueMap_count; ++i) { 839 ValueMap& v = valueMap[i]; 840 v.enumToName_offset = v.ncEnumToName_offset = 0; 841 if (valueEnumToName[i] != 0) { 842 v.enumToName_offset = valueEnumToName_offset[i]; 843 } 844 if (valueNCEnumToName[i] != 0) { 845 v.ncEnumToName_offset = valueEnumToName_offset[i]; 846 } 847 v.nameToEnum_offset = valueNameToEnum_offset[i]; 848 } 849} 850 851void Builder::fixup() { 852 computeOffsets(); 853 fixupStringPoolOffsets(); 854 fixupNameGroupPoolOffsets(); 855 fixupMiscellaneousOffsets(); 856} 857 858int8_t* Builder::createData(int32_t& length) const { 859 length = total_size; 860 int8_t* result = MALLOC(int8_t, length); 861 862 int8_t* p = result; 863 int8_t* limit = result + length; 864 865 #define APPEND2(x, size) \ 866 U_ASSERT((p+size)<=limit); \ 867 uprv_memcpy(p, x, size); \ 868 p += size 869 870 #define APPEND(x) APPEND2(x, x##_size) 871 872 APPEND2(&header, sizeof(header)); 873 APPEND(enumToName); 874 APPEND(nameToEnum); 875 APPEND(enumToValue); 876 APPEND(valueMap); 877 878 for (int32_t i=0; i<valueMap_count; ++i) { 879 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) || 880 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0)); 881 if (valueEnumToName[i] != 0) { 882 APPEND2(valueEnumToName[i], valueEnumToName_size[i]); 883 } 884 if (valueNCEnumToName[i] != 0) { 885 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]); 886 } 887 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]); 888 } 889 890 APPEND(nameGroupPool); 891 APPEND(stringPool); 892 893 if (p != limit) { 894 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit); 895 exit(1); 896 } 897 return result; 898} 899 900// END Builder 901//---------------------------------------------------------------------- 902 903/* UDataInfo cf. udata.h */ 904static UDataInfo dataInfo = { 905 sizeof(UDataInfo), 906 0, 907 908 U_IS_BIG_ENDIAN, 909 U_CHARSET_FAMILY, 910 sizeof(UChar), 911 0, 912 913 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3}, 914 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */ 915 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */ 916}; 917 918class genpname { 919 920 // command-line options 921 UBool useCopyright; 922 UBool verbose; 923 int32_t debug; 924 925public: 926 int MMain(int argc, char *argv[]); 927 928private: 929 NameToEnumEntry* createNameIndex(const AliasList& list, 930 int32_t& nameIndexCount); 931 932 EnumToNameGroupEntry* createEnumIndex(const AliasList& list); 933 934 int32_t writeDataFile(const char *destdir, const Builder&); 935}; 936 937int main(int argc, char *argv[]) { 938 UErrorCode status = U_ZERO_ERROR; 939 u_init(&status); 940 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { 941 // Note: u_init() will try to open ICU property data. 942 // failures here are expected when building ICU from scratch. 943 // ignore them. 944 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n", 945 u_errorName(status)); 946 exit(1); 947 } 948 949 genpname app; 950 U_MAIN_INIT_ARGS(argc, argv); 951 int retVal = app.MMain(argc, argv); 952 u_cleanup(); 953 return retVal; 954} 955 956static UOption options[]={ 957 UOPTION_HELP_H, 958 UOPTION_HELP_QUESTION_MARK, 959 UOPTION_COPYRIGHT, 960 UOPTION_DESTDIR, 961 UOPTION_VERBOSE, 962 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG), 963}; 964 965NameToEnumEntry* genpname::createNameIndex(const AliasList& list, 966 int32_t& nameIndexCount) { 967 968 // Build name => enum map 969 970 // This is an n->1 map. There are typically multiple names 971 // mapping to one enum. The name index is sorted in order of the name, 972 // as defined by the uprv_compareAliasNames() function. 973 974 int32_t i, j; 975 int32_t count = list.count(); 976 977 // compute upper limit on number of names in the index 978 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP; 979 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity); 980 981 nameIndexCount = 0; 982 int32_t names[MAX_NAMES_PER_GROUP]; 983 for (i=0; i<count; ++i) { 984 const Alias& p = list[i]; 985 int32_t n = p.getUniqueNames(names); 986 for (j=0; j<n; ++j) { 987 U_ASSERT(nameIndexCount < nameIndexCapacity); 988 nameIndex[nameIndexCount++] = 989 NameToEnumEntry(names[j], p.enumValue); 990 } 991 } 992 993 /* 994 * use a stable sort to ensure consistent results between 995 * genpname.cpp and the propname.cpp swapping code 996 */ 997 UErrorCode errorCode = U_ZERO_ERROR; 998 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]), 999 compareNameToEnumEntry, NULL, TRUE, &errorCode); 1000 if (debug>1) { 1001 printf("Alias names: %d\n", (int)nameIndexCount); 1002 for (i=0; i<nameIndexCount; ++i) { 1003 printf("%s => %d\n", 1004 STRING_TABLE[nameIndex[i].nameIndex].str, 1005 (int)nameIndex[i].enumValue); 1006 } 1007 printf("\n"); 1008 } 1009 // make sure there are no duplicates. for a sorted list we need 1010 // only compare adjacent items. Alias.getUniqueNames() has 1011 // already eliminated duplicate names for a single property, which 1012 // does occur, so we're checking for duplicate names between two 1013 // properties, which should never occur. 1014 UBool ok = TRUE; 1015 for (i=1; i<nameIndexCount; ++i) { 1016 if (STRING_TABLE[nameIndex[i-1].nameIndex] == 1017 STRING_TABLE[nameIndex[i].nameIndex]) { 1018 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n", 1019 STRING_TABLE[nameIndex[i-1].nameIndex].str, 1020 STRING_TABLE[nameIndex[i].nameIndex].str); 1021 ok = FALSE; 1022 } 1023 } 1024 if (!ok) { 1025 die("Two or more duplicate names in property list"); 1026 } 1027 1028 return nameIndex; 1029} 1030 1031EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) { 1032 1033 // Build the enum => name map 1034 1035 // This is a 1->n map. Each enum maps to 1 or more names. To 1036 // accomplish this the index entry points to an element of the 1037 // NAME_GROUP array. This is the short name (which may be empty). 1038 // From there, subsequent elements of NAME_GROUP are alternate 1039 // names for this enum, up to and including the first one that is 1040 // negative (negate for actual index). 1041 1042 int32_t i, j, k; 1043 int32_t count = list.count(); 1044 1045 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count); 1046 for (i=0; i<count; ++i) { 1047 const Alias& p = list[i]; 1048 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex); 1049 } 1050 1051 UErrorCode errorCode = U_ZERO_ERROR; 1052 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]), 1053 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode); 1054 if (debug>1) { 1055 printf("Property enums: %d\n", (int)count); 1056 for (i=0; i<count; ++i) { 1057 printf("%d => %d: ", 1058 (int)enumIndex[i].enumValue, 1059 (int)enumIndex[i].nameGroupIndex); 1060 UBool done = FALSE; 1061 for (j=enumIndex[i].nameGroupIndex; !done; ++j) { 1062 k = NAME_GROUP[j]; 1063 if (k < 0) { 1064 k = -k; 1065 done = TRUE; 1066 } 1067 printf("\"%s\"", STRING_TABLE[k].str); 1068 if (!done) printf(", "); 1069 } 1070 printf("\n"); 1071 } 1072 printf("\n"); 1073 } 1074 return enumIndex; 1075} 1076 1077int genpname::MMain(int argc, char* argv[]) 1078{ 1079 int32_t i, j; 1080 UErrorCode status = U_ZERO_ERROR; 1081 1082 u_init(&status); 1083 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { 1084 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status)); 1085 status = U_ZERO_ERROR; 1086 } 1087 1088 1089 /* preset then read command line options */ 1090 options[3].value=u_getDataDirectory(); 1091 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); 1092 1093 /* error handling, printing usage message */ 1094 if (argc<0) { 1095 fprintf(stderr, 1096 "error in command line argument \"%s\"\n", 1097 argv[-argc]); 1098 } 1099 1100 debug = options[5].doesOccur ? (*options[5].value - '0') : 0; 1101 1102 if (argc!=1 || options[0].doesOccur || options[1].doesOccur || 1103 debug < 0 || debug > 9) { 1104 fprintf(stderr, 1105 "usage: %s [-options]\n" 1106 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n" 1107 "options:\n" 1108 "\t-h or -? or --help this usage text\n" 1109 "\t-v or --verbose turn on verbose output\n" 1110 "\t-c or --copyright include a copyright notice\n" 1111 "\t-d or --destdir destination directory, followed by the path\n" 1112 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n", 1113 argv[0]); 1114 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 1115 } 1116 1117 /* get the options values */ 1118 useCopyright=options[2].doesOccur; 1119 verbose = options[4].doesOccur; 1120 1121 // ------------------------------------------------------------ 1122 // Do not sort the string table, instead keep it in data.h order. 1123 // This simplifies data swapping and testing thereof because the string 1124 // table itself need not be sorted during swapping. 1125 // The NameToEnum sorter sorts each such map's string offsets instead. 1126 1127 if (debug>1) { 1128 printf("String pool: %d\n", (int)STRING_COUNT); 1129 for (i=0; i<STRING_COUNT; ++i) { 1130 if (i != 0) { 1131 printf(", "); 1132 } 1133 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index); 1134 } 1135 printf("\n\n"); 1136 } 1137 1138 // ------------------------------------------------------------ 1139 // Create top-level property indices 1140 1141 PropertyArrayList props(PROPERTY, PROPERTY_COUNT); 1142 int32_t propNameCount; 1143 NameToEnumEntry* propName = createNameIndex(props, propNameCount); 1144 EnumToNameGroupEntry* propEnum = createEnumIndex(props); 1145 1146 // ------------------------------------------------------------ 1147 // Create indices for the value list for each enumerated property 1148 1149 // This will have more entries than we need... 1150 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT); 1151 int32_t enumToValue_count = 0; 1152 for (i=0, j=0; i<PROPERTY_COUNT; ++i) { 1153 if (PROPERTY[i].valueCount == 0) continue; 1154 AliasArrayList values(PROPERTY[i].valueList, 1155 PROPERTY[i].valueCount); 1156 enumToValue[j].enumValue = PROPERTY[i].enumValue; 1157 enumToValue[j].enumToName = createEnumIndex(values); 1158 enumToValue[j].enumToName_count = PROPERTY[i].valueCount; 1159 enumToValue[j].nameToEnum = createNameIndex(values, 1160 enumToValue[j].nameToEnum_count); 1161 ++j; 1162 } 1163 enumToValue_count = j; 1164 1165 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]), 1166 compareEnumToValueEntry, NULL, FALSE, &status); 1167 1168 // ------------------------------------------------------------ 1169 // Build PropertyAliases layout in memory 1170 1171 Builder builder(debug); 1172 1173 builder.buildTopLevelProperties(propName, 1174 propNameCount, 1175 propEnum, 1176 PROPERTY_COUNT); 1177 1178 builder.buildValues(enumToValue, 1179 enumToValue_count); 1180 1181 builder.buildStringPool(STRING_TABLE, 1182 STRING_COUNT, 1183 NAME_GROUP, 1184 NAME_GROUP_COUNT); 1185 1186 builder.fixup(); 1187 1188 //////////////////////////////////////////////////////////// 1189 // Write the output file 1190 //////////////////////////////////////////////////////////// 1191 int32_t wlen = writeDataFile(options[3].value, builder); 1192 if (verbose) { 1193 fprintf(stdout, "Output file: %s.%s, %ld bytes\n", 1194 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen); 1195 } 1196 1197 return 0; // success 1198} 1199 1200int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) { 1201 int32_t length; 1202 int8_t* data = builder.createData(length); 1203 1204 UNewDataMemory *pdata; 1205 UErrorCode status = U_ZERO_ERROR; 1206 1207 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo, 1208 useCopyright ? U_COPYRIGHT_STRING : 0, &status); 1209 if (U_FAILURE(status)) { 1210 die("Unable to create data memory"); 1211 } 1212 1213 udata_writeBlock(pdata, data, length); 1214 1215 int32_t dataLength = (int32_t) udata_finish(pdata, &status); 1216 if (U_FAILURE(status)) { 1217 die("Error writing output file"); 1218 } 1219 if (dataLength != length) { 1220 die("Written file doesn't match expected size"); 1221 } 1222 1223 return dataLength; 1224} 1225 1226//eof 1227