1/* 2********************************************************************** 3* Copyright (C) 2002-2006, International Business Machines 4* Corporation and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 10/11/02 aliu Creation. 8********************************************************************** 9*/ 10 11#include "unicode/utypes.h" 12#include "unicode/putil.h" 13#include "unicode/uclean.h" 14#include "cmemory.h" 15#include "cstring.h" 16#include "filestrm.h" 17#include "uarrsort.h" 18#include "unewdata.h" 19#include "uoptions.h" 20#include "uprops.h" 21#include "propname.h" 22#include "uassert.h" 23 24#include <stdio.h> 25 26U_NAMESPACE_USE 27 28// TODO: Clean up and comment this code. 29 30//---------------------------------------------------------------------- 31// BEGIN DATA 32// 33// This is the raw data to be output. We define the data structure, 34// then include a machine-generated header that contains the actual 35// data. 36 37#include "unicode/uchar.h" 38#include "unicode/uscript.h" 39#include "unicode/unorm.h" 40 41class AliasName { 42public: 43 const char* str; 44 int32_t index; 45 46 AliasName(const char* str, int32_t index); 47 48 int compare(const AliasName& other) const; 49 50 UBool operator==(const AliasName& other) const { 51 return compare(other) == 0; 52 } 53 54 UBool operator!=(const AliasName& other) const { 55 return compare(other) != 0; 56 } 57}; 58 59AliasName::AliasName(const char* _str, 60 int32_t _index) : 61 str(_str), 62 index(_index) 63{ 64} 65 66int AliasName::compare(const AliasName& other) const { 67 return uprv_comparePropertyNames(str, other.str); 68} 69 70class Alias { 71public: 72 int32_t enumValue; 73 int32_t nameGroupIndex; 74 75 Alias(int32_t enumValue, 76 int32_t nameGroupIndex); 77 78 int32_t getUniqueNames(int32_t* nameGroupIndices) const; 79}; 80 81Alias::Alias(int32_t anEnumValue, 82 int32_t aNameGroupIndex) : 83 enumValue(anEnumValue), 84 nameGroupIndex(aNameGroupIndex) 85{ 86} 87 88class Property : public Alias { 89public: 90 int32_t valueCount; 91 const Alias* valueList; 92 93 Property(int32_t enumValue, 94 int32_t nameGroupIndex, 95 int32_t valueCount, 96 const Alias* valueList); 97}; 98 99Property::Property(int32_t _enumValue, 100 int32_t _nameGroupIndex, 101 int32_t _valueCount, 102 const Alias* _valueList) : 103 Alias(_enumValue, _nameGroupIndex), 104 valueCount(_valueCount), 105 valueList(_valueList) 106{ 107} 108 109// *** Include the data header *** 110#include "data.h" 111 112/* return a list of unique names, not including "", for this property 113 * @param stringIndices array of at least MAX_NAMES_PER_GROUP 114 * elements, will be filled with indices into STRING_TABLE 115 * @return number of indices, >= 1 116 */ 117int32_t Alias::getUniqueNames(int32_t* stringIndices) const { 118 int32_t count = 0; 119 int32_t i = nameGroupIndex; 120 UBool done = FALSE; 121 while (!done) { 122 int32_t j = NAME_GROUP[i++]; 123 if (j < 0) { 124 done = TRUE; 125 j = -j; 126 } 127 if (j == 0) continue; // omit "" entries 128 UBool dupe = FALSE; 129 for (int32_t k=0; k<count; ++k) { 130 if (stringIndices[k] == j) { 131 dupe = TRUE; 132 break; 133 } 134 // also do a string check for things like "age|Age" 135 if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) { 136 //printf("Found dupe %s|%s\n", 137 // STRING_TABLE[stringIndices[k]].str, 138 // STRING_TABLE[j].str); 139 dupe = TRUE; 140 break; 141 } 142 } 143 if (dupe) continue; // omit duplicates 144 stringIndices[count++] = j; 145 } 146 return count; 147} 148 149// END DATA 150//---------------------------------------------------------------------- 151 152#define MALLOC(type, count) \ 153 (type*) uprv_malloc(sizeof(type) * count) 154 155void die(const char* msg) { 156 fprintf(stderr, "Error: %s\n", msg); 157 exit(1); 158} 159 160//---------------------------------------------------------------------- 161 162/** 163 * A list of Alias objects. 164 */ 165class AliasList { 166public: 167 virtual ~AliasList(); 168 virtual const Alias& operator[](int32_t i) const = 0; 169 virtual int32_t count() const = 0; 170}; 171 172AliasList::~AliasList() {} 173 174/** 175 * A single array. 176 */ 177class AliasArrayList : public AliasList { 178 const Alias* a; 179 int32_t n; 180public: 181 AliasArrayList(const Alias* _a, int32_t _n) { 182 a = _a; 183 n = _n; 184 } 185 virtual const Alias& operator[](int32_t i) const { 186 return a[i]; 187 } 188 virtual int32_t count() const { 189 return n; 190 } 191}; 192 193/** 194 * A single array. 195 */ 196class PropertyArrayList : public AliasList { 197 const Property* a; 198 int32_t n; 199public: 200 PropertyArrayList(const Property* _a, int32_t _n) { 201 a = _a; 202 n = _n; 203 } 204 virtual const Alias& operator[](int32_t i) const { 205 return a[i]; 206 } 207 virtual int32_t count() const { 208 return n; 209 } 210}; 211 212//---------------------------------------------------------------------- 213 214/** 215 * An element in a name index. It maps a name (given by index) into 216 * an enum value. 217 */ 218class NameToEnumEntry { 219public: 220 int32_t nameIndex; 221 int32_t enumValue; 222 NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; } 223}; 224 225// Sort function for NameToEnumEntry (sort by name) 226U_CFUNC int32_t 227compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) { 228 return 229 STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex]. 230 compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]); 231} 232 233//---------------------------------------------------------------------- 234 235/** 236 * An element in an enum index. It maps an enum into a name group entry 237 * (given by index). 238 */ 239class EnumToNameGroupEntry { 240public: 241 int32_t enumValue; 242 int32_t nameGroupIndex; 243 EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; } 244 245 // are enumValues contiguous for count entries starting with this one? 246 // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** 247 UBool isContiguous(int32_t count) const { 248 const EnumToNameGroupEntry* p = this; 249 for (int32_t i=1; i<count; ++i) { 250 if (p[i].enumValue != (this->enumValue + i)) { 251 return FALSE; 252 } 253 } 254 return TRUE; 255 } 256}; 257 258// Sort function for EnumToNameGroupEntry (sort by name index) 259U_CFUNC int32_t 260compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) { 261 return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue; 262} 263 264//---------------------------------------------------------------------- 265 266/** 267 * An element in the map from enumerated property enums to value maps. 268 */ 269class EnumToValueEntry { 270public: 271 int32_t enumValue; 272 EnumToNameGroupEntry* enumToName; 273 int32_t enumToName_count; 274 NameToEnumEntry* nameToEnum; 275 int32_t nameToEnum_count; 276 277 // are enumValues contiguous for count entries starting with this one? 278 // ***!!!*** we assume we are in an array and look at neighbors ***!!!*** 279 UBool isContiguous(int32_t count) const { 280 const EnumToValueEntry* p = this; 281 for (int32_t i=1; i<count; ++i) { 282 if (p[i].enumValue != (this->enumValue + i)) { 283 return FALSE; 284 } 285 } 286 return TRUE; 287 } 288}; 289 290// Sort function for EnumToValueEntry (sort by enum) 291U_CFUNC int32_t 292compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) { 293 return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue; 294} 295 296//---------------------------------------------------------------------- 297// BEGIN Builder 298 299#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET)) 300 301class Builder { 302 // header: 303 PropertyAliases header; 304 305 // 0: 306 NonContiguousEnumToOffset* enumToName; 307 int32_t enumToName_size; 308 Offset enumToName_offset; 309 310 // 1: (deleted) 311 312 // 2: 313 NameToEnum* nameToEnum; 314 int32_t nameToEnum_size; 315 Offset nameToEnum_offset; 316 317 // 3: 318 NonContiguousEnumToOffset* enumToValue; 319 int32_t enumToValue_size; 320 Offset enumToValue_offset; 321 322 // 4: 323 ValueMap* valueMap; 324 int32_t valueMap_size; 325 int32_t valueMap_count; 326 Offset valueMap_offset; 327 328 // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is 329 // NULL and one is not. valueEnumToName_size[i] is the size of 330 // the non-NULL one. i=0..valueMapCount-1 331 // 5a: 332 EnumToOffset** valueEnumToName; 333 // 5b: 334 NonContiguousEnumToOffset** valueNCEnumToName; 335 int32_t* valueEnumToName_size; 336 Offset* valueEnumToName_offset; 337 // 6: 338 // arrays of valueMap_count pointers, sizes, & offsets 339 NameToEnum** valueNameToEnum; 340 int32_t* valueNameToEnum_size; 341 Offset* valueNameToEnum_offset; 342 343 // 98: 344 Offset* nameGroupPool; 345 int32_t nameGroupPool_count; 346 int32_t nameGroupPool_size; 347 Offset nameGroupPool_offset; 348 349 // 99: 350 char* stringPool; 351 int32_t stringPool_count; 352 int32_t stringPool_size; 353 Offset stringPool_offset; 354 Offset* stringPool_offsetArray; // relative to stringPool 355 356 int32_t total_size; // size of everything 357 358 int32_t debug; 359 360public: 361 362 Builder(int32_t debugLevel); 363 ~Builder(); 364 365 void buildTopLevelProperties(const NameToEnumEntry* propName, 366 int32_t propNameCount, 367 const EnumToNameGroupEntry* propEnum, 368 int32_t propEnumCount); 369 370 void buildValues(const EnumToValueEntry* e2v, 371 int32_t count); 372 373 void buildStringPool(const AliasName* propertyNames, 374 int32_t propertyNameCount, 375 const int32_t* nameGroupIndices, 376 int32_t nameGroupIndicesCount); 377 378 void fixup(); 379 380 int8_t* createData(int32_t& length) const; 381 382private: 383 384 static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng, 385 int32_t count, 386 int32_t& size); 387 static NonContiguousEnumToOffset* 388 buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, 389 int32_t count, 390 int32_t& size); 391 392 static NonContiguousEnumToOffset* 393 buildNCEnumToValue(const EnumToValueEntry* e2v, 394 int32_t count, 395 int32_t& size); 396 397 static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum, 398 int32_t count, 399 int32_t& size); 400 401 Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const; 402 void fixupNameToEnum(NameToEnum* n); 403 void fixupEnumToNameGroup(EnumToOffset* e2ng); 404 void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng); 405 406 void computeOffsets(); 407 void fixupStringPoolOffsets(); 408 void fixupNameGroupPoolOffsets(); 409 void fixupMiscellaneousOffsets(); 410 411 static int32_t align(int32_t a); 412 static void erase(void* p, int32_t size); 413}; 414 415Builder::Builder(int32_t debugLevel) { 416 debug = debugLevel; 417 enumToName = 0; 418 nameToEnum = 0; 419 enumToValue = 0; 420 valueMap_count = 0; 421 valueMap = 0; 422 valueEnumToName = 0; 423 valueNCEnumToName = 0; 424 valueEnumToName_size = 0; 425 valueEnumToName_offset = 0; 426 valueNameToEnum = 0; 427 valueNameToEnum_size = 0; 428 valueNameToEnum_offset = 0; 429 nameGroupPool = 0; 430 stringPool = 0; 431 stringPool_offsetArray = 0; 432} 433 434Builder::~Builder() { 435 uprv_free(enumToName); 436 uprv_free(nameToEnum); 437 uprv_free(enumToValue); 438 uprv_free(valueMap); 439 for (int32_t i=0; i<valueMap_count; ++i) { 440 uprv_free(valueEnumToName[i]); 441 uprv_free(valueNCEnumToName[i]); 442 uprv_free(valueNameToEnum[i]); 443 } 444 uprv_free(valueEnumToName); 445 uprv_free(valueNCEnumToName); 446 uprv_free(valueEnumToName_size); 447 uprv_free(valueEnumToName_offset); 448 uprv_free(valueNameToEnum); 449 uprv_free(valueNameToEnum_size); 450 uprv_free(valueNameToEnum_offset); 451 uprv_free(nameGroupPool); 452 uprv_free(stringPool); 453 uprv_free(stringPool_offsetArray); 454} 455 456int32_t Builder::align(int32_t a) { 457 U_ASSERT(a >= 0); 458 int32_t k = a % sizeof(int32_t); 459 if (k == 0) { 460 return a; 461 } 462 a += sizeof(int32_t) - k; 463 return a; 464} 465 466void Builder::erase(void* p, int32_t size) { 467 U_ASSERT(size >= 0); 468 int8_t* q = (int8_t*) p; 469 while (size--) { 470 *q++ = 0; 471 } 472} 473 474EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng, 475 int32_t count, 476 int32_t& size) { 477 U_ASSERT(e2ng->isContiguous(count)); 478 size = align(EnumToOffset::getSize(count)); 479 EnumToOffset* result = (EnumToOffset*) uprv_malloc(size); 480 erase(result, size); 481 result->enumStart = e2ng->enumValue; 482 result->enumLimit = e2ng->enumValue + count; 483 Offset* p = result->getOffsetArray(); 484 for (int32_t i=0; i<count; ++i) { 485 // set these to NGI index values 486 // fix them up to NGI offset values 487 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); 488 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later 489 } 490 return result; 491} 492 493NonContiguousEnumToOffset* 494Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng, 495 int32_t count, 496 int32_t& size) { 497 U_ASSERT(!e2ng->isContiguous(count)); 498 size = align(NonContiguousEnumToOffset::getSize(count)); 499 NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size); 500 erase(nc, size); 501 nc->count = count; 502 EnumValue* e = nc->getEnumArray(); 503 Offset* p = nc->getOffsetArray(); 504 for (int32_t i=0; i<count; ++i) { 505 // set these to NGI index values 506 // fix them up to NGI offset values 507 e[i] = e2ng[i].enumValue; 508 U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex)); 509 p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later 510 } 511 return nc; 512} 513 514NonContiguousEnumToOffset* 515Builder::buildNCEnumToValue(const EnumToValueEntry* e2v, 516 int32_t count, 517 int32_t& size) { 518 U_ASSERT(!e2v->isContiguous(count)); 519 size = align(NonContiguousEnumToOffset::getSize(count)); 520 NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size); 521 erase(result, size); 522 result->count = count; 523 EnumValue* e = result->getEnumArray(); 524 for (int32_t i=0; i<count; ++i) { 525 e[i] = e2v[i].enumValue; 526 // offset must be set later 527 } 528 return result; 529} 530 531/** 532 * Given an index into the string pool, return an offset. computeOffsets() 533 * must have been called already. If allowNegative is true, allow negatives 534 * and preserve their sign. 535 */ 536Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const { 537 // Index 0 is ""; we turn this into an Offset of zero 538 if (index == 0) return 0; 539 if (index < 0) { 540 if (allowNegative) { 541 return -Builder::stringIndexToOffset(-index); 542 } else { 543 die("Negative string pool index"); 544 } 545 } else { 546 if (index >= stringPool_count) { 547 die("String pool index too large"); 548 } 549 Offset result = stringPool_offset + stringPool_offsetArray[index]; 550 U_ASSERT(result >= 0 && result < total_size); 551 return result; 552 } 553 return 0; // never executed; make compiler happy 554} 555 556NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum, 557 int32_t count, 558 int32_t& size) { 559 size = align(NameToEnum::getSize(count)); 560 NameToEnum* n2e = (NameToEnum*) uprv_malloc(size); 561 erase(n2e, size); 562 n2e->count = count; 563 Offset* p = n2e->getNameArray(); 564 EnumValue* e = n2e->getEnumArray(); 565 for (int32_t i=0; i<count; ++i) { 566 // set these to SP index values 567 // fix them up to SP offset values 568 U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex)); 569 p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later 570 e[i] = nameToEnum[i].enumValue; 571 } 572 return n2e; 573} 574 575 576void Builder::buildTopLevelProperties(const NameToEnumEntry* propName, 577 int32_t propNameCount, 578 const EnumToNameGroupEntry* propEnum, 579 int32_t propEnumCount) { 580 enumToName = buildNCEnumToNameGroup(propEnum, 581 propEnumCount, 582 enumToName_size); 583 nameToEnum = buildNameToEnum(propName, 584 propNameCount, 585 nameToEnum_size); 586} 587 588void Builder::buildValues(const EnumToValueEntry* e2v, 589 int32_t count) { 590 int32_t i; 591 592 U_ASSERT(!e2v->isContiguous(count)); 593 594 valueMap_count = count; 595 596 enumToValue = buildNCEnumToValue(e2v, count, 597 enumToValue_size); 598 599 valueMap_size = align(count * sizeof(ValueMap)); 600 valueMap = (ValueMap*) uprv_malloc(valueMap_size); 601 erase(valueMap, valueMap_size); 602 603 valueEnumToName = MALLOC(EnumToOffset*, count); 604 valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count); 605 valueEnumToName_size = MALLOC(int32_t, count); 606 valueEnumToName_offset = MALLOC(Offset, count); 607 valueNameToEnum = MALLOC(NameToEnum*, count); 608 valueNameToEnum_size = MALLOC(int32_t, count); 609 valueNameToEnum_offset = MALLOC(Offset, count); 610 611 for (i=0; i<count; ++i) { 612 UBool isContiguous = 613 e2v[i].enumToName->isContiguous(e2v[i].enumToName_count); 614 valueEnumToName[i] = 0; 615 valueNCEnumToName[i] = 0; 616 if (isContiguous) { 617 valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName, 618 e2v[i].enumToName_count, 619 valueEnumToName_size[i]); 620 } else { 621 valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName, 622 e2v[i].enumToName_count, 623 valueEnumToName_size[i]); 624 } 625 valueNameToEnum[i] = 626 buildNameToEnum(e2v[i].nameToEnum, 627 e2v[i].nameToEnum_count, 628 valueNameToEnum_size[i]); 629 } 630} 631 632void Builder::buildStringPool(const AliasName* propertyNames, 633 int32_t propertyNameCount, 634 const int32_t* nameGroupIndices, 635 int32_t nameGroupIndicesCount) { 636 int32_t i; 637 638 nameGroupPool_count = nameGroupIndicesCount; 639 nameGroupPool_size = sizeof(Offset) * nameGroupPool_count; 640 nameGroupPool = MALLOC(Offset, nameGroupPool_count); 641 642 for (i=0; i<nameGroupPool_count; ++i) { 643 // Some indices are negative. 644 int32_t a = nameGroupIndices[i]; 645 if (a < 0) a = -a; 646 U_ASSERT(IS_VALID_OFFSET(a)); 647 nameGroupPool[i] = (Offset) nameGroupIndices[i]; 648 } 649 650 stringPool_count = propertyNameCount; 651 stringPool_size = 0; 652 // first string must be "" -- we skip it 653 U_ASSERT(*propertyNames[0].str == 0); 654 for (i=1 /*sic*/; i<propertyNameCount; ++i) { 655 stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1); 656 } 657 stringPool = MALLOC(char, stringPool_size); 658 stringPool_offsetArray = MALLOC(Offset, stringPool_count); 659 Offset soFar = 0; 660 char* p = stringPool; 661 stringPool_offsetArray[0] = -1; // we don't use this entry 662 for (i=1 /*sic*/; i<propertyNameCount; ++i) { 663 const char* str = propertyNames[i].str; 664 int32_t len = (int32_t)uprv_strlen(str); 665 uprv_strcpy(p, str); 666 p += len; 667 *p++ = 0; 668 stringPool_offsetArray[i] = soFar; 669 soFar += (Offset)(len+1); 670 } 671 U_ASSERT(soFar == stringPool_size); 672 U_ASSERT(p == (stringPool + stringPool_size)); 673} 674 675// Confirm that PropertyAliases is a POD (plain old data; see C++ 676// std). The following union will _fail to compile_ if 677// PropertyAliases is _not_ a POD. (Note: We used to use the offsetof 678// macro to check this, but that's not quite right, so that test is 679// commented out -- see below.) 680typedef union { 681 int32_t i; 682 PropertyAliases p; 683} PropertyAliasesPODTest; 684 685void Builder::computeOffsets() { 686 int32_t i; 687 Offset off = sizeof(header); 688 689 if (debug>0) { 690 printf("header \t offset=%4d size=%5d\n", 0, off); 691 } 692 693 // PropertyAliases must have no v-table and must be 694 // padded (if necessary) to the next 32-bit boundary. 695 //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above 696 U_ASSERT(sizeof(header) % sizeof(int32_t) == 0); 697 698 #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t) 699 700 #define COMPUTE_OFFSET2(foo,type) \ 701 if (debug>0)\ 702 printf(#foo "\t offset=%4d size=%5d\n", off, (int)foo##_size);\ 703 foo##_offset = off;\ 704 U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\ 705 U_ASSERT(foo##_offset % sizeof(type) == 0);\ 706 off = (Offset) (off + foo##_size); 707 708 COMPUTE_OFFSET(enumToName); // 0: 709 COMPUTE_OFFSET(nameToEnum); // 2: 710 COMPUTE_OFFSET(enumToValue); // 3: 711 COMPUTE_OFFSET(valueMap); // 4: 712 713 for (i=0; i<valueMap_count; ++i) { 714 if (debug>0) { 715 printf(" enumToName[%d]\t offset=%4d size=%5d\n", 716 (int)i, off, (int)valueEnumToName_size[i]); 717 } 718 719 valueEnumToName_offset[i] = off; // 5: 720 U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i])); 721 off = (Offset) (off + valueEnumToName_size[i]); 722 723 if (debug>0) { 724 printf(" nameToEnum[%d]\t offset=%4d size=%5d\n", 725 (int)i, off, (int)valueNameToEnum_size[i]); 726 } 727 728 valueNameToEnum_offset[i] = off; // 6: 729 U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i])); 730 off = (Offset) (off + valueNameToEnum_size[i]); 731 } 732 733 // These last two chunks have weaker alignment needs 734 COMPUTE_OFFSET2(nameGroupPool,Offset); // 98: 735 COMPUTE_OFFSET2(stringPool,char); // 99: 736 737 total_size = off; 738 if (debug>0) printf("total size=%5d\n\n", (int)total_size); 739 U_ASSERT(total_size <= (MAX_OFFSET+1)); 740} 741 742void Builder::fixupNameToEnum(NameToEnum* n) { 743 // Fix the string pool offsets in n 744 Offset* p = n->getNameArray(); 745 for (int32_t i=0; i<n->count; ++i) { 746 p[i] = stringIndexToOffset(p[i]); 747 } 748} 749 750void Builder::fixupStringPoolOffsets() { 751 int32_t i; 752 753 // 2: 754 fixupNameToEnum(nameToEnum); 755 756 // 6: 757 for (i=0; i<valueMap_count; ++i) { 758 fixupNameToEnum(valueNameToEnum[i]); 759 } 760 761 // 98: 762 for (i=0; i<nameGroupPool_count; ++i) { 763 nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE); 764 } 765} 766 767void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) { 768 EnumValue i; 769 int32_t j; 770 Offset* p = e2ng->getOffsetArray(); 771 for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) { 772 p[j] = nameGroupPool_offset + sizeof(Offset) * p[j]; 773 } 774} 775 776void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) { 777 int32_t i; 778 /*EnumValue* e = e2ng->getEnumArray();*/ 779 Offset* p = e2ng->getOffsetArray(); 780 for (i=0; i<e2ng->count; ++i) { 781 p[i] = nameGroupPool_offset + sizeof(Offset) * p[i]; 782 } 783} 784 785void Builder::fixupNameGroupPoolOffsets() { 786 int32_t i; 787 788 // 0: 789 fixupNCEnumToNameGroup(enumToName); 790 791 // 1: (deleted) 792 793 // 5: 794 for (i=0; i<valueMap_count; ++i) { 795 // 5a: 796 if (valueEnumToName[i] != 0) { 797 fixupEnumToNameGroup(valueEnumToName[i]); 798 } 799 // 5b: 800 if (valueNCEnumToName[i] != 0) { 801 fixupNCEnumToNameGroup(valueNCEnumToName[i]); 802 } 803 } 804} 805 806void Builder::fixupMiscellaneousOffsets() { 807 int32_t i; 808 809 // header: 810 erase(&header, sizeof(header)); 811 header.enumToName_offset = enumToName_offset; 812 header.nameToEnum_offset = nameToEnum_offset; 813 header.enumToValue_offset = enumToValue_offset; 814 // header meta-info used by Java: 815 U_ASSERT(total_size > 0 && total_size < 0x7FFF); 816 header.total_size = (int16_t) total_size; 817 header.valueMap_offset = valueMap_offset; 818 header.valueMap_count = (int16_t) valueMap_count; 819 header.nameGroupPool_offset = nameGroupPool_offset; 820 header.nameGroupPool_count = (int16_t) nameGroupPool_count; 821 header.stringPool_offset = stringPool_offset; 822 header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry 823 824 U_ASSERT(valueMap_count <= 0x7FFF); 825 U_ASSERT(nameGroupPool_count <= 0x7FFF); 826 U_ASSERT(stringPool_count <= 0x7FFF); 827 828 // 3: 829 Offset* p = enumToValue->getOffsetArray(); 830 /*EnumValue* e = enumToValue->getEnumArray();*/ 831 U_ASSERT(valueMap_count == enumToValue->count); 832 for (i=0; i<valueMap_count; ++i) { 833 p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i); 834 } 835 836 // 4: 837 for (i=0; i<valueMap_count; ++i) { 838 ValueMap& v = valueMap[i]; 839 v.enumToName_offset = v.ncEnumToName_offset = 0; 840 if (valueEnumToName[i] != 0) { 841 v.enumToName_offset = valueEnumToName_offset[i]; 842 } 843 if (valueNCEnumToName[i] != 0) { 844 v.ncEnumToName_offset = valueEnumToName_offset[i]; 845 } 846 v.nameToEnum_offset = valueNameToEnum_offset[i]; 847 } 848} 849 850void Builder::fixup() { 851 computeOffsets(); 852 fixupStringPoolOffsets(); 853 fixupNameGroupPoolOffsets(); 854 fixupMiscellaneousOffsets(); 855} 856 857int8_t* Builder::createData(int32_t& length) const { 858 length = total_size; 859 int8_t* result = MALLOC(int8_t, length); 860 861 int8_t* p = result; 862 int8_t* limit = result + length; 863 864 #define APPEND2(x, size) \ 865 U_ASSERT((p+size)<=limit); \ 866 uprv_memcpy(p, x, size); \ 867 p += size 868 869 #define APPEND(x) APPEND2(x, x##_size) 870 871 APPEND2(&header, sizeof(header)); 872 APPEND(enumToName); 873 APPEND(nameToEnum); 874 APPEND(enumToValue); 875 APPEND(valueMap); 876 877 for (int32_t i=0; i<valueMap_count; ++i) { 878 U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) || 879 (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0)); 880 if (valueEnumToName[i] != 0) { 881 APPEND2(valueEnumToName[i], valueEnumToName_size[i]); 882 } 883 if (valueNCEnumToName[i] != 0) { 884 APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]); 885 } 886 APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]); 887 } 888 889 APPEND(nameGroupPool); 890 APPEND(stringPool); 891 892 if (p != limit) { 893 fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit); 894 exit(1); 895 } 896 return result; 897} 898 899// END Builder 900//---------------------------------------------------------------------- 901 902/* UDataInfo cf. udata.h */ 903static UDataInfo dataInfo = { 904 sizeof(UDataInfo), 905 0, 906 907 U_IS_BIG_ENDIAN, 908 U_CHARSET_FAMILY, 909 sizeof(UChar), 910 0, 911 912 {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3}, 913 {PNAME_FORMAT_VERSION, 0, 0, 0}, /* formatVersion */ 914 {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */ 915}; 916 917class genpname { 918 919 // command-line options 920 UBool useCopyright; 921 UBool verbose; 922 int32_t debug; 923 924public: 925 int MMain(int argc, char *argv[]); 926 927private: 928 NameToEnumEntry* createNameIndex(const AliasList& list, 929 int32_t& nameIndexCount); 930 931 EnumToNameGroupEntry* createEnumIndex(const AliasList& list); 932 933 int32_t writeDataFile(const char *destdir, const Builder&); 934}; 935 936int main(int argc, char *argv[]) { 937 UErrorCode status = U_ZERO_ERROR; 938 u_init(&status); 939 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { 940 // Note: u_init() will try to open ICU property data. 941 // failures here are expected when building ICU from scratch. 942 // ignore them. 943 fprintf(stderr, "genpname: can not initialize ICU. Status = %s\n", 944 u_errorName(status)); 945 exit(1); 946 } 947 948 genpname app; 949 U_MAIN_INIT_ARGS(argc, argv); 950 int retVal = app.MMain(argc, argv); 951 u_cleanup(); 952 return retVal; 953} 954 955static UOption options[]={ 956 UOPTION_HELP_H, 957 UOPTION_HELP_QUESTION_MARK, 958 UOPTION_COPYRIGHT, 959 UOPTION_DESTDIR, 960 UOPTION_VERBOSE, 961 UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG), 962}; 963 964NameToEnumEntry* genpname::createNameIndex(const AliasList& list, 965 int32_t& nameIndexCount) { 966 967 // Build name => enum map 968 969 // This is an n->1 map. There are typically multiple names 970 // mapping to one enum. The name index is sorted in order of the name, 971 // as defined by the uprv_compareAliasNames() function. 972 973 int32_t i, j; 974 int32_t count = list.count(); 975 976 // compute upper limit on number of names in the index 977 int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP; 978 NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity); 979 980 nameIndexCount = 0; 981 int32_t names[MAX_NAMES_PER_GROUP]; 982 for (i=0; i<count; ++i) { 983 const Alias& p = list[i]; 984 int32_t n = p.getUniqueNames(names); 985 for (j=0; j<n; ++j) { 986 U_ASSERT(nameIndexCount < nameIndexCapacity); 987 nameIndex[nameIndexCount++] = 988 NameToEnumEntry(names[j], p.enumValue); 989 } 990 } 991 992 /* 993 * use a stable sort to ensure consistent results between 994 * genpname.cpp and the propname.cpp swapping code 995 */ 996 UErrorCode errorCode = U_ZERO_ERROR; 997 uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]), 998 compareNameToEnumEntry, NULL, TRUE, &errorCode); 999 if (debug>1) { 1000 printf("Alias names: %d\n", (int)nameIndexCount); 1001 for (i=0; i<nameIndexCount; ++i) { 1002 printf("%s => %d\n", 1003 STRING_TABLE[nameIndex[i].nameIndex].str, 1004 (int)nameIndex[i].enumValue); 1005 } 1006 printf("\n"); 1007 } 1008 // make sure there are no duplicates. for a sorted list we need 1009 // only compare adjacent items. Alias.getUniqueNames() has 1010 // already eliminated duplicate names for a single property, which 1011 // does occur, so we're checking for duplicate names between two 1012 // properties, which should never occur. 1013 UBool ok = TRUE; 1014 for (i=1; i<nameIndexCount; ++i) { 1015 if (STRING_TABLE[nameIndex[i-1].nameIndex] == 1016 STRING_TABLE[nameIndex[i].nameIndex]) { 1017 printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n", 1018 STRING_TABLE[nameIndex[i-1].nameIndex].str, 1019 STRING_TABLE[nameIndex[i].nameIndex].str); 1020 ok = FALSE; 1021 } 1022 } 1023 if (!ok) { 1024 die("Two or more duplicate names in property list"); 1025 } 1026 1027 return nameIndex; 1028} 1029 1030EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) { 1031 1032 // Build the enum => name map 1033 1034 // This is a 1->n map. Each enum maps to 1 or more names. To 1035 // accomplish this the index entry points to an element of the 1036 // NAME_GROUP array. This is the short name (which may be empty). 1037 // From there, subsequent elements of NAME_GROUP are alternate 1038 // names for this enum, up to and including the first one that is 1039 // negative (negate for actual index). 1040 1041 int32_t i, j, k; 1042 int32_t count = list.count(); 1043 1044 EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count); 1045 for (i=0; i<count; ++i) { 1046 const Alias& p = list[i]; 1047 enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex); 1048 } 1049 1050 UErrorCode errorCode = U_ZERO_ERROR; 1051 uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]), 1052 compareEnumToNameGroupEntry, NULL, FALSE, &errorCode); 1053 if (debug>1) { 1054 printf("Property enums: %d\n", (int)count); 1055 for (i=0; i<count; ++i) { 1056 printf("%d => %d: ", 1057 (int)enumIndex[i].enumValue, 1058 (int)enumIndex[i].nameGroupIndex); 1059 UBool done = FALSE; 1060 for (j=enumIndex[i].nameGroupIndex; !done; ++j) { 1061 k = NAME_GROUP[j]; 1062 if (k < 0) { 1063 k = -k; 1064 done = TRUE; 1065 } 1066 printf("\"%s\"", STRING_TABLE[k].str); 1067 if (!done) printf(", "); 1068 } 1069 printf("\n"); 1070 } 1071 printf("\n"); 1072 } 1073 return enumIndex; 1074} 1075 1076int genpname::MMain(int argc, char* argv[]) 1077{ 1078 int32_t i, j; 1079 UErrorCode status = U_ZERO_ERROR; 1080 1081 u_init(&status); 1082 if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) { 1083 fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status)); 1084 status = U_ZERO_ERROR; 1085 } 1086 1087 1088 /* preset then read command line options */ 1089 options[3].value=u_getDataDirectory(); 1090 argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); 1091 1092 /* error handling, printing usage message */ 1093 if (argc<0) { 1094 fprintf(stderr, 1095 "error in command line argument \"%s\"\n", 1096 argv[-argc]); 1097 } 1098 1099 debug = options[5].doesOccur ? (*options[5].value - '0') : 0; 1100 1101 if (argc!=1 || options[0].doesOccur || options[1].doesOccur || 1102 debug < 0 || debug > 9) { 1103 fprintf(stderr, 1104 "usage: %s [-options]\n" 1105 "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n" 1106 "options:\n" 1107 "\t-h or -? or --help this usage text\n" 1108 "\t-v or --verbose turn on verbose output\n" 1109 "\t-c or --copyright include a copyright notice\n" 1110 "\t-d or --destdir destination directory, followed by the path\n" 1111 "\t-D or --debug 0..9 emit debugging messages (if > 0)\n", 1112 argv[0]); 1113 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; 1114 } 1115 1116 /* get the options values */ 1117 useCopyright=options[2].doesOccur; 1118 verbose = options[4].doesOccur; 1119 1120 // ------------------------------------------------------------ 1121 // Do not sort the string table, instead keep it in data.h order. 1122 // This simplifies data swapping and testing thereof because the string 1123 // table itself need not be sorted during swapping. 1124 // The NameToEnum sorter sorts each such map's string offsets instead. 1125 1126 if (debug>1) { 1127 printf("String pool: %d\n", (int)STRING_COUNT); 1128 for (i=0; i<STRING_COUNT; ++i) { 1129 if (i != 0) { 1130 printf(", "); 1131 } 1132 printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index); 1133 } 1134 printf("\n\n"); 1135 } 1136 1137 // ------------------------------------------------------------ 1138 // Create top-level property indices 1139 1140 PropertyArrayList props(PROPERTY, PROPERTY_COUNT); 1141 int32_t propNameCount; 1142 NameToEnumEntry* propName = createNameIndex(props, propNameCount); 1143 EnumToNameGroupEntry* propEnum = createEnumIndex(props); 1144 1145 // ------------------------------------------------------------ 1146 // Create indices for the value list for each enumerated property 1147 1148 // This will have more entries than we need... 1149 EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT); 1150 int32_t enumToValue_count = 0; 1151 for (i=0, j=0; i<PROPERTY_COUNT; ++i) { 1152 if (PROPERTY[i].valueCount == 0) continue; 1153 AliasArrayList values(PROPERTY[i].valueList, 1154 PROPERTY[i].valueCount); 1155 enumToValue[j].enumValue = PROPERTY[i].enumValue; 1156 enumToValue[j].enumToName = createEnumIndex(values); 1157 enumToValue[j].enumToName_count = PROPERTY[i].valueCount; 1158 enumToValue[j].nameToEnum = createNameIndex(values, 1159 enumToValue[j].nameToEnum_count); 1160 ++j; 1161 } 1162 enumToValue_count = j; 1163 1164 uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]), 1165 compareEnumToValueEntry, NULL, FALSE, &status); 1166 1167 // ------------------------------------------------------------ 1168 // Build PropertyAliases layout in memory 1169 1170 Builder builder(debug); 1171 1172 builder.buildTopLevelProperties(propName, 1173 propNameCount, 1174 propEnum, 1175 PROPERTY_COUNT); 1176 1177 builder.buildValues(enumToValue, 1178 enumToValue_count); 1179 1180 builder.buildStringPool(STRING_TABLE, 1181 STRING_COUNT, 1182 NAME_GROUP, 1183 NAME_GROUP_COUNT); 1184 1185 builder.fixup(); 1186 1187 //////////////////////////////////////////////////////////// 1188 // Write the output file 1189 //////////////////////////////////////////////////////////// 1190 int32_t wlen = writeDataFile(options[3].value, builder); 1191 if (verbose) { 1192 fprintf(stdout, "Output file: %s.%s, %ld bytes\n", 1193 U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen); 1194 } 1195 1196 return 0; // success 1197} 1198 1199int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) { 1200 int32_t length; 1201 int8_t* data = builder.createData(length); 1202 1203 UNewDataMemory *pdata; 1204 UErrorCode status = U_ZERO_ERROR; 1205 1206 pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo, 1207 useCopyright ? U_COPYRIGHT_STRING : 0, &status); 1208 if (U_FAILURE(status)) { 1209 die("Unable to create data memory"); 1210 } 1211 1212 udata_writeBlock(pdata, data, length); 1213 1214 int32_t dataLength = (int32_t) udata_finish(pdata, &status); 1215 if (U_FAILURE(status)) { 1216 die("Error writing output file"); 1217 } 1218 if (dataLength != length) { 1219 die("Written file doesn't match expected size"); 1220 } 1221 1222 return dataLength; 1223} 1224 1225//eof 1226