1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/** 4 * Copyright (c) 1999-2016, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 * 7 * Generator for source/i18n/collunsafe.h 8 * see Makefile 9 */ 10 11#include <stdio.h> 12#include "unicode/uversion.h" 13#include "unicode/uniset.h" 14#include "collationroot.h" 15#include "collationtailoring.h" 16 17/** 18 * Define the type of generator to use. Choose one. 19 */ 20#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor 21#define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp) 22#define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp) 23 24int main(int argc, const char *argv[]) { 25 UErrorCode errorCode = U_ZERO_ERROR; 26 27 // Get the unsafeBackwardsSet 28 const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode); 29 if(U_FAILURE(errorCode)) { 30 fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode)); 31 return 1; 32 } 33 const UVersionInfo &version = rootEntry->tailoring->version; 34 const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet; 35 char verString[20]; 36 u_versionToString(version, verString); 37 fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString); 38 int32_t rangeCount = unsafeBackwardSet->getRangeCount(); 39 40#if SERIALIZE 41 fprintf(stderr, ".. serializing\n"); 42 // UnicodeSet serialization 43 44 UErrorCode preflightCode = U_ZERO_ERROR; 45 // preflight 46 int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode); 47 if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) { 48 fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode)); 49 return 1; 50 } 51 uint16_t *serializedData = new uint16_t[serializedCount]; 52 // serialize 53 unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode); 54 if(U_FAILURE(errorCode)) { 55 delete [] serializedData; 56 fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode)); 57 return 1; 58 } 59#endif 60 61#if PATTERN 62 fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n"); 63 // attempt to use pattern 64 65 UnicodeString pattern; 66 UnicodeSet set(*unsafeBackwardSet); 67 set.compact(); 68 set.toPattern(pattern, FALSE); 69 70 if(U_SUCCESS(errorCode)) { 71 // This fails (bug# ?) - which is why this method was abandoned. 72 73 // UnicodeSet usA(pattern, errorCode); 74 // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 75 // return 1; 76 } 77 78 79 const UChar *buf = pattern.getBuffer(); 80 int32_t needed = pattern.length(); 81 82 // print 83 { 84 char buf2[2048]; 85 int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8"); 86 buf2[len2]=0; 87 fprintf(stderr,"===\n%s\n===\n", buf2); 88 } 89 90 const UnicodeString unsafeBackwardPattern(FALSE, buf, needed); 91 if(U_SUCCESS(errorCode)) { 92 //UnicodeSet us(unsafeBackwardPattern, errorCode); 93 // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode)); 94 } else { 95 fprintf(stderr, "Uset OK - \n"); 96 } 97#endif 98 99 100 // Generate the output file. 101 102 printf("// collunsafe.h\n"); 103 printf("// %s\n", U_COPYRIGHT_STRING); 104 printf("\n"); 105 printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n"); 106 printf("// Machine generated, do not edit.\n"); 107 printf("\n"); 108 printf("#ifndef COLLUNSAFE_H\n" 109 "#define COLLUNSAFE_H\n" 110 "\n" 111 "#include \"unicode/utypes.h\"\n" 112 "\n" 113 "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n"); 114 printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString); 115 116 117 118#if PATTERN 119 printf("#define COLLUNSAFE_PATTERN 1\n"); 120 printf("static const int32_t collunsafe_len = %d;\n", needed); 121 printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n"); 122 for(int i=0;i<needed;i++) { 123 if( (i>0) && (i%8 == 0) ) { 124 printf(" // %d\n", i); 125 } 126 printf("0x%04X", buf[i]); // TODO check 127 if(i != (needed-1)) { 128 printf(", "); 129 } 130 } 131 printf(" //%d\n};\n", (needed-1)); 132#endif 133 134#if RANGE 135 fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n"); 136 printf("#define COLLUNSAFE_RANGE 1\n"); 137 printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount); 138 printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2); 139 for(int32_t i=0;i<rangeCount;i++) { 140 printf(" 0x%04X, 0x%04X, // %d\n", 141 unsafeBackwardSet->getRangeStart(i), 142 unsafeBackwardSet->getRangeEnd(i), 143 i); 144 } 145 printf("};\n"); 146#endif 147 148#if SERIALIZE 149 printf("#define COLLUNSAFE_SERIALIZE 1\n"); 150 printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount); 151 printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount); 152 for(int32_t i=0;i<serializedCount;i++) { 153 if( (i>0) && (i%8 == 0) ) { 154 printf(" // %d\n", i); 155 } 156 printf("0x%04X", serializedData[i]); // TODO check 157 if(i != (serializedCount-1)) { 158 printf(", "); 159 } 160 } 161 printf("};\n"); 162#endif 163 164 printf("#endif\n"); 165 fflush(stderr); 166 fflush(stdout); 167 return(U_SUCCESS(errorCode)?0:1); 168} 169