1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/**
4 * Copyright (c) 1999-2016, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 *
7 * Generator for source/i18n/collunsafe.h
8 * see Makefile
9 */
10
11#include <stdio.h>
12#include "unicode/uversion.h"
13#include "unicode/uniset.h"
14#include "collationroot.h"
15#include "collationtailoring.h"
16
17/**
18 * Define the type of generator to use. Choose one.
19 */
20#define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
21#define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
22#define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
23
24int main(int argc, const char *argv[]) {
25    UErrorCode errorCode = U_ZERO_ERROR;
26
27    // Get the unsafeBackwardsSet
28    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
29    if(U_FAILURE(errorCode)) {
30      fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
31      return 1;
32    }
33    const UVersionInfo &version = rootEntry->tailoring->version;
34    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
35    char verString[20];
36    u_versionToString(version, verString);
37    fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
38    int32_t rangeCount = unsafeBackwardSet->getRangeCount();
39
40#if SERIALIZE
41    fprintf(stderr, ".. serializing\n");
42    // UnicodeSet serialization
43
44    UErrorCode preflightCode = U_ZERO_ERROR;
45    // preflight
46    int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
47    if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
48      fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
49      return 1;
50    }
51    uint16_t *serializedData = new uint16_t[serializedCount];
52    // serialize
53    unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
54    if(U_FAILURE(errorCode)) {
55      delete [] serializedData;
56      fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
57      return 1;
58    }
59#endif
60
61#if PATTERN
62    fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
63    // attempt to use pattern
64
65    UnicodeString pattern;
66    UnicodeSet set(*unsafeBackwardSet);
67    set.compact();
68    set.toPattern(pattern, FALSE);
69
70    if(U_SUCCESS(errorCode)) {
71      // This fails (bug# ?) - which is why this method was abandoned.
72
73      // UnicodeSet usA(pattern, errorCode);
74      // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
75      // return 1;
76    }
77
78
79    const UChar *buf = pattern.getBuffer();
80    int32_t needed = pattern.length();
81
82    // print
83    {
84      char buf2[2048];
85      int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
86      buf2[len2]=0;
87      fprintf(stderr,"===\n%s\n===\n", buf2);
88    }
89
90    const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
91  if(U_SUCCESS(errorCode)) {
92    //UnicodeSet us(unsafeBackwardPattern, errorCode);
93    //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
94  } else {
95    fprintf(stderr, "Uset OK - \n");
96  }
97#endif
98
99
100  // Generate the output file.
101
102  printf("// collunsafe.h\n");
103  printf("// %s\n", U_COPYRIGHT_STRING);
104  printf("\n");
105  printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
106  printf("// Machine generated, do not edit.\n");
107  printf("\n");
108  printf("#ifndef COLLUNSAFE_H\n"
109         "#define COLLUNSAFE_H\n"
110         "\n"
111         "#include \"unicode/utypes.h\"\n"
112         "\n"
113         "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
114  printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
115
116
117
118#if PATTERN
119  printf("#define COLLUNSAFE_PATTERN 1\n");
120  printf("static const int32_t collunsafe_len = %d;\n", needed);
121  printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
122  for(int i=0;i<needed;i++) {
123    if( (i>0) && (i%8 == 0) ) {
124      printf(" // %d\n", i);
125    }
126    printf("0x%04X", buf[i]); // TODO check
127    if(i != (needed-1)) {
128      printf(", ");
129    }
130    }
131  printf(" //%d\n};\n", (needed-1));
132#endif
133
134#if RANGE
135    fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
136    printf("#define COLLUNSAFE_RANGE 1\n");
137    printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
138    printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
139    for(int32_t i=0;i<rangeCount;i++) {
140      printf(" 0x%04X, 0x%04X, // %d\n",
141             unsafeBackwardSet->getRangeStart(i),
142             unsafeBackwardSet->getRangeEnd(i),
143             i);
144    }
145    printf("};\n");
146#endif
147
148#if SERIALIZE
149    printf("#define COLLUNSAFE_SERIALIZE 1\n");
150    printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
151    printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
152    for(int32_t i=0;i<serializedCount;i++) {
153      if( (i>0) && (i%8 == 0) ) {
154        printf(" // %d\n", i);
155      }
156      printf("0x%04X", serializedData[i]); // TODO check
157      if(i != (serializedCount-1)) {
158        printf(", ");
159      }
160    }
161    printf("};\n");
162#endif
163
164    printf("#endif\n");
165    fflush(stderr);
166    fflush(stdout);
167    return(U_SUCCESS(errorCode)?0:1);
168}
169