1/******************************************************************************
2 *   Copyright (C) 2008, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#include <stdio.h>
9#include <stdlib.h>
10#include "unicode/utypes.h"
11#include "unicode/putil.h"
12#include "cmemory.h"
13#include "cstring.h"
14#include "filestrm.h"
15#include "toolutil.h"
16#include "unicode/uclean.h"
17#include "unewdata.h"
18#include "putilimp.h"
19#include "pkg_gencmn.h"
20
21#define STRING_STORE_SIZE 100000
22#define MAX_FILE_COUNT 2000
23
24#define COMMON_DATA_NAME U_ICUDATA_NAME
25#define DATA_TYPE "dat"
26
27/* ICU package data file format (.dat files) ------------------------------- ***
28
29Description of the data format after the usual ICU data file header
30(UDataInfo etc.).
31
32Format version 1
33
34A .dat package file contains a simple Table of Contents of item names,
35followed by the items themselves:
36
371. ToC table
38
39uint32_t count; - number of items
40UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
41    uint32_t nameOffset; - offset of the item name
42    uint32_t dataOffset; - offset of the item data
43both are byte offsets from the beginning of the data
44
452. item name strings
46
47All item names are stored as char * strings in one block between the ToC table
48and the data items.
49
503. data items
51
52The data items are stored following the item names block.
53Each data item is 16-aligned.
54The data items are stored in the sorted order of their names.
55
56Therefore, the top of the name strings block is the offset of the first item,
57the length of the last item is the difference between its offset and
58the .dat file length, and the length of all previous items is the difference
59between its offset and the next one.
60
61----------------------------------------------------------------------------- */
62
63/* UDataInfo cf. udata.h */
64static const UDataInfo dataInfo={
65    sizeof(UDataInfo),
66    0,
67
68    U_IS_BIG_ENDIAN,
69    U_CHARSET_FAMILY,
70    sizeof(UChar),
71    0,
72
73    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
74    {1, 0, 0, 0},                 /* formatVersion */
75    {3, 0, 0, 0}                  /* dataVersion */
76};
77
78static uint32_t maxSize;
79
80static char stringStore[STRING_STORE_SIZE];
81static uint32_t stringTop=0, basenameTotal=0;
82
83typedef struct {
84    char *pathname, *basename;
85    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
86} File;
87
88static File files[MAX_FILE_COUNT];
89static uint32_t fileCount=0;
90
91static char *symPrefix = NULL;
92
93/* prototypes --------------------------------------------------------------- */
94
95static void
96addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
97
98static char *
99allocString(uint32_t length);
100
101static int
102compareFiles(const void *file1, const void *file2);
103
104static char *
105pathToFullPath(const char *path, const char *source);
106
107/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
108static void
109fixDirToTreePath(char *s);
110/* -------------------------------------------------------------------------- */
111
112U_CAPI void U_EXPORT2
113createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
114                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
115    static char buffer[4096];
116    char line[512];
117    char *s;
118    UErrorCode errorCode=U_ZERO_ERROR;
119    uint32_t i, fileOffset, basenameOffset, length, nread;
120    FileStream *in, *file;
121
122    maxSize = max_size;
123
124    if (destDir == NULL) {
125        destDir = u_getDataDirectory();
126    }
127    if (name == NULL) {
128        name = COMMON_DATA_NAME;
129    }
130    if (type == NULL) {
131        type = DATA_TYPE;
132    }
133    if (source == NULL) {
134        source = ".";
135    }
136
137    if (dataFile == NULL) {
138        in = T_FileStream_stdin();
139    } else {
140        in = T_FileStream_open(dataFile, "r");
141        if(in == NULL) {
142            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
143            exit(U_FILE_ACCESS_ERROR);
144        }
145    }
146
147    if (verbose) {
148        if(sourceTOC) {
149            printf("generating %s_%s.c (table of contents source file)\n", name, type);
150        } else {
151            printf("generating %s.%s (common data file with table of contents)\n", name, type);
152        }
153    }
154
155    /* read the list of files and get their lengths */
156    while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
157        /* remove trailing newline characters */
158        s=line;
159        while(*s!=0) {
160            if(*s=='\r' || *s=='\n') {
161                *s=0;
162                break;
163            }
164            ++s;
165        }
166
167        /* check for comment */
168
169        if (*line == '#') {
170            continue;
171        }
172
173        /* add the file */
174#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
175        {
176          char *t;
177          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
178            *t = U_FILE_SEP_CHAR;
179          }
180        }
181#endif
182        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
183    }
184
185    if(in!=T_FileStream_stdin()) {
186        T_FileStream_close(in);
187    }
188
189    if(fileCount==0) {
190        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
191        return;
192    }
193
194    /* sort the files by basename */
195    qsort(files, fileCount, sizeof(File), compareFiles);
196
197    if(!sourceTOC) {
198        UNewDataMemory *out;
199
200        /* determine the offsets of all basenames and files in this common one */
201        basenameOffset=4+8*fileCount;
202        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
203        for(i=0; i<fileCount; ++i) {
204            files[i].fileOffset=fileOffset;
205            fileOffset+=(files[i].fileSize+15)&~0xf;
206            files[i].basenameOffset=basenameOffset;
207            basenameOffset+=files[i].basenameLength;
208        }
209
210        /* create the output file */
211        out=udata_create(destDir, type, name,
212                         &dataInfo,
213                         copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
214                         &errorCode);
215        if(U_FAILURE(errorCode)) {
216            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
217                destDir, name, type,
218                u_errorName(errorCode));
219            exit(errorCode);
220        }
221
222        /* write the table of contents */
223        udata_write32(out, fileCount);
224        for(i=0; i<fileCount; ++i) {
225            udata_write32(out, files[i].basenameOffset);
226            udata_write32(out, files[i].fileOffset);
227        }
228
229        /* write the basenames */
230        for(i=0; i<fileCount; ++i) {
231            udata_writeString(out, files[i].basename, files[i].basenameLength);
232        }
233        length=4+8*fileCount+basenameTotal;
234
235        /* copy the files */
236        for(i=0; i<fileCount; ++i) {
237            /* pad to 16-align the next file */
238            length&=0xf;
239            if(length!=0) {
240                udata_writePadding(out, 16-length);
241            }
242
243            if (verbose) {
244                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
245            }
246
247            /* copy the next file */
248            file=T_FileStream_open(files[i].pathname, "rb");
249            if(file==NULL) {
250                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
251                exit(U_FILE_ACCESS_ERROR);
252            }
253            for(nread = 0;;) {
254                length=T_FileStream_read(file, buffer, sizeof(buffer));
255                if(length <= 0) {
256                    break;
257                }
258                nread += length;
259                udata_writeBlock(out, buffer, length);
260            }
261            T_FileStream_close(file);
262            length=files[i].fileSize;
263
264            if (nread != files[i].fileSize) {
265              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
266                exit(U_FILE_ACCESS_ERROR);
267            }
268        }
269
270        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
271        length&=0xf;
272        if(length!=0) {
273            udata_writePadding(out, 16-length);
274        }
275
276        /* finish */
277        udata_finish(out, &errorCode);
278        if(U_FAILURE(errorCode)) {
279            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
280            exit(errorCode);
281        }
282    } else {
283        /* write a .c source file with the table of contents */
284        char *filename;
285        FileStream *out;
286
287        /* create the output filename */
288        filename=s=buffer;
289        uprv_strcpy(filename, destDir);
290        s=filename+uprv_strlen(filename);
291        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
292            *s++=U_FILE_SEP_CHAR;
293        }
294        uprv_strcpy(s, name);
295        if(*(type)!=0) {
296            s+=uprv_strlen(s);
297            *s++='_';
298            uprv_strcpy(s, type);
299        }
300        s+=uprv_strlen(s);
301        uprv_strcpy(s, ".c");
302
303        /* open the output file */
304        out=T_FileStream_open(filename, "w");
305        if (gencmnFileName != NULL) {
306            uprv_strcpy(gencmnFileName, filename);
307        }
308        if(out==NULL) {
309            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
310            exit(U_FILE_ACCESS_ERROR);
311        }
312
313        /* write the source file */
314        sprintf(buffer,
315            "/*\n"
316            " * ICU common data table of contents for %s.%s ,\n"
317            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
318            " */\n\n"
319            "#include \"unicode/utypes.h\"\n"
320            "#include \"unicode/udata.h\"\n"
321            "\n"
322            "/* external symbol declarations for data */\n",
323            name, type);
324        T_FileStream_writeLine(out, buffer);
325
326        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
327        T_FileStream_writeLine(out, buffer);
328        for(i=1; i<fileCount; ++i) {
329            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
330            T_FileStream_writeLine(out, buffer);
331        }
332        T_FileStream_writeLine(out, ";\n\n");
333
334        sprintf(
335            buffer,
336            "U_EXPORT struct {\n"
337            "    uint16_t headerSize;\n"
338            "    uint8_t magic1, magic2;\n"
339            "    UDataInfo info;\n"
340            "    char padding[%lu];\n"
341            "    uint32_t count, reserved;\n"
342            "    struct {\n"
343            "        const char *name;\n"
344            "        const void *data;\n"
345            "    } toc[%lu];\n"
346            "} U_EXPORT2 %s_dat = {\n"
347            "    32, 0xda, 0x27, {\n"
348            "        %lu, 0,\n"
349            "        %u, %u, %u, 0,\n"
350            "        {0x54, 0x6f, 0x43, 0x50},\n"
351            "        {1, 0, 0, 0},\n"
352            "        {0, 0, 0, 0}\n"
353            "    },\n"
354            "    \"\", %lu, 0, {\n",
355            (unsigned long)32-4-sizeof(UDataInfo),
356            (unsigned long)fileCount,
357            entrypointName,
358            (unsigned long)sizeof(UDataInfo),
359            U_IS_BIG_ENDIAN,
360            U_CHARSET_FAMILY,
361            U_SIZEOF_UCHAR,
362            (unsigned long)fileCount
363        );
364        T_FileStream_writeLine(out, buffer);
365
366        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
367        T_FileStream_writeLine(out, buffer);
368        for(i=1; i<fileCount; ++i) {
369            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
370            T_FileStream_writeLine(out, buffer);
371        }
372
373        T_FileStream_writeLine(out, "\n    }\n};\n");
374        T_FileStream_close(out);
375
376        uprv_free(symPrefix);
377    }
378}
379
380static void
381addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
382    char *s;
383    uint32_t length;
384    char *fullPath = NULL;
385
386    if(fileCount==MAX_FILE_COUNT) {
387        fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
388        exit(U_BUFFER_OVERFLOW_ERROR);
389    }
390
391    if(!sourceTOC) {
392        FileStream *file;
393
394        if(uprv_pathIsAbsolute(filename)) {
395            fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
396            exit(U_ILLEGAL_ARGUMENT_ERROR);
397        }
398        fullPath = pathToFullPath(filename, source);
399
400        /* store the pathname */
401        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
402        s=allocString(length);
403        uprv_strcpy(s, name);
404        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
405        uprv_strcat(s, filename);
406
407        /* get the basename */
408        fixDirToTreePath(s);
409        files[fileCount].basename=s;
410        files[fileCount].basenameLength=length;
411
412        files[fileCount].pathname=fullPath;
413
414        basenameTotal+=length;
415
416        /* try to open the file */
417        file=T_FileStream_open(fullPath, "rb");
418        if(file==NULL) {
419            fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
420            exit(U_FILE_ACCESS_ERROR);
421        }
422
423        /* get the file length */
424        length=T_FileStream_size(file);
425        if(T_FileStream_error(file) || length<=20) {
426            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
427            exit(U_FILE_ACCESS_ERROR);
428        }
429
430        T_FileStream_close(file);
431
432        /* do not add files that are longer than maxSize */
433        if(maxSize && length>maxSize) {
434            if (verbose) {
435                printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
436            }
437            return;
438        }
439        files[fileCount].fileSize=length;
440    } else {
441        char *t;
442
443        /* get and store the basename */
444        /* need to include the package name */
445        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
446        s=allocString(length);
447        uprv_strcpy(s, name);
448        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
449        uprv_strcat(s, filename);
450        fixDirToTreePath(s);
451        files[fileCount].basename=s;
452
453
454        /* turn the basename into an entry point name and store in the pathname field */
455        t=files[fileCount].pathname=allocString(length);
456        while(--length>0) {
457            if(*s=='.' || *s=='-' || *s=='/') {
458                *t='_';
459            } else {
460                *t=*s;
461            }
462            ++s;
463            ++t;
464        }
465        *t=0;
466    }
467    ++fileCount;
468}
469
470static char *
471allocString(uint32_t length) {
472    uint32_t top=stringTop+length;
473    char *p;
474
475    if(top>STRING_STORE_SIZE) {
476        fprintf(stderr, "gencmn: out of memory\n");
477        exit(U_MEMORY_ALLOCATION_ERROR);
478    }
479    p=stringStore+stringTop;
480    stringTop=top;
481    return p;
482}
483
484static char *
485pathToFullPath(const char *path, const char *source) {
486    int32_t length;
487    int32_t newLength;
488    char *fullPath;
489    int32_t n;
490
491    length = (uint32_t)(uprv_strlen(path) + 1);
492    newLength = (length + 1 + (int32_t)uprv_strlen(source));
493    fullPath = uprv_malloc(newLength);
494    if(source != NULL) {
495        uprv_strcpy(fullPath, source);
496        uprv_strcat(fullPath, U_FILE_SEP_STRING);
497    } else {
498        fullPath[0] = 0;
499    }
500    n = (int32_t)uprv_strlen(fullPath);
501    uprv_strcat(fullPath, path);
502
503#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
504#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
505    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
506    for(;fullPath[n];n++) {
507        if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
508            fullPath[n] = U_FILE_SEP_CHAR;
509        }
510    }
511#endif
512#endif
513#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
514    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
515    for(;fullPath[n];n++) {
516        if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
517            fullPath[n] = U_FILE_SEP_CHAR;
518        }
519    }
520#endif
521    return fullPath;
522}
523
524static int
525compareFiles(const void *file1, const void *file2) {
526    /* sort by basename */
527    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
528}
529
530static void
531fixDirToTreePath(char *s)
532{
533#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
534    char *t;
535#endif
536#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
537    for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
538        *t = U_TREE_ENTRY_SEP_CHAR;
539    }
540#endif
541#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
542    for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
543        *t = U_TREE_ENTRY_SEP_CHAR;
544    }
545#endif
546}
547