1/******************************************************************************
2 *   Copyright (C) 2008-2010, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#include <stdio.h>
9#include <stdlib.h>
10#include "unicode/utypes.h"
11#include "unicode/putil.h"
12#include "cmemory.h"
13#include "cstring.h"
14#include "filestrm.h"
15#include "toolutil.h"
16#include "unicode/uclean.h"
17#include "unewdata.h"
18#include "putilimp.h"
19#include "pkg_gencmn.h"
20
21#define STRING_STORE_SIZE 100000
22
23#define COMMON_DATA_NAME U_ICUDATA_NAME
24#define DATA_TYPE "dat"
25
26/* ICU package data file format (.dat files) ------------------------------- ***
27
28Description of the data format after the usual ICU data file header
29(UDataInfo etc.).
30
31Format version 1
32
33A .dat package file contains a simple Table of Contents of item names,
34followed by the items themselves:
35
361. ToC table
37
38uint32_t count; - number of items
39UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
40    uint32_t nameOffset; - offset of the item name
41    uint32_t dataOffset; - offset of the item data
42both are byte offsets from the beginning of the data
43
442. item name strings
45
46All item names are stored as char * strings in one block between the ToC table
47and the data items.
48
493. data items
50
51The data items are stored following the item names block.
52Each data item is 16-aligned.
53The data items are stored in the sorted order of their names.
54
55Therefore, the top of the name strings block is the offset of the first item,
56the length of the last item is the difference between its offset and
57the .dat file length, and the length of all previous items is the difference
58between its offset and the next one.
59
60----------------------------------------------------------------------------- */
61
62/* UDataInfo cf. udata.h */
63static const UDataInfo dataInfo={
64    sizeof(UDataInfo),
65    0,
66
67    U_IS_BIG_ENDIAN,
68    U_CHARSET_FAMILY,
69    sizeof(UChar),
70    0,
71
72    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
73    {1, 0, 0, 0},                 /* formatVersion */
74    {3, 0, 0, 0}                  /* dataVersion */
75};
76
77static uint32_t maxSize;
78
79static char stringStore[STRING_STORE_SIZE];
80static uint32_t stringTop=0, basenameTotal=0;
81
82typedef struct {
83    char *pathname, *basename;
84    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
85} File;
86
87#define CHUNK_FILE_COUNT 256
88static File *files = NULL;
89static uint32_t fileCount=0;
90static uint32_t fileMax = 0;
91
92
93static char *symPrefix = NULL;
94
95/* prototypes --------------------------------------------------------------- */
96
97static void
98addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
99
100static char *
101allocString(uint32_t length);
102
103static int
104compareFiles(const void *file1, const void *file2);
105
106static char *
107pathToFullPath(const char *path, const char *source);
108
109/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
110static void
111fixDirToTreePath(char *s);
112/* -------------------------------------------------------------------------- */
113
114U_CAPI void U_EXPORT2
115createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
116                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
117    static char buffer[4096];
118    char line[512];
119    char *s;
120    UErrorCode errorCode=U_ZERO_ERROR;
121    uint32_t i, fileOffset, basenameOffset, length, nread;
122    FileStream *in, *file;
123
124    maxSize = max_size;
125
126    if (destDir == NULL) {
127        destDir = u_getDataDirectory();
128    }
129    if (name == NULL) {
130        name = COMMON_DATA_NAME;
131    }
132    if (type == NULL) {
133        type = DATA_TYPE;
134    }
135    if (source == NULL) {
136        source = ".";
137    }
138
139    if (dataFile == NULL) {
140        in = T_FileStream_stdin();
141    } else {
142        in = T_FileStream_open(dataFile, "r");
143        if(in == NULL) {
144            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
145            exit(U_FILE_ACCESS_ERROR);
146        }
147    }
148
149    if (verbose) {
150        if(sourceTOC) {
151            printf("generating %s_%s.c (table of contents source file)\n", name, type);
152        } else {
153            printf("generating %s.%s (common data file with table of contents)\n", name, type);
154        }
155    }
156
157    /* read the list of files and get their lengths */
158    while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
159        /* remove trailing newline characters */
160        s=line;
161        while(*s!=0) {
162            if(*s=='\r' || *s=='\n') {
163                *s=0;
164                break;
165            }
166            ++s;
167        }
168
169        /* check for comment */
170
171        if (*line == '#') {
172            continue;
173        }
174
175        /* add the file */
176#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
177        {
178          char *t;
179          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
180            *t = U_FILE_SEP_CHAR;
181          }
182        }
183#endif
184        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
185    }
186
187    if(in!=T_FileStream_stdin()) {
188        T_FileStream_close(in);
189    }
190
191    if(fileCount==0) {
192        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == NULL ? "<stdin>" : dataFile);
193        return;
194    }
195
196    /* sort the files by basename */
197    qsort(files, fileCount, sizeof(File), compareFiles);
198
199    if(!sourceTOC) {
200        UNewDataMemory *out;
201
202        /* determine the offsets of all basenames and files in this common one */
203        basenameOffset=4+8*fileCount;
204        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
205        for(i=0; i<fileCount; ++i) {
206            files[i].fileOffset=fileOffset;
207            fileOffset+=(files[i].fileSize+15)&~0xf;
208            files[i].basenameOffset=basenameOffset;
209            basenameOffset+=files[i].basenameLength;
210        }
211
212        /* create the output file */
213        out=udata_create(destDir, type, name,
214                         &dataInfo,
215                         copyRight == NULL ? U_COPYRIGHT_STRING : copyRight,
216                         &errorCode);
217        if(U_FAILURE(errorCode)) {
218            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
219                destDir, name, type,
220                u_errorName(errorCode));
221            exit(errorCode);
222        }
223
224        /* write the table of contents */
225        udata_write32(out, fileCount);
226        for(i=0; i<fileCount; ++i) {
227            udata_write32(out, files[i].basenameOffset);
228            udata_write32(out, files[i].fileOffset);
229        }
230
231        /* write the basenames */
232        for(i=0; i<fileCount; ++i) {
233            udata_writeString(out, files[i].basename, files[i].basenameLength);
234        }
235        length=4+8*fileCount+basenameTotal;
236
237        /* copy the files */
238        for(i=0; i<fileCount; ++i) {
239            /* pad to 16-align the next file */
240            length&=0xf;
241            if(length!=0) {
242                udata_writePadding(out, 16-length);
243            }
244
245            if (verbose) {
246                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
247            }
248
249            /* copy the next file */
250            file=T_FileStream_open(files[i].pathname, "rb");
251            if(file==NULL) {
252                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
253                exit(U_FILE_ACCESS_ERROR);
254            }
255            for(nread = 0;;) {
256                length=T_FileStream_read(file, buffer, sizeof(buffer));
257                if(length <= 0) {
258                    break;
259                }
260                nread += length;
261                udata_writeBlock(out, buffer, length);
262            }
263            T_FileStream_close(file);
264            length=files[i].fileSize;
265
266            if (nread != files[i].fileSize) {
267              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
268                exit(U_FILE_ACCESS_ERROR);
269            }
270        }
271
272        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
273        length&=0xf;
274        if(length!=0) {
275            udata_writePadding(out, 16-length);
276        }
277
278        /* finish */
279        udata_finish(out, &errorCode);
280        if(U_FAILURE(errorCode)) {
281            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
282            exit(errorCode);
283        }
284    } else {
285        /* write a .c source file with the table of contents */
286        char *filename;
287        FileStream *out;
288
289        /* create the output filename */
290        filename=s=buffer;
291        uprv_strcpy(filename, destDir);
292        s=filename+uprv_strlen(filename);
293        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
294            *s++=U_FILE_SEP_CHAR;
295        }
296        uprv_strcpy(s, name);
297        if(*(type)!=0) {
298            s+=uprv_strlen(s);
299            *s++='_';
300            uprv_strcpy(s, type);
301        }
302        s+=uprv_strlen(s);
303        uprv_strcpy(s, ".c");
304
305        /* open the output file */
306        out=T_FileStream_open(filename, "w");
307        if (gencmnFileName != NULL) {
308            uprv_strcpy(gencmnFileName, filename);
309        }
310        if(out==NULL) {
311            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
312            exit(U_FILE_ACCESS_ERROR);
313        }
314
315        /* write the source file */
316        sprintf(buffer,
317            "/*\n"
318            " * ICU common data table of contents for %s.%s ,\n"
319            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
320            " */\n\n"
321            "#include \"unicode/utypes.h\"\n"
322            "#include \"unicode/udata.h\"\n"
323            "\n"
324            "/* external symbol declarations for data */\n",
325            name, type);
326        T_FileStream_writeLine(out, buffer);
327
328        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
329        T_FileStream_writeLine(out, buffer);
330        for(i=1; i<fileCount; ++i) {
331            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
332            T_FileStream_writeLine(out, buffer);
333        }
334        T_FileStream_writeLine(out, ";\n\n");
335
336        sprintf(
337            buffer,
338            "U_EXPORT struct {\n"
339            "    uint16_t headerSize;\n"
340            "    uint8_t magic1, magic2;\n"
341            "    UDataInfo info;\n"
342            "    char padding[%lu];\n"
343            "    uint32_t count, reserved;\n"
344            "    struct {\n"
345            "        const char *name;\n"
346            "        const void *data;\n"
347            "    } toc[%lu];\n"
348            "} U_EXPORT2 %s_dat = {\n"
349            "    32, 0xda, 0x27, {\n"
350            "        %lu, 0,\n"
351            "        %u, %u, %u, 0,\n"
352            "        {0x54, 0x6f, 0x43, 0x50},\n"
353            "        {1, 0, 0, 0},\n"
354            "        {0, 0, 0, 0}\n"
355            "    },\n"
356            "    \"\", %lu, 0, {\n",
357            (unsigned long)32-4-sizeof(UDataInfo),
358            (unsigned long)fileCount,
359            entrypointName,
360            (unsigned long)sizeof(UDataInfo),
361            U_IS_BIG_ENDIAN,
362            U_CHARSET_FAMILY,
363            U_SIZEOF_UCHAR,
364            (unsigned long)fileCount
365        );
366        T_FileStream_writeLine(out, buffer);
367
368        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
369        T_FileStream_writeLine(out, buffer);
370        for(i=1; i<fileCount; ++i) {
371            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
372            T_FileStream_writeLine(out, buffer);
373        }
374
375        T_FileStream_writeLine(out, "\n    }\n};\n");
376        T_FileStream_close(out);
377
378        uprv_free(symPrefix);
379    }
380}
381
382static void
383addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
384    char *s;
385    uint32_t length;
386    char *fullPath = NULL;
387
388    if(fileCount==fileMax) {
389      fileMax += CHUNK_FILE_COUNT;
390      files = uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
391      if(files==NULL) {
392        fprintf(stderr, "pkgdata/gencmn: Could not allocate %ld bytes for %d files\n", (fileMax*sizeof(files[0])), fileCount);
393        exit(U_MEMORY_ALLOCATION_ERROR);
394      }
395    }
396
397    if(!sourceTOC) {
398        FileStream *file;
399
400        if(uprv_pathIsAbsolute(filename)) {
401            fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
402            exit(U_ILLEGAL_ARGUMENT_ERROR);
403        }
404        fullPath = pathToFullPath(filename, source);
405
406        /* store the pathname */
407        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
408        s=allocString(length);
409        uprv_strcpy(s, name);
410        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
411        uprv_strcat(s, filename);
412
413        /* get the basename */
414        fixDirToTreePath(s);
415        files[fileCount].basename=s;
416        files[fileCount].basenameLength=length;
417
418        files[fileCount].pathname=fullPath;
419
420        basenameTotal+=length;
421
422        /* try to open the file */
423        file=T_FileStream_open(fullPath, "rb");
424        if(file==NULL) {
425            fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
426            exit(U_FILE_ACCESS_ERROR);
427        }
428
429        /* get the file length */
430        length=T_FileStream_size(file);
431        if(T_FileStream_error(file) || length<=20) {
432            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
433            exit(U_FILE_ACCESS_ERROR);
434        }
435
436        T_FileStream_close(file);
437
438        /* do not add files that are longer than maxSize */
439        if(maxSize && length>maxSize) {
440            if (verbose) {
441                printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
442            }
443            return;
444        }
445        files[fileCount].fileSize=length;
446    } else {
447        char *t;
448
449        /* get and store the basename */
450        /* need to include the package name */
451        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
452        s=allocString(length);
453        uprv_strcpy(s, name);
454        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
455        uprv_strcat(s, filename);
456        fixDirToTreePath(s);
457        files[fileCount].basename=s;
458
459
460        /* turn the basename into an entry point name and store in the pathname field */
461        t=files[fileCount].pathname=allocString(length);
462        while(--length>0) {
463            if(*s=='.' || *s=='-' || *s=='/') {
464                *t='_';
465            } else {
466                *t=*s;
467            }
468            ++s;
469            ++t;
470        }
471        *t=0;
472    }
473    ++fileCount;
474}
475
476static char *
477allocString(uint32_t length) {
478    uint32_t top=stringTop+length;
479    char *p;
480
481    if(top>STRING_STORE_SIZE) {
482        fprintf(stderr, "gencmn: out of memory\n");
483        exit(U_MEMORY_ALLOCATION_ERROR);
484    }
485    p=stringStore+stringTop;
486    stringTop=top;
487    return p;
488}
489
490static char *
491pathToFullPath(const char *path, const char *source) {
492    int32_t length;
493    int32_t newLength;
494    char *fullPath;
495    int32_t n;
496
497    length = (uint32_t)(uprv_strlen(path) + 1);
498    newLength = (length + 1 + (int32_t)uprv_strlen(source));
499    fullPath = uprv_malloc(newLength);
500    if(source != NULL) {
501        uprv_strcpy(fullPath, source);
502        uprv_strcat(fullPath, U_FILE_SEP_STRING);
503    } else {
504        fullPath[0] = 0;
505    }
506    n = (int32_t)uprv_strlen(fullPath);
507    uprv_strcat(fullPath, path);
508
509#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
510#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
511    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
512    for(;fullPath[n];n++) {
513        if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
514            fullPath[n] = U_FILE_SEP_CHAR;
515        }
516    }
517#endif
518#endif
519#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
520    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
521    for(;fullPath[n];n++) {
522        if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
523            fullPath[n] = U_FILE_SEP_CHAR;
524        }
525    }
526#endif
527    return fullPath;
528}
529
530static int
531compareFiles(const void *file1, const void *file2) {
532    /* sort by basename */
533    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
534}
535
536static void
537fixDirToTreePath(char *s)
538{
539#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
540    char *t;
541#endif
542#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
543    for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
544        *t = U_TREE_ENTRY_SEP_CHAR;
545    }
546#endif
547#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
548    for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
549        *t = U_TREE_ENTRY_SEP_CHAR;
550    }
551#endif
552}
553