1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 2002-2009, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File genbrk.c
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Tool for generating RuleBasedBreakIterator data files (.brk files).
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   .brk files contain the precompiled rules for standard types
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   of iterators - word, line, sentence, etc.
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Usage:  genbrk [options] -r rule-file.txt  -o output-file.brk
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       options:   -v         verbose
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  -? or -h   help
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   The input rule file is a plain text file containing break rules
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    in the input format accepted by RuleBasedBreakIterators.  The
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    file can be encoded as utf-8, or utf-16 (either endian), or
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    in the default code page (platform dependent.).  utf encoded
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    files must include a BOM.
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uoptions.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unewdata.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucmndata.h"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbbidata.h"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic char *progName;
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UOption options[]={
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_HELP_H,             /* 0 */
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_HELP_QUESTION_MARK, /* 1 */
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_VERBOSE,            /* 2 */
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "rules", NULL, NULL, NULL, 'r', UOPT_REQUIRES_ARG, 0 },   /* 3 */
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "out",   NULL, NULL, NULL, 'o', UOPT_REQUIRES_ARG, 0 },   /* 4 */
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_ICUDATADIR,         /* 5 */
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_DESTDIR,            /* 6 */
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_COPYRIGHT,          /* 7 */
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid usageAndDie(int retCode) {
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Usage: %s [-v] [-options] -r rule-file -o output-file\n", progName);
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\tRead in break iteration rules text and write out the binary data\n"
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "options:\n"
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-h or -? or --help  this usage text\n"
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-V or --version     show a version message\n"
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-c or --copyright   include a copyright notice\n"
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-v or --verbose     turn on verbose output\n"
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t                    followed by path, defaults to %s\n"
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-d or --destdir     destination directory, followed by the path\n",
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_getDataDirectory());
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit (retCode);
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* dummy UDataInfo cf. udata.h */
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UDataInfo dummyDataInfo = {
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sizeof(UDataInfo),
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0,
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_IS_BIG_ENDIAN,
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_CHARSET_FAMILY,
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_SIZEOF_UCHAR,
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0,
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy dataFormat */
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy formatVersion */
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 }                  /* dummy dataVersion */
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  Set up the ICU data header, defined in ucmndata.h
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruDataHeader dh ={
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {sizeof(DataHeader),           // Struct MappedData
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xda,
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x27},
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {                               // struct UDataInfo
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sizeof(UDataInfo),          //     size
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,                          //     reserved
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_IS_BIG_ENDIAN,
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_CHARSET_FAMILY,
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_SIZEOF_UCHAR,
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,                          //     reserved
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0x42, 0x72, 0x6b, 0x20 },     //     dataFormat="Brk "
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0xff, 0, 0, 0 },              //     formatVersion.  Filled in later with values
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    //      from the RBBI rule builder.  The  values declared
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    //      here should never appear in any real RBBI data.
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        { 4, 1, 0, 0 }              //   dataVersion (Unicode version)
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }};
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  main      for genbrk
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint  main(int argc, char **argv) {
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *ruleFileName;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outFileName;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outDir = NULL;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *copyright = NULL;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Pick up and check the command line arguments,
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    using the standard ICU tool utils option handling.
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_MAIN_INIT_ARGS(argc, argv);
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    progName = argv[0];
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(argc<0) {
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Unrecognized option
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(options[0].doesOccur || options[1].doesOccur) {
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  -? or -h for help.
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(0);
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!(options[3].doesOccur && options[4].doesOccur)) {
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "rule file and output file must both be specified.\n");
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleFileName = options[3].value;
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    outFileName  = options[4].value;
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (options[5].doesOccur) {
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_setDataDirectory(options[5].value);
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Combine the directory with the file name */
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(options[6].doesOccur) {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outDir = options[6].value;
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (options[7].doesOccur) {
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        copyright = U_COPYRIGHT_STRING;
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNewDataMemory *pData;
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char msg[1024];
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* write message with just the name */
17850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "%s\n", msg);
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* write the dummy data file */
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_writeBlock(pData, msg, strlen(msg));
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_finish(pData, &status);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int)status;
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
18850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Initialize ICU */
18950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_init(&status);
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            argv[0], u_errorName(status));
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(1);
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Read in the rule source file
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    long        result;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    long        ruleFileSize;
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE        *file;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char        *ruleBufferC;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    file = fopen(ruleFileName, "rb");
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( file == 0 ) {
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName);
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(-1);
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fseek(file, 0, SEEK_END);
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleFileSize = ftell(file);
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fseek(file, 0, SEEK_SET);
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleBufferC = new char[ruleFileSize+10];
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result = (long)fread(ruleBufferC, 1, ruleFileSize, file);
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (result != ruleFileSize)  {
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName);
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit (-1);
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleBufferC[ruleFileSize]=0;
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fclose(file);
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for a Unicode Signature (BOM) on the rule file
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t        signatureLength;
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *   ruleSourceC = ruleBufferC;
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char*    encoding = ucnv_detectUnicodeSignature(
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           ruleSourceC, ruleFileSize, &signatureLength, &status);
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(encoding!=NULL ){
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ruleSourceC  += signatureLength;
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ruleFileSize -= signatureLength;
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open a converter to take the rule file to UTF-16
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter* conv;
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    conv = ucnv_open(encoding, &status);
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert the rules to UChar.
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Preflight first to determine required buffer size.
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t destCap = ucnv_toUChars(conv,
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       NULL,           //  dest,
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       0,              //  destCapacity,
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       ruleSourceC,
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       ruleFileSize,
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &status);
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status != U_BUFFER_OVERFLOW_ERROR) {
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *ruleSourceU = new UChar[destCap+1];
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_toUChars(conv,
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleSourceU,     //  dest,
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  destCap+1,
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleSourceC,
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleFileSize,
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  &status);
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(conv);
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Put the source rules into a UnicodeString
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Create the break iterator from the rules
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     This will compile the rules.
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError parseError;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseError.line = 0;
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseError.offset = 0;
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_errorName(status), (int)parseError.line, (int)parseError.offset);
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Get the compiled rule data from the break iterator.
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t        outDataSize;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t  *outData;
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    outData = bi->getBinaryRules(outDataSize);
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy the data format version numbers from the RBBI data header into the UDataMemory header.
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Create the output file
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bytesWritten;
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNewDataMemory *pData;
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(status)) {
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n",
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         outFileName, u_errorName(status));
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Write the data itself.
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_writeBlock(pData, outData, outDataSize);
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // finish up
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bytesWritten = udata_finish(pData, &status);
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(status)) {
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "genbrk: error %d writing the output file\n", status);
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bytesWritten != outDataSize) {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(-1);
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete bi;
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete[] ruleSourceU;
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete[] ruleBufferC;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_cleanup();
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("genbrk: tool completed successfully.\n");
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
347