1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
3c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert*   Copyright (C) 2002-2015, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File genbrk.c
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Tool for generating RuleBasedBreakIterator data files (.brk files).
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   .brk files contain the precompiled rules for standard types
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   of iterators - word, line, sentence, etc.
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   Usage:  genbrk [options] -r rule-file.txt  -o output-file.brk
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//       options:   -v         verbose
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//                  -? or -h   help
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//   The input rule file is a plain text file containing break rules
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    in the input format accepted by RuleBasedBreakIterators.  The
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    file can be encoded as utf-8, or utf-16 (either endian), or
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    in the default code page (platform dependent.).  utf encoded
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//    files must include a BOM.
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//--------------------------------------------------------------------
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ucnv.h"
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rbbi.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uclean.h"
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uoptions.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unewdata.h"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucmndata.h"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbbidata.h"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic char *progName;
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UOption options[]={
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_HELP_H,             /* 0 */
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_HELP_QUESTION_MARK, /* 1 */
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_VERBOSE,            /* 2 */
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "rules", NULL, NULL, NULL, 'r', UOPT_REQUIRES_ARG, 0 },   /* 3 */
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "out",   NULL, NULL, NULL, 'o', UOPT_REQUIRES_ARG, 0 },   /* 4 */
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_ICUDATADIR,         /* 5 */
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_DESTDIR,            /* 6 */
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UOPTION_COPYRIGHT,          /* 7 */
59c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    UOPTION_QUIET,              /* 8 */
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid usageAndDie(int retCode) {
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("Usage: %s [-v] [-options] -r rule-file -o output-file\n", progName);
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("\tRead in break iteration rules text and write out the binary data\n"
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "options:\n"
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-h or -? or --help  this usage text\n"
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-V or --version     show a version message\n"
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-c or --copyright   include a copyright notice\n"
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-v or --verbose     turn on verbose output\n"
70c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            "\t-q or --quiet       do not display warnings and progress\n"
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-i or --icudatadir  directory for locating any needed intermediate data files,\n"
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t                    followed by path, defaults to %s\n"
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            "\t-d or --destdir     destination directory, followed by the path\n",
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_getDataDirectory());
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit (retCode);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
7950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* dummy UDataInfo cf. udata.h */
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UDataInfo dummyDataInfo = {
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sizeof(UDataInfo),
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0,
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_IS_BIG_ENDIAN,
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_CHARSET_FAMILY,
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_SIZEOF_UCHAR,
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    0,
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy dataFormat */
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy formatVersion */
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0, 0, 0, 0 }                  /* dummy dataVersion */
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  Set up the ICU data header, defined in ucmndata.h
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruDataHeader dh ={
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {sizeof(DataHeader),           // Struct MappedData
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xda,
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0x27},
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    {                               // struct UDataInfo
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sizeof(UDataInfo),          //     size
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,                          //     reserved
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_IS_BIG_ENDIAN,
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_CHARSET_FAMILY,
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        U_SIZEOF_UCHAR,
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0,                          //     reserved
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0x42, 0x72, 0x6b, 0x20 },     //     dataFormat="Brk "
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { 0xff, 0, 0, 0 },              //     formatVersion.  Filled in later with values
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    //      from the RBBI rule builder.  The  values declared
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    //      here should never appear in any real RBBI data.
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        { 4, 1, 0, 0 }              //   dataVersion (Unicode version)
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }};
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//  main      for genbrk
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------------------
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint  main(int argc, char **argv) {
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode  status = U_ZERO_ERROR;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *ruleFileName;
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outFileName;
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outDir = NULL;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *copyright = NULL;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Pick up and check the command line arguments,
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //    using the standard ICU tool utils option handling.
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    U_MAIN_INIT_ARGS(argc, argv);
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    progName = argv[0];
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(argc<0) {
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Unrecognized option
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]);
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(options[0].doesOccur || options[1].doesOccur) {
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //  -? or -h for help.
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(0);
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!(options[3].doesOccur && options[4].doesOccur)) {
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "rule file and output file must both be specified.\n");
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        usageAndDie(U_ILLEGAL_ARGUMENT_ERROR);
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleFileName = options[3].value;
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    outFileName  = options[4].value;
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (options[5].doesOccur) {
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_setDataDirectory(options[5].value);
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Combine the directory with the file name */
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(options[6].doesOccur) {
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outDir = options[6].value;
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (options[7].doesOccur) {
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        copyright = U_COPYRIGHT_STRING;
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
17450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNewDataMemory *pData;
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char msg[1024];
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* write message with just the name */
18050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    sprintf(msg, "genbrk writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName);
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fprintf(stderr, "%s\n", msg);
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* write the dummy data file */
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pData = udata_create(outDir, NULL, outFileName, &dummyDataInfo, NULL, &status);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_writeBlock(pData, msg, strlen(msg));
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_finish(pData, &status);
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return (int)status;
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
19050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    /* Initialize ICU */
19150294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    u_init(&status);
19250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    if (U_FAILURE(status)) {
19350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
19450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho            argv[0], u_errorName(status));
19550294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho        exit(1);
19650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    }
19750294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    status = U_ZERO_ERROR;
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Read in the rule source file
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    long        result;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    long        ruleFileSize;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE        *file;
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char        *ruleBufferC;
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    file = fopen(ruleFileName, "rb");
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if( file == 0 ) {
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Could not open file \"%s\"\n", ruleFileName);
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(-1);
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fseek(file, 0, SEEK_END);
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleFileSize = ftell(file);
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fseek(file, 0, SEEK_SET);
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleBufferC = new char[ruleFileSize+10];
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    result = (long)fread(ruleBufferC, 1, ruleFileSize, file);
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (result != ruleFileSize)  {
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Error reading file \"%s\"\n", ruleFileName);
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit (-1);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ruleBufferC[ruleFileSize]=0;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    fclose(file);
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Look for a Unicode Signature (BOM) on the rule file
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t        signatureLength;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *   ruleSourceC = ruleBufferC;
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char*    encoding = ucnv_detectUnicodeSignature(
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           ruleSourceC, ruleFileSize, &signatureLength, &status);
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(encoding!=NULL ){
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ruleSourceC  += signatureLength;
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ruleFileSize -= signatureLength;
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open a converter to take the rule file to UTF-16
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter* conv;
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    conv = ucnv_open(encoding, &status);
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_open: ICU Error \"%s\"\n", u_errorName(status));
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Convert the rules to UChar.
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Preflight first to determine required buffer size.
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t destCap = ucnv_toUChars(conv,
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       NULL,           //  dest,
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       0,              //  destCapacity,
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       ruleSourceC,
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       ruleFileSize,
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                       &status);
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (status != U_BUFFER_OVERFLOW_ERROR) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    status = U_ZERO_ERROR;
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *ruleSourceU = new UChar[destCap+1];
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_toUChars(conv,
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleSourceU,     //  dest,
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  destCap+1,
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleSourceC,
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  ruleFileSize,
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  &status);
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(conv);
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Put the source rules into a UnicodeString
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Create the break iterator from the rules
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //     This will compile the rules.
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UParseError parseError;
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseError.line = 0;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    parseError.offset = 0;
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n",
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_errorName(status), (int)parseError.line, (int)parseError.offset);
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Get the compiled rule data from the break iterator.
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t        outDataSize;
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const uint8_t  *outData;
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    outData = bi->getBinaryRules(outDataSize);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Copy the data format version numbers from the RBBI data header into the UDataMemory header.
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Create the output file
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bytesWritten;
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UNewDataMemory *pData;
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(status)) {
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n",
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         outFileName, u_errorName(status));
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    //  Write the data itself.
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    udata_writeBlock(pData, outData, outDataSize);
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // finish up
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    bytesWritten = udata_finish(pData, &status);
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(U_FAILURE(status)) {
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "genbrk: error %d writing the output file\n", status);
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(status);
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bytesWritten != outDataSize) {
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(-1);
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete bi;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete[] ruleSourceU;
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete[] ruleBufferC;
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_cleanup();
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
343c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    if(!options[8].doesOccur) {
344c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert        printf("genbrk: tool completed successfully.\n");
345c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert    }
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
351