1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*****************************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 1999-2011, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************/
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * uconv(1): an iconv(1)-like converter using ICU.
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contributed in 1999.
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Conversion to the C conversion API and many improvements by
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Yves Arrouye <yves@realnames.com>, current maintainer.
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Markus Scherer maintainer from 2003.
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See source code repository history for changes.
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/utypes.h>
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/putil.h>
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/ucnv.h>
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uenum.h>
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/unistr.h>
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/translit.h>
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uset.h>
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uclean.h>
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <errno.h>
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uwmsg.h"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if (defined(U_WINDOWS) || defined(U_CYGWIN) || defined(U_MINGW)) && !defined(__STRICT_ANSI__)
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <io.h>
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <fcntl.h>
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(U_WINDOWS)
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define USE_FILENO_BINARY_MODE 1
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Windows likes to rename Unix-like functions */
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef fileno
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define fileno _fileno
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef setmode
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define setmode _setmode
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef O_BINARY
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define O_BINARY _O_BINARY
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* below from the README */
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC char uconvmsg_dat[];
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DEFAULT_BUFSZ   4096
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCONVMSG "uconvmsg"
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Initialize the message bundle so that message strings can be fetched
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * by u_wmsg().
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initMsg(const char *pname) {
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static int ps = 0;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!ps) {
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode err = U_ZERO_ERROR;
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ps = 1;
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Set up our static data - if any */
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  pname, u_errorName(err));
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          err = U_ZERO_ERROR; /* It may still fail */
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Get messages. */
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        gBundle = u_wmsg_setPath(UCONVMSG, &err);
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fprintf(stderr,
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: warning: couldn't open bundle %s: %s\n",
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, UCONVMSG, u_errorName(err));
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fprintf(stderr,
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: setAppData was called, internal data %s failed to load\n",
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pname, UCONVMSG);
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            err = U_ZERO_ERROR;
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* that was try #1, try again with a path */
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcpy(dataPath, u_getDataDirectory());
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcat(dataPath, U_FILE_SEP_STRING);
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcat(dataPath, UCONVMSG);
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            gBundle = u_wmsg_setPath(dataPath, &err);
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(err)) {
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(stderr,
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: warning: still couldn't open bundle %s: %s\n",
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, dataPath, u_errorName(err));
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Mapping of callback names to the callbacks passed to the converter
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   API. */
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic struct callback_ent {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *name;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterFromUCallback fromu;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *fromuctxt;
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterToUCallback tou;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *touctxt;
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} transcode_callbacks[] = {
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "substitute",
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "skip",
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_SKIP, 0,
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_SKIP, 0 },
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "stop",
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_STOP, 0,
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_STOP, 0 },
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape",
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, 0,
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, 0},
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-icu",
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-java",
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-c",
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml",
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml-hex",
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml-dec",
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Return a pointer to a callback record given its name. */
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct callback_ent *findCallback(const char *name) {
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i, count =
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* We'll do a linear search, there aren't many of them and bsearch()
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       may not be that portable. */
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i = 0; i < count; ++i) {
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return &transcode_callbacks[i];
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print converter information. If lookfor is set, only that converter will
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   be printed, otherwise all converters will be printed. If canon is non
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   zero, tags and aliases for each converter are printed too, in the format
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   expected for convrters.txt(5). */
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printConverters(const char *pname, const char *lookfor,
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool canon)
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t num;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t num_stds;
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char **stds;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If there is a specified name, just handle that now. */
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lookfor) {
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!canon) {
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s\n", lookfor);
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*  Because we are printing a canonical name, we need the
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            true converter name. We've done that already except for
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            the default name (because we want to print the exact
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name one would get when calling ucnv_getDefaultName()
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in non-canon mode). But since we do not know at this
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            point if we have the default name or something else, we
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            need to normalize again to the canonical converter
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name. */
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *truename = ucnv_getAlias(lookfor, 0, &err);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_SUCCESS(err)) {
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lookfor = truename;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR;
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Print converter names. We come here for one of two reasons: we
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       are printing all the names (lookfor was null), or we have a
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       single converter to print but in canon mode, hence we need to
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       get to it in order to print everything. */
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    num = ucnv_countAvailable();
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (num <= 0) {
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantGetNames");
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lookfor) {
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        num = 1;                /* We know where we want to be. */
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    num_stds = ucnv_countStandards();
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!stds) {
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint16_t s;
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canon) {
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("{ ");
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (s = 0; s < num_stds; ++s) {
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stds[s] = ucnv_getStandard(s, &err);
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (canon) {
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printf("%s ", stds[s]);
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(err)) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto error_cleanup;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canon) {
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            puts("}");
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < num; i++) {
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const char *name;
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint16_t num_aliases;
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Set the name either to what we are looking for, or
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        to the current converter name. */
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lookfor) {
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name = lookfor;
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name = ucnv_getAvailableName(i);
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Get all the aliases associated to the name. */
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_ZERO_ERROR;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        num_aliases = ucnv_countAliases(name, &err);
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s", name);
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str(name, "");
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            putchar('\t');
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg_errorName(err));
294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto error_cleanup;
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uint16_t a, s, t;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Write all the aliases and their tags. */
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (a = 0; a < num_aliases; ++a) {
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const char *alias = ucnv_getAlias(name, a, &err);
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(err)) {
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(name, "");
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    putchar('\t');
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u_wmsg_errorName(err));
308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto error_cleanup;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Print the current alias so that it looks right. */
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 alias,
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 (canon ? "" : " "));
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Look (slowly, linear searching) for a tag. */
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (canon) {
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* -1 to skip the last standard */
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (s = t = 0; s < num_stds-1; ++s) {
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (U_SUCCESS(err)) {
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            /* List the standard tags */
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            const char *standardName;
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UBool isFirst = TRUE;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UErrorCode enumError = U_ZERO_ERROR;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                /* See if this alias is supported by this standard. */
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if (!strcmp(standardName, alias)) {
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    if (!t) {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                        printf(" {");
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                        t = 1;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    }
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    /* Print a * after the default standard name */
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    printf(" %s%s", stds[s], (isFirst ? "*" : ""));
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                isFirst = FALSE;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (t) {
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        printf(" }");
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Terminate this entry. */
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (canon) {
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    puts("");
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Move on. */
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Terminate this entry. */
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!canon) {
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                puts("");
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Free temporary data. */
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(stds);
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Success. */
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruerror_cleanup:
367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_free(stds);
368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return -1;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print all available transliterators. If canon is non zero, print
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   one transliterator per line. */
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printTransliterators(UBool canon)
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_TRANSLITERATION
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 1;
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
38027f654740f2a26ad62a5c155af9199af9e69b889claireho    UErrorCode status = U_ZERO_ERROR;
38127f654740f2a26ad62a5c155af9199af9e69b889claireho    UEnumeration *ids = utrans_openIDs(&status);
38227f654740f2a26ad62a5c155af9199af9e69b889claireho    int32_t i, numtrans = uenum_count(ids, &status);
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char sepchar = canon ? '\n' : ' ';
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
38627f654740f2a26ad62a5c155af9199af9e69b889claireho    for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
38727f654740f2a26ad62a5c155af9199af9e69b889claireho    	int32_t len;
38827f654740f2a26ad62a5c155af9199af9e69b889claireho    	const char *nextTrans = uenum_next(ids, &len, &status);
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
39027f654740f2a26ad62a5c155af9199af9e69b889claireho        printf("%s", nextTrans);
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i < numtrans - 1) {
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            putchar(sepchar);
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
39627f654740f2a26ad62a5c155af9199af9e69b889claireho    uenum_close(ids);
39727f654740f2a26ad62a5c155af9199af9e69b889claireho
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Add a terminating newline if needed. */
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sepchar != '\n') {
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        putchar('\n');
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Success. */
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uSP = 0x20,         // space
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uCR = 0xd,          // carriage return
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uLF = 0xa,          // line feed
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uNL = 0x85,         // newline
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uLS = 0x2028,       // line separator
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uPS = 0x2029,       // paragraph separator
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uSig = 0xfeff       // signature/BOM character
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find one of
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // CR, LF, CRLF, NL, LS, PS
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for paragraph ends (see UAX #13/Unicode 4)
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // and include it in the chunk
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // all of these characters are on the BMP
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // do not include FF or VT in case they are part of a paragraph
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // (important for bidi contexts)
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const UChar paraEnds[] = {
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd, 0xa, 0x85, 0x2028, 0x2029
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    enum {
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        iCR, iLF, iNL, iLS, iPS, iCount
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // first, see if there is a CRLF split between prev and s
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (prev.endsWith(paraEnds + iCR, 1)) {
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (s.startsWith(paraEnds + iLF, 1)) {
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 1; // split CRLF, include the LF
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!s.isEmpty()) {
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0; // complete the last chunk
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1; // wait for actual further contents to arrive
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *u = s.getBuffer(), *limit = u + s.length();
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (u < limit) {
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = *u++;
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ((c < uSP) && (c == uCR || c == uLF)) ||
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (c == uNL) ||
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ((c & uLS) == uLS)
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c == uCR) {
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // check for CRLF
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u == limit) {
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return -1; // LF may be in the next chunk
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if (*u == uLF) {
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++u; // include the LF in this chunk
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return (int32_t)(u - s.getBuffer());
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1; // continue collecting the chunk
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_WITH_FEFF,  // can convert the U+FEFF signature character
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerunibbleToHex(uint8_t n) {
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n &= 0xf;
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n <= 9 ?
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (UChar)(0x30 + n) :
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (UChar)((0x61 - 10) + n);
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// check the converter's Unicode signature properties;
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the fromUnicode side of the converter must be in its initial state
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and will be reset again if it was used
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucnvSigType(UConverter *cnv) {
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err;
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result;
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test if the output charset can convert U+FEFF
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    USet *set = uset_open(1, 0);
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    err = U_ZERO_ERROR;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(err) && uset_contains(set, uSig)) {
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = CNV_WITH_FEFF;
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uset_close(set);
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (result == CNV_WITH_FEFF) {
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test if the output charset emits a signature anyway
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar a[1] = { 0x61 }; // "a"
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar *in;
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char buffer[20];
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char *out;
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        in = a;
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        out = buffer;
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_ZERO_ERROR;
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_fromUnicode(cnv,
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &out, buffer + sizeof(buffer),
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &in, a + 1,
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            NULL, TRUE, &err);
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_resetFromUnicode(cnv);
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SUCCESS(err)
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = CNV_ADDS_FEFF;
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass ConvertFile {
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ConvertFile() :
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf(NULL), outbuf(NULL), fromoffsets(NULL),
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufsz(0), signature(0) {}
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    setBufferSize(size_t bufferSize) {
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufsz = bufferSize;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf = new char[2 * bufsz];
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outbuf = buf + bufsz;
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // +1 for an added U+FEFF in the intermediate Unicode buffer
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fromoffsets = new int32_t[bufsz + 1];
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ~ConvertFile() {
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete [] buf;
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete [] fromoffsets;
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool convertFile(const char *pname,
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *fromcpage,
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UConverterToUCallback toucallback,
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const void *touctxt,
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *tocpage,
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UConverterFromUCallback fromucallback,
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const void *fromuctxt,
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool fallback,
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *translit,
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *infilestr,
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      FILE * outfile, int verbose);
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend int main(int argc, char **argv);
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *buf, *outbuf;
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t *fromoffsets;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bufsz;
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Convert a file from one encoding to another
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruConvertFile::convertFile(const char *pname,
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *fromcpage,
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UConverterToUCallback toucallback,
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const void *touctxt,
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *tocpage,
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UConverterFromUCallback fromucallback,
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const void *fromuctxt,
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UBool fallback,
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *translit,
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *infilestr,
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         FILE * outfile, int verbose)
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *infile;
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool ret = TRUE;
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *convfrom = 0;
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *convto = 0;
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool flush;
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *cbufp, *prevbufp;
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bufp;
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *unibuf, *unibufbp;
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *unibufp;
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t rd, wr;
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator *t = 0;      // Transliterator acting on Unicode data.
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString chunk;        // One chunk of the text being collected for transformation.
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString u;            // String to do the transliteration.
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t ulen;
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // use conversion offsets for error messages
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // unless a transliterator is used -
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a text transformation will reorder characters in unpredictable ways
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool useOffsets = TRUE;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open the correct input file or connect to stdin for reading input
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (infilestr != 0 && strcmp(infilestr, "-")) {
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infile = fopen(infilestr, "rb");
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (infile == 0) {
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str1(infilestr, "");
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str1.append((UChar32) 0);
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str2(strerror(errno), "");
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str2.append((UChar32) 0);
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infilestr = "-";
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infile = stdin;
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (setmode(fileno(stdin), O_BINARY) == -1) {
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantSetInBinMode");
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (verbose) {
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "%s:\n", infilestr);
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Create transliterator as needed.
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (translit != NULL && *translit) {
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError parse;
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(translit), pestr;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Create from rules or by ID as needed. */
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        parse.line = -1;
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.append((UChar32) 0);
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (parse.line >= 0) {
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar linebuf[20], offsetbuf[20];
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uprv_itou(linebuf, 20, parse.line, 10, 0);
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg_errorName(err), linebuf, offsetbuf);
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg_errorName(err));
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t) {
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t = 0;
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        useOffsets = FALSE;
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Create codepage converter. If the codepage or its aliases weren't
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // available, it returns NULL and a failure code. We also set the
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // callbacks, and return errors in the same way.
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convfrom = ucnv_open(fromcpage, &err);
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(fromcpage, "");
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg_errorName(err));
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convto = ucnv_open(tocpage, &err);
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(tocpage, "");
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg_errorName(err));
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setFallback(convto, fallback);
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int8_t sig;
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // OK, we can convert now.
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sig = signature;
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rd = 0;
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        willexit = FALSE;
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // input file offset at the beginning of the next buffer
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infoffset += rd;
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rd = fread(buf, 1, bufsz, infile);
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ferror(infile) != 0) {
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str(strerror(errno));
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Convert the read buffer into the new encoding via Unicode.
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // After the call 'unibufp' will be placed behind the last
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // character that was converted in the 'unibuf'.
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Also the 'cbufp' is positioned behind the last converted
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // character.
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At the last conversion in the file, flush should be set to
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // true so that we get all characters converted.
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The converter must be flushed at the end of conversion so
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // that characters on hold also will be written.
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cbufp = buf;
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flush = (UBool)(rd != bufsz);
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // convert until the input is consumed
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // remember the start of the current byte-to-Unicode conversion
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            prevbufp = cbufp;
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            unibuf = unibufp = u.getBuffer((int32_t)bufsz);
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Use bufsz instead of u.getCapacity() for the targetLimit
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // so that we don't overflow fromoffsets[].
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ulen = (int32_t)(unibufp - unibuf);
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // converting all of the input bytes.
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // It works like this because ucnv_toUnicode() returns only under the
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // following conditions:
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - an error occurred during conversion (an error code is set)
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - the target buffer is filled (the error code indicates an overflow)
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - the source is consumed
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // That is, if the error code does not indicate a failure,
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // not even an overflow, then the source must be consumed entirely.
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fromSawEndOfBytes = (UBool)U_SUCCESS(err);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (err == U_BUFFER_OVERFLOW_ERROR) {
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR;
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (U_FAILURE(err)) {
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                char pos[32], errorBytes[32];
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int8_t i, length, errorLength;
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode localError = U_ZERO_ERROR;
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errorLength = (int8_t)sizeof(errorBytes);
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(localError) || errorLength == 0) {
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errorLength = 1;
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // print the input file offset of the start of the error bytes:
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // input file offset of the current byte buffer +
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // length of the just consumed bytes -
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // length of the error bytes
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length =
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    (int8_t)sprintf(pos, "%d",
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (int)(infoffset + (cbufp - buf) - errorLength));
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // output the bytes that caused the error
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString str;
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i = 0; i < errorLength; ++i) {
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i > 0) {
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append((UChar)uSP);
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    str.append(nibbleToHex((uint8_t)errorBytes[i]));
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                initMsg(pname);
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "problemCvtToU",
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString(pos, length, "").getTerminatedBuffer(),
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.getTerminatedBuffer(),
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u_wmsg_errorName(err));
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                willexit = TRUE;
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Replaced a check for whether the input was consumed by
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // looping until it is; message key "premEndInput" now obsolete.
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ulen == 0) {
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // remove a U+FEFF Unicode signature character if requested
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (sig < 0) {
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u.charAt(0) == uSig) {
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u.remove(0, 1);
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // account for the removed UChar and offset
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    --ulen;
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // remove an offset from fromoffsets[] as well
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // to keep the array parallel with the UChars
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        memmove(fromoffsets, fromoffsets + 1, ulen * 4);
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sig = 0;
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Transliterate/transform if needed.
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // For transformation, we use chunking code -
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // collect Unicode input until, for example, an end-of-line,
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // then transform and output-convert that and continue collecting.
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This makes the transformation result independent of the buffer size
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // while avoiding the slower keyboard mode.
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The end-of-chunk characters are completely included in the
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // transformed string in case they are to be transformed themselves.
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t != NULL) {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString out;
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t chunkLimit;
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                do {
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    chunkLimit = getChunkLimit(chunk, u);
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // use all of the rest at the end of the text
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunkLimit = u.length();
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (chunkLimit >= 0) {
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // complete the chunk and transform it
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.append(u, 0, chunkLimit);
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u.remove(0, chunkLimit);
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        t->transliterate(chunk);
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // append the transformation result to the result and empty the chunk
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        out.append(chunk);
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.remove();
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // continue collecting the chunk
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.append(u);
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } while (!u.isEmpty());
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u = out;
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ulen = u.length();
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // add a U+FEFF Unicode signature character if requested
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // and possible/necessary
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (sig > 0) {
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u.insert(0, (UChar)uSig);
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // insert a pseudo-offset into fromoffsets[] as well
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // to keep the array parallel with the UChars
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        memmove(fromoffsets + 1, fromoffsets, ulen * 4);
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        fromoffsets[0] = -1;
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // account for the additional UChar and offset
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++ulen;
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sig = 0;
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Convert the Unicode buffer into the destination codepage
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Again 'bufp' will be placed behind the last converted character
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // And 'unibufp' will be placed behind the last converted unicode character
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // At the last conversion flush should be set to true to ensure that
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // all characters left get converted
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            unibuf = unibufbp = u.getBuffer();
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                bufp = outbuf;
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Use fromSawEndOfBytes in addition to the flush flag -
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // it indicates whether the intermediate Unicode string
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // contains the very last UChars for the very last input bytes.
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 &unibufbp,
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 unibuf + ulen,
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 NULL, (UBool)(flush && fromSawEndOfBytes), &err);
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // converting all of the intermediate UChars.
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // See comment for fromSawEndOfBytes.
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                toSawEndOfUnicode = (UBool)U_SUCCESS(err);
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (err == U_BUFFER_OVERFLOW_ERROR) {
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    err = U_ZERO_ERROR;
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if (U_FAILURE(err)) {
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar errorUChars[4];
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const char *errtag;
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char pos[32];
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar32 c;
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int8_t i, length, errorLength;
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode localError = U_ZERO_ERROR;
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errorLength = (int8_t)LENGTHOF(errorUChars);
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(localError) || errorLength == 0) {
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // need at least 1 so that we don't access beyond the length of fromoffsets[]
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errorLength = 1;
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t ferroffset;
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Unicode buffer offset of the start of the error UChars
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (ferroffset < 0) {
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // approximation - the character started in the previous Unicode buffer
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ferroffset = 0;
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // get the corresponding byte offset out of fromoffsets[]
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // go back if the offset is not known for some of the UChars
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t fromoffset;
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        do {
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            fromoffset = fromoffsets[ferroffset];
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } while (fromoffset < 0 && --ferroffset >= 0);
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // total input file offset =
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // input file offset of the current byte buffer +
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // byte buffer offset of where the current Unicode buffer is converted from +
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // fromoffsets[Unicode offset]
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = infoffset + (prevbufp - buf) + fromoffset;
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errtag = "problemCvtFromU";
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Do not use fromoffsets if (t != NULL) because the Unicode text may
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // be different from what the offsets refer to.
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // output file offset
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errtag = "problemCvtFromUOut";
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // output the code points that caused the error
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str;
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (i = 0; i < errorLength;) {
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (i > 0) {
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append((UChar)uSP);
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_NEXT(errorUChars, i, errorLength, c);
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c >= 0x100000) {
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append(nibbleToHex((uint8_t)(c >> 20)));
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c >= 0x10000) {
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append(nibbleToHex((uint8_t)(c >> 16)));
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 12)));
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 8)));
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 4)));
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)c));
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, errtag,
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeString(pos, length, "").getTerminatedBuffer(),
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.getTerminatedBuffer(),
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           u_wmsg_errorName(err));
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    willexit = TRUE;
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Replaced a check for whether the intermediate Unicode characters were all consumed by
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // looping until they are; message key "premEnd" now obsolete.
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Finally, write the converted buffer to the output file
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                size_t outlen = (size_t) (bufp - outbuf);
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (wr != outlen) {
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(strerror(errno));
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    willexit = TRUE;
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (willexit) {
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    goto error_exit;
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while (!toSawEndOfUnicode);
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while (!fromSawEndOfBytes);
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (!flush);           // Stop when we have flushed the
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // converters (this means that it's
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // the end of output)
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goto normal_exit;
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit:
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ret = FALSE;
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit:
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Cleanup.
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(convfrom);
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(convto);
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete t;
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (infile != stdin) {
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fclose(infile);
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ret;
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void usage(const char *pname, int ecode) {
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *msg;
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t msgLen;
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *fp = ecode ? stderr : stdout;
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int res;
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    initMsg(pname);
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    msg =
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            &msgLen, &err);
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString mname(msg, msgLen + 1);
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!ecode) {
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!res) {
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fputc('\n', fp);
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!u_wmsg(fp, "help")) {
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Now dump callbacks and finish. */
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int i, count =
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i = 0; i < count; ++i) {
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(fp, " %s", transcode_callbacks[i].name);
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fputc('\n', fp);
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exit(ecode);
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruextern int
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumain(int argc, char **argv)
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *outfile;
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int ret = 0;
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bufsz = DEFAULT_BUFSZ;
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *fromcpage = 0;
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *tocpage = 0;
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *translit = 0;
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outfilestr = 0;
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool fallback = FALSE;
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *fromuctxt = 0;
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *touctxt = 0;
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char **iter, **remainArgv, **remainArgvLimit;
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char **end = argv + argc;
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *pname;
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *printName = 0;
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool verbose = FALSE;
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ConvertFile cf;
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Initialize ICU */
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_init(&status);
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            argv[0], u_errorName(status));
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(1);
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Get and prettify pname.
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_WINDOWS
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!pname) {
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pname = uprv_strrchr(*argv, '/');
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!pname) {
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pname = *argv;
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++pname;
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First, get the arguments from command-line
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // to know the codepages to convert between
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    remainArgv = remainArgvLimit = argv + 1;
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (iter = argv + 1; iter != end; iter++) {
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check for from charset
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fromcpage = *iter;
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                tocpage = *iter;
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-x", *iter) == 0) {
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                translit = *iter;
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--fallback", *iter)) {
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fallback = TRUE;
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--no-fallback", *iter)) {
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fallback = FALSE;
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                bufsz = atoi(*iter);
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((int) bufsz <= 0) {
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 3;
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printConvs = TRUE;
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--default-code", *iter) == 0) {
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printName = ucnv_getDefaultName();
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--list-code", *iter) == 0) {
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode e = U_ZERO_ERROR;
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printName = ucnv_getAlias(*iter, 0, &e);
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(e) || !printName) {
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 2;
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--canon", *iter) == 0) {
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printCanon = TRUE;
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-L", *iter) == 0
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            || !strcmp("--list-transliterators", *iter)) {
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printConvs) {
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printTranslits = TRUE;
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            || !strcmp("--help", *iter)) {
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            usage(pname, 0);
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-c", *iter)) {
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--to-callback", *iter)) {
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback = cbe->fromu;
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromuctxt = cbe->fromuctxt;
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--from-callback", *iter)) {
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    toucallback = cbe->tou;
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    touctxt = cbe->touctxt;
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-i", *iter)) {
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            toucallback = UCNV_TO_U_CALLBACK_SKIP;
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--callback", *iter)) {
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback = cbe->fromu;
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromuctxt = cbe->fromuctxt;
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    toucallback = cbe->tou;
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    touctxt = cbe->touctxt;
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            verbose = FALSE;
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            verbose = TRUE;
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++iter;
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end && !outfilestr) {
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfilestr = *iter;
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (0 == strcmp("--add-signature", *iter)) {
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cf.signature = 1;
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (0 == strcmp("--remove-signature", *iter)) {
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cf.signature = -1;
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (**iter == '-' && (*iter)[1]) {
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            usage(pname, 1);
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // move a non-option up in argv[]
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *remainArgvLimit++ = *iter;
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (printConvs || printName) {
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return printConverters(pname, printName, printCanon) ? 2 : 0;
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (printTranslits) {
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return printTransliterators(printCanon) ? 3 : 0;
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fromcpage = ucnv_getDefaultName();
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tocpage = ucnv_getDefaultName();
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open the correct output file or connect to stdout for reading input
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfile = fopen(outfilestr, "wb");
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (outfile == 0) {
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str1(outfilestr, "");
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str2(strerror(errno), "");
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantCreateOutputF",
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                str1.getBuffer(), str2.getBuffer());
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 1;
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfilestr = "-";
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfile = stdout;
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (setmode(fileno(outfile), O_BINARY) == -1) {
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantSetOutBinMode");
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            exit(-1);
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Loop again on the arguments to find all the input files, and
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convert them. */
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cf.setBufferSize(bufsz);
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(remainArgv < remainArgvLimit) {
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!cf.convertFile(
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, fromcpage, toucallback, touctxt, tocpage,
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback, fromuctxt, fallback, translit, *iter,
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    outfile, verbose)
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ) {
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto error_exit;
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!cf.convertFile(
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pname, fromcpage, toucallback, touctxt, tocpage,
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fromucallback, fromuctxt, fallback, translit, 0,
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfile, verbose)
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goto normal_exit;
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit:
137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ret = 1;
137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit:
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (outfile != stdout) {
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fclose(outfile);
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ret;
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Hey, Emacs, please set the following:
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Local Variables:
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indent-tabs-mode: nil
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * End:
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1393