1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*****************************************************************************
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho*   Copyright (C) 1999-2009, International Business Machines
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************/
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * uconv(1): an iconv(1)-like converter using ICU.
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contributed in 1999.
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Conversion to the C conversion API and many improvements by
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Yves Arrouye <yves@realnames.com>, current maintainer.
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Markus Scherer maintainer from 2003.
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See source code repository history for changes.
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/utypes.h>
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/putil.h>
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/ucnv.h>
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uenum.h>
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/unistr.h>
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/translit.h>
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uset.h>
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uclean.h>
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h>
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <errno.h>
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h>
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h>
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h"
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uwmsg.h"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <io.h>
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <fcntl.h>
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(U_WINDOWS)
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define USE_FILENO_BINARY_MODE 1
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Windows likes to rename Unix-like functions */
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef fileno
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define fileno _fileno
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef setmode
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define setmode _setmode
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef O_BINARY
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define O_BINARY _O_BINARY
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* below from the README */
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h"
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC char uconvmsg_dat[];
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DEFAULT_BUFSZ   4096
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCONVMSG "uconvmsg"
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Initialize the message bundle so that message strings can be fetched
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * by u_wmsg().
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initMsg(const char *pname) {
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static int ps = 0;
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!ps) {
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode err = U_ZERO_ERROR;
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ps = 1;
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Set up our static data - if any */
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                  pname, u_errorName(err));
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru          err = U_ZERO_ERROR; /* It may still fail */
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Get messages. */
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        gBundle = u_wmsg_setPath(UCONVMSG, &err);
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fprintf(stderr,
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: warning: couldn't open bundle %s: %s\n",
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, UCONVMSG, u_errorName(err));
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fprintf(stderr,
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: setAppData was called, internal data %s failed to load\n",
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        pname, UCONVMSG);
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            err = U_ZERO_ERROR;
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* that was try #1, try again with a path */
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcpy(dataPath, u_getDataDirectory());
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcat(dataPath, U_FILE_SEP_STRING);
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uprv_strcat(dataPath, UCONVMSG);
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            gBundle = u_wmsg_setPath(dataPath, &err);
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(err)) {
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(stderr,
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    "%s: warning: still couldn't open bundle %s: %s\n",
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, dataPath, u_errorName(err));
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Mapping of callback names to the callbacks passed to the converter
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   API. */
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic struct callback_ent {
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *name;
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterFromUCallback fromu;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *fromuctxt;
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterToUCallback tou;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *touctxt;
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} transcode_callbacks[] = {
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "substitute",
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "skip",
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_SKIP, 0,
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_SKIP, 0 },
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "stop",
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_STOP, 0,
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_STOP, 0 },
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape",
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, 0,
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, 0},
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-icu",
152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-java",
155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-c",
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml",
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml-hex",
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-xml-dec",
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru      UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Return a pointer to a callback record given its name. */
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct callback_ent *findCallback(const char *name) {
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int i, count =
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* We'll do a linear search, there aren't many of them and bsearch()
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       may not be that portable. */
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i = 0; i < count; ++i) {
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return &transcode_callbacks[i];
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print converter information. If lookfor is set, only that converter will
192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   be printed, otherwise all converters will be printed. If canon is non
193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   zero, tags and aliases for each converter are printed too, in the format
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   expected for convrters.txt(5). */
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printConverters(const char *pname, const char *lookfor,
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool canon)
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t num;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint16_t num_stds;
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char **stds;
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* If there is a specified name, just handle that now. */
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lookfor) {
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!canon) {
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s\n", lookfor);
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /*  Because we are printing a canonical name, we need the
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            true converter name. We've done that already except for
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            the default name (because we want to print the exact
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name one would get when calling ucnv_getDefaultName()
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            in non-canon mode). But since we do not know at this
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            point if we have the default name or something else, we
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            need to normalize again to the canonical converter
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name. */
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            const char *truename = ucnv_getAlias(lookfor, 0, &err);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_SUCCESS(err)) {
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                lookfor = truename;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR;
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Print converter names. We come here for one of two reasons: we
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       are printing all the names (lookfor was null), or we have a
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       single converter to print but in canon mode, hence we need to
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru       get to it in order to print everything. */
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    num = ucnv_countAvailable();
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (num <= 0) {
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantGetNames");
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (lookfor) {
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        num = 1;                /* We know where we want to be. */
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    num_stds = ucnv_countStandards();
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!stds) {
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return -1;
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint16_t s;
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canon) {
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("{ ");
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (s = 0; s < num_stds; ++s) {
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            stds[s] = ucnv_getStandard(s, &err);
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (canon) {
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printf("%s ", stds[s]);
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(err)) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                goto error_cleanup;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canon) {
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            puts("}");
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t i = 0; i < num; i++) {
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const char *name;
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uint16_t num_aliases;
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Set the name either to what we are looking for, or
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        to the current converter name. */
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (lookfor) {
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name = lookfor;
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            name = ucnv_getAvailableName(i);
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Get all the aliases associated to the name. */
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_ZERO_ERROR;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        num_aliases = ucnv_countAliases(name, &err);
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s", name);
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str(name, "");
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            putchar('\t');
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg_errorName(err));
294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            goto error_cleanup;
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            uint16_t a, s, t;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Write all the aliases and their tags. */
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (a = 0; a < num_aliases; ++a) {
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const char *alias = ucnv_getAlias(name, a, &err);
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(err)) {
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(name, "");
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    putchar('\t');
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u_wmsg_errorName(err));
308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru                    goto error_cleanup;
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Print the current alias so that it looks right. */
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 alias,
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 (canon ? "" : " "));
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Look (slowly, linear searching) for a tag. */
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (canon) {
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    /* -1 to skip the last standard */
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (s = t = 0; s < num_stds-1; ++s) {
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (U_SUCCESS(err)) {
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            /* List the standard tags */
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            const char *standardName;
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UBool isFirst = TRUE;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UErrorCode enumError = U_ZERO_ERROR;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                /* See if this alias is supported by this standard. */
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                if (!strcmp(standardName, alias)) {
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    if (!t) {
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                        printf(" {");
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                        t = 1;
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    }
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    /* Print a * after the default standard name */
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    printf(" %s%s", stds[s], (isFirst ? "*" : ""));
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                }
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                isFirst = FALSE;
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            }
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (t) {
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        printf(" }");
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Terminate this entry. */
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (canon) {
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    puts("");
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                /* Move on. */
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Terminate this entry. */
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!canon) {
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                puts("");
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Free temporary data. */
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uprv_free(stds);
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Success. */
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruerror_cleanup:
367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    uprv_free(stds);
368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    return -1;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print all available transliterators. If canon is non zero, print
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru   one transliterator per line. */
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printTransliterators(UBool canon)
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_TRANSLITERATION
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 1;
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t numtrans = utrans_countAvailableIDs(), i;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int buflen = 512;
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *buf = (char *) uprv_malloc(buflen);
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char staticbuf[512];
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char sepchar = canon ? '\n' : ' ';
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!buf) {
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf = staticbuf;
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buflen = sizeof(staticbuf);
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i = 0; i < numtrans; ++i) {
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        int32_t len = utrans_getAvailableID(i, buf, buflen);
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (len >= buflen - 1) {
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (buf != staticbuf) {
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buflen <<= 1;
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (buflen < len) {
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buflen = len + 64;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf = (char *) uprv_realloc(buf, buflen);
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (!buf) {
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buf = staticbuf;
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    buflen = sizeof(staticbuf);
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            utrans_getAvailableID(i, buf, buflen);
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (len >= buflen) {
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        printf("%s", buf);
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i < numtrans - 1) {
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            putchar(sepchar);
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Add a terminating newline if needed. */
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sepchar != '\n') {
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        putchar('\n');
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Free temporary data. */
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (buf != staticbuf) {
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        uprv_free(buf);
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Success. */
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return 0;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uSP = 0x20,         // space
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uCR = 0xd,          // carriage return
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uLF = 0xa,          // line feed
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uNL = 0x85,         // newline
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uLS = 0x2028,       // line separator
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uPS = 0x2029,       // paragraph separator
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uSig = 0xfeff       // signature/BOM character
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // find one of
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // CR, LF, CRLF, NL, LS, PS
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // for paragraph ends (see UAX #13/Unicode 4)
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // and include it in the chunk
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // all of these characters are on the BMP
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // do not include FF or VT in case they are part of a paragraph
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // (important for bidi contexts)
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    static const UChar paraEnds[] = {
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        0xd, 0xa, 0x85, 0x2028, 0x2029
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    enum {
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        iCR, iLF, iNL, iLS, iPS, iCount
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    };
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // first, see if there is a CRLF split between prev and s
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (prev.endsWith(paraEnds + iCR, 1)) {
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (s.startsWith(paraEnds + iLF, 1)) {
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 1; // split CRLF, include the LF
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!s.isEmpty()) {
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0; // complete the last chunk
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return -1; // wait for actual further contents to arrive
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *u = s.getBuffer(), *limit = u + s.length();
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar c;
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    while (u < limit) {
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        c = *u++;
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ((c < uSP) && (c == uCR || c == uLF)) ||
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (c == uNL) ||
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ((c & uLS) == uLS)
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (c == uCR) {
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // check for CRLF
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u == limit) {
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return -1; // LF may be in the next chunk
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if (*u == uLF) {
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++u; // include the LF in this chunk
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return (int32_t)(u - s.getBuffer());
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return -1; // continue collecting the chunk
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum {
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_WITH_FEFF,  // can convert the U+FEFF signature character
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerunibbleToHex(uint8_t n) {
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    n &= 0xf;
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        n <= 9 ?
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (UChar)(0x30 + n) :
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            (UChar)((0x61 - 10) + n);
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// check the converter's Unicode signature properties;
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the fromUnicode side of the converter must be in its initial state
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and will be reset again if it was used
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucnvSigType(UConverter *cnv) {
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err;
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t result;
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // test if the output charset can convert U+FEFF
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    USet *set = uset_open(1, 0);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    err = U_ZERO_ERROR;
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(err) && uset_contains(set, uSig)) {
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = CNV_WITH_FEFF;
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uset_close(set);
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (result == CNV_WITH_FEFF) {
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // test if the output charset emits a signature anyway
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar a[1] = { 0x61 }; // "a"
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        const UChar *in;
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char buffer[20];
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        char *out;
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        in = a;
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        out = buffer;
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        err = U_ZERO_ERROR;
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_fromUnicode(cnv,
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &out, buffer + sizeof(buffer),
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            &in, a + 1,
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            NULL, TRUE, &err);
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ucnv_resetFromUnicode(cnv);
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            U_SUCCESS(err)
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            result = CNV_ADDS_FEFF;
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return result;
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass ConvertFile {
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic:
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ConvertFile() :
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf(NULL), outbuf(NULL), fromoffsets(NULL),
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufsz(0), signature(0) {}
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    void
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    setBufferSize(size_t bufferSize) {
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bufsz = bufferSize;
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf = new char[2 * bufsz];
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outbuf = buf + bufsz;
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // +1 for an added U+FEFF in the intermediate Unicode buffer
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fromoffsets = new int32_t[bufsz + 1];
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ~ConvertFile() {
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete [] buf;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete [] fromoffsets;
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool convertFile(const char *pname,
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *fromcpage,
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UConverterToUCallback toucallback,
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const void *touctxt,
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *tocpage,
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UConverterFromUCallback fromucallback,
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const void *fromuctxt,
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      UBool fallback,
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *translit,
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      const char *infilestr,
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                      FILE * outfile, int verbose);
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate:
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    friend int main(int argc, char **argv);
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *buf, *outbuf;
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t *fromoffsets;
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bufsz;
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru};
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Convert a file from one encoding to another
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruConvertFile::convertFile(const char *pname,
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *fromcpage,
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UConverterToUCallback toucallback,
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const void *touctxt,
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *tocpage,
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UConverterFromUCallback fromucallback,
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const void *fromuctxt,
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         UBool fallback,
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *translit,
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         const char *infilestr,
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                         FILE * outfile, int verbose)
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *infile;
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool ret = TRUE;
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *convfrom = 0;
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverter *convto = 0;
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool flush;
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *cbufp, *prevbufp;
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char *bufp;
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *unibuf, *unibufbp;
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar *unibufp;
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t rd, wr;
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator *t = 0;      // Transliterator acting on Unicode data.
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString chunk;        // One chunk of the text being collected for transformation.
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString u;            // String to do the transliteration.
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t ulen;
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // use conversion offsets for error messages
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // unless a transliterator is used -
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a text transformation will reorder characters in unpredictable ways
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool useOffsets = TRUE;
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open the correct input file or connect to stdin for reading input
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (infilestr != 0 && strcmp(infilestr, "-")) {
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infile = fopen(infilestr, "rb");
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (infile == 0) {
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str1(infilestr, "");
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str1.append((UChar32) 0);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str2(strerror(errno), "");
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str2.append((UChar32) 0);
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infilestr = "-";
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infile = stdin;
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (setmode(fileno(stdin), O_BINARY) == -1) {
662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantSetInBinMode");
664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return FALSE;
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (verbose) {
670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "%s:\n", infilestr);
671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Create transliterator as needed.
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (translit != NULL && *translit) {
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UParseError parse;
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(translit), pestr;
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* Create from rules or by ID as needed. */
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        parse.line = -1;
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(err)) {
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            str.append((UChar32) 0);
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (parse.line >= 0) {
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UChar linebuf[20], offsetbuf[20];
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uprv_itou(linebuf, 20, parse.line, 10, 0);
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg_errorName(err), linebuf, offsetbuf);
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg_errorName(err));
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t) {
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t = 0;
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        useOffsets = FALSE;
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Create codepage converter. If the codepage or its aliases weren't
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // available, it returns NULL and a failure code. We also set the
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // callbacks, and return errors in the same way.
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convfrom = ucnv_open(fromcpage, &err);
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(fromcpage, "");
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg_errorName(err));
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convto = ucnv_open(tocpage, &err);
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString str(tocpage, "");
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg_errorName(err));
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(err)) {
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        initMsg(pname);
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto error_exit;
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_setFallback(convto, fallback);
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int8_t sig;
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // OK, we can convert now.
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sig = signature;
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    rd = 0;
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    do {
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        willexit = FALSE;
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // input file offset at the beginning of the next buffer
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        infoffset += rd;
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        rd = fread(buf, 1, bufsz, infile);
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ferror(infile) != 0) {
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str(strerror(errno));
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Convert the read buffer into the new encoding via Unicode.
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // After the call 'unibufp' will be placed behind the last
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // character that was converted in the 'unibuf'.
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Also the 'cbufp' is positioned behind the last converted
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // character.
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // At the last conversion in the file, flush should be set to
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // true so that we get all characters converted.
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        //
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // The converter must be flushed at the end of conversion so
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // that characters on hold also will be written.
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        cbufp = buf;
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        flush = (UBool)(rd != bufsz);
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // convert until the input is consumed
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        do {
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // remember the start of the current byte-to-Unicode conversion
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            prevbufp = cbufp;
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            unibuf = unibufp = u.getBuffer((int32_t)bufsz);
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Use bufsz instead of u.getCapacity() for the targetLimit
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // so that we don't overflow fromoffsets[].
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ulen = (int32_t)(unibufp - unibuf);
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // converting all of the input bytes.
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // It works like this because ucnv_toUnicode() returns only under the
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // following conditions:
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - an error occurred during conversion (an error code is set)
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - the target buffer is filled (the error code indicates an overflow)
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // - the source is consumed
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // That is, if the error code does not indicate a failure,
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // not even an overflow, then the source must be consumed entirely.
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fromSawEndOfBytes = (UBool)U_SUCCESS(err);
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (err == U_BUFFER_OVERFLOW_ERROR) {
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR;
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else if (U_FAILURE(err)) {
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                char pos[32], errorBytes[32];
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int8_t i, length, errorLength;
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode localError = U_ZERO_ERROR;
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                errorLength = (int8_t)sizeof(errorBytes);
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(localError) || errorLength == 0) {
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errorLength = 1;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // print the input file offset of the start of the error bytes:
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // input file offset of the current byte buffer +
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // length of the just consumed bytes -
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // length of the error bytes
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                length =
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    (int8_t)sprintf(pos, "%d",
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        (int)(infoffset + (cbufp - buf) - errorLength));
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // output the bytes that caused the error
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString str;
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                for (i = 0; i < errorLength; ++i) {
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (i > 0) {
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append((UChar)uSP);
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    str.append(nibbleToHex((uint8_t)errorBytes[i]));
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                initMsg(pname);
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u_wmsg(stderr, "problemCvtToU",
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        UnicodeString(pos, length, "").getTerminatedBuffer(),
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.getTerminatedBuffer(),
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u_wmsg_errorName(err));
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                willexit = TRUE;
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Replaced a check for whether the input was consumed by
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // looping until it is; message key "premEndInput" now obsolete.
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (ulen == 0) {
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // remove a U+FEFF Unicode signature character if requested
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (sig < 0) {
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u.charAt(0) == uSig) {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u.remove(0, 1);
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // account for the removed UChar and offset
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    --ulen;
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // remove an offset from fromoffsets[] as well
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // to keep the array parallel with the UChars
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        memmove(fromoffsets, fromoffsets + 1, ulen * 4);
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sig = 0;
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Transliterate/transform if needed.
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // For transformation, we use chunking code -
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // collect Unicode input until, for example, an end-of-line,
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // then transform and output-convert that and continue collecting.
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // This makes the transformation result independent of the buffer size
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // while avoiding the slower keyboard mode.
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // The end-of-chunk characters are completely included in the
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // transformed string in case they are to be transformed themselves.
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t != NULL) {
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UnicodeString out;
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                int32_t chunkLimit;
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                do {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    chunkLimit = getChunkLimit(chunk, u);
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // use all of the rest at the end of the text
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunkLimit = u.length();
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (chunkLimit >= 0) {
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // complete the chunk and transform it
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.append(u, 0, chunkLimit);
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        u.remove(0, chunkLimit);
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        t->transliterate(chunk);
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // append the transformation result to the result and empty the chunk
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        out.append(chunk);
906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.remove();
907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // continue collecting the chunk
909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        chunk.append(u);
910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        break;
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } while (!u.isEmpty());
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                u = out;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ulen = u.length();
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // add a U+FEFF Unicode signature character if requested
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // and possible/necessary
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (sig > 0) {
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u.insert(0, (UChar)uSig);
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // insert a pseudo-offset into fromoffsets[] as well
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // to keep the array parallel with the UChars
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        memmove(fromoffsets + 1, fromoffsets, ulen * 4);
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        fromoffsets[0] = -1;
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // account for the additional UChar and offset
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ++ulen;
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sig = 0;
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Convert the Unicode buffer into the destination codepage
939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Again 'bufp' will be placed behind the last converted character
940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // And 'unibufp' will be placed behind the last converted unicode character
941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // At the last conversion flush should be set to true to ensure that
942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // all characters left get converted
943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            unibuf = unibufbp = u.getBuffer();
945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            do {
947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                bufp = outbuf;
948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Use fromSawEndOfBytes in addition to the flush flag -
950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // it indicates whether the intermediate Unicode string
951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // contains the very last UChars for the very last input bytes.
952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 &unibufbp,
954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 unibuf + ulen,
955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                 NULL, (UBool)(flush && fromSawEndOfBytes), &err);
956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // converting all of the intermediate UChars.
959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // See comment for fromSawEndOfBytes.
960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                toSawEndOfUnicode = (UBool)U_SUCCESS(err);
961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (err == U_BUFFER_OVERFLOW_ERROR) {
963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    err = U_ZERO_ERROR;
964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else if (U_FAILURE(err)) {
965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar errorUChars[4];
966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    const char *errtag;
967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    char pos[32];
968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UChar32 c;
969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int8_t i, length, errorLength;
970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UErrorCode localError = U_ZERO_ERROR;
972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    errorLength = (int8_t)LENGTHOF(errorUChars);
973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (U_FAILURE(localError) || errorLength == 0) {
975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // need at least 1 so that we don't access beyond the length of fromoffsets[]
976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errorLength = 1;
977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    int32_t ferroffset;
980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    if (useOffsets) {
982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Unicode buffer offset of the start of the error UChars
983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (ferroffset < 0) {
985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            // approximation - the character started in the previous Unicode buffer
986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            ferroffset = 0;
987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // get the corresponding byte offset out of fromoffsets[]
990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // go back if the offset is not known for some of the UChars
991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        int32_t fromoffset;
992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        do {
993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            fromoffset = fromoffsets[ferroffset];
994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        } while (fromoffset < 0 && --ferroffset >= 0);
995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // total input file offset =
997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // input file offset of the current byte buffer +
998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // byte buffer offset of where the current Unicode buffer is converted from +
999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // fromoffsets[Unicode offset]
1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = infoffset + (prevbufp - buf) + fromoffset;
1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errtag = "problemCvtFromU";
1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    } else {
1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // Do not use fromoffsets if (t != NULL) because the Unicode text may
1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // be different from what the offsets refer to.
1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        // output file offset
1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        errtag = "problemCvtFromUOut";
1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    // output the code points that caused the error
1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str;
1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    for (i = 0; i < errorLength;) {
1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (i > 0) {
1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append((UChar)uSP);
1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        U16_NEXT(errorUChars, i, errorLength, c);
1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c >= 0x100000) {
1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append(nibbleToHex((uint8_t)(c >> 20)));
1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        if (c >= 0x10000) {
1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.append(nibbleToHex((uint8_t)(c >> 16)));
1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        }
1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 12)));
1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 8)));
1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)(c >> 4)));
1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                        str.append(nibbleToHex((uint8_t)c));
1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    }
1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, errtag,
1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            UnicodeString(pos, length, "").getTerminatedBuffer(),
1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            str.getTerminatedBuffer(),
1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                           u_wmsg_errorName(err));
1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    willexit = TRUE;
1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Replaced a check for whether the intermediate Unicode characters were all consumed by
1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // looping until they are; message key "premEnd" now obsolete.
1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Finally, write the converted buffer to the output file
1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                size_t outlen = (size_t) (bufp - outbuf);
1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (wr != outlen) {
1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(strerror(errno));
1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    willexit = TRUE;
1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (willexit) {
1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    goto error_exit;
1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } while (!toSawEndOfUnicode);
1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } while (!fromSawEndOfBytes);
1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } while (!flush);           // Stop when we have flushed the
1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // converters (this means that it's
1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                // the end of output)
1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goto normal_exit;
1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit:
1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ret = FALSE;
1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit:
1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Cleanup.
1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(convfrom);
1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ucnv_close(convto);
1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete t;
1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (infile != stdin) {
1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fclose(infile);
1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ret;
1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void usage(const char *pname, int ecode) {
1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const UChar *msg;
1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t msgLen;
1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode err = U_ZERO_ERROR;
1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *fp = ecode ? stderr : stdout;
1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int res;
1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    initMsg(pname);
1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    msg =
1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                            &msgLen, &err);
1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString mname(msg, msgLen + 1);
1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!ecode) {
1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!res) {
1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fputc('\n', fp);
1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!u_wmsg(fp, "help")) {
1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            /* Now dump callbacks and finish. */
1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            int i, count =
1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            for (i = 0; i < count; ++i) {
1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fprintf(fp, " %s", transcode_callbacks[i].name);
1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fputc('\n', fp);
1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    exit(ecode);
1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruextern int
1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumain(int argc, char **argv)
1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    FILE *outfile;
1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int ret = 0;
1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    size_t bufsz = DEFAULT_BUFSZ;
1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *fromcpage = 0;
1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *tocpage = 0;
1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *translit = 0;
1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *outfilestr = 0;
1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool fallback = FALSE;
1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *fromuctxt = 0;
1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const void *touctxt = 0;
1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char **iter, **remainArgv, **remainArgvLimit;
1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    char **end = argv + argc;
1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *pname;
1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    const char *printName = 0;
1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool verbose = FALSE;
1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode status = U_ZERO_ERROR;
1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ConvertFile cf;
1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Initialize ICU */
1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    u_init(&status);
1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            argv[0], u_errorName(status));
1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        exit(1);
1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Get and prettify pname.
1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_WINDOWS
1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!pname) {
1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pname = uprv_strrchr(*argv, '/');
1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!pname) {
1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pname = *argv;
1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++pname;
1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // First, get the arguments from command-line
1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // to know the codepages to convert between
1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    remainArgv = remainArgvLimit = argv + 1;
1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (iter = argv + 1; iter != end; iter++) {
1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Check for from charset
1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fromcpage = *iter;
1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                tocpage = *iter;
1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-x", *iter) == 0) {
1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end)
1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                translit = *iter;
1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            else
1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--fallback", *iter)) {
1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fallback = TRUE;
1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--no-fallback", *iter)) {
1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fallback = FALSE;
1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                bufsz = atoi(*iter);
1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if ((int) bufsz <= 0) {
1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 3;
1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printConvs = TRUE;
1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--default-code", *iter) == 0) {
1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printName = ucnv_getDefaultName();
1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--list-code", *iter) == 0) {
1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printTranslits) {
1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                UErrorCode e = U_ZERO_ERROR;
1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                printName = ucnv_getAlias(*iter, 0, &e);
1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (U_FAILURE(e) || !printName) {
1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 2;
1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else
1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("--canon", *iter) == 0) {
1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printCanon = TRUE;
1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-L", *iter) == 0
1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            || !strcmp("--list-transliterators", *iter)) {
1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (printConvs) {
1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printTranslits = TRUE;
1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            || !strcmp("--help", *iter)) {
1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            usage(pname, 0);
1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-c", *iter)) {
1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--to-callback", *iter)) {
1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback = cbe->fromu;
1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromuctxt = cbe->fromuctxt;
1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--from-callback", *iter)) {
1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    toucallback = cbe->tou;
1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    touctxt = cbe->touctxt;
1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-i", *iter)) {
1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            toucallback = UCNV_TO_U_CALLBACK_SKIP;
1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("--callback", *iter)) {
1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            iter++;
1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end) {
1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                const struct callback_ent *cbe = findCallback(*iter);
1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (cbe) {
1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback = cbe->fromu;
1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromuctxt = cbe->fromuctxt;
1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    toucallback = cbe->tou;
1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    touctxt = cbe->touctxt;
1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                } else {
1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    UnicodeString str(*iter);
1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    initMsg(pname);
1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return 4;
1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            verbose = FALSE;
1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            verbose = TRUE;
1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ++iter;
1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (iter != end && !outfilestr) {
1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfilestr = *iter;
1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                usage(pname, 1);
1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (0 == strcmp("--add-signature", *iter)) {
1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cf.signature = 1;
1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (0 == strcmp("--remove-signature", *iter)) {
1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            cf.signature = -1;
1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else if (**iter == '-' && (*iter)[1]) {
1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            usage(pname, 1);
1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // move a non-option up in argv[]
1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            *remainArgvLimit++ = *iter;
1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (printConvs || printName) {
1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return printConverters(pname, printName, printCanon) ? 2 : 0;
1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (printTranslits) {
1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return printTransliterators(printCanon) ? 3 : 0;
1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fromcpage = ucnv_getDefaultName();
1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tocpage = ucnv_getDefaultName();
1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Open the correct output file or connect to stdout for reading input
1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfile = fopen(outfilestr, "wb");
1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (outfile == 0) {
1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str1(outfilestr, "");
1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeString str2(strerror(errno), "");
1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            initMsg(pname);
1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantCreateOutputF",
1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                str1.getBuffer(), str2.getBuffer());
1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 1;
1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfilestr = "-";
1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        outfile = stdout;
1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE
1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (setmode(fileno(outfile), O_BINARY) == -1) {
1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            u_wmsg(stderr, "cantSetOutBinMode");
1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            exit(-1);
1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif
1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    /* Loop again on the arguments to find all the input files, and
1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    convert them. */
1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    cf.setBufferSize(bufsz);
1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if(remainArgv < remainArgvLimit) {
1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!cf.convertFile(
1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pname, fromcpage, toucallback, touctxt, tocpage,
1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    fromucallback, fromuctxt, fallback, translit, *iter,
1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    outfile, verbose)
1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ) {
1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto error_exit;
1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!cf.convertFile(
1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pname, fromcpage, toucallback, touctxt, tocpage,
1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                fromucallback, fromuctxt, fallback, translit, 0,
1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                outfile, verbose)
1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ) {
1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto error_exit;
1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    goto normal_exit;
1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit:
139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION
1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ret = 1;
139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else
139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho    fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif
1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit:
1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (outfile != stdout) {
1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        fclose(outfile);
1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return ret;
1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Hey, Emacs, please set the following:
1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Local Variables:
1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indent-tabs-mode: nil
1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * End:
1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
1419