1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/***************************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho* Copyright (C) 1999-2009, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * uconv(1): an iconv(1)-like converter using ICU. 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Original code by Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contributed in 1999. 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Conversion to the C conversion API and many improvements by 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Yves Arrouye <yves@realnames.com>, current maintainer. 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Markus Scherer maintainer from 2003. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See source code repository history for changes. 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/utypes.h> 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/putil.h> 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/ucnv.h> 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uenum.h> 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/unistr.h> 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/translit.h> 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uset.h> 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uclean.h> 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <errno.h> 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uwmsg.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__) 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <io.h> 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <fcntl.h> 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(U_WINDOWS) 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define USE_FILENO_BINARY_MODE 1 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Windows likes to rename Unix-like functions */ 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef fileno 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define fileno _fileno 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef setmode 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define setmode _setmode 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef O_BINARY 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define O_BINARY _O_BINARY 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* below from the README */ 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC char uconvmsg_dat[]; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DEFAULT_BUFSZ 4096 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCONVMSG "uconvmsg" 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UResourceBundle *gBundle = 0; /* Bundle containing messages. */ 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Initialize the message bundle so that message strings can be fetched 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * by u_wmsg(). 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initMsg(const char *pname) { 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static int ps = 0; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ps) { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */ 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ps = 1; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Set up our static data - if any */ 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n", 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, u_errorName(err)); 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* It may still fail */ 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get messages. */ 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gBundle = u_wmsg_setPath(UCONVMSG, &err); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: warning: couldn't open bundle %s: %s\n", 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, UCONVMSG, u_errorName(err)); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: setAppData was called, internal data %s failed to load\n", 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, UCONVMSG); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* that was try #1, try again with a path */ 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(dataPath, u_getDataDirectory()); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcat(dataPath, U_FILE_SEP_STRING); 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcat(dataPath, UCONVMSG); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gBundle = u_wmsg_setPath(dataPath, &err); 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: warning: still couldn't open bundle %s: %s\n", 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, dataPath, u_errorName(err)); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: warning: messages will not be displayed\n", pname); 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Mapping of callback names to the callbacks passed to the converter 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru API. */ 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic struct callback_ent { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromu; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback tou; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt; 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} transcode_callbacks[] = { 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "substitute", 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 }, 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "skip", 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_SKIP, 0, 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_SKIP, 0 }, 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "stop", 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_STOP, 0, 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_STOP, 0 }, 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape", 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, 0, 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, 0}, 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-icu", 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU }, 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-java", 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA }, 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-c", 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C }, 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml", 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml-hex", 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml-dec", 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC }, 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE } 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Return a pointer to a callback record given its name. */ 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct callback_ent *findCallback(const char *name) { 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, count = 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* We'll do a linear search, there aren't many of them and bsearch() 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru may not be that portable. */ 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < count; ++i) { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!uprv_stricmp(name, transcode_callbacks[i].name)) { 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return &transcode_callbacks[i]; 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print converter information. If lookfor is set, only that converter will 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru be printed, otherwise all converters will be printed. If canon is non 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zero, tags and aliases for each converter are printed too, in the format 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected for convrters.txt(5). */ 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printConverters(const char *pname, const char *lookfor, 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool canon) 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t num; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t num_stds; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char **stds; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If there is a specified name, just handle that now. */ 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!canon) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s\n", lookfor); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Because we are printing a canonical name, we need the 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru true converter name. We've done that already except for 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the default name (because we want to print the exact 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name one would get when calling ucnv_getDefaultName() 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in non-canon mode). But since we do not know at this 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru point if we have the default name or something else, we 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru need to normalize again to the canonical converter 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name. */ 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *truename = ucnv_getAlias(lookfor, 0, &err); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err)) { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lookfor = truename; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print converter names. We come here for one of two reasons: we 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru are printing all the names (lookfor was null), or we have a 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single converter to print but in canon mode, hence we need to 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru get to it in order to print everything. */ 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num = ucnv_countAvailable(); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (num <= 0) { 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetNames"); 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num = 1; /* We know where we want to be. */ 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num_stds = ucnv_countStandards(); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stds = (const char **) uprv_malloc(num_stds * sizeof(*stds)); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!stds) { 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR)); 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t s; 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("{ "); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (s = 0; s < num_stds; ++s) { 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stds[s] = ucnv_getStandard(s, &err); 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s ", stds[s]); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err)); 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts("}"); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < num; i++) { 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t num_aliases; 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Set the name either to what we are looking for, or 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru to the current converter name. */ 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = lookfor; 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucnv_getAvailableName(i); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get all the aliases associated to the name. */ 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num_aliases = ucnv_countAliases(name, &err); 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s", name); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(name, ""); 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\t'); 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t a, s, t; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Write all the aliases and their tags. */ 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (a = 0; a < num_aliases; ++a) { 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *alias = ucnv_getAlias(name, a, &err); 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(name, ""); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\t'); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print the current alias so that it looks right. */ 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") , 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias, 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (canon ? "" : " ")); 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Look (slowly, linear searching) for a tag. */ 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* -1 to skip the last standard */ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (s = t = 0; s < num_stds-1; ++s) { 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err)) { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* List the standard tags */ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *standardName; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirst = TRUE; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode enumError = U_ZERO_ERROR; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while ((standardName = uenum_next(nameEnum, NULL, &enumError))) { 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* See if this alias is supported by this standard. */ 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strcmp(standardName, alias)) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!t) { 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" {"); 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = 1; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print a * after the default standard name */ 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" %s%s", stds[s], (isFirst ? "*" : "")); 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirst = FALSE; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t) { 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" }"); 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate this entry. */ 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts(""); 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Move on. */ 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate this entry. */ 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!canon) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts(""); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Free temporary data. */ 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(stds); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Success. */ 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruerror_cleanup: 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(stds); 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print all available transliterators. If canon is non zero, print 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru one transliterator per line. */ 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printTransliterators(UBool canon) 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_TRANSLITERATION 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n"); 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t numtrans = utrans_countAvailableIDs(), i; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int buflen = 512; 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *buf = (char *) uprv_malloc(buflen); 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char staticbuf[512]; 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char sepchar = canon ? '\n' : ' '; 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!buf) { 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf = staticbuf; 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buflen = sizeof(staticbuf); 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < numtrans; ++i) { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = utrans_getAvailableID(i, buf, buflen); 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len >= buflen - 1) { 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buf != staticbuf) { 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buflen <<= 1; 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buflen < len) { 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buflen = len + 64; 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf = (char *) uprv_realloc(buf, buflen); 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!buf) { 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf = staticbuf; 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buflen = sizeof(staticbuf); 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru utrans_getAvailableID(i, buf, buflen); 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (len >= buflen) { 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */ 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s", buf); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < numtrans - 1) { 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar(sepchar); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Add a terminating newline if needed. */ 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sepchar != '\n') { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\n'); 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Free temporary data. */ 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (buf != staticbuf) { 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(buf); 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Success. */ 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uSP = 0x20, // space 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uCR = 0xd, // carriage return 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uLF = 0xa, // line feed 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uNL = 0x85, // newline 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uLS = 0x2028, // line separator 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uPS = 0x2029, // paragraph separator 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uSig = 0xfeff // signature/BOM character 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetChunkLimit(const UnicodeString &prev, const UnicodeString &s) { 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find one of 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // CR, LF, CRLF, NL, LS, PS 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for paragraph ends (see UAX #13/Unicode 4) 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and include it in the chunk 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all of these characters are on the BMP 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do not include FF or VT in case they are part of a paragraph 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (important for bidi contexts) 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar paraEnds[] = { 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd, 0xa, 0x85, 0x2028, 0x2029 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum { 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iCR, iLF, iNL, iLS, iPS, iCount 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first, see if there is a CRLF split between prev and s 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prev.endsWith(paraEnds + iCR, 1)) { 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.startsWith(paraEnds + iLF, 1)) { 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; // split CRLF, include the LF 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!s.isEmpty()) { 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; // complete the last chunk 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // wait for actual further contents to arrive 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *u = s.getBuffer(), *limit = u + s.length(); 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (u < limit) { 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = *u++; 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((c < uSP) && (c == uCR || c == uLF)) || 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c == uNL) || 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((c & uLS) == uLS) 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == uCR) { 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // check for CRLF 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u == limit) { 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // LF may be in the next chunk 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (*u == uLF) { 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++u; // include the LF in this chunk 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(u - s.getBuffer()); 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // continue collecting the chunk 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM) 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_WITH_FEFF, // can convert the U+FEFF signature character 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerunibbleToHex(uint8_t n) { 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n &= 0xf; 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n <= 9 ? 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar)(0x30 + n) : 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar)((0x61 - 10) + n); 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// check the converter's Unicode signature properties; 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the fromUnicode side of the converter must be in its initial state 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and will be reset again if it was used 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucnvSigType(UConverter *cnv) { 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err; 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result; 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test if the output charset can convert U+FEFF 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USet *set = uset_open(1, 0); 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err); 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err) && uset_contains(set, uSig)) { 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_WITH_FEFF; 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uset_close(set); 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == CNV_WITH_FEFF) { 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test if the output charset emits a signature anyway 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar a[1] = { 0x61 }; // "a" 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *in; 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[20]; 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *out; 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in = a; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru out = buffer; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &out, buffer + sizeof(buffer), 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &in, a + 1, 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, TRUE, &err); 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_resetFromUnicode(cnv); 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) && 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SUCCESS(err) 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_ADDS_FEFF; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass ConvertFile { 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ConvertFile() : 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf(NULL), outbuf(NULL), fromoffsets(NULL), 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz(0), signature(0) {} 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setBufferSize(size_t bufferSize) { 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz = bufferSize; 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf = new char[2 * bufsz]; 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outbuf = buf + bufsz; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // +1 for an added U+FEFF in the intermediate Unicode buffer 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffsets = new int32_t[bufsz + 1]; 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~ConvertFile() { 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] buf; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] fromoffsets; 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool convertFile(const char *pname, 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage, 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback, 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt, 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage, 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback, 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt, 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback, 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit, 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *infilestr, 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE * outfile, int verbose); 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend int main(int argc, char **argv); 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *buf, *outbuf; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *fromoffsets; 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t bufsz; 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Convert a file from one encoding to another 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruConvertFile::convertFile(const char *pname, 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage, 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback, 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt, 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage, 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback, 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt, 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback, 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit, 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *infilestr, 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE * outfile, int verbose) 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *infile; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool ret = TRUE; 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *convfrom = 0; 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *convto = 0; 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool flush; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *cbufp, *prevbufp; 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bufp; 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */ 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *unibuf, *unibufbp; 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *unibufp; 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t rd, wr; 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator *t = 0; // Transliterator acting on Unicode data. 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString chunk; // One chunk of the text being collected for transformation. 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString u; // String to do the transliteration. 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ulen; 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use conversion offsets for error messages 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unless a transliterator is used - 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a text transformation will reorder characters in unpredictable ways 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool useOffsets = TRUE; 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open the correct input file or connect to stdin for reading input 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infilestr != 0 && strcmp(infilestr, "-")) { 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infile = fopen(infilestr, "rb"); 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infile == 0) { 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str1(infilestr, ""); 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str1.append((UChar32) 0); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str2(strerror(errno), ""); 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str2.append((UChar32) 0); 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infilestr = "-"; 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infile = stdin; 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (setmode(fileno(stdin), O_BINARY) == -1) { 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetInBinMode"); 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (verbose) { 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s:\n", infilestr); 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create transliterator as needed. 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (translit != NULL && *translit) { 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parse; 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(translit), pestr; 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Create from rules or by ID as needed. */ 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parse.line = -1; 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) { 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err); 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = Transliterator::createInstance(translit, UTRANS_FORWARD, err); 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar32) 0); 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parse.line >= 0) { 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar linebuf[20], offsetbuf[20]; 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_itou(linebuf, 20, parse.line, 10, 0); 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_itou(offsetbuf, 20, parse.offset, 10, 0); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err), linebuf, offsetbuf); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t) { 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = 0; 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru useOffsets = FALSE; 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create codepage converter. If the codepage or its aliases weren't 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // available, it returns NULL and a failure code. We also set the 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // callbacks, and return errors in the same way. 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convfrom = ucnv_open(fromcpage, &err); 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(fromcpage, ""); 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convto = ucnv_open(tocpage, &err); 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(tocpage, ""); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err); 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setFallback(convto, fallback); 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode; 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t sig; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // OK, we can convert now. 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = signature; 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rd = 0; 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = FALSE; 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset at the beginning of the next buffer 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infoffset += rd; 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rd = fread(buf, 1, bufsz, infile); 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ferror(infile) != 0) { 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(strerror(errno)); 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert the read buffer into the new encoding via Unicode. 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // After the call 'unibufp' will be placed behind the last 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // character that was converted in the 'unibuf'. 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Also the 'cbufp' is positioned behind the last converted 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // character. 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At the last conversion in the file, flush should be set to 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // true so that we get all characters converted. 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The converter must be flushed at the end of conversion so 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that characters on hold also will be written. 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cbufp = buf; 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flush = (UBool)(rd != bufsz); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // convert until the input is consumed 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remember the start of the current byte-to-Unicode conversion 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevbufp = cbufp; 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf = unibufp = u.getBuffer((int32_t)bufsz); 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use bufsz instead of u.getCapacity() for the targetLimit 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that we don't overflow fromoffsets[]. 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp, 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf + rd, useOffsets ? fromoffsets : NULL, flush, &err); 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = (int32_t)(unibufp - unibuf); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.releaseBuffer(U_SUCCESS(err) ? ulen : 0); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fromSawEndOfBytes indicates that ucnv_toUnicode() is done 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converting all of the input bytes. 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It works like this because ucnv_toUnicode() returns only under the 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // following conditions: 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - an error occurred during conversion (an error code is set) 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - the target buffer is filled (the error code indicates an overflow) 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - the source is consumed 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // That is, if the error code does not indicate a failure, 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // not even an overflow, then the source must be consumed entirely. 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromSawEndOfBytes = (UBool)U_SUCCESS(err); 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (err == U_BUFFER_OVERFLOW_ERROR) { 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (U_FAILURE(err)) { 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char pos[32], errorBytes[32]; 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t i, length, errorLength; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode localError = U_ZERO_ERROR; 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = (int8_t)sizeof(errorBytes); 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError); 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(localError) || errorLength == 0) { 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = 1; 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // print the input file offset of the start of the error bytes: 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset of the current byte buffer + 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // length of the just consumed bytes - 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // length of the error bytes 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int8_t)sprintf(pos, "%d", 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)(infoffset + (cbufp - buf) - errorLength)); 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output the bytes that caused the error 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < errorLength; ++i) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0) { 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)uSP); 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4)); 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)errorBytes[i])); 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "problemCvtToU", 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString(pos, length, "").getTerminatedBuffer(), 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.getTerminatedBuffer(), 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replaced a check for whether the input was consumed by 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // looping until it is; message key "premEndInput" now obsolete. 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ulen == 0) { 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove a U+FEFF Unicode signature character if requested 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sig < 0) { 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u.charAt(0) == uSig) { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.remove(0, 1); 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // account for the removed UChar and offset 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --ulen; 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove an offset from fromoffsets[] as well 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to keep the array parallel with the UChars 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(fromoffsets, fromoffsets + 1, ulen * 4); 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = 0; 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Transliterate/transform if needed. 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For transformation, we use chunking code - 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // collect Unicode input until, for example, an end-of-line, 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // then transform and output-convert that and continue collecting. 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This makes the transformation result independent of the buffer size 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while avoiding the slower keyboard mode. 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The end-of-chunk characters are completely included in the 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transformed string in case they are to be transformed themselves. 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL) { 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString out; 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t chunkLimit; 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunkLimit = getChunkLimit(chunk, u); 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (chunkLimit < 0 && flush && fromSawEndOfBytes) { 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use all of the rest at the end of the text 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunkLimit = u.length(); 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (chunkLimit >= 0) { 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // complete the chunk and transform it 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.append(u, 0, chunkLimit); 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.remove(0, chunkLimit); 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->transliterate(chunk); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // append the transformation result to the result and empty the chunk 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru out.append(chunk); 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.remove(); 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // continue collecting the chunk 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.append(u); 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!u.isEmpty()); 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u = out; 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = u.length(); 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // add a U+FEFF Unicode signature character if requested 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and possible/necessary 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sig > 0) { 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) { 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.insert(0, (UChar)uSig); 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // insert a pseudo-offset into fromoffsets[] as well 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to keep the array parallel with the UChars 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(fromoffsets + 1, fromoffsets, ulen * 4); 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffsets[0] = -1; 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // account for the additional UChar and offset 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++ulen; 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = 0; 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert the Unicode buffer into the destination codepage 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Again 'bufp' will be placed behind the last converted character 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // And 'unibufp' will be placed behind the last converted unicode character 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At the last conversion flush should be set to true to ensure that 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all characters left get converted 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf = unibufbp = u.getBuffer(); 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufp = outbuf; 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use fromSawEndOfBytes in addition to the flush flag - 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it indicates whether the intermediate Unicode string 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains the very last UChars for the very last input bytes. 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(convto, &bufp, outbuf + bufsz, 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &unibufbp, 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf + ulen, 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, (UBool)(flush && fromSawEndOfBytes), &err); 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converting all of the intermediate UChars. 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // See comment for fromSawEndOfBytes. 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toSawEndOfUnicode = (UBool)U_SUCCESS(err); 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (err == U_BUFFER_OVERFLOW_ERROR) { 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (U_FAILURE(err)) { 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar errorUChars[4]; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *errtag; 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char pos[32]; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t i, length, errorLength; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode localError = U_ZERO_ERROR; 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = (int8_t)LENGTHOF(errorUChars); 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError); 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(localError) || errorLength == 0) { 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // need at least 1 so that we don't access beyond the length of fromoffsets[] 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = 1; 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ferroffset; 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unicode buffer offset of the start of the error UChars 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = (int32_t)((unibufbp - unibuf) - errorLength); 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ferroffset < 0) { 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // approximation - the character started in the previous Unicode buffer 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = 0; 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the corresponding byte offset out of fromoffsets[] 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // go back if the offset is not known for some of the UChars 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fromoffset; 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffset = fromoffsets[ferroffset]; 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (fromoffset < 0 && --ferroffset >= 0); 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // total input file offset = 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset of the current byte buffer + 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // byte buffer offset of where the current Unicode buffer is converted from + 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fromoffsets[Unicode offset] 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = infoffset + (prevbufp - buf) + fromoffset; 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errtag = "problemCvtFromU"; 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not use fromoffsets if (t != NULL) because the Unicode text may 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be different from what the offsets refer to. 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output file offset 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = (int32_t)(outfoffset + (bufp - outbuf)); 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errtag = "problemCvtFromUOut"; 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = (int8_t)sprintf(pos, "%u", (int)ferroffset); 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output the code points that caused the error 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < errorLength;) { 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0) { 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)uSP); 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(errorUChars, i, errorLength, c); 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= 0x100000) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 20))); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= 0x10000) { 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 16))); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 12))); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 8))); 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 4))); 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)c)); 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, errtag, 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString(pos, length, "").getTerminatedBuffer(), 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.getTerminatedBuffer(), 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replaced a check for whether the intermediate Unicode characters were all consumed by 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // looping until they are; message key "premEnd" now obsolete. 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Finally, write the converted buffer to the output file 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t outlen = (size_t) (bufp - outbuf); 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile)); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (wr != outlen) { 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(strerror(errno)); 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (willexit) { 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!toSawEndOfUnicode); 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!fromSawEndOfBytes); 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!flush); // Stop when we have flushed the 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converters (this means that it's 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the end of output) 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto normal_exit; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit: 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ret = FALSE; 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit: 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cleanup. 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(convfrom); 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(convto); 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infile != stdin) { 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fclose(infile); 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ret; 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void usage(const char *pname, int ecode) { 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *msg; 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t msgLen; 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *fp = ecode ? stderr : stdout; 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int res; 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru msg = 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &msgLen, &err); 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString mname(msg, msgLen + 1); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ecode) { 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!res) { 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fputc('\n', fp); 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!u_wmsg(fp, "help")) { 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Now dump callbacks and finish. */ 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, count = 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < count; ++i) { 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(fp, " %s", transcode_callbacks[i].name); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fputc('\n', fp); 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(ecode); 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruextern int 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumain(int argc, char **argv) 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *outfile; 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int ret = 0; 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t bufsz = DEFAULT_BUFSZ; 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage = 0; 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage = 0; 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit = 0; 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *outfilestr = 0; 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback = FALSE; 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP; 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt = 0; 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP; 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt = 0; 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char **iter, **remainArgv, **remainArgvLimit; 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char **end = argv + argc; 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *pname; 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE; 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *printName = 0; 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool verbose = FALSE; 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ConvertFile cf; 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Initialize ICU */ 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_init(&status); 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: can not initialize ICU. status = %s\n", 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru argv[0], u_errorName(status)); 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get and prettify pname. 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_WINDOWS 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!pname) { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = uprv_strrchr(*argv, '/'); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!pname) { 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = *argv; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pname; 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First, get the arguments from command-line 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to know the codepages to convert between 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remainArgv = remainArgvLimit = argv + 1; 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (iter = argv + 1; iter != end; iter++) { 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for from charset 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) { 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromcpage = *iter; 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) { 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tocpage = *iter; 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-x", *iter) == 0) { 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru translit = *iter; 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--fallback", *iter)) { 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fallback = TRUE; 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--no-fallback", *iter)) { 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fallback = FALSE; 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) { 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz = atoi(*iter); 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((int) bufsz <= 0) { 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 3; 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) { 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printConvs = TRUE; 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--default-code", *iter) == 0) { 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printName = ucnv_getDefaultName(); 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--list-code", *iter) == 0) { 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode e = U_ZERO_ERROR; 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printName = ucnv_getAlias(*iter, 0, &e); 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(e) || !printName) { 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 2; 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--canon", *iter) == 0) { 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printCanon = TRUE; 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-L", *iter) == 0 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || !strcmp("--list-transliterators", *iter)) { 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printConvs) { 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printTranslits = TRUE; 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter) 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || !strcmp("--help", *iter)) { 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 0); 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-c", *iter)) { 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = UCNV_FROM_U_CALLBACK_SKIP; 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--to-callback", *iter)) { 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = cbe->fromu; 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromuctxt = cbe->fromuctxt; 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--from-callback", *iter)) { 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = cbe->tou; 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru touctxt = cbe->touctxt; 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-i", *iter)) { 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = UCNV_TO_U_CALLBACK_SKIP; 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--callback", *iter)) { 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = cbe->fromu; 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromuctxt = cbe->fromuctxt; 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = cbe->tou; 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru touctxt = cbe->touctxt; 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) { 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru verbose = FALSE; 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) { 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru verbose = TRUE; 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) { 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname); 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) { 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++iter; 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end && !outfilestr) { 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfilestr = *iter; 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (0 == strcmp("--add-signature", *iter)) { 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.signature = 1; 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (0 == strcmp("--remove-signature", *iter)) { 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.signature = -1; 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (**iter == '-' && (*iter)[1]) { 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // move a non-option up in argv[] 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *remainArgvLimit++ = *iter; 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printConvs || printName) { 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return printConverters(pname, printName, printCanon) ? 2 : 0; 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (printTranslits) { 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return printTransliterators(printCanon) ? 3 : 0; 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!fromcpage || !uprv_strcmp(fromcpage, "-")) { 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromcpage = ucnv_getDefaultName(); 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!tocpage || !uprv_strcmp(tocpage, "-")) { 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tocpage = ucnv_getDefaultName(); 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open the correct output file or connect to stdout for reading input 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfilestr != 0 && strcmp(outfilestr, "-")) { 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile = fopen(outfilestr, "wb"); 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfile == 0) { 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str1(outfilestr, ""); 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str2(strerror(errno), ""); 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateOutputF", 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str1.getBuffer(), str2.getBuffer()); 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfilestr = "-"; 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile = stdout; 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (setmode(fileno(outfile), O_BINARY) == -1) { 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetOutBinMode"); 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(-1); 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Loop again on the arguments to find all the input files, and 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convert them. */ 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.setBufferSize(bufsz); 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(remainArgv < remainArgvLimit) { 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (iter = remainArgv; iter != remainArgvLimit; iter++) { 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!cf.convertFile( 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, fromcpage, toucallback, touctxt, tocpage, 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback, fromuctxt, fallback, translit, *iter, 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile, verbose) 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!cf.convertFile( 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, fromcpage, toucallback, touctxt, tocpage, 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback, fromuctxt, fallback, translit, 0, 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile, verbose) 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto normal_exit; 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit: 139650294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ret = 1; 139850294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 139950294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n"); 140050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit: 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfile != stdout) { 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fclose(outfile); 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ret; 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Hey, Emacs, please set the following: 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Local Variables: 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indent-tabs-mode: nil 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * End: 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1419