1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/***************************************************************************** 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 1999-2011, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * uconv(1): an iconv(1)-like converter using ICU. 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Original code by Jonas Utterström <jonas.utterstrom@vittran.norrnod.se> 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * contributed in 1999. 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Conversion to the C conversion API and many improvements by 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Yves Arrouye <yves@realnames.com>, current maintainer. 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Markus Scherer maintainer from 2003. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * See source code repository history for changes. 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/utypes.h> 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/putil.h> 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/ucnv.h> 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uenum.h> 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/unistr.h> 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/translit.h> 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uset.h> 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <unicode/uclean.h> 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdio.h> 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <errno.h> 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <string.h> 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <stdlib.h> 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ustrfmt.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uwmsg.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_USE 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#if (defined(U_WINDOWS) || defined(U_CYGWIN) || defined(U_MINGW)) && !defined(__STRICT_ANSI__) 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <io.h> 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include <fcntl.h> 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if defined(U_WINDOWS) 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define USE_FILENO_BINARY_MODE 1 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Windows likes to rename Unix-like functions */ 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef fileno 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define fileno _fileno 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef setmode 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define setmode _setmode 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifndef O_BINARY 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define O_BINARY _O_BINARY 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* below from the README */ 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/udata.h" 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC char uconvmsg_dat[]; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define DEFAULT_BUFSZ 4096 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define UCONVMSG "uconvmsg" 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UResourceBundle *gBundle = 0; /* Bundle containing messages. */ 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Initialize the message bundle so that message strings can be fetched 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * by u_wmsg(). 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void initMsg(const char *pname) { 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static int ps = 0; 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ps) { 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */ 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ps = 1; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Set up our static data - if any */ 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err); 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n", 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, u_errorName(err)); 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* It may still fail */ 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get messages. */ 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gBundle = u_wmsg_setPath(UCONVMSG, &err); 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: warning: couldn't open bundle %s: %s\n", 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, UCONVMSG, u_errorName(err)); 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef UCONVMSG_LINK 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: setAppData was called, internal data %s failed to load\n", 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, UCONVMSG); 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* that was try #1, try again with a path */ 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(dataPath, u_getDataDirectory()); 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcat(dataPath, U_FILE_SEP_STRING); 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcat(dataPath, UCONVMSG); 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru gBundle = u_wmsg_setPath(dataPath, &err); 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru "%s: warning: still couldn't open bundle %s: %s\n", 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, dataPath, u_errorName(err)); 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: warning: messages will not be displayed\n", pname); 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Mapping of callback names to the callbacks passed to the converter 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru API. */ 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic struct callback_ent { 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromu; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt; 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback tou; 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt; 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} transcode_callbacks[] = { 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "substitute", 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 }, 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "skip", 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_SKIP, 0, 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_SKIP, 0 }, 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "stop", 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_STOP, 0, 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_STOP, 0 }, 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape", 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, 0, 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, 0}, 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-icu", 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU, 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU }, 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-java", 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA, 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA }, 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-c", 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C }, 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml", 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml-hex", 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX, 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX }, 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-xml-dec", 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC }, 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE, 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE } 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Return a pointer to a callback record given its name. */ 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const struct callback_ent *findCallback(const char *name) { 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, count = 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* We'll do a linear search, there aren't many of them and bsearch() 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru may not be that portable. */ 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < count; ++i) { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!uprv_stricmp(name, transcode_callbacks[i].name)) { 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return &transcode_callbacks[i]; 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print converter information. If lookfor is set, only that converter will 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru be printed, otherwise all converters will be printed. If canon is non 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru zero, tags and aliases for each converter are printed too, in the format 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru expected for convrters.txt(5). */ 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printConverters(const char *pname, const char *lookfor, 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool canon) 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t num; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t num_stds; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char **stds; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* If there is a specified name, just handle that now. */ 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!canon) { 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s\n", lookfor); 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Because we are printing a canonical name, we need the 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru true converter name. We've done that already except for 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru the default name (because we want to print the exact 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name one would get when calling ucnv_getDefaultName() 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in non-canon mode). But since we do not know at this 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru point if we have the default name or something else, we 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru need to normalize again to the canonical converter 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name. */ 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *truename = ucnv_getAlias(lookfor, 0, &err); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err)) { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru lookfor = truename; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print converter names. We come here for one of two reasons: we 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru are printing all the names (lookfor was null), or we have a 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single converter to print but in canon mode, hence we need to 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru get to it in order to print everything. */ 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num = ucnv_countAvailable(); 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (num <= 0) { 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetNames"); 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num = 1; /* We know where we want to be. */ 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num_stds = ucnv_countStandards(); 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stds = (const char **) uprv_malloc(num_stds * sizeof(*stds)); 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!stds) { 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR)); 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t s; 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("{ "); 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (s = 0; s < num_stds; ++s) { 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru stds[s] = ucnv_getStandard(s, &err); 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s ", stds[s]); 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err)); 262c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts("}"); 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < num; i++) { 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *name; 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t num_aliases; 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Set the name either to what we are looking for, or 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru to the current converter name. */ 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (lookfor) { 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = lookfor; 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru name = ucnv_getAvailableName(i); 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Get all the aliases associated to the name. */ 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru num_aliases = ucnv_countAliases(name, &err); 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s", name); 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(name, ""); 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\t'); 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 294c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint16_t a, s, t; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Write all the aliases and their tags. */ 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (a = 0; a < num_aliases; ++a) { 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *alias = ucnv_getAlias(name, a, &err); 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(name, ""); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\t'); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(), 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 308c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru goto error_cleanup; 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print the current alias so that it looks right. */ 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") , 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias, 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (canon ? "" : " ")); 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Look (slowly, linear searching) for a tag. */ 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* -1 to skip the last standard */ 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (s = t = 0; s < num_stds-1; ++s) { 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err)) { 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* List the standard tags */ 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *standardName; 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isFirst = TRUE; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode enumError = U_ZERO_ERROR; 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while ((standardName = uenum_next(nameEnum, NULL, &enumError))) { 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* See if this alias is supported by this standard. */ 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!strcmp(standardName, alias)) { 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!t) { 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" {"); 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = 1; 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Print a * after the default standard name */ 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" %s%s", stds[s], (isFirst ? "*" : "")); 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isFirst = FALSE; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t) { 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf(" }"); 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate this entry. */ 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canon) { 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts(""); 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Move on. */ 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Terminate this entry. */ 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!canon) { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru puts(""); 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Free temporary data. */ 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_free(stds); 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Success. */ 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 366c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queruerror_cleanup: 367c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru uprv_free(stds); 368c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return -1; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Print all available transliterators. If canon is non zero, print 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru one transliterator per line. */ 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int printTransliterators(UBool canon) 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if UCONFIG_NO_TRANSLITERATION 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n"); 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#else 38027f654740f2a26ad62a5c155af9199af9e69b889claireho UErrorCode status = U_ZERO_ERROR; 38127f654740f2a26ad62a5c155af9199af9e69b889claireho UEnumeration *ids = utrans_openIDs(&status); 38227f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t i, numtrans = uenum_count(ids, &status); 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char sepchar = canon ? '\n' : ' '; 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38627f654740f2a26ad62a5c155af9199af9e69b889claireho for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) { 38727f654740f2a26ad62a5c155af9199af9e69b889claireho int32_t len; 38827f654740f2a26ad62a5c155af9199af9e69b889claireho const char *nextTrans = uenum_next(ids, &len, &status); 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39027f654740f2a26ad62a5c155af9199af9e69b889claireho printf("%s", nextTrans); 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < numtrans - 1) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar(sepchar); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 39627f654740f2a26ad62a5c155af9199af9e69b889claireho uenum_close(ids); 39727f654740f2a26ad62a5c155af9199af9e69b889claireho 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Add a terminating newline if needed. */ 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sepchar != '\n') { 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru putchar('\n'); 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Success. */ 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uSP = 0x20, // space 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uCR = 0xd, // carriage return 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uLF = 0xa, // line feed 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uNL = 0x85, // newline 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uLS = 0x2028, // line separator 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uPS = 0x2029, // paragraph separator 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uSig = 0xfeff // signature/BOM character 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline int32_t 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerugetChunkLimit(const UnicodeString &prev, const UnicodeString &s) { 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // find one of 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // CR, LF, CRLF, NL, LS, PS 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for paragraph ends (see UAX #13/Unicode 4) 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and include it in the chunk 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all of these characters are on the BMP 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do not include FF or VT in case they are part of a paragraph 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (important for bidi contexts) 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru static const UChar paraEnds[] = { 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0xd, 0xa, 0x85, 0x2028, 0x2029 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru enum { 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iCR, iLF, iNL, iLS, iPS, iCount 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru }; 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first, see if there is a CRLF split between prev and s 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (prev.endsWith(paraEnds + iCR, 1)) { 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s.startsWith(paraEnds + iLF, 1)) { 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; // split CRLF, include the LF 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!s.isEmpty()) { 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; // complete the last chunk 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // wait for actual further contents to arrive 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *u = s.getBuffer(), *limit = u + s.length(); 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (u < limit) { 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru c = *u++; 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ( 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((c < uSP) && (c == uCR || c == uLF)) || 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c == uNL) || 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((c & uLS) == uLS) 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c == uCR) { 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // check for CRLF 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u == limit) { 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // LF may be in the next chunk 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (*u == uLF) { 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++u; // include the LF in this chunk 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (int32_t)(u - s.getBuffer()); 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; // continue collecting the chunk 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruenum { 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM) 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_WITH_FEFF, // can convert the U+FEFF signature character 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UChar 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerunibbleToHex(uint8_t n) { 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n &= 0xf; 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru n <= 9 ? 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar)(0x30 + n) : 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (UChar)((0x61 - 10) + n); 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// check the converter's Unicode signature properties; 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// the fromUnicode side of the converter must be in its initial state 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// and will be reset again if it was used 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic int32_t 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QuerucnvSigType(UConverter *cnv) { 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t result; 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test if the output charset can convert U+FEFF 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru USet *set = uset_open(1, 0); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err); 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(err) && uset_contains(set, uSig)) { 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_WITH_FEFF; 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uset_close(set); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == CNV_WITH_FEFF) { 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // test if the output charset emits a signature anyway 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar a[1] = { 0x61 }; // "a" 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *in; 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char buffer[20]; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *out; 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru in = a; 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru out = buffer; 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(cnv, 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &out, buffer + sizeof(buffer), 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &in, a + 1, 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, TRUE, &err); 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_resetFromUnicode(cnv); 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) && 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_SUCCESS(err) 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = CNV_ADDS_FEFF; 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruclass ConvertFile { 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querupublic: 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ConvertFile() : 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf(NULL), outbuf(NULL), fromoffsets(NULL), 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz(0), signature(0) {} 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru void 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru setBufferSize(size_t bufferSize) { 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz = bufferSize; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf = new char[2 * bufsz]; 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outbuf = buf + bufsz; 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // +1 for an added U+FEFF in the intermediate Unicode buffer 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffsets = new int32_t[bufsz + 1]; 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ~ConvertFile() { 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] buf; 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete [] fromoffsets; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool convertFile(const char *pname, 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage, 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback, 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt, 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage, 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback, 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt, 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback, 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit, 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *infilestr, 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE * outfile, int verbose); 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruprivate: 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru friend int main(int argc, char **argv); 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *buf, *outbuf; 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t *fromoffsets; 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t bufsz; 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}; 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Convert a file from one encoding to another 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruConvertFile::convertFile(const char *pname, 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage, 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback, 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt, 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage, 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback, 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt, 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback, 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit, 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *infilestr, 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE * outfile, int verbose) 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *infile; 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool ret = TRUE; 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *convfrom = 0; 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverter *convto = 0; 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool flush; 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *cbufp, *prevbufp; 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char *bufp; 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */ 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *unibuf, *unibufbp; 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar *unibufp; 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t rd, wr; 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator *t = 0; // Transliterator acting on Unicode data. 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString chunk; // One chunk of the text being collected for transformation. 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString u; // String to do the transliteration. 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ulen; 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use conversion offsets for error messages 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unless a transliterator is used - 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a text transformation will reorder characters in unpredictable ways 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool useOffsets = TRUE; 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open the correct input file or connect to stdin for reading input 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infilestr != 0 && strcmp(infilestr, "-")) { 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infile = fopen(infilestr, "rb"); 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infile == 0) { 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str1(infilestr, ""); 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str1.append((UChar32) 0); 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str2(strerror(errno), ""); 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str2.append((UChar32) 0); 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer()); 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infilestr = "-"; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infile = stdin; 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (setmode(fileno(stdin), O_BINARY) == -1) { 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetInBinMode"); 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (verbose) { 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s:\n", infilestr); 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create transliterator as needed. 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (translit != NULL && *translit) { 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parse; 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(translit), pestr; 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Create from rules or by ID as needed. */ 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parse.line = -1; 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) { 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err); 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = Transliterator::createInstance(translit, UTRANS_FORWARD, err); 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar32) 0); 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parse.line >= 0) { 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar linebuf[20], offsetbuf[20]; 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_itou(linebuf, 20, parse.line, 10, 0); 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_itou(offsetbuf, 20, parse.offset, 10, 0); 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(), 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err), linebuf, offsetbuf); 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(), 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t) { 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = 0; 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru useOffsets = FALSE; 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Create codepage converter. If the codepage or its aliases weren't 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // available, it returns NULL and a failure code. We also set the 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // callbacks, and return errors in the same way. 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convfrom = ucnv_open(fromcpage, &err); 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(fromcpage, ""); 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(), 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err); 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convto = ucnv_open(tocpage, &err); 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(tocpage, ""); 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(), 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err); 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(err)) { 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err)); 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_setFallback(convto, fallback); 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode; 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t sig; 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // OK, we can convert now. 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = signature; 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rd = 0; 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = FALSE; 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset at the beginning of the next buffer 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru infoffset += rd; 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rd = fread(buf, 1, bufsz, infile); 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ferror(infile) != 0) { 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(strerror(errno)); 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantRead", str.getTerminatedBuffer()); 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert the read buffer into the new encoding via Unicode. 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // After the call 'unibufp' will be placed behind the last 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // character that was converted in the 'unibuf'. 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Also the 'cbufp' is positioned behind the last converted 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // character. 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At the last conversion in the file, flush should be set to 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // true so that we get all characters converted. 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The converter must be flushed at the end of conversion so 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // that characters on hold also will be written. 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cbufp = buf; 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru flush = (UBool)(rd != bufsz); 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // convert until the input is consumed 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remember the start of the current byte-to-Unicode conversion 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru prevbufp = cbufp; 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf = unibufp = u.getBuffer((int32_t)bufsz); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use bufsz instead of u.getCapacity() for the targetLimit 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // so that we don't overflow fromoffsets[]. 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp, 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf + rd, useOffsets ? fromoffsets : NULL, flush, &err); 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = (int32_t)(unibufp - unibuf); 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.releaseBuffer(U_SUCCESS(err) ? ulen : 0); 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fromSawEndOfBytes indicates that ucnv_toUnicode() is done 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converting all of the input bytes. 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // It works like this because ucnv_toUnicode() returns only under the 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // following conditions: 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - an error occurred during conversion (an error code is set) 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - the target buffer is filled (the error code indicates an overflow) 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // - the source is consumed 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // That is, if the error code does not indicate a failure, 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // not even an overflow, then the source must be consumed entirely. 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromSawEndOfBytes = (UBool)U_SUCCESS(err); 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (err == U_BUFFER_OVERFLOW_ERROR) { 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (U_FAILURE(err)) { 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char pos[32], errorBytes[32]; 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t i, length, errorLength; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode localError = U_ZERO_ERROR; 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = (int8_t)sizeof(errorBytes); 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError); 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(localError) || errorLength == 0) { 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = 1; 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // print the input file offset of the start of the error bytes: 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset of the current byte buffer + 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // length of the just consumed bytes - 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // length of the error bytes 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int8_t)sprintf(pos, "%d", 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (int)(infoffset + (cbufp - buf) - errorLength)); 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output the bytes that caused the error 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < errorLength; ++i) { 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0) { 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)uSP); 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4)); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)errorBytes[i])); 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "problemCvtToU", 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString(pos, length, "").getTerminatedBuffer(), 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.getTerminatedBuffer(), 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replaced a check for whether the input was consumed by 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // looping until it is; message key "premEndInput" now obsolete. 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ulen == 0) { 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove a U+FEFF Unicode signature character if requested 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sig < 0) { 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u.charAt(0) == uSig) { 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.remove(0, 1); 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // account for the removed UChar and offset 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --ulen; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // remove an offset from fromoffsets[] as well 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to keep the array parallel with the UChars 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(fromoffsets, fromoffsets + 1, ulen * 4); 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = 0; 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Transliterate/transform if needed. 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // For transformation, we use chunking code - 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // collect Unicode input until, for example, an end-of-line, 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // then transform and output-convert that and continue collecting. 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This makes the transformation result independent of the buffer size 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while avoiding the slower keyboard mode. 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The end-of-chunk characters are completely included in the 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transformed string in case they are to be transformed themselves. 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL) { 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString out; 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t chunkLimit; 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunkLimit = getChunkLimit(chunk, u); 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (chunkLimit < 0 && flush && fromSawEndOfBytes) { 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // use all of the rest at the end of the text 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunkLimit = u.length(); 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (chunkLimit >= 0) { 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // complete the chunk and transform it 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.append(u, 0, chunkLimit); 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.remove(0, chunkLimit); 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->transliterate(chunk); 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // append the transformation result to the result and empty the chunk 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru out.append(chunk); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.remove(); 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // continue collecting the chunk 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru chunk.append(u); 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!u.isEmpty()); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u = out; 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ulen = u.length(); 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // add a U+FEFF Unicode signature character if requested 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and possible/necessary 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sig > 0) { 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) { 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u.insert(0, (UChar)uSig); 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // insert a pseudo-offset into fromoffsets[] as well 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to keep the array parallel with the UChars 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru memmove(fromoffsets + 1, fromoffsets, ulen * 4); 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffsets[0] = -1; 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // account for the additional UChar and offset 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++ulen; 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sig = 0; 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Convert the Unicode buffer into the destination codepage 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Again 'bufp' will be placed behind the last converted character 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // And 'unibufp' will be placed behind the last converted unicode character 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // At the last conversion flush should be set to true to ensure that 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // all characters left get converted 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf = unibufbp = u.getBuffer(); 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufp = outbuf; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use fromSawEndOfBytes in addition to the flush flag - 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it indicates whether the intermediate Unicode string 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains the very last UChars for the very last input bytes. 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_fromUnicode(convto, &bufp, outbuf + bufsz, 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &unibufbp, 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru unibuf + ulen, 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NULL, (UBool)(flush && fromSawEndOfBytes), &err); 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converting all of the intermediate UChars. 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // See comment for fromSawEndOfBytes. 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toSawEndOfUnicode = (UBool)U_SUCCESS(err); 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (err == U_BUFFER_OVERFLOW_ERROR) { 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (U_FAILURE(err)) { 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar errorUChars[4]; 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *errtag; 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char pos[32]; 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int8_t i, length, errorLength; 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode localError = U_ZERO_ERROR; 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = (int8_t)LENGTHOF(errorUChars); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError); 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(localError) || errorLength == 0) { 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // need at least 1 so that we don't access beyond the length of fromoffsets[] 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errorLength = 1; 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t ferroffset; 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (useOffsets) { 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Unicode buffer offset of the start of the error UChars 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = (int32_t)((unibufbp - unibuf) - errorLength); 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ferroffset < 0) { 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // approximation - the character started in the previous Unicode buffer 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = 0; 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get the corresponding byte offset out of fromoffsets[] 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // go back if the offset is not known for some of the UChars 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t fromoffset; 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru do { 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromoffset = fromoffsets[ferroffset]; 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (fromoffset < 0 && --ferroffset >= 0); 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // total input file offset = 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input file offset of the current byte buffer + 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // byte buffer offset of where the current Unicode buffer is converted from + 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // fromoffsets[Unicode offset] 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = infoffset + (prevbufp - buf) + fromoffset; 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errtag = "problemCvtFromU"; 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Do not use fromoffsets if (t != NULL) because the Unicode text may 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // be different from what the offsets refer to. 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output file offset 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ferroffset = (int32_t)(outfoffset + (bufp - outbuf)); 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru errtag = "problemCvtFromUOut"; 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length = (int8_t)sprintf(pos, "%u", (int)ferroffset); 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // output the code points that caused the error 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str; 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < errorLength;) { 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i > 0) { 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append((UChar)uSP); 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U16_NEXT(errorUChars, i, errorLength, c); 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= 0x100000) { 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 20))); 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (c >= 0x10000) { 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 16))); 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 12))); 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 8))); 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)(c >> 4))); 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.append(nibbleToHex((uint8_t)c)); 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, errtag, 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString(pos, length, "").getTerminatedBuffer(), 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str.getTerminatedBuffer(), 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg_errorName(err)); 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer()); 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */ 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Replaced a check for whether the intermediate Unicode characters were all consumed by 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // looping until they are; message key "premEnd" now obsolete. 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Finally, write the converted buffer to the output file 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t outlen = (size_t) (bufp - outbuf); 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile)); 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (wr != outlen) { 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(strerror(errno)); 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer()); 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru willexit = TRUE; 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (willexit) { 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!toSawEndOfUnicode); 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!fromSawEndOfBytes); 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } while (!flush); // Stop when we have flushed the 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // converters (this means that it's 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the end of output) 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto normal_exit; 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit: 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ret = FALSE; 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit: 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Cleanup. 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(convfrom); 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucnv_close(convto); 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (infile != stdin) { 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fclose(infile); 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ret; 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void usage(const char *pname, int ecode) { 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *msg; 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t msgLen; 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode err = U_ZERO_ERROR; 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *fp = ecode ? stderr : stdout; 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int res; 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru msg = 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord", 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru &msgLen, &err); 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1)); 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString mname(msg, msgLen + 1); 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer()); 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ecode) { 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!res) { 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fputc('\n', fp); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!u_wmsg(fp, "help")) { 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Now dump callbacks and finish. */ 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int i, count = 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sizeof(transcode_callbacks) / sizeof(*transcode_callbacks); 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i = 0; i < count; ++i) { 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(fp, " %s", transcode_callbacks[i].name); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fputc('\n', fp); 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(ecode); 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruextern int 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querumain(int argc, char **argv) 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FILE *outfile; 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int ret = 0; 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru size_t bufsz = DEFAULT_BUFSZ; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *fromcpage = 0; 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *tocpage = 0; 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *translit = 0; 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *outfilestr = 0; 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool fallback = FALSE; 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP; 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *fromuctxt = 0; 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP; 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const void *touctxt = 0; 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char **iter, **remainArgv, **remainArgvLimit; 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char **end = argv + argc; 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *pname; 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE; 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char *printName = 0; 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool verbose = FALSE; 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ConvertFile cf; 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Initialize ICU */ 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_init(&status); 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fprintf(stderr, "%s: can not initialize ICU. status = %s\n", 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru argv[0], u_errorName(status)); 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(1); 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Get and prettify pname. 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR); 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_WINDOWS 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!pname) { 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = uprv_strrchr(*argv, '/'); 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!pname) { 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname = *argv; 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pname; 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // First, get the arguments from command-line 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to know the codepages to convert between 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru remainArgv = remainArgvLimit = argv + 1; 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (iter = argv + 1; iter != end; iter++) { 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check for from charset 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) { 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromcpage = *iter; 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) { 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tocpage = *iter; 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-x", *iter) == 0) { 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru translit = *iter; 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--fallback", *iter)) { 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fallback = TRUE; 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--no-fallback", *iter)) { 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fallback = FALSE; 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) { 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bufsz = atoi(*iter); 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((int) bufsz <= 0) { 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer()); 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 3; 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) { 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printConvs = TRUE; 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--default-code", *iter) == 0) { 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printName = ucnv_getDefaultName(); 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--list-code", *iter) == 0) { 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printTranslits) { 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode e = U_ZERO_ERROR; 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printName = ucnv_getAlias(*iter, 0, &e); 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(e) || !printName) { 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer()); 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 2; 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("--canon", *iter) == 0) { 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printCanon = TRUE; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-L", *iter) == 0 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || !strcmp("--list-transliterators", *iter)) { 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printConvs) { 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printTranslits = TRUE; 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter) 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru || !strcmp("--help", *iter)) { 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 0); 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-c", *iter)) { 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = UCNV_FROM_U_CALLBACK_SKIP; 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--to-callback", *iter)) { 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = cbe->fromu; 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromuctxt = cbe->fromuctxt; 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--from-callback", *iter)) { 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = cbe->tou; 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru touctxt = cbe->touctxt; 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-i", *iter)) { 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = UCNV_TO_U_CALLBACK_SKIP; 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("--callback", *iter)) { 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru iter++; 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end) { 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const struct callback_ent *cbe = findCallback(*iter); 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (cbe) { 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback = cbe->fromu; 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromuctxt = cbe->fromuctxt; 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru toucallback = cbe->tou; 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru touctxt = cbe->touctxt; 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(*iter); 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer()); 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 4; 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) { 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru verbose = FALSE; 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) { 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru verbose = TRUE; 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) { 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname); 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) { 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++iter; 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (iter != end && !outfilestr) { 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfilestr = *iter; 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (0 == strcmp("--add-signature", *iter)) { 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.signature = 1; 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (0 == strcmp("--remove-signature", *iter)) { 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.signature = -1; 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (**iter == '-' && (*iter)[1]) { 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru usage(pname, 1); 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // move a non-option up in argv[] 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *remainArgvLimit++ = *iter; 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (printConvs || printName) { 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return printConverters(pname, printName, printCanon) ? 2 : 0; 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (printTranslits) { 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return printTransliterators(printCanon) ? 3 : 0; 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!fromcpage || !uprv_strcmp(fromcpage, "-")) { 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromcpage = ucnv_getDefaultName(); 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!tocpage || !uprv_strcmp(tocpage, "-")) { 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tocpage = ucnv_getDefaultName(); 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Open the correct output file or connect to stdout for reading input 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfilestr != 0 && strcmp(outfilestr, "-")) { 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile = fopen(outfilestr, "wb"); 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfile == 0) { 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str1(outfilestr, ""); 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str2(strerror(errno), ""); 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru initMsg(pname); 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantCreateOutputF", 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru str1.getBuffer(), str2.getBuffer()); 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 1; 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfilestr = "-"; 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile = stdout; 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef USE_FILENO_BINARY_MODE 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (setmode(fileno(outfile), O_BINARY) == -1) { 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_wmsg(stderr, "cantSetOutBinMode"); 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru exit(-1); 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* Loop again on the arguments to find all the input files, and 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru convert them. */ 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru cf.setBufferSize(bufsz); 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if(remainArgv < remainArgvLimit) { 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (iter = remainArgv; iter != remainArgvLimit; iter++) { 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!cf.convertFile( 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, fromcpage, toucallback, touctxt, tocpage, 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback, fromuctxt, fallback, translit, *iter, 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile, verbose) 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!cf.convertFile( 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pname, fromcpage, toucallback, touctxt, tocpage, 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fromucallback, fromuctxt, fallback, translit, 0, 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru outfile, verbose) 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ) { 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto error_exit; 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto normal_exit; 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruerror_exit: 137050294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#if !UCONFIG_NO_LEGACY_CONVERSION 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ret = 1; 137250294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#else 137350294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n"); 137450294ead5e5d23f5bbfed76e00e6b510bd41eee1claireho#endif 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querunormal_exit: 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (outfile != stdout) { 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru fclose(outfile); 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ret; 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Hey, Emacs, please set the following: 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Local Variables: 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indent-tabs-mode: nil 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * End: 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1393