1f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 2f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius****************************************************************************** 3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 4f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Copyright (C) 2000-2014, International Business Machines 5f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Corporation and others. All Rights Reserved. 6f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 7f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius****************************************************************************** 8f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* file name: ucnvmbcs.cpp 9f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* encoding: US-ASCII 10f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* tab size: 8 (not used) 11f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* indentation:4 12f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 13f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* created on: 2000jul03 14f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* created by: Markus W. Scherer 15f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 16f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* The current code in this file replaces the previous implementation 17f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* of conversion code from multi-byte codepages to Unicode and back. 18f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* This implementation supports the following: 19f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - legacy variable-length codepages with up to 4 bytes per character 20f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - all Unicode code points (up to 0x10ffff) 21f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - efficient distinction of unassigned vs. illegal byte sequences 22f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - it is possible in fromUnicode() to directly deal with simple 23f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* stateful encodings (used for EBCDIC_STATEFUL) 24f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - it is possible to convert Unicode code points 25f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* to a single zero byte (but not as a fallback except for SBCS) 26f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 27f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Remaining limitations in fromUnicode: 28f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - byte sequences must not have leading zero bytes 29f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte 30f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* - limitation to up to 4 bytes per character 31f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 32f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these 33f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* limitations and adds m:n character mappings and other features. 34f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* See ucnv_ext.h for details. 35f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 36f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Change history: 37f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 38f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U, 39f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2 40f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* macros to ucnvmbcs.h file 41f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius*/ 42f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 43f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/utypes.h" 44f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 45f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 46f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 47f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/ucnv.h" 48f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/ucnv_cb.h" 49f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/udata.h" 50f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/uset.h" 51f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/utf8.h" 52f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "unicode/utf16.h" 53f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "ucnv_bld.h" 54f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "ucnvmbcs.h" 55f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "ucnv_ext.h" 56f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "ucnv_cnv.h" 57f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "cmemory.h" 58f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "cstring.h" 59f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "umutex.h" 60f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 61f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* control optimizations according to the platform */ 62f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define MBCS_UNROLL_SINGLE_TO_BMP 1 63f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define MBCS_UNROLL_SINGLE_FROM_BMP 0 64f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 65f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 66f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * _MBCSHeader versions 5.3 & 4.3 67f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (Note that the _MBCSHeader version is in addition to the converter formatVersion.) 68f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 69f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version is optional. Version 5 is used for incompatible data format changes. 70f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * makeconv will continue to generate version 4 files if possible. 71f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 72f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Changes from version 4: 73f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 74f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The main difference is an additional _MBCSHeader field with 75f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - the length (number of uint32_t) of the _MBCSHeader 76f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - flags for further incompatible data format changes 77f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - flags for further, backward compatible data format changes 78f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 79f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from 80f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the file and needs to be reconstituted at load time. 81f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This requires a utf8Friendly format with an additional mbcsIndex table for fast 82f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar. 83f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (For details about these structures see below, and see ucnvmbcs.h.) 84f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 85f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order 86f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * of the Unicode code points. (This requires that the .ucm file has the |0 etc. 87f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * precision markers for all mappings.) 88f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 89f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * All fallbacks have been moved to the extension table, leaving only roundtrips in the 90f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * omitted data that can be reconstituted from the toUnicode data. 91f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 92f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted. 93f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * With only roundtrip mappings in the base fromUnicode data, this part is fully 94f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * redundant with the mbcsIndex and will be reconstituted from that (also using the 95f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * stage 1 table which contains the information about how stage 2 was compacted). 96f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 97f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The rest of the stage 2 table, the part for code points above maxFastUChar, 98f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is stored in the file and will be appended to the reconstituted part. 99f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The entire fromUBytes array is omitted from the file and will be reconstitued. 101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is done by enumerating all toUnicode roundtrip mappings, performing 102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * each mapping (using the stage 1 and reconstituted stage 2 tables) and 103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * writing instead of reading the byte values. 104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * _MBCSHeader version 4.3 106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Change from version 4.2: 108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - Optional utf8Friendly data structures, with 64-entry stage 3 block 109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS 110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * files which can be used instead of stages 1 & 2. 111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Faster lookups for roundtrips from most commonly used characters, 112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and lookups from UTF-8 byte sequences with a natural bit distribution. 113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See ucnvmbcs.h for more details. 114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Change from version 4.1: 116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - Added an optional extension table structure at the end of the .cnv file. 117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It is present if the upper bits of the header flags field contains a non-zero 118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * byte offset to it. 119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Files that contain only a conversion table and no base table 120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * use the special outputType MBCS_OUTPUT_EXT_ONLY. 121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * These contain the base table name between the MBCS header and the extension 122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * data. 123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Change from version 4.0: 125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - Replace header.reserved with header.fromUBytesLength so that all 126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * fields in the data have length. 127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Changes from version 3 (for performance improvements): 129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - new bit distribution for state table entries 130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - reordered action codes 131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - new data structure for single-byte fromUnicode 132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * + stage 2 only contains indexes 133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * + stage 3 stores 16 bits per character with classification bits 15..8 134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - no multiplier for stage 1 entries 135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - stage 2 for non-single-byte codepages contains the index and the flags in 136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * one 32-bit value 137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers 138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For more details about old versions of the MBCS data structure, see 140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the corresponding versions of this file. 141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Converting stateless codepage data ---------------------------------------*** 143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (or codepage data with simple states) to Unicode. 144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Data structure and algorithm for converting from complex legacy codepages 146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to Unicode. (Designed before 2000-may-22.) 147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The basic idea is that the structure of legacy codepages can be described 149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * with state tables. 150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * When reading a byte stream, each input byte causes a state transition. 151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Some transitions result in the output of a code point, some result in 152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * "unassigned" or "illegal" output. 153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is used here for character conversion. 154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The data structure begins with a state table consisting of a row 156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * per state, with 256 entries (columns) per row for each possible input 157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * byte value. 158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Each entry is 32 bits wide, with two formats distinguished by 159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the sign bit (bit 31): 160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * One format for transitional entries (bit 31 not set) for non-final bytes, and 162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * one format for final entries (bit 31 set). 163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Both formats contain the number of the next state in the same bit 164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * positions. 165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * State 0 is the initial state. 166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Most of the time, the offset values of subsequent states are added 168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * up to a scalar value. This value will eventually be the index of 169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the Unicode code point in a table that follows the state table. 170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The effect is that the code points for final state table rows 171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * are contiguous. The code points of final state rows follow each other 172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in the order of the references to those final states by previous 173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * states, etc. 174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For some terminal states, the offset is itself the output Unicode 176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * code point (16 bits for a BMP code point or 20 bits for a supplementary 177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * code point (stored as code point minus 0x10000 so that 20 bits are enough). 178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For others, the code point in the Unicode table is stored with either 179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * one or two code units: one for BMP code points, two for a pair of 180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * surrogates. 181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * All code points for a final state entry take up the same number of code 182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * units, regardless of whether they all actually _use_ the same number 183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * of code units. This is necessary for simple array access. 184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An additional feature comes in with what in ICU is called "fallback" 186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * mappings: 187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In addition to round-trippable, precise, 1:1 mappings, there are often 189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * mappings defined between similar, though not the same, characters. 190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Typically, such mappings occur only in fromUnicode mapping tables because 191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Unicode has a superset repertoire of most other codepages. However, it 192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is possible to provide such mappings in the toUnicode tables, too. 193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In this case, the fallback mappings are partly integrated into the 194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * general state tables because the structure of the encoding includes their 195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * byte sequences. 196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For final entries in an initial state, fallback mappings are stored in 197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the entry itself like with roundtrip mappings. 198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For other final entries, they are stored in the code units table if 199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the entry is for a pair of code units. 200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For single-unit results in the code units table, there is no space to 201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * alternatively hold a fallback mapping; in this case, the code unit 202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is stored as U+fffe (unassigned), and the fallback mapping needs to 203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * be looked up by the scalar offset value in a separate table. 204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * "Unassigned" state entries really mean "structurally unassigned", 206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * i.e., such a byte sequence will never have a mapping result. 207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The interpretation of the bits in each entry is as follows: 209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bit 31 not set, not a terminal entry ("transitional"): 211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 30..24 next state 212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 23..0 offset delta, to be added up 213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bit 31 set, terminal ("final") entry: 215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 30..24 next state (regardless of action code) 216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 23..20 action code: 217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * action codes 0 and 1 result in precise-mapping Unicode code points 218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 0 valid byte sequence 219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..16 not used, 0 220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 15..0 16-bit Unicode BMP code point 221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * never U+fffe or U+ffff 222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1 valid byte sequence 223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..0 20-bit Unicode supplementary code point 224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * never U+fffe or U+ffff 225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points 227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 2 valid byte sequence (fallback) 228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..16 not used, 0 229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 15..0 16-bit Unicode BMP code point as fallback result 230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 3 valid byte sequence (fallback) 231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..0 20-bit Unicode supplementary code point as fallback result 232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results 234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * depending on the code units they result in 235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4 valid byte sequence 236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..9 not used, 0 237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 8..0 final offset delta 238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * pointing to one 16-bit code unit which may be 239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * fffe unassigned -- look for a fallback for this offset 240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ffff illegal 241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 5 valid byte sequence 242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..9 not used, 0 243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 8..0 final offset delta 244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * pointing to two 16-bit code units 245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (typically UTF-16 surrogates) 246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the result depends on the first code unit as follows: 247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 0000..d7ff roundtrip BMP code point (1st alone) 248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * d800..dbff roundtrip surrogate pair (1st, 2nd) 249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * dc00..dfff fallback surrogate pair (1st-400, 2nd) 250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * e000 roundtrip BMP code point (2nd alone) 251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * e001 fallback BMP code point (2nd alone) 252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * fffe unassigned 253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ffff illegal 254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (the final offset deltas are at most 255 * 2, 255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * times 2 because of storing code unit pairs) 256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 6 unassigned byte sequence 258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..16 not used, 0 259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2) 260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * this does not contain a final offset delta because the main 261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * purpose of this action code is to save scalar offset values; 262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * therefore, fallback values cannot be assigned to byte 263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequences that result in this action code 264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 7 illegal byte sequence 265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..16 not used, 0 266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2) 267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 8 state change only 268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 19..0 not used, 0 269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * useful for state changes in simple stateful encodings, 270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * at Shift-In/Shift-Out codes 271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 9..15 reserved for future use 274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * current implementations will only perform a state change 275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and ignore bits 19..0 276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An encoding with contiguous ranges of unassigned byte sequences, like 278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Shift-JIS and especially EUC-TW, can be stored efficiently by having 279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * at least two states for the trail bytes: 280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * One trail byte state that results in code points, and one that only 281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * has "unassigned" and "illegal" terminal states. 282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note: partly by accident, this data structure supports simple stateful 284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * encodings without any additional logic. 285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Currently, only simple Shift-In/Shift-Out schemes are handled with 286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * appropriate state tables (especially EBCDIC_STATEFUL!). 287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS version 2 added: 289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * unassigned and illegal action codes have U+fffe and U+ffff 290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP() 291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Converting from Unicode to codepage bytes --------------------------------*** 293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The conversion data structure for fromUnicode is designed for the known 295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to 296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a sequence of 1..4 bytes, in addition to a flag that indicates if there is 297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a roundtrip mapping. 298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3 300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * like in the character properties table. 301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The beginning of the trie is at offsetFromUTable, the beginning of stage 3 302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * with the resulting bytes is at offsetFromUBytes. 303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Beginning with version 4, single-byte codepages have a significantly different 305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * trie compared to other codepages. 306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In all cases, the entry in stage 1 is directly the index of the block of 307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 64 entries in stage 2. 308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Single-byte lookup: 310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3. 312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Stage 3 contains one 16-bit word per result: 313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 15..8 indicate the kind of result: 314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * f roundtrip result 315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * c fallback result from private-use code point 316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 8 fallback result from other code points 317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 0 unassigned 318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 7..0 contain the codepage byte. A zero byte is always possible. 319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly 321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup 322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. 323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ASCII code points can be looked up with a linear array access into stage 3. 324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See maxFastUChar and other details in ucnvmbcs.h. 325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Multi-byte lookup: 327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Stage 2 contains a 32-bit word for each 16-block in stage 3: 329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results 330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) 331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If this test is false, then a non-zero result will be interpreted as 332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a fallback mapping. 333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char) 334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Stage 3 contains 2, 3, or 4 bytes per result. 336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness, 337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * while 3 bytes are stored as bytes in big-endian order. 338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Leading zero bytes are ignored, and the number of bytes is counted. 339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * A zero byte mapping result is possible as a roundtrip result. 340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For some output types, the actual result is processed from this; 341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * see ucnv_MBCSFromUnicodeWithOffsets(). 342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10), 344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * or (version 3 and up) for BMP-only codepages, it contains 64 entries. 345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In version 4.3, a utf8Friendly file contains an mbcsIndex table. 347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup 348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. 349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ASCII code points can be looked up with a linear array access into stage 3. 350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h. 351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In version 3, stage 2 blocks may overlap by multiples of the multiplier 353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for compaction. 354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks) 355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * may overlap by any number of entries. 356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS version 2 added: 358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the converter checks for known output types, which allows 359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * adding new ones without crashing an unaware converter 360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/** 363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from 364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * consecutive sequences of bytes, starting from the one encoded in value, 365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to Unicode code points. (Multiple mappings to reduce per-function call overhead.) 366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Does not currently support m:n mappings or reverse fallbacks. 367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This function will not be called for sequences of bytes with leading zeros. 368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode() 370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @param value contains 1..4 bytes of the first byte sequence, right-aligned 371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @param codePoints resulting Unicode code points, or negative if a byte sequence does 372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * not map to anything 373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @return TRUE to continue enumeration, FALSE to stop 374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliustypedef UBool U_CALLCONV 376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusUConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]); 377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSLoad(UConverterSharedData *sharedData, 380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterLoadArgs *pArgs, 381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *raw, 382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSUnload(UConverterSharedData *sharedData); 386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSOpen(UConverter *cnv, 389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterLoadArgs *pArgs, 390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, 394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetStarters(const UConverter* cnv, 398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool starters[256], 399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const char * 402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetName(const UConverter *cnv); 403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, 406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t offsetIndex, 407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, 411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterToUnicodeArgs *pToUArgs, 416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetUnicodeSet(const UConverter *cnv, 420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const USetAdder *sa, 421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterUnicodeSet which, 422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterToUnicodeArgs *pToUArgs, 427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode); 428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const UConverterImpl _SBCSUTF8Impl={ 430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_MBCS, 431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSLoad, 433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSUnload, 434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSOpen, 436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetNextUChar, 444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetStarters, 446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetName, 447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSWriteSub, 448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetUnicodeSet, 450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_SBCSFromUTF8 453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const UConverterImpl _DBCSUTF8Impl={ 456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_MBCS, 457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSLoad, 459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSUnload, 460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSOpen, 462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetNextUChar, 470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetStarters, 472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetName, 473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSWriteSub, 474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetUnicodeSet, 476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_DBCSFromUTF8 479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const UConverterImpl _MBCSImpl={ 482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_MBCS, 483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSLoad, 485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSUnload, 486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSOpen, 488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSToUnicodeWithOffsets, 493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSFromUnicodeWithOffsets, 495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetNextUChar, 496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetStarters, 498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetName, 499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSWriteSub, 500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetUnicodeSet, 502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, 503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL 504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* Static data is in tools/makeconv/ucnvstat.c for data-based 508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * converters. Be sure to update it as well. 509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusconst UConverterSharedData _MBCSData={ 512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sizeof(UConverterSharedData), 1, 513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, NULL, NULL, FALSE, &_MBCSImpl, 514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 0, UCNV_MBCS_TABLE_INITIALIZER 515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* GB 18030 data ------------------------------------------------------------ */ 519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* helper macros for linear values for GB 18030 four-byte sequences */ 521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d)) 522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30) 524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff) 526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Some ranges of GB 18030 where both the Unicode code points and the 529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * GB four-byte sequences are contiguous and are handled algorithmically by 530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the special callback functions below. 531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The values are start & end of Unicode & GB codes. 532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note that single surrogates are not mapped by GB 18030 534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * as of the re-released mapping tables from 2000-nov-30. 535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const uint32_t 537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgb18030Ranges[14][4]={ 538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)}, 539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)}, 540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)}, 541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)}, 542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)}, 543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)}, 544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)}, 545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)}, 546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)}, 547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)}, 548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)}, 549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)}, 550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)}, 551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)} 552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* bit flag for UConverter.options indicating GB 18030 special handling */ 555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define _MBCS_OPTION_GB18030 0x8000 556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ 558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define _MBCS_OPTION_KEIS 0x01000 559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define _MBCS_OPTION_JEF 0x02000 560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define _MBCS_OPTION_JIPS 0x04000 561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define KEIS_SO_CHAR_1 0x0A 563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define KEIS_SO_CHAR_2 0x42 564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define KEIS_SI_CHAR_1 0x0A 565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define KEIS_SI_CHAR_2 0x41 566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JEF_SO_CHAR 0x28 568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JEF_SI_CHAR 0x29 569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JIPS_SO_CHAR_1 0x1A 571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JIPS_SO_CHAR_2 0x70 572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JIPS_SI_CHAR_1 0x1A 573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define JIPS_SI_CHAR_2 0x71 574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusenum SISO_Option { 576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SI, 577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SO 578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius}; 579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliustypedef enum SISO_Option SISO_Option; 580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) { 582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t SISOLength = 0; 583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch (option) { 585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case SI: 586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { 587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = KEIS_SI_CHAR_1; 588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[1] = KEIS_SI_CHAR_2; 589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 2; 590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { 591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = JEF_SI_CHAR; 592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 1; 593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { 594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = JIPS_SI_CHAR_1; 595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[1] = JIPS_SI_CHAR_2; 596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 2; 597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = UCNV_SI; 599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 1; 600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case SO: 603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { 604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = KEIS_SO_CHAR_1; 605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[1] = KEIS_SO_CHAR_2; 606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 2; 607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { 608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = JEF_SO_CHAR; 609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 1; 610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { 611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = JIPS_SO_CHAR_1; 612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[1] = JIPS_SO_CHAR_2; 613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 2; 614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value[0] = UCNV_SO; 616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SISOLength = 1; 617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Should never happen. */ 621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return SISOLength; 625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* Miscellaneous ------------------------------------------------------------ */ 628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* similar to ucnv_MBCSGetNextUChar() but recursive */ 630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool 631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusenumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[], 632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t state, uint32_t offset, 633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t value, 634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterEnumToUCallback *callback, const void *context, 635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 codePoints[32]; 637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *row; 638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *unicodeCodeUnits; 639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 anyCodePoints; 640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t b, limit; 641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius row=mbcsTable->stateTable[state]; 643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeCodeUnits=mbcsTable->unicodeCodeUnits; 644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value<<=8; 646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius anyCodePoints=-1; /* becomes non-negative if there is a mapping */ 647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=(stateProps[state]&0x38)<<2; 649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(b==0 && stateProps[state]>=0x40) { 650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */ 651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius codePoints[0]=U_SENTINEL; 652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=1; 653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius limit=((stateProps[state]&7)+1)<<5; 655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(b<limit) { 656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry=row[b]; 657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry); 659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]>=0) { 660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recurse to a state with non-ignorable actions */ 661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!enumToU( 662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable, stateProps, nextState, 663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), 664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|(uint32_t)b, 665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius callback, context, 666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode)) { 667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius codePoints[b&0x1f]=U_SENTINEL; 671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t action; 674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=MBCS_ENTRY_FINAL_ACTION(entry); 680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_DIRECT_16) { 681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16) { 684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); 685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[finalOffset]; 686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xfffe) { 687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U_SENTINEL; 690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16_PAIR) { 692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); 693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[finalOffset++]; 694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xd800) { 695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point below 0xd800 */ 696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c<=0xdbff) { 697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip or fallback supplementary code point */ 698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00); 699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xe000) { 700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[finalOffset]; 702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U_SENTINEL; 704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_20) { 706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U_SENTINEL; 710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius codePoints[b&0x1f]=c; 713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius anyCodePoints&=c; 714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(((++b)&0x1f)==0) { 716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(anyCodePoints>=0) { 717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) { 718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius anyCodePoints=-1; 721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Only called if stateProps[state]==-1. 729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * A recursive call may do stateProps[state]|=0x40 if this state is the target of an 730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_STATE_CHANGE_ONLY. 731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic int8_t 733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgetStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) { 734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *row; 735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t min, max, entry, nextState; 736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius row=stateTable[state]; 738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[state]=0; 739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* find first non-ignorable state */ 741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(min=0;; ++min) { 742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[min]; 743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextState=MBCS_ENTRY_STATE(entry); 744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]==-1) { 745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius getStateProp(stateTable, stateProps, nextState); 746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]>=0) { 749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { 752f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 753f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 754f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(min==0xff) { 755f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[state]=-0x40; /* (int8_t)0xc0 */ 756f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return stateProps[state]; 757f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 758f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[state]|=(int8_t)((min>>5)<<3); 760f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 761f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* find last non-ignorable state */ 762f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(max=0xff; min<max; --max) { 763f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[max]; 764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextState=MBCS_ENTRY_STATE(entry); 765f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]==-1) { 766f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius getStateProp(stateTable, stateProps, nextState); 767f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 768f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 769f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]>=0) { 770f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 771f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 772f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { 773f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 774f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 775f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 776f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[state]|=(int8_t)(max>>5); 777f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recurse further and collect direct-state information */ 779f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(min<=max) { 780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[min]; 781f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextState=MBCS_ENTRY_STATE(entry); 782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[nextState]==-1) { 783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius getStateProp(stateTable, stateProps, nextState); 784f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 785f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_FINAL(entry)) { 786f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[nextState]|=0x40; 787f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) { 788f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateProps[state]|=0x40; 789f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 790f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++min; 792f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 793f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return stateProps[state]; 794f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 795f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 797f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Internal function enumerating the toUnicode data of an MBCS converter. 798f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U 799f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * table, but could also be used for a future ucnv_getUnicodeSet() option 800f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * that includes reverse fallbacks (after updating this function's implementation). 801f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Currently only handles roundtrip mappings. 802f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Does not currently handle extensions. 803f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 804f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 805f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable, 806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterEnumToUCallback *callback, const void *context, 807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 808f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 809f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Properties for each state, to speed up the enumeration. 810f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Ignorable actions are unassigned/illegal/state-change-only: 811f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * They do not lead to mappings. 812f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 813f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 7..6: 814f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1 direct/initial state (stateful converters have multiple) 815f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 0 non-initial state with transitions or with non-ignorable result actions 816f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * -1 final state with only ignorable actions 817f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 818f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 5..3: 819f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The lowest byte value with non-ignorable actions is 820f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * value<<5 (rounded down). 821f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 822f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 2..0: 823f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The highest byte value with non-ignorable actions is 824f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (value<<5)&0x1f (rounded up). 825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t stateProps[MBCS_MAX_STATE_COUNT]; 827f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t state; 828f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 829f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memset(stateProps, -1, sizeof(stateProps)); 830f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 831f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recurse from state 0 and set all stateProps */ 832f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius getStateProp(mbcsTable->stateTable, stateProps, 0); 833f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 834f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(state=0; state<mbcsTable->countStates; ++state) { 835f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /*if(stateProps[state]==-1) { 836f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius printf("unused/unreachable <icu:state> %d\n", state); 837f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius }*/ 838f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(stateProps[state]>=0x40) { 839f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* start from each direct state */ 840f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius enumToU( 841f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable, stateProps, state, 0, 0, 842f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius callback, context, 843f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 845f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 846f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 847f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 848f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC void 849f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, 850f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const USetAdder *sa, 851f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterUnicodeSet which, 852f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterSetFilter filter, 853f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 854f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UConverterMBCSTable *mbcsTable; 855f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 856f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t st3; 858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t st1, maxStage1, st2; 859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 861f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 862f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* enumerate the from-Unicode trie table */ 863f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable=&sharedData->mbcs; 864f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=mbcsTable->fromUnicodeTable; 865f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 866f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius maxStage1=0x440; 867f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 868f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius maxStage1=0x40; 869f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 870f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 871f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; /* keep track of the current code point while enumerating */ 872f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_1) { 874f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *stage2, *stage3, *results; 875f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t minValue; 876f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 877f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(const uint16_t *)mbcsTable->fromUnicodeBytes; 878f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 879f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 880f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Set a threshold variable for selecting which mappings to use. 881f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See ucnv_MBCSSingleFromBMPWithOffsets() and 882f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_SINGLE_RESULT_FROM_U() for details. 883f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 884f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(which==UCNV_ROUNDTRIP_SET) { 885f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use only roundtrips */ 886f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0xf00; 887f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { 888f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use all roundtrip and fallback results */ 889f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0x800; 890f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 891f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 892f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(st1=0; st1<maxStage1; ++st1) { 893f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st2=table[st1]; 894f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(st2>maxStage1) { 895f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2=table+st2; 896f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(st2=0; st2<64; ++st2) { 897f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((st3=stage2[st2])!=0) { 898f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* read the stage 3 block */ 899f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3=results+st3; 900f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 901f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 902f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(*stage3++>=minValue) { 903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 904f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 905f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c+=16; /* empty stage 3 block */ 908f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 911f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c+=1024; /* empty stage 2 block */ 912f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 913f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 915f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint32_t *stage2; 916f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *stage3, *bytes; 917f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t st3Multiplier; 918f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t value; 919f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool useFallback; 920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 921f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=mbcsTable->fromUnicodeBytes; 922f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 923f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); 924f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(mbcsTable->outputType) { 926f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 927f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 928f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3Multiplier=3; 929f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 930f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 931f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3Multiplier=4; 932f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 933f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 934f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3Multiplier=2; 935f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 936f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 937f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 938f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(st1=0; st1<maxStage1; ++st1) { 939f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st2=table[st1]; 940f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(st2>(maxStage1>>1)) { 941f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2=(const uint32_t *)table+st2; 942f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(st2=0; st2<64; ++st2) { 943f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((st3=stage2[st2])!=0) { 944f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* read the stage 3 block */ 945f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3; 946f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 947f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the roundtrip flags for the stage 3 block */ 948f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=16; 949f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 950f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 951f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Add code points for which the roundtrip flag is set, 952f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * or which map to non-zero bytes if we use fallbacks. 953f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See ucnv_MBCSFromUnicodeWithOffsets() for details. 954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(filter) { 956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_NONE: 957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(st3&1) { 959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=st3Multiplier; 961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(useFallback) { 962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t b=0; 963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(st3Multiplier) { 964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 4: 965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b|=*stage3++; 966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 3: /*fall through*/ 967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b|=*stage3++; 968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: /*fall through*/ 969f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b|=stage3[0]|stage3[1]; 970f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=2; 971f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 972f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 973f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 974f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(b!=0) { 975f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 976f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 979f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 980f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 981f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_DBCS_ONLY: 982f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Ignore single-byte results (<0x100). */ 983f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 984f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) { 985f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 986f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 988f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=2; /* +=st3Multiplier */ 989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 990f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 991f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_2022_CN: 992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ 993f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 994f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) { 995f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 997f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 998f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=3; /* +=st3Multiplier */ 999f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 1000f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_SJIS: 1002f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ 1003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 1004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { 1005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 1006f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1007f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 1008f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=2; /* +=st3Multiplier */ 1009f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 1010f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1011f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_GR94DBCS: 1012f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ 1013f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 1014f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( ((st3&1)!=0 || useFallback) && 1015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) && 1016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (uint8_t)(value-0xa1)<=(0xfe - 0xa1) 1017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1018f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 1019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 1021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=2; /* +=st3Multiplier */ 1022f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 1023f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1024f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case UCNV_SET_FILTER_HZ: 1025f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */ 1026f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 1027f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( ((st3&1)!=0 || useFallback) && 1028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) && 1029f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (uint8_t)(value-0xa1)<=(0xfe - 0xa1) 1030f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->add(sa->set, c); 1032f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=1; 1034f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage3+=2; /* +=st3Multiplier */ 1035f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while((++c&0xf)!=0); 1036f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1037f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 1038f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 1039f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1040f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1041f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1042f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c+=16; /* empty stage 3 block */ 1043f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1044f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1045f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1046f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c+=1024; /* empty stage 2 block */ 1047f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1048f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1051f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode); 1052f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1053f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1054f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC void 1055f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, 1056f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const USetAdder *sa, 1057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterUnicodeSet which, 1058f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1059f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetFilteredUnicodeSetForUnicode( 1060f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sharedData, sa, which, 1061f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_SET_FILTER_DBCS_ONLY : 1063f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_SET_FILTER_NONE, 1064f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 1065f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1067f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 1068f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetUnicodeSet(const UConverter *cnv, 1069f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const USetAdder *sa, 1070f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterUnicodeSet which, 1071f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->options&_MBCS_OPTION_GB18030) { 1073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->addRange(sa->set, 0, 0xd7ff); 1074f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sa->addRange(sa->set, 0xe000, 0x10ffff); 1075f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1076f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode); 1077f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1078f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1079f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1080f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* conversion extensions for input not in the main table -------------------- */ 1081f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1082f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 1083f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Hardcoded extension handling for GB 18030. 1084f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file. 1085f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In the future, conversion extensions may handle m:n mappings and delta tables, 1087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html 1088f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1089f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If an input character cannot be mapped, then these functions set an error 1090f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * code. The framework will then call the callback function. 1091f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1092f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1093f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 1094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @return if(U_FAILURE) return the code point for cnv->fromUChar32 1095f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * else return 0 after output has been written to the target 1096f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1097f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 1098f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius_extFromU(UConverter *cnv, const UConverterSharedData *sharedData, 1099f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 cp, 1100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar **source, const UChar *sourceLimit, 1101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t **target, const uint8_t *targetLimit, 1102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t **offsets, int32_t sourceIndex, 1103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool flush, 1104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *cx; 1106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->useSubChar1=FALSE; 1108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( (cx=sharedData->mbcs.extIndexes)!=NULL && 1110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_extInitialMatchFromU( 1111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv, cx, 1112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cp, source, sourceLimit, 1113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (char **)target, (char *)targetLimit, 1114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets, sourceIndex, 1115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius flush, 1116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode) 1117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0; /* an extension mapping handled the input */ 1119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* GB 18030 */ 1122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&_MBCS_OPTION_GB18030)!=0) { 1123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint32_t *range; 1124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i; 1125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius range=gb18030Ranges[0]; 1127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { 1128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) { 1129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* found the Unicode code point, output the four-byte sequence for it */ 1130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t linear; 1131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius char bytes[4]; 1132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the linear value of the first GB 18030 code in this range */ 1134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius linear=range[2]-LINEAR_18030_BASE; 1135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* add the offset from the beginning of the range */ 1137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius linear+=((uint32_t)cp-range[0]); 1138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* turn this into a four-byte sequence */ 1140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[3]=(char)(0x30+linear%10); linear/=10; 1141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[2]=(char)(0x81+linear%126); linear/=126; 1142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[1]=(char)(0x30+linear%10); linear/=10; 1143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[0]=(char)(0x81+linear); 1144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output this sequence */ 1146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_fromUWriteBytes(cnv, 1147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes, 4, (char **)target, (char *)targetLimit, 1148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets, sourceIndex, pErrorCode); 1149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0; 1150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping */ 1155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_CHAR_FOUND; 1156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return cp; 1157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 1160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Input sequence: cnv->toUBytes[0..length[ 1161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input 1162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * else return 0 after output has been written to the target 1163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic int8_t 1165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius_extToU(UConverter *cnv, const UConverterSharedData *sharedData, 1166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t length, 1167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t **source, const uint8_t *sourceLimit, 1168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar **target, const UChar *targetLimit, 1169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t **offsets, int32_t sourceIndex, 1170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool flush, 1171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *cx; 1173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( (cx=sharedData->mbcs.extIndexes)!=NULL && 1175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_extInitialMatchToU( 1176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv, cx, 1177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length, (const char **)source, (const char *)sourceLimit, 1178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target, targetLimit, 1179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets, sourceIndex, 1180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius flush, 1181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode) 1182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0; /* an extension mapping handled the input */ 1184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* GB 18030 */ 1187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) { 1188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint32_t *range; 1189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t linear; 1190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i; 1191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]); 1193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius range=gb18030Ranges[0]; 1194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { 1195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(range[2]<=linear && linear<=range[3]) { 1196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* found the sequence, output the Unicode code point for it */ 1197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ZERO_ERROR; 1198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* add the linear difference between the input and start sequences to the start code point */ 1200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius linear=range[0]+(linear-range[2]); 1201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output this code point */ 1203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode); 1204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0; 1206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping */ 1211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_CHAR_FOUND; 1212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return length; 1213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* EBCDIC swap LF<->NL ------------------------------------------------------ */ 1216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 1218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This code modifies a standard EBCDIC<->Unicode mapping table for 1219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * OS/390 (z/OS) Unix System Services (Open Edition). 1220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The difference is in the mapping of Line Feed and New Line control codes: 1221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Standard EBCDIC maps 1222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * <U000A> \x25 |0 1224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * <U0085> \x15 |0 1225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * but OS/390 USS EBCDIC swaps the control codes for LF and NL, 1227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * mapping 1228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * <U000A> \x15 |0 1230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * <U0085> \x25 |0 1231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This code modifies a loaded standard EBCDIC<->Unicode mapping table 1233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * by copying it into allocated memory and swapping the LF and NL values. 1234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It allows to support the same EBCDIC charset in both versions without 1235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * duplicating the entire installed table. 1236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* standard EBCDIC codes */ 1239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define EBCDIC_LF 0x25 1240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define EBCDIC_NL 0x15 1241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */ 1243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define EBCDIC_RT_LF 0xf25 1244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define EBCDIC_RT_NL 0xf15 1245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* Unicode code points */ 1247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define U_LF 0x0a 1248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#define U_NL 0x85 1249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool 1251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) { 1252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterMBCSTable *mbcsTable; 1253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table, *results; 1255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *bytes; 1256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t (*newStateTable)[256]; 1258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t *newResults; 1259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *p; 1260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius char *name; 1261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Entry; 1263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t size, sizeofFromUBytes; 1264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable=&sharedData->mbcs; 1266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=mbcsTable->fromUnicodeTable; 1268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=mbcsTable->fromUnicodeBytes; 1269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(const uint16_t *)bytes; 1270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Check that this is an EBCDIC table with SBCS portion - 1273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings. 1274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If not, ignore the option. Options are always ignored if they do not apply. 1276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!( 1278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) && 1279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && 1280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL) 1281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius )) { 1282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_1) { 1286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!( 1287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) && 1288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL) 1289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius )) { 1290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* MBCS_OUTPUT_2_SISO */ { 1293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); 1294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!( 1295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 && 1296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF) 1297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius )) { 1298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); 1302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!( 1303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 && 1304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL) 1305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius )) { 1306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->fromUBytesLength>0) { 1311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We _know_ the number of bytes in the fromUnicodeBytes array 1313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * starting with header.version 4.1. 1314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sizeofFromUBytes=mbcsTable->fromUBytesLength; 1316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Otherwise: 1319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * There used to be code to enumerate the fromUnicode 1320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * trie and find the highest entry, but it was removed in ICU 3.2 1321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * because it was not tested and caused a low code coverage number. 1322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See Jitterbug 3674. 1323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This affects only some .cnv file formats with a header.version 1324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * below 4.1, and only when swaplfnl is requested. 1325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ucnvmbcs.c revision 1.99 is the last one with the 1327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ucnv_MBCSSizeofFromUBytes() function. 1328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_FORMAT_ERROR; 1330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The table has an appropriate format. 1335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Allocate and build 1336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - a modified to-Unicode state table 1337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - a modified from-Unicode output array 1338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - a converter name string with the swap option appended 1339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius size= 1341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->countStates*1024+ 1342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sizeofFromUBytes+ 1343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UCNV_MAX_CONVERTER_NAME_LENGTH+20; 1344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=(uint8_t *)uprv_malloc(size); 1345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(p==NULL) { 1346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 1348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* copy and modify the to-Unicode state table */ 1351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newStateTable=(int32_t (*)[256])p; 1352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024); 1353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL); 1355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); 1356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* copy and modify the from-Unicode result table */ 1358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newResults=(uint16_t *)newStateTable[mbcsTable->countStates]; 1359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(newResults, bytes, sizeofFromUBytes); 1360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conveniently, the table access macros work on the left side of expressions */ 1362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_1) { 1363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL; 1364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF; 1365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* MBCS_OUTPUT_2_SISO */ { 1366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); 1367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL; 1368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); 1370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF; 1371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the canonical converter name */ 1374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius name=(char *)newResults+sizeofFromUBytes; 1375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_strcpy(name, sharedData->staticData->name); 1376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING); 1377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the pointers */ 1379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_lock(NULL); 1380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->swapLFNLStateTable==NULL) { 1381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLStateTable=newStateTable; 1382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults; 1383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLName=name; 1384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newStateTable=NULL; 1386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_unlock(NULL); 1388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* release the allocated memory if another thread beat us to it */ 1390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(newStateTable!=NULL) { 1391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_free(newStateTable); 1392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 1394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* reconstitute omitted fromUnicode data ------------------------------------ */ 1397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */ 1399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool U_CALLCONV 1400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliuswriteStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) { 1401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context; 1402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 1403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t *stage2; 1404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *bytes, *p; 1405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 1406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i, st3; 1407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=mbcsTable->fromUnicodeTable; 1409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=(uint8_t *)mbcsTable->fromUnicodeBytes; 1410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ 1412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(mbcsTable->outputType) { 1413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3_EUC: 1414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xffff) { 1415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* short sequences are stored directly */ 1416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 0 or 1 */ 1417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0x8effff) { 1418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 2 */ 1419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value&=0x7fff; 1420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* first byte is 0x8f */ { 1421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 3 */ 1422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value&=0xff7f; 1423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 1426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xffffff) { 1427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* short sequences are stored directly */ 1428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 0 or 1 */ 1429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0x8effffff) { 1430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 2 */ 1431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value&=0x7fffff; 1432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* first byte is 0x8f */ { 1433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* code set 3 */ 1434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value&=0xff7fff; 1435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 1438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<=0x1f; ++value, ++i) { 1442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=codePoints[i]; 1443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0) { 1444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 1445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* locate the stage 2 & 3 data */ 1448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f); 1449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=bytes; 1450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3=(int32_t)(uint16_t)*stage2*16+(c&0xf); 1451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write the codepage bytes into stage 3 */ 1453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(mbcsTable->outputType) { 1454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 1455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 1456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p+=st3*3; 1457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p[0]=(uint8_t)(value>>16); 1458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p[1]=(uint8_t)(value>>8); 1459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p[2]=(uint8_t)value; 1460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 1462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ((uint32_t *)p)[st3]=value; 1463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 1465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2 bytes per character */ 1466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ((uint16_t *)p)[st3]=(uint16_t)value; 1467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the roundtrip flag */ 1471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *stage2|=(1UL<<(16+(c&0xf))); 1472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 1474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 1477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusreconstituteData(UConverterMBCSTable *mbcsTable, 1478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage1Length, uint32_t stage2Length, 1479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */ 1480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t *stage1; 1482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t *stage2; 1483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength; 1484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength); 1485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->reconstitutedData==NULL) { 1486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memset(mbcsTable->reconstitutedData, 0, dataLength); 1490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* copy existing data and reroute the pointers */ 1492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage1=(uint16_t *)mbcsTable->reconstitutedData; 1493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2); 1494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2=(uint32_t *)(stage1+stage1Length); 1496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(stage2+(fullStage2Length-stage2Length), 1497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUnicodeTable+stage1Length, 1498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Length*4); 1499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUnicodeTable=stage1; 1501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length); 1502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ 1504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2=(uint32_t *)stage1; 1505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reconstitute the initial part of stage 2 from the mbcsIndex */ 1507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { 1508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6; 1509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t stageUTF8Index=0; 1510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t st1, st2, st3, i; 1511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) { 1513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st2=stage1[st1]; 1514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(st2!=(int32_t)stage1Length/2) { 1515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */ 1516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<16; ++i) { 1517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3=mbcsTable->mbcsIndex[stageUTF8Index++]; 1518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(st3!=0) { 1519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */ 1520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st3>>=4; 1521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are 1523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * allocated together as a single 64-block for access from the mbcsIndex 1524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2[st2++]=st3++; 1526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2[st2++]=st3++; 1527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2[st2++]=st3++; 1528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2[st2++]=st3; 1529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no stage 3 block, skip */ 1531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius st2+=4; 1532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no stage 2 block, skip */ 1536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stageUTF8Index+=16; 1537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ 1542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode); 1543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* MBCS setup functions ----------------------------------------------------- */ 1546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 1548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSLoad(UConverterSharedData *sharedData, 1549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterLoadArgs *pArgs, 1550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *raw, 1551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UDataInfo info; 1553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterMBCSTable *mbcsTable=&sharedData->mbcs; 1554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius _MBCSHeader *header=(_MBCSHeader *)raw; 1555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t offset; 1556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t headerLength; 1557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool noFromU=FALSE; 1558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(header->version[0]==4) { 1560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius headerLength=MBCS_HEADER_V4_LENGTH; 1561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(header->version[0]==5 && header->version[1]>=3 && 1562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) { 1563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius headerLength=header->options&MBCS_OPT_LENGTH_MASK; 1564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0); 1565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->outputType=(uint8_t)header->flags; 1571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) { 1572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* extension data, header version 4.2 and higher */ 1577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=header->flags>>8; 1578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offset!=0) { 1579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->extIndexes=(const int32_t *)(raw+offset); 1580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) { 1583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER; 1584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterSharedData *baseSharedData; 1585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *extIndexes; 1586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const char *baseName; 1587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* extension-only file, load the base table and set values appropriately */ 1589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((extIndexes=mbcsTable->extIndexes)==NULL) { 1590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* extension-only file without extension */ 1591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(pArgs->nestedLoads!=1) { 1596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* an extension table must not be loaded as a base table */ 1597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FILE; 1598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* load the base table */ 1602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseName=(const char *)header+headerLength*4; 1603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(0==uprv_strcmp(baseName, sharedData->staticData->name)) { 1604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* forbid loading this same extension-only file */ 1605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */ 1610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.size=sizeof(UConverterLoadArgs); 1611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.nestedLoads=2; 1612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable; 1613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.reserved=pArgs->reserved; 1614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.options=pArgs->options; 1615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.pkg=pArgs->pkg; 1616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius args.name=baseName; 1617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseSharedData=ucnv_load(&args, pErrorCode); 1618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 1619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( baseSharedData->staticData->conversionType!=UCNV_MBCS || 1622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseSharedData->mbcs.baseSharedData!=NULL 1623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_unload(baseSharedData); 1625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(pArgs->onlyTestIsLoadable) { 1629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Exit as soon as we know that we can load the converter 1631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and the format is valid and supported. 1632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The worst that can happen in the following code is a memory 1633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * allocation error. 1634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_unload(baseSharedData); 1636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* copy the base table data */ 1640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable)); 1641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* overwrite values with relevant ones for the extension converter */ 1643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->baseSharedData=baseSharedData; 1644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->extIndexes=extIndexes; 1645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It would be possible to share the swapLFNL data with a base converter, 1648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * but the generated name would have to be different, and the memory 1649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * would have to be free'd only once. 1650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It is easier to just create the data for the extension converter 1651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * separately when it is requested. 1652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLStateTable=NULL; 1654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLFromUnicodeBytes=NULL; 1655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->swapLFNLName=NULL; 1656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The reconstitutedData must be deleted only when the base converter 1659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is unloaded. 1660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->reconstitutedData=NULL; 1662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Set a special, runtime-only outputType if the extension converter 1665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is a DBCS version of a base converter that also maps single bytes. 1666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( sharedData->staticData->conversionType==UCNV_DBCS || 1668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (sharedData->staticData->conversionType==UCNV_MBCS && 1669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sharedData->staticData->minBytesPerChar>=2) 1670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) { 1672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* the base converter is SI/SO-stateful */ 1673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 1674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the dbcs state from the state table entry for SO=0x0e */ 1676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=mbcsTable->stateTable[0][0xe]; 1677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( MBCS_ENTRY_IS_FINAL(entry) && 1678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY && 1679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_STATE(entry)!=0 1680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); 1682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; 1684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if( 1686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseSharedData->staticData->conversionType==UCNV_MBCS && 1687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseSharedData->staticData->minBytesPerChar==1 && 1688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius baseSharedData->staticData->maxBytesPerChar==2 && 1689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->countStates<=127 1690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* non-stateful base converter, need to modify the state table */ 1692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t (*newStateTable)[256]; 1693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *state; 1694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i, count; 1695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* allocate a new state table and copy the base state table contents */ 1697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius count=mbcsTable->countStates; 1698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024); 1699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(newStateTable==NULL) { 1700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_unload(baseSharedData); 1701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 1702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024); 1706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* change all final single-byte entries to go to a new all-illegal state */ 1708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=newStateTable[0]; 1709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<256; ++i) { 1710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_FINAL(state[i])) { 1711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state[i]=MBCS_ENTRY_TRANSITION(count, 0); 1712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* build the new all-illegal state */ 1716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=newStateTable[count]; 1717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<256; ++i) { 1718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0); 1719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->stateTable=(const int32_t (*)[256])newStateTable; 1721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->countStates=(uint8_t)(count+1); 1722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->stateTableOwned=TRUE; 1723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; 1725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * unlike below for files with base tables, do not get the unicodeMask 1730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the sharedData; instead, use the base table's unicodeMask, 1731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * which we copied in the memcpy above; 1732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * this is necessary because the static data unicodeMask, especially 1733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data 1734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion file with a base table; an additional extension table is optional */ 1737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* make sure that the output type is known */ 1738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(mbcsTable->outputType) { 1739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_1: 1740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2: 1741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 1742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 1743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3_EUC: 1744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 1745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2_SISO: 1746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* OK */ 1747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 1748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 1749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INVALID_TABLE_FORMAT; 1750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1752f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(pArgs->onlyTestIsLoadable) { 1753f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1754f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Exit as soon as we know that we can load the converter 1755f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and the format is valid and supported. 1756f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The worst that can happen in the following code is a memory 1757f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * allocation error. 1758f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1760f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1761f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1762f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->countStates=(uint8_t)header->countStates; 1763f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->countToUFallbacks=header->countToUFallbacks; 1764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4); 1765f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates); 1766f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits); 1767f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1768f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable); 1769f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes); 1770f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->fromUBytesLength=header->fromUBytesLength; 1771f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1772f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1773f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * converter versions 6.1 and up contain a unicodeMask that is 1774f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * used here to select the most efficient function implementations 1775f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1776f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius info.size=sizeof(UDataInfo); 1777f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); 1778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { 1779f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* mask off possible future extensions to be safe */ 1780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3); 1781f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ 1783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; 1784f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1785f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1786f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1787f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * _MBCSHeader.version 4.3 adds utf8Friendly data structures. 1788f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Check for the header version, SBCS vs. MBCS, and for whether the 1789f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * data structures are optimized for code points as high as what the 1790f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * runtime code is designed for. 1791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The implementation does not handle mapping tables with entries for 1792f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * unpaired surrogates. 1793f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1794f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( header->version[1]>=3 && 1795f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 && 1796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (mbcsTable->countStates==1 ? 1797f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (header->version[2]>=(SBCS_FAST_MAX>>8)) : 1798f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (header->version[2]>=(MBCS_FAST_MAX>>8)) 1799f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) 1800f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 1801f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->utf8Friendly=TRUE; 1802f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1803f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->countStates==1) { 1804f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1805f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. 1806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Build a table with indexes to each block, to be used instead of 1807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the regular stage 1/2 table. 1808f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1809f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i; 1810f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) { 1811f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; 1812f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1813f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */ 1814f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->maxFastUChar=SBCS_FAST_MAX; 1815f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1816f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1817f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. 1818f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The .cnv file is prebuilt with an additional stage table with indexes 1819f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to each block. 1820f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1821f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->mbcsIndex=(const uint16_t *) 1822f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (mbcsTable->fromUnicodeBytes+ 1823f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (noFromU ? 0 : mbcsTable->fromUBytesLength)); 1824f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff; 1825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1827f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1828f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ 1829f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { 1830f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips=0xffffffff; 1831f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i; 1832f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1833f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<0x80; ++i) { 1834f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { 1835f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips&=~((uint32_t)1<<(i>>2)); 1836f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1837f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1838f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->asciiRoundtrips=asciiRoundtrips; 1839f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1840f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1841f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(noFromU) { 1842f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage1Length= 1843f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ? 1844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 0x440 : 0x40; 1845f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Length= 1846f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (header->offsetFromUBytes-header->offsetFromUTable)/4- 1847f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage1Length/2; 1848f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode); 1849f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1850f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1851f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1852f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Set the impl pointer here so that it is set for both extension-only and base tables. */ 1853f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->utf8Friendly) { 1854f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->countStates==1) { 1855f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sharedData->impl=&_SBCSUTF8Impl; 1856f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_2) { 1858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sharedData->impl=&_DBCSUTF8Impl; 1859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1861f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1862f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1863f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) { 1864f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1865f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip. 1866f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly. 1867f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1868f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable->asciiRoundtrips=0; 1869f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1870f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1871f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1872f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 1873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSUnload(UConverterSharedData *sharedData) { 1874f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterMBCSTable *mbcsTable=&sharedData->mbcs; 1875f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1876f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->swapLFNLStateTable!=NULL) { 1877f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_free(mbcsTable->swapLFNLStateTable); 1878f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1879f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->stateTableOwned) { 1880f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_free((void *)mbcsTable->stateTable); 1881f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1882f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->baseSharedData!=NULL) { 1883f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_unload(mbcsTable->baseSharedData); 1884f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1885f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(mbcsTable->reconstitutedData!=NULL) { 1886f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_free(mbcsTable->reconstitutedData); 1887f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1888f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1889f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1890f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 1891f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSOpen(UConverter *cnv, 1892f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterLoadArgs *pArgs, 1893f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 1894f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterMBCSTable *mbcsTable; 1895f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *extIndexes; 1896f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t outputType; 1897f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t maxBytesPerUChar; 1898f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1899f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(pArgs->onlyTestIsLoadable) { 1900f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 1901f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1902f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsTable=&cnv->sharedData->mbcs; 1904f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius outputType=mbcsTable->outputType; 1905f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(outputType==MBCS_OUTPUT_DBCS_ONLY) { 1907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* the swaplfnl option does not apply, remove it */ 1908f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; 1909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1911f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) { 1912f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* do this because double-checked locking is broken */ 1913f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool isCached; 1914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1915f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_lock(NULL); 1916f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius isCached=mbcsTable->swapLFNLStateTable!=NULL; 1917f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_unlock(NULL); 1918f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1919f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!isCached) { 1920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) { 1921f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 1922f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; /* something went wrong */ 1923f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1924f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* the option does not apply, remove it */ 1926f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; 1927f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1928f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1929f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1930f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1931f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(uprv_strstr(pArgs->name, "18030")!=NULL) { 1932f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) { 1933f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set a flag for GB 18030 mode, which changes the callback behavior */ 1934f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options|=_MBCS_OPTION_GB18030; 1935f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1936f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) { 1937f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set a flag for KEIS converter, which changes the SI/SO character sequence */ 1938f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options|=_MBCS_OPTION_KEIS; 1939f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) { 1940f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set a flag for JEF converter, which changes the SI/SO character sequence */ 1941f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options|=_MBCS_OPTION_JEF; 1942f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) { 1943f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set a flag for JIPS converter, which changes the SI/SO character sequence */ 1944f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->options|=_MBCS_OPTION_JIPS; 1945f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1946f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1947f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* fix maxBytesPerUChar depending on outputType and options etc. */ 1948f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(outputType==MBCS_OUTPUT_2_SISO) { 1949f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->maxBytesPerUChar=3; /* SO+DBCS */ 1950f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1951f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1952f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius extIndexes=mbcsTable->extIndexes; 1953f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(extIndexes!=NULL) { 1954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes); 1955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(outputType==MBCS_OUTPUT_2_SISO) { 1956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++maxBytesPerUChar; /* SO + multiple DBCS */ 1957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(maxBytesPerUChar>cnv->maxBytesPerUChar) { 1960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->maxBytesPerUChar=maxBytesPerUChar; 1961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 1965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * documentation of UConverter fields used for status 1967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset() 1968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 1969f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1970f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* toUnicode */ 1971f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUnicodeStatus=0; /* offset */ 1972f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=0; /* state */ 1973f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=0; /* byteIndex */ 1974f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1975f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* fromUnicode */ 1976f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=0; 1977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=1; /* prevLength */ 1978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 1979f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1980f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1981f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const char * 1982f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetName(const UConverter *cnv) { 1983f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) { 1984f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return cnv->sharedData->mbcs.swapLFNLName; 1985f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 1986f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return cnv->sharedData->staticData->name; 1987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 1988f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 1989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1990f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* MBCS-to-Unicode conversion functions ------------------------------------- */ 1991f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 1993f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) { 1994f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const _MBCSToUFallback *toUFallbacks; 1995f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t i, start, limit; 1996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1997f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius limit=mbcsTable->countToUFallbacks; 1998f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(limit>0) { 1999f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* do a binary search for the fallback mapping */ 2000f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toUFallbacks=mbcsTable->toUFallbacks; 2001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius start=0; 2002f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(start<limit-1) { 2003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius i=(start+limit)/2; 2004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offset<toUFallbacks[i].offset) { 2005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius limit=i; 2006f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2007f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius start=i; 2008f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2009f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2010f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2011f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* did we really find it? */ 2012f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offset==toUFallbacks[start].offset) { 2013f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return toUFallbacks[start].codePoint; 2014f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xfffe; 2018f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */ 2021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 2022f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 2023f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 2024f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 2025f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit; 2026f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar *target; 2027f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *targetLimit; 2028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 2029f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2030f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 2031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2032f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex; 2033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2034f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 2035f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar c; 2036f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 2037f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2038f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 2039f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 2040f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(const uint8_t *)pArgs->source; 2041f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2042f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=pArgs->target; 2043f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetLimit=pArgs->targetLimit; 2044f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 2045f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2046f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 2047f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 2048f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=cnv->sharedData->mbcs.stateTable; 2050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2051f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2052f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 2053f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=0; 2054f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2055f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 2056f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 2057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2058f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This following test is to see if available input would overflow the output. 2059f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not catch output of more than one code unit that 2060f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * overflows as a result of a surrogate pair or callback output 2061f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the last source byte. 2062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Therefore, those situations also test for overflows and will 2063f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then break the loop, too. 2064f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2065f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target>=targetLimit) { 2066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 2067f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2068f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2069f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2070f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2071f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[0][*source++]; 2072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_ENTRY_IS_FINAL(entry) */ 2073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2074f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the most common case first */ 2075f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 2076f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2077f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2078f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2079f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2080f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2081f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2082f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of action codes: prepare for a new character */ 2083f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++sourceIndex; 2084f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2085f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2088f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 2089f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 2090f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2091f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 2092f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_DIRECT_20 || 2093f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 2094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2095f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=MBCS_ENTRY_FINAL_VALUE(entry); 2096f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output surrogate pair */ 2097f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)(0xd800|(UChar)(entry>>10)); 2098f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2099f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); 2102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target<targetLimit) { 2103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 2109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBuffer[0]=c; 2110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBufferLength=1; 2111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++sourceIndex; 2116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 2118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv)) { 2119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++sourceIndex; 2126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 2129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* just fall through */ 2130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 2131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved, must never occur */ 2135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++sourceIndex; 2136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* unassigned sequences indicated with byteIndex>0 */ { 2143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 2144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUBytes[0]=*(source-1); 2146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=_extToU(cnv, cnv->sharedData, 2147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1, &source, sourceLimit, 2148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, targetLimit, 2149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 2150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 2151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 2152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source); 2153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 2156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 2162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=target; 2164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 2165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 2168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages 2169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * that only map to and from the BMP. 2170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In addition to single-byte optimizations, the offset calculations 2171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * become much easier. 2172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 2174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs, 2175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 2176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 2177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit, *lastSource; 2178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar *target; 2179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity, length; 2180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 2181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 2183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex; 2185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 2187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 2188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 2190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 2191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(const uint8_t *)pArgs->source; 2192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=pArgs->target; 2194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 2195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 2196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 2198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 2199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=cnv->sharedData->mbcs.stateTable; 2201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 2204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=0; 2205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 2206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * since the conversion here is 1:1 UChar:uint8_t, we need only one counter 2209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for the minimum of the sourceLength and targetCapacity 2210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source); 2212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<targetCapacity) { 2213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=length; 2214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if MBCS_UNROLL_SINGLE_TO_BMP 2217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unrolling makes it faster on Pentium III/Windows 2000 */ 2218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unroll the loop with the most common case */ 2219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunrolled: 2220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>=16) { 2221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t count, loops, oredEntries; 2222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius loops=count=targetCapacity>>4; 2224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 2225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries=entry=stateTable[0][*source++]; 2226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oredEntries|=entry=stateTable[0][*source++]; 2256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* were all 16 entries really valid? */ 2259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) { 2260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no, return to the first of these 16 */ 2261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=16; 2262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target-=16; 2263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while(--count>0); 2266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius count=loops-count; 2267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity-=16*count; 2268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource+=16*count; 2271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(count>0) { 2272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --count; 2289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 2293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 2295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(targetCapacity > 0 && source < sourceLimit) { 2296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[0][*source++]; 2297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_ENTRY_IS_FINAL(entry) */ 2298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the most common case first */ 2300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 2301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 2304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 2309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 2310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 2312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 2313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv)) { 2314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 2317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 2320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* just fall through */ 2321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 2322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved, must never occur */ 2326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set offsets since the start or the last extension */ 2330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t count=(int32_t)(source-lastSource); 2332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* predecrement: do not set the offset for the callback-causing character */ 2334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(--count>0) { 2335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* offset and sourceIndex are now set for the current character */ 2338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* unassigned sequences indicated with byteIndex>0 */ { 2344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 2345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 2346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUBytes[0]=*(source-1); 2347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=_extToU(cnv, cnv->sharedData, 2348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 1, &source, sourceLimit, 2349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, pArgs->targetLimit, 2350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 2351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 2352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 2353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex+=1+(int32_t)(source-lastSource); 2354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 2357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 2361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-target); 2362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source); 2363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<targetCapacity) { 2364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=length; 2365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if MBCS_UNROLL_SINGLE_TO_BMP 2369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unrolling makes it faster on Pentium III/Windows 2000 */ 2370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unrolled; 2371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 2372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) { 2375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 2376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set offsets since the start or the last callback */ 2380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius size_t count=source-lastSource; 2382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(count>0) { 2383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 2384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --count; 2385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 2389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=target; 2391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 2392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool 2395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliushasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { 2396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *row=stateTable[state]; 2397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t b, entry; 2398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* First test for final entries in this state for some commonly valid byte values. */ 2399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[0xa1]; 2400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( !MBCS_ENTRY_IS_TRANSITION(entry) && 2401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 2402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 2404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[0x41]; 2406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( !MBCS_ENTRY_IS_TRANSITION(entry) && 2407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 2408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 2410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Then test for final entries in this state. */ 2412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(b=0; b<=0xff; ++b) { 2413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[b]; 2414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( !MBCS_ENTRY_IS_TRANSITION(entry) && 2415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 2416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 2418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Then recurse for transition entries. */ 2421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(b=0; b<=0xff; ++b) { 2422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=row[b]; 2423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( MBCS_ENTRY_IS_TRANSITION(entry) && 2424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) 2425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return TRUE; 2427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; 2430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 2433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Is byte b a single/lead byte in this state? 2434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Recurse for transition states, because here we don't want to say that 2435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * b is a lead byte if all byte sequences that start with b are illegal. 2436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UBool 2438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusisSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { 2439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *row=stateTable[state]; 2440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry=row[b]; 2441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ 2442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); 2443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 2445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { 2446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return FALSE; /* SI/SO are illegal for DBCS-only conversion */ 2447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return action!=MBCS_STATE_ILLEGAL; 2449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC void 2454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 2455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 2456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 2457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit; 2458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar *target; 2459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *targetLimit; 2460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 2461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 2463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *unicodeCodeUnits; 2464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t offset; 2466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t state; 2467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t byteIndex; 2468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *bytes; 2469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex, nextSourceIndex; 2471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 2473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar c; 2474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 2475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use optimized function if possible */ 2477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 2478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->preToULength>0) { 2480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * pass sourceIndex=-1 because we continue from an earlier buffer 2482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in the future, this may change with continuous offsets 2483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode); 2485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) { 2487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 2488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->sharedData->mbcs.countStates==1) { 2492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 2493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode); 2494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode); 2496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 2498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 2501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(const uint8_t *)pArgs->source; 2502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=pArgs->target; 2504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetLimit=pArgs->targetLimit; 2505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 2506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 2508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 2509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=cnv->sharedData->mbcs.stateTable; 2511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; 2513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 2515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=cnv->toUnicodeStatus; 2516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=cnv->toULength; 2517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=cnv->toUBytes; 2518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * if we are in the SBCS state for a DBCS-only converter, 2521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then load the DBCS state from the MBCS data 2522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (dbcsOnlyState==0 if it is not a DBCS-only converter) 2523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((state=(uint8_t)(cnv->mode))==0) { 2525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=cnv->sharedData->mbcs.dbcsOnlyState; 2526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 2529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=byteIndex==0 ? 0 : -1; 2530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex=0; 2531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 2533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 2534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This following test is to see if available input would overflow the output. 2536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not catch output of more than one code unit that 2537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * overflows as a result of a surrogate pair or callback output 2538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the last source byte. 2539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Therefore, those situations also test for overflows and will 2540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then break the loop, too. 2541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target>=targetLimit) { 2543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 2544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(byteIndex==0) { 2549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* optimized loop for 1/2-byte input and BMP output */ 2550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets==NULL) { 2551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 2552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[state][*source]; 2553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 2554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 2555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); 2556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( source<sourceLimit && 2559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 2560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 2561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 2562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 2566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 2567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the state and leave the optimized loop */ 2569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[0]=*(source-1); 2570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=1; 2571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 2575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 2579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* leave the optimized loop */ 2581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while(source<sourceLimit && target<targetLimit); 2585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* offsets!=NULL */ { 2586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 2587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[state][*source]; 2588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 2589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 2590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); 2591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( source<sourceLimit && 2594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 2595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 2596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 2597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=(nextSourceIndex+=2); 2603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 2605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 2606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the state and leave the optimized loop */ 2608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 2609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[0]=*(source-1); 2610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=1; 2611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 2615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 2617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=++nextSourceIndex; 2621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 2623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* leave the optimized loop */ 2625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while(source<sourceLimit && target<targetLimit); 2629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * these tests and break statements could be put inside the loop 2633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * if C had "break outerLoop" like Java 2634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source>=sourceLimit) { 2636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target>=targetLimit) { 2639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 2640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 2645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes[byteIndex++]=*source++; 2646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* byteIndex>0 */ { 2647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 2648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[state][bytes[byteIndex++]=*source++]; 2649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 2652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 2653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 2654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* save the previous state for proper extension mapping with SI/SO-stateful converters */ 2658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=state; 2659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the next state early so that we can reuse the entry variable */ 2661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 2662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 2665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 2666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 2668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_16) { 2669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 2670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset]; 2671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xfffe) { 2672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xfffe) { 2679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { 2680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output fallback BMP code point */ 2681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)entry; 2682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_16) { 2692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16_PAIR) { 2699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 2700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset++]; 2701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xd800) { 2702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point below 0xd800 */ 2703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 2709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip or fallback surrogate pair */ 2710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)(c&0xdbff); 2711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target<targetLimit) { 2716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=unicodeCodeUnits[offset]; 2717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 2722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset]; 2723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBufferLength=1; 2724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 2727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 2730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 2731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=unicodeCodeUnits[offset]; 2732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xffff) { 2737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_20 || 2741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 2742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=MBCS_ENTRY_FINAL_VALUE(entry); 2744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output surrogate pair */ 2745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)(0xd800|(UChar)(entry>>10)); 2746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); 2751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(target<targetLimit) { 2752f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=c; 2753f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2754f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2755f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2756f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2757f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 2758f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBuffer[0]=c; 2759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->UCharErrorBufferLength=1; 2760f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 2761f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2762f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 2763f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2765f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_CHANGE_ONLY) { 2766f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2767f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This serves as a state change without any output. 2768f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It is useful for reading simple stateful encodings, 2769f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for example using just Shift-In/Shift-Out codes. 2770f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The 21 unused bits may later be used for more sophisticated 2771f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * state transitions. 2772f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2773f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->sharedData->mbcs.dbcsOnlyState==0) { 2774f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2775f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2776f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* SI/SO are illegal for DBCS-only conversion */ 2777f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)(cnv->mode); /* restore the previous state */ 2778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2779f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2781f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 2783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv)) { 2784f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2785f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2786f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 2787f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 2788f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2789f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2790f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 2792f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* just fall through */ 2793f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 2794f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2795f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2797f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved, must never occur */ 2798f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=0; 2799f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2800f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2801f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* end of action codes: prepare for a new character */ 2802f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 2803f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2804f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(byteIndex==0) { 2805f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 2806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(U_FAILURE(*pErrorCode)) { 2807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2808f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(byteIndex>1) { 2809f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2810f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Ticket 5691: consistent illegal sequences: 2811f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - We include at least the first byte in the illegal sequence. 2812f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - If any of the non-initial bytes could be the start of a character, 2813f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * we stop the illegal sequence before the first one of those. 2814f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2815f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); 2816f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t i; 2817f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=1; 2818f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]); 2819f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++i) {} 2820f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i<byteIndex) { 2821f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Back out some bytes. */ 2822f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t backOutDistance=byteIndex-i; 2823f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source); 2824f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=i; /* length of reported illegal byte sequence */ 2825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(backOutDistance<=bytesFromThisBuffer) { 2826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=backOutDistance; 2827f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2828f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Back out bytes from the previous buffer: Need to replay them. */ 2829f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); 2830f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* preToULength is negative! */ 2831f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); 2832f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(const uint8_t *)pArgs->source; 2833f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2834f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2835f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2836f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2837f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* unassigned sequences indicated with byteIndex>0 */ { 2838f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 2839f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2840f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex=_extToU(cnv, cnv->sharedData, 2841f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius byteIndex, &source, sourceLimit, 2842f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, targetLimit, 2843f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 2844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 2845f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 2846f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source); 2847f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2848f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2849f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 2850f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2851f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2852f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2853f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2854f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2855f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter */ 2856f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUnicodeStatus=offset; 2857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=state; 2858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=byteIndex; 2859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 2861f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2862f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=target; 2863f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 2864f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2865f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2866f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 2867f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages. 2868f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We still need a conversion loop in case we find reserved action codes, which are to be ignored. 2869f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2870f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 2871f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs, 2872f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 2873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 2874f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 2875f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit; 2876f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2877f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 2878f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 2879f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2880f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 2881f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 2882f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(const uint8_t *)pArgs->source; 2883f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2884f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 2885f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 2886f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2887f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=cnv->sharedData->mbcs.stateTable; 2888f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2889f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2890f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 2891f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 2892f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[0][*source++]; 2893f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_ENTRY_IS_FINAL(entry) */ 2894f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2895f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointer early so that we can return directly */ 2896f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 2897f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2898f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 2899f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2900f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2901f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2902f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2904f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 2905f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 2906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 2908f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( action==MBCS_STATE_VALID_DIRECT_20 || 2909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 2910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 2911f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 2912f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 2913f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 2914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv)) { 2915f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 2916f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 2917f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2918f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 2919f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* just fall through */ 2920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 2921f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2922f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 2923f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2924f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved, must never occur */ 2925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 2926f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2927f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2928f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 2929f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 2930f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 2931f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* unassigned sequence */ { 2932f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* defer to the generic implementation */ 2933f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source-1; 2934f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return UCNV_GET_NEXT_UCHAR_USE_TO_U; 2935f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2936f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2937f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2938f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no output because of empty input or only state changes */ 2939f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2940f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 2941f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 2942f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2943f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 2944f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Version of _MBCSToUnicodeWithOffsets() optimized for single-character 2945f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * conversion without offset handling. 2946f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 2947f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * When a character does not have a mapping to Unicode, then we return to the 2948f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback 2949f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * handling. 2950f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We also defer to the generic code in other complicated cases and have them 2951f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * ultimately handled by _MBCSToUnicodeWithOffsets() itself. 2952f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 2953f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * All normal mappings and errors are handled here. 2954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UChar32 2956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, 2957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 2958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 2959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit, *lastSource; 2960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 2962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *unicodeCodeUnits; 2963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t offset; 2965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t state; 2966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 2968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 2969f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 2970f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2971f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use optimized function if possible */ 2972f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 2973f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2974f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->preToULength>0) { 2975f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use the generic code in ucnv_getNextUChar() to continue with a partial match */ 2976f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return UCNV_GET_NEXT_UCHAR_USE_TO_U; 2977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2979f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) { 2980f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 2981f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Using the generic ucnv_getNextUChar() code lets us deal correctly 2982f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * with the rare case of a codepage that maps single surrogates 2983f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * without adding the complexity to this already complicated function here. 2984f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 2985f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return UCNV_GET_NEXT_UCHAR_USE_TO_U; 2986f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(cnv->sharedData->mbcs.countStates==1) { 2987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode); 2988f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2990f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 2991f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=lastSource=(const uint8_t *)pArgs->source; 2992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(const uint8_t *)pArgs->sourceLimit; 2993f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 2994f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 2995f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 2996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 2997f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=cnv->sharedData->mbcs.stateTable; 2998f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 2999f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; 3000f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 3002f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=cnv->toUnicodeStatus; 3003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * if we are in the SBCS state for a DBCS-only converter, 3006f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then load the DBCS state from the MBCS data 3007f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (dbcsOnlyState==0 if it is not a DBCS-only converter) 3008f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3009f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((state=(uint8_t)(cnv->mode))==0) { 3010f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=cnv->sharedData->mbcs.dbcsOnlyState; 3011f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3012f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3013f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 3014f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U_SENTINEL; 3015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 3016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[state][*source++]; 3017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 3018f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 3019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 3020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* optimization for 1/2-byte input and BMP output */ 3022f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( source<sourceLimit && 3023f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 3024f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 3025f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 3026f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 3027f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 3028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 3029f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3030f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3032f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* save the previous state for proper extension mapping with SI/SO-stateful converters */ 3034f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=state; 3035f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3036f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the next state early so that we can reuse the entry variable */ 3037f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 3038f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3039f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3040f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 3041f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 3042f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3043f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 3044f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_DIRECT_16) { 3045f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3046f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3047f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3048f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16) { 3049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 3050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset]; 3051f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xfffe) { 3052f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3053f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3054f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xfffe) { 3055f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { 3056f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3058f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3059f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3060f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3061f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16_PAIR) { 3063f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 3064f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset++]; 3065f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xd800) { 3066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point below 0xd800 */ 3067f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3068f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 3069f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip or fallback supplementary code point */ 3070f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00); 3071f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 3073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 3074f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset]; 3075f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3076f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xffff) { 3077f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3078f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3079f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3080f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_20 || 3081f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 3082f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 3083f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 3084f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 3085f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_CHANGE_ONLY) { 3087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3088f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This serves as a state change without any output. 3089f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It is useful for reading simple stateful encodings, 3090f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for example using just Shift-In/Shift-Out codes. 3091f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The 21 unused bits may later be used for more sophisticated 3092f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * state transitions. 3093f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->sharedData->mbcs.dbcsOnlyState!=0) { 3095f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* SI/SO are illegal for DBCS-only conversion */ 3096f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)(cnv->mode); /* restore the previous state */ 3097f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3098f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3099f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 3102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(UCNV_TO_U_USE_FALLBACK(cnv)) { 3103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 3108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* just fall through */ 3109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 3110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved (must never occur), or only state change */ 3114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 3115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 3116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* end of action codes: prepare for a new character */ 3120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 3121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 3123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* unassigned sequence */ { 3126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* defer to the generic implementation */ 3127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUnicodeStatus=0; 3128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=state; 3129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)lastSource; 3130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return UCNV_GET_NEXT_UCHAR_USE_TO_U; 3131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0) { 3136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) { 3137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* incomplete character byte sequence */ 3138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *bytes=cnv->toUBytes; 3139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=(int8_t)(source-lastSource); 3140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 3141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *bytes++=*lastSource++; 3142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while(lastSource<source); 3143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_TRUNCATED_CHAR_FOUND; 3144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(U_FAILURE(*pErrorCode)) { 3145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Ticket 5691: consistent illegal sequences: 3148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - We include at least the first byte in the illegal sequence. 3149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - If any of the non-initial bytes could be the start of a character, 3150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * we stop the illegal sequence before the first one of those. 3151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); 3153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *bytes=cnv->toUBytes; 3154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *bytes++=*lastSource++; /* first byte */ 3155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(lastSource==source) { 3156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=1; 3157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* lastSource<source: multi-byte character */ { 3158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t i; 3159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=1; 3160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource); 3161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++i 3162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 3163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *bytes++=*lastSource++; 3164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toULength=i; 3166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=lastSource; 3167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no output because of empty input or only state changes */ 3170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 3171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0xffff; 3173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter, ready for a new character */ 3176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->toUnicodeStatus=0; 3177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->mode=state; 3178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointer */ 3180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=(const char *)source; 3181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return c; 3182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 3183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 3185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 3186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus 3187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Removal improves code coverage. 3188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/** 3190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. 3191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle the EBCDIC swaplfnl option (set in UConverter). 3192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle conversion extensions (_extToU()). 3193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC UChar32 3195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, 3196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t b, UBool useFallback) { 3197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t entry; 3198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t action; 3199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=sharedData->mbcs.stateTable[0][b]; 3201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_ENTRY_IS_FINAL(entry) */ 3202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 3204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 3210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 3211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 3213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_DIRECT_20) { 3214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 3215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 3216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 3217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!TO_U_USE_FALLBACK(useFallback)) { 3218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xfffe; 3219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { 3223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!TO_U_USE_FALLBACK(useFallback)) { 3224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xfffe; 3225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 3227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 3228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 3229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xfffe; 3230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_ILLEGAL) { 3231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reserved, must never occur */ 3234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 3237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 3238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 3240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is a simple version of _MBCSGetNextUChar() that is used 3241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * by other converter implementations. 3242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It only returns an "assigned" result if it consumes the entire input. 3243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not use state from the converter, nor error codes. 3244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle the EBCDIC swaplfnl option (set in UConverter). 3245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It handles conversion extensions but not GB 18030. 3246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 3247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Return value: 3248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * U+fffe unassigned 3249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * U+ffff illegal 3250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * otherwise the Unicode code point 3251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC UChar32 3253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, 3254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const char *source, int32_t length, 3255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool useFallback) { 3256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t (*stateTable)[256]; 3257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *unicodeCodeUnits; 3258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t offset; 3260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t state, action; 3261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 3263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i, entry; 3264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<=0) { 3266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no input at all: "illegal" */ 3267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 3271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 3272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus 3273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * TODO In future releases, verify that this function is never called for SBCS 3274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * conversions, i.e., that sharedData->mbcs.countStates==1 is still true. 3275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Removal improves code coverage. 3276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use optimized function if possible */ 3278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(sharedData->mbcs.countStates==1) { 3279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length==1) { 3280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback); 3281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; /* illegal: more than a single byte for an SBCS converter */ 3283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 3286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 3288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stateTable=sharedData->mbcs.stateTable; 3289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits; 3290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* converter state */ 3292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset=0; 3293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=sharedData->mbcs.dbcsOnlyState; 3294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 3296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0;;) { 3297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius entry=stateTable[state][(uint8_t)source[i++]]; 3298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(MBCS_ENTRY_IS_TRANSITION(entry)) { 3299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 3300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 3301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i==length) { 3303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; /* truncated character */ 3304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * An if-else-if chain provides more reliable performance for 3308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the most common cases compared to a switch. 3309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 3311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(action==MBCS_STATE_VALID_16) { 3312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 3313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset]; 3314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0xfffe) { 3315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* done */ 3316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv)) { 3317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset); 3318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* else done with 0xfffe */ 3319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_16) { 3322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_16_PAIR) { 3326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 3327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset++]; 3328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0xd800) { 3329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point below 0xd800 */ 3330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 3331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip or fallback supplementary code point */ 3332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00)); 3333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 3334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 3335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=unicodeCodeUnits[offset]; 3336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(c==0xffff) { 3337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0xfffe; 3340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_VALID_DIRECT_20) { 3343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 3344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 3345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 3347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!TO_U_USE_FALLBACK(useFallback)) { 3348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0xfffe; 3349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output BMP code point */ 3352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 3353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { 3355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!TO_U_USE_FALLBACK(useFallback)) { 3356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0xfffe; 3357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output supplementary code point */ 3360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 3361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(action==MBCS_STATE_UNASSIGNED) { 3363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0xfffe; 3364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * forbid MBCS_STATE_CHANGE_ONLY for this function, 3369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and MBCS_STATE_ILLEGAL and reserved action codes 3370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i!=length) { 3376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* illegal for this function: not all input consumed */ 3377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0xffff; 3378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c==0xfffe) { 3381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 3382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *cx=sharedData->mbcs.extIndexes; 3383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cx!=NULL) { 3384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return ucnv_extSimpleMatchToU(cx, source, length, useFallback); 3385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return c; 3389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 3390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* MBCS-from-Unicode conversion functions ----------------------------------- */ 3392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */ 3394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 3395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 3396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 3397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 3398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *source, *sourceLimit; 3399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 3400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity; 3401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 3402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 3404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *mbcsIndex; 3405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *bytes; 3406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 3408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex, nextSourceIndex; 3410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Entry; 3412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips; 3413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t value; 3414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t unicodeMask; 3415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use optimized function if possible */ 3417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 3418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeMask=cnv->sharedData->mbcs.unicodeMask; 3419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 3421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=pArgs->source; 3422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=pArgs->sourceLimit; 3423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pArgs->target; 3424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 3425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 3426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 3428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 3429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 3430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 3431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=cnv->sharedData->mbcs.fromUnicodeBytes; 3433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 3435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 3437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=cnv->fromUChar32; 3438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 3440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex= c==0 ? 0 : -1; 3441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex=0; 3442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 3444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 3445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto getTrail; 3446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 3449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This following test is to see if available input would overflow the output. 3451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not catch output of more than one byte that 3452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * overflows as a result of a multi-byte character or callback output 3453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the last source character. 3454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Therefore, those situations also test for overflows and will 3455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then break the loop, too. 3456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 3458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Get a correct Unicode code point: 3460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a single UChar for a BMP code point or 3461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a matched surrogate pair for a "supplementary code point". 3462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 3465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 3466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)c; 3467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 3470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 3472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX 3477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to avoid dealing with surrogates. 3478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_FAST_MAX must be >=0xd7ff. 3479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0xd7ff) { 3481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c); 3482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ 3483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 3484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 3485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output the value */ 3487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This also tests if the codepage maps single surrogates. 3490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If it does, then surrogates are not paired but mapped separately. 3491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note that in this case unmatched surrogates are not detected. 3492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { 3494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE_LEAD(c)) { 3495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgetTrail: 3496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<sourceLimit) { 3497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the following code unit */ 3498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar trail=*source; 3499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_TRAIL(trail)) { 3500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 3501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 3502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U16_GET_SUPPLEMENTARY(c, trail); 3503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 3504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 3505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(unassigned) */ 3506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 3507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert this supplementary code point */ 3509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* exit this condition tree */ 3510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched lead code unit (1st surrogate) */ 3512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no more input */ 3518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched trail code unit (2nd surrogate) */ 3522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes */ 3529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 3530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the bytes and the length for the output */ 3532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_OUTPUT_2 */ 3533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 3534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 3536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 3537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 3538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 3539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We allow a 0 byte output if the "assigned" bit is set for this entry. 3541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * There is no way with this data structure for fallback output 3542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to be a zero byte. 3543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunassigned: 3546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 3547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 3548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 3549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &source, sourceLimit, 3550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, target+targetCapacity, 3551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 3552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 3553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 3554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex+=(int32_t)(source-pArgs->source); 3555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 3557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 3558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 3561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 3563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 3564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 3566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 3567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write the output character bytes from value and length */ 3573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* from the first if in the loop we know that targetCapacity>0 */ 3574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 3575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is easy because we know that there is enough space */ 3576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 3581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* length==2 */ { 3582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>8); 3583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(2<=targetCapacity) { 3584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity-=2; 3590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBuffer[0]=(char)value; 3595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBufferLength=1; 3596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 3598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=0; 3599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 3600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 3606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 3608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 3611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 3612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter */ 3617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 3618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 3620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 3621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=(char *)target; 3622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 3623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 3624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */ 3626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 3627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 3628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 3629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 3630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *source, *sourceLimit; 3631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 3632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity; 3633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 3634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 3636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *results; 3637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 3639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex, nextSourceIndex; 3641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t value, minValue; 3643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool hasSupplementary; 3644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 3646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 3647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=pArgs->source; 3648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=pArgs->sourceLimit; 3649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pArgs->target; 3650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 3651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 3652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 3654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 3655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 3656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 3658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->useFallback) { 3661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use all roundtrip and fallback results */ 3662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0x800; 3663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use only roundtrips and fallbacks from private-use characters */ 3665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0xc00; 3666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 3668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 3670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=cnv->fromUChar32; 3671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 3673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex= c==0 ? 0 : -1; 3674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex=0; 3675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 3677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 3678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto getTrail; 3679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 3682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This following test is to see if available input would overflow the output. 3684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not catch output of more than one byte that 3685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * overflows as a result of a multi-byte character or callback output 3686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the last source character. 3687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Therefore, those situations also test for overflows and will 3688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then break the loop, too. 3689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 3691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Get a correct Unicode code point: 3693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a single UChar for a BMP code point or 3694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a matched surrogate pair for a "supplementary code point". 3695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 3698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE(c)) { 3699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE_LEAD(c)) { 3700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgetTrail: 3701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<sourceLimit) { 3702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the following code unit */ 3703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar trail=*source; 3704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_TRAIL(trail)) { 3705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 3706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 3707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U16_GET_SUPPLEMENTARY(c, trail); 3708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!hasSupplementary) { 3709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 3710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(unassigned) */ 3711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 3712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert this supplementary code point */ 3714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* exit this condition tree */ 3715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched lead code unit (1st surrogate) */ 3717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no more input */ 3723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched trail code unit (2nd surrogate) */ 3727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes */ 3734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 3737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value>=minValue) { 3738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* assigned, write the output character bytes from value and length */ 3739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* length==1 */ 3740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is easy because we know that there is enough space */ 3741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 3744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 3746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 3748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 3750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { /* unassigned */ 3751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunassigned: 3752f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 3753f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 3754f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 3755f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &source, sourceLimit, 3756f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, target+targetCapacity, 3757f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 3758f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 3759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 3760f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex+=(int32_t)(source-pArgs->source); 3761f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3762f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 3763f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 3764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3765f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3766f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 3767f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3768f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 3769f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 3770f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3771f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 3772f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 3773f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3774f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3775f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3776f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 3777f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 3778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3779f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3781f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter */ 3783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 3784f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3785f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 3786f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 3787f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=(char *)target; 3788f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 3789f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 3790f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 3792f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages 3793f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * that map only to and from the BMP. 3794f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In addition to single-byte/state optimizations, the offset calculations 3795f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * become much easier. 3796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It would be possible to use the sbcsIndex for UTF-8-friendly tables, 3797f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * but measurements have shown that this diminishes performance 3798f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in more cases than it improves it. 3799f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See SVN revision 21013 (2007-feb-06) for the last version with #if switches 3800f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for various MBCS and SBCS optimizations. 3801f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3802f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 3803f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs, 3804f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 3805f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 3806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *source, *sourceLimit, *lastSource; 3807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 3808f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity, length; 3809f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 3810f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3811f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 3812f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *results; 3813f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3814f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 3815f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3816f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t sourceIndex; 3817f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3818f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips; 3819f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t value, minValue; 3820f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3821f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 3822f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 3823f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=pArgs->source; 3824f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=pArgs->sourceLimit; 3825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pArgs->target; 3826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 3827f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 3828f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3829f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 3830f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 3831f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 3832f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3833f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 3834f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3835f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 3836f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3837f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->useFallback) { 3838f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use all roundtrip and fallback results */ 3839f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0x800; 3840f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3841f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use only roundtrips and fallbacks from private-use characters */ 3842f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0xc00; 3843f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3845f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 3846f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=cnv->fromUChar32; 3847f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3848f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 3849f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex= c==0 ? 0 : -1; 3850f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 3851f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3852f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3853f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * since the conversion here is 1:1 UChar:uint8_t, we need only one counter 3854f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for the minimum of the sourceLength and targetCapacity 3855f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3856f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source); 3857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<targetCapacity) { 3858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=length; 3859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3861f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 3862f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 3863f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto getTrail; 3864f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3865f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3866f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if MBCS_UNROLL_SINGLE_FROM_BMP 3867f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unrolling makes it slower on Pentium III/Windows 2000?! */ 3868f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unroll the loop with the most common case */ 3869f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunrolled: 3870f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>=4) { 3871f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t count, loops; 3872f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t andedValues; 3873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3874f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius loops=count=targetCapacity>>2; 3875f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius do { 3876f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3877f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3878f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3879f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3880f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3881f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3882f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3883f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3884f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3885f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3886f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3887f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3888f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3889f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* were all 4 entries really valid? */ 3890f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(andedValues<minValue) { 3891f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no, return to the first of these 4 */ 3892f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=4; 3893f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target-=4; 3894f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3895f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3896f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } while(--count>0); 3897f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius count=loops-count; 3898f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity-=4*count; 3899f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3900f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3901f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource+=4*count; 3902f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(count>0) { 3903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 3904f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 3905f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 3906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 3907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --count; 3908f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3911f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3912f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3913f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 3914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3915f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(targetCapacity>0) { 3916f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3917f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Get a correct Unicode code point: 3918f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a single UChar for a BMP code point or 3919f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a matched surrogate pair for a "supplementary code point". 3920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3921f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 3922f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 3923f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Do not immediately check for single surrogates: 3924f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Assume that they are unassigned and check for them in that case. 3925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This speeds up the conversion of assigned characters. 3926f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 3927f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes */ 3928f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 3929f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)c; 3930f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 3931f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3932f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3933f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3934f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 3935f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 3936f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value>=minValue) { 3937f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* assigned, write the output character bytes from value and length */ 3938f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* length==1 */ 3939f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is easy because we know that there is enough space */ 3940f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 3941f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 3942f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3943f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 3944f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 3945f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 3946f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(!U16_IS_SURROGATE(c)) { 3947f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal, unassigned BMP character */ 3948f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(U16_IS_SURROGATE_LEAD(c)) { 3949f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgetTrail: 3950f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<sourceLimit) { 3951f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the following code unit */ 3952f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar trail=*source; 3953f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_TRAIL(trail)) { 3954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 3955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U16_GET_SUPPLEMENTARY(c, trail); 3956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this codepage does not map supplementary code points */ 3957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(unassigned) */ 3958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched lead code unit (1st surrogate) */ 3960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no more input */ 3966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (pArgs->flush) { 3967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_TRUNCATED_CHAR_FOUND; 3968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3969f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3970f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3971f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 3972f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched trail code unit (2nd surrogate) */ 3973f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 3974f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 3975f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 3976f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* c does not have a mapping */ 3979f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3980f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the number of code units for c to correctly advance sourceIndex */ 3981f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=U16_LENGTH(c); 3982f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3983f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set offsets since the start or the last extension */ 3984f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 3985f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t count=(int32_t)(source-lastSource); 3986f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* do not set the offset for this character */ 3988f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius count-=length; 3989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3990f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(count>0) { 3991f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 3992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --count; 3993f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3994f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* offsets and sourceIndex are now set for the current character */ 3995f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 3996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 3997f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 3998f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 3999f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 4000f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &source, sourceLimit, 4001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, (const uint8_t *)(pArgs->targetLimit), 4002f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 4003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 4004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 4005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex+=length+(int32_t)(source-lastSource); 4006f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius lastSource=source; 4007f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4008f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 4009f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 4010f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4011f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4012f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 4013f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4014f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 4015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 4016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source); 4017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<targetCapacity) { 4018f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=length; 4019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4022f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if MBCS_UNROLL_SINGLE_FROM_BMP 4023f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unrolling makes it slower on Pentium III/Windows 2000?! */ 4024f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unrolled; 4025f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 4026f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4027f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) { 4029f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 4030f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4032f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set offsets since the start or the last callback */ 4034f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4035f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius size_t count=source-lastSource; 4036f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) { 4037f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4038f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius Caller gave us a partial supplementary character, 4039f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius which this function couldn't convert in any case. 4040f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius The callback will handle the offset. 4041f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4042f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius count--; 4043f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4044f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(count>0) { 4045f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex++; 4046f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --count; 4047f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4048f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter */ 4051f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 4052f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4053f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 4054f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 4055f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=(char *)target; 4056f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 4057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 4058f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4059f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC void 4060f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 4061f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 4062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv; 4063f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *source, *sourceLimit; 4064f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 4065f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity; 4066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t *offsets; 4067f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4068f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 4069f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *mbcsIndex; 4070f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *p, *bytes; 4071f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t outputType; 4072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 4074f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4075f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t prevSourceIndex, sourceIndex, nextSourceIndex; 4076f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4077f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Entry; 4078f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips; 4079f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t value; 4080f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */ 4081f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t siBytes[2] = {0, 0}; 4082f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t soBytes[2] = {0, 0}; 4083f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t siLength, soLength; 4084f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t length = 0, prevLength; 4085f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t unicodeMask; 4086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pArgs->converter; 4088f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4089f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->preFromUFirstCP>=0) { 4090f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4091f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * pass sourceIndex=-1 because we continue from an earlier buffer 4092f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in the future, this may change with continuous offsets 4093f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode); 4095f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4096f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) { 4097f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 4098f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4099f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use optimized function if possible */ 4102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius outputType=cnv->sharedData->mbcs.outputType; 4103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius unicodeMask=cnv->sharedData->mbcs.unicodeMask; 4104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) { 4105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 4106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode); 4107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode); 4109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 4111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) { 4112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode); 4113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 4114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 4117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=pArgs->source; 4118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=pArgs->sourceLimit; 4119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pArgs->target; 4120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 4121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius offsets=pArgs->offsets; 4122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 4124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->sharedData->mbcs.utf8Friendly) { 4125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 4126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsIndex=NULL; 4128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 4130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 4131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius bytes=cnv->sharedData->mbcs.fromUnicodeBytes; 4133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 4135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from UConverter */ 4137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=cnv->fromUChar32; 4138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(outputType==MBCS_OUTPUT_2_SISO) { 4140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=cnv->fromUnicodeStatus; 4141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(prevLength==0) { 4142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the real value */ 4143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=1; 4144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* prevent fromUnicodeStatus from being set to something non-0 */ 4147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=0; 4148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* sourceIndex=-1 if the current character began in the previous buffer */ 4151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevSourceIndex=-1; 4152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex= c==0 ? 0 : -1; 4153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex=0; 4154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* Get the SI/SO character for the converter */ 4156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius siLength = getSISOBytes(SI, cnv->options, siBytes); 4157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius soLength = getSISOBytes(SO, cnv->options, soBytes); 4158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 4160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is another piece of ugly code: 4162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * A goto into the loop if the converter state contains a first surrogate 4163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the previous function call. 4164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It saves me to check in each loop iteration a check of if(c==0) 4165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and duplicating the trail-surrogate-handling code in the else 4166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * branch of that check. 4167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * I could not find any other way to get around this other than 4168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * using a function call for the conversion and callback, which would 4169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * be even more inefficient. 4170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Markus Scherer 2000-jul-19 4172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 4174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto getTrail; 4175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 4178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This following test is to see if available input would overflow the output. 4180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not catch output of more than one byte that 4181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * overflows as a result of a multi-byte character or callback output 4182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * from the last source character. 4183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Therefore, those situations also test for overflows and will 4184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * then break the loop, too. 4185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 4187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Get a correct Unicode code point: 4189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a single UChar for a BMP code point or 4190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a matched surrogate pair for a "supplementary code point". 4191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=*source++; 4193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 4194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 4195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)c; 4196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevSourceIndex=sourceIndex; 4199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 4200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 4202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 4203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 4204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX 4207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to avoid dealing with surrogates. 4208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * MBCS_FAST_MAX must be >=0xd7ff. 4209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0xd7ff && mbcsIndex!=NULL) { 4211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=mbcsIndex[c>>6]; 4212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */ 4214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ 4215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(outputType) { 4216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2: 4217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((const uint16_t *)bytes)[value +(c&0x3f)]; 4218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2_SISO: 4229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1/2-byte stateful with Shift-In/Shift-Out */ 4230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Save the old state in the converter object 4232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * right here, then change the local prevLength state variable if necessary. 4233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Then, if this character turns out to be unassigned or a fallback that 4234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is not taken, the callback code must not save the new state in the converter 4235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * because the new state is for a character that is not output. 4236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * However, the callback must still restore the state from the converter 4237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in case the callback function changed it for its output. 4238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=prevLength; /* save the old state */ 4240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((const uint16_t *)bytes)[value +(c&0x3f)]; 4241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(prevLength<=1) { 4245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* change from double-byte mode to single-byte */ 4248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (siLength == 1) { 4249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[0]<<8; 4250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 2; 4251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if (siLength == 2) { 4252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[1]<<8; 4253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[0]<<16; 4254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 3; 4255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=1; 4257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(prevLength==2) { 4260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* change from single-byte mode to double-byte */ 4263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (soLength == 1) { 4264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[0]<<16; 4265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 3; 4266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if (soLength == 2) { 4267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[1]<<16; 4268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[0]<<24; 4269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 4; 4270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=2; 4272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_DBCS_ONLY: 4276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* table with single-byte results, but only DBCS mappings used */ 4277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((const uint16_t *)bytes)[value +(c&0x3f)]; 4278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping or SBCS result, not taken for DBCS-only */ 4280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 4286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=bytes+(value+(c&0x3f))*3; 4287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 4301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((const uint32_t *)bytes)[value +(c&0x3f)]; 4302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffffff) { 4311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3_EUC: 4317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((const uint16_t *)bytes)[value +(c&0x3f)]; 4318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation */ 4319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e8000; 4327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x80)==0) { 4329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f0080; 4330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 4336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=bytes+(value+(c&0x3f))*3; 4337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation applied to the first two bytes */ 4339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 4341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x800000)==0) { 4348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e800000; 4349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f008000; 4352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* must not occur */ 4359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * To avoid compiler warnings that value & length may be 4361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * used without having been initialized, we set them here. 4362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In reality, this is unreachable code. 4363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Not having a default branch also causes warnings with 4364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * some compilers. 4365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=0; 4367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=0; 4368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output the value */ 4371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This also tests if the codepage maps single surrogates. 4374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If it does, then surrogates are not paired but mapped separately. 4375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note that in this case unmatched surrogates are not detected. 4376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { 4378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_SURROGATE_LEAD(c)) { 4379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusgetTrail: 4380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<sourceLimit) { 4381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* test the following code unit */ 4382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar trail=*source; 4383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U16_IS_TRAIL(trail)) { 4384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 4385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++nextSourceIndex; 4386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=U16_GET_SUPPLEMENTARY(c, trail); 4387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 4388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 4389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=prevLength; /* save the old state */ 4390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(unassigned) */ 4391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 4392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert this supplementary code point */ 4394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* exit this condition tree */ 4395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched lead code unit (1st surrogate) */ 4397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 4398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 4399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no more input */ 4403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is an unmatched trail code unit (2nd surrogate) */ 4407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* callback(illegal) */ 4408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 4409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes */ 4414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The basic lookup is a triple-stage compact array (trie) lookup. 4417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For details see the beginning of this file. 4418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Single-byte codepages are handled with a different data structure 4420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * by _MBCSSingle... functions. 4421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The result consists of a 32-bit value from stage 2 and 4423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * a pointer to as many bytes as are stored per character. 4424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The pointer points to the character's bytes in stage 3. 4425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Bits 15..0 of the stage 2 entry contain the stage 3 index 4426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * for that pointer, while bits 31..16 are flags for which of 4427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the 16 characters in the block are roundtrip-assigned. 4428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t 4430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * respectively as uint32_t, in the platform encoding. 4431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For 3-byte codepages, the bytes are always stored in big-endian order. 4432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * For EUC encodings that use only either 0x8e or 0x8f as the first 4434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * byte of their longest byte sequences, the first two bytes in 4435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * this third stage indicate with their 7th bits whether these bytes 4436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * are to be written directly or actually need to be preceeded by 4437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * one of the two Single-Shift codes. With this, the third stage 4438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * stores one byte fewer per character than the actual maximum length of 4439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * EUC byte sequences. 4440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Other than that, leading zero bytes are removed and the other 4442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * bytes output. A single zero byte may be output if the "assigned" 4443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * bit in stage 2 was on. 4444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The data structure does not support zero byte output as a fallback, 4445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and also does not allow output of leading zeros. 4446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 4448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the bytes and the length for the output */ 4450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(outputType) { 4451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2: 4452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 4453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2_SISO: 4460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 1/2-byte stateful with Shift-In/Shift-Out */ 4461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Save the old state in the converter object 4463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * right here, then change the local prevLength state variable if necessary. 4464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Then, if this character turns out to be unassigned or a fallback that 4465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * is not taken, the callback code must not save the new state in the converter 4466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * because the new state is for a character that is not output. 4467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * However, the callback must still restore the state from the converter 4468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * in case the callback function changed it for its output. 4469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=prevLength; /* save the old state */ 4471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 4472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) { 4474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping, leave value==0 */ 4475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=0; 4476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(prevLength<=1) { 4477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* change from double-byte mode to single-byte */ 4480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (siLength == 1) { 4481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[0]<<8; 4482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 2; 4483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if (siLength == 2) { 4484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[1]<<8; 4485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)siBytes[0]<<16; 4486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 3; 4487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=1; 4489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(prevLength==2) { 4492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* change from single-byte mode to double-byte */ 4495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (soLength == 1) { 4496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[0]<<16; 4497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 3; 4498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if (soLength == 2) { 4499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[1]<<16; 4500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=(uint32_t)soBytes[0]<<24; 4501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length = 4; 4502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=2; 4504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_DBCS_ONLY: 4508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* table with single-byte results, but only DBCS mappings used */ 4509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 4510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping or SBCS result, not taken for DBCS-only */ 4512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 4513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=0; 4514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 4519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); 4520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 4530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); 4531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffffff) { 4536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3_EUC: 4542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 4543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation */ 4544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e8000; 4548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x80)==0) { 4550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f0080; 4551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 4557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); 4558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation applied to the first two bytes */ 4560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x800000)==0) { 4565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e800000; 4566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f008000; 4569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* must not occur */ 4576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * To avoid compiler warnings that value & length may be 4578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * used without having been initialized, we set them here. 4579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In reality, this is unreachable code. 4580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Not having a default branch also causes warnings with 4581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * some compilers. 4582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 4584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=0; 4585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 4589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 || 4590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 4591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 4592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We allow a 0 byte output if the "assigned" bit is set for this entry. 4594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * There is no way with this data structure for fallback output 4595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to be a zero byte. 4596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunassigned: 4599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* try an extension mapping */ 4600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 4601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 4602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &source, sourceLimit, 4603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, target+targetCapacity, 4604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &offsets, sourceIndex, 4605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush, 4606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 4607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius nextSourceIndex+=(int32_t)(source-pArgs->source); 4608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=cnv->fromUnicodeStatus; /* restore SISO state */ 4609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 4611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 4612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 4615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 4617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 4618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 4620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevSourceIndex=sourceIndex; 4622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 4623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 4625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write the output character bytes from value and length */ 4630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* from the first if in the loop we know that targetCapacity>0 */ 4631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(length<=targetCapacity) { 4632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets==NULL) { 4633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(length) { 4634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* each branch falls through to the next one */ 4635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 4: 4636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>24); 4637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 3: /*fall through*/ 4638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>16); 4639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: /*fall through*/ 4640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>8); 4641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 1: /*fall through*/ 4642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 4643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* will never occur */ 4645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(length) { 4649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* each branch falls through to the next one */ 4650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 4: 4651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>24); 4652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 3: /*fall through*/ 4654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>16); 4655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: /*fall through*/ 4657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>8); 4658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 1: /*fall through*/ 4660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 4661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* will never occur */ 4664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity-=length; 4668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *charErrorBuffer; 4670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We actually do this backwards here: 4673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In order to save an intermediate variable, we output 4674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * first to the overflow buffer what does not fit into the 4675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * regular target. 4676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* we know that 1<=targetCapacity<length<=4 */ 4678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length-=targetCapacity; 4679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; 4680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(length) { 4681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* each branch falls through to the next one */ 4682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 3: 4683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *charErrorBuffer++=(uint8_t)(value>>16); 4684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: /*fall through*/ 4685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *charErrorBuffer++=(uint8_t)(value>>8); 4686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 1: /*fall through*/ 4687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *charErrorBuffer=(uint8_t)value; 4688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* will never occur */ 4690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBufferLength=(int8_t)length; 4693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* now output what fits into the regular target */ 4695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value>>=8*length; /* length was reduced by targetCapacity */ 4696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(targetCapacity) { 4697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* each branch falls through to the next one */ 4698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 3: 4699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>16); 4700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: /*fall through*/ 4704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>8); 4705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 1: /*fall through*/ 4709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 4710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=sourceIndex; 4712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4713f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4714f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* will never occur */ 4715f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4716f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4717f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4718f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 4719f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=0; 4720f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4721f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 4722f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4723f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4724f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4725f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* normal end of conversion: prepare for a new character */ 4726f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=0; 4727f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4728f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevSourceIndex=sourceIndex; 4729f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceIndex=nextSourceIndex; 4730f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4731f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 4732f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4733f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 4734f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4735f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4736f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4737f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4738f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4739f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4740f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the end of the input stream and detection of truncated input 4741f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * are handled by the framework, but for EBCDIC_STATEFUL conversion 4742f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * we need to emit an SI at the very end 4743f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4744f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * conditions: 4745f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * successful 4746f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * EBCDIC_STATEFUL in DBCS mode 4747f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * end of input and no truncated input 4748f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4749f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( U_SUCCESS(*pErrorCode) && 4750f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius outputType==MBCS_OUTPUT_2_SISO && prevLength==2 && 4751f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->flush && source>=sourceLimit && c==0 4752f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 4753f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ 4754f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 4755f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)siBytes[0]; 4756f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (siLength == 2) { 4757f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (targetCapacity<2) { 4758f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBuffer[0]=(uint8_t)siBytes[1]; 4759f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBufferLength=1; 4760f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4761f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4762f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)siBytes[1]; 4763f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4764f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4765f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(offsets!=NULL) { 4766f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the last source character's index (sourceIndex points at sourceLimit now) */ 4767f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *offsets++=prevSourceIndex; 4768f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4769f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4770f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 4771f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBuffer[0]=(uint8_t)siBytes[0]; 4772f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (siLength == 2) { 4773f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBuffer[1]=(uint8_t)siBytes[1]; 4774f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4775f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBufferLength=siLength; 4776f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 4777f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4778f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius prevLength=1; /* we switched into SBCS */ 4779f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4780f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4781f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set the converter state back into UConverter */ 4782f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 4783f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=prevLength; 4784f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4785f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 4786f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->source=source; 4787f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->target=(char *)target; 4788f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pArgs->offsets=offsets; 4789f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 4790f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4791f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 4792f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is another simple conversion function for internal use by other 4793f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * conversion implementations. 4794f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not use the converter state nor call callbacks. 4795f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle the EBCDIC swaplfnl option (set in UConverter). 4796f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It handles conversion extensions but not GB 18030. 4797f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4798f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It converts one single Unicode code point into codepage bytes, encoded 4799f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * as one 32-bit value. The function returns the number of bytes in *pValue: 4800f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 1..4 the number of bytes in *pValue 4801f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 0 unassigned (*pValue undefined) 4802f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * -1 illegal (currently not used, *pValue undefined) 4803f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4804f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * *pValue will contain the resulting bytes with the last byte in bits 7..0, 4805f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * the second to last byte in bits 15..8, etc. 4806f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Currently, the function assumes but does not check that 0<=c<=0x10ffff. 4807f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4808f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC int32_t 4809f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSFromUChar32(UConverterSharedData *sharedData, 4810f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c, uint32_t *pValue, 4811f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool useFallback) { 4812f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *cx; 4813f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 4814f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 4815f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ 4816f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *p; 4817f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 4818f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Entry; 4819f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t value; 4820f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t length; 4821f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4822f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 4823f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 4824f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=sharedData->mbcs.fromUnicodeTable; 4825f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4826f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 4827f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) { 4828f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 4829f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 4830f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(useFallback ? value>=0x800 : value>=0xc00) { 4831f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pValue=value&0xff; 4832f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 1; 4833f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4834f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* outputType!=MBCS_OUTPUT_1 */ { 4835f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 4836f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4837f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the bytes and the length for the output */ 4838f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(sharedData->mbcs.outputType) { 4839f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_2: 4840f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4841f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4842f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4843f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4844f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4845f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4846f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4847f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 4848f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ 4849f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_DBCS_ONLY: 4850f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* table with single-byte results, but only DBCS mappings used */ 4851f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4852f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4853f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* no mapping or SBCS result, not taken for DBCS-only */ 4854f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 4855f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=0; 4856f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3: 4861f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4862f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4863f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4864f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4865f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4866f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4867f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4868f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4869f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4870f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4871f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4: 4872f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4873f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4874f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4875f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4876f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4877f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffffff) { 4878f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4879f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4880f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4881f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4882f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4883f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_3_EUC: 4884f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4885f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation */ 4886f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4887f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4888f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4889f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e8000; 4890f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4891f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x80)==0) { 4892f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f0080; 4893f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4894f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4895f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4896f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4897f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4898f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case MBCS_OUTPUT_4_EUC: 4899f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 4900f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 4901f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* EUC 16-bit fixed-length representation applied to the first two bytes */ 4902f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 4903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 4904f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(value<=0xffff) { 4905f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=2; 4906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x800000)==0) { 4907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8e800000; 4908f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((value&0x8000)==0) { 4910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value|=0x8f008000; 4911f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=4; 4912f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4913f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=3; 4914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4915f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 4916f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 4917f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 4918f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* must not occur */ 4919f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return -1; 4920f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4921f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4922f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 4923f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 4924f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (FROM_U_USE_FALLBACK(useFallback, c) && value!=0) 4925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 4926f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 4927f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * We allow a 0 byte output if the "assigned" bit is set for this entry. 4928f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * There is no way with this data structure for fallback output 4929f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to be a zero byte. 4930f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4931f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* assigned */ 4932f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pValue=value; 4933f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return length; 4934f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4935f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4936f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4937f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4938f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cx=sharedData->mbcs.extIndexes; 4939f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cx!=NULL) { 4940f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback); 4941f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return length>=0 ? length : -length; /* return abs(length); */ 4942f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4943f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4944f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* unassigned */ 4945f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return 0; 4946f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 4947f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4948f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4949f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#if 0 4950f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 4951f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This function has been moved to ucnv2022.c for inlining. 4952f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This implementation is here only for documentation purposes 4953f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4954f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4955f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/** 4956f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages. 4957f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle the EBCDIC swaplfnl option (set in UConverter). 4958f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It does not handle conversion extensions (_extFromU()). 4959f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 4960f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * It returns the codepage byte for the code point, or -1 if it is unassigned. 4961f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 4962f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC int32_t 4963f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, 4964f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c, 4965f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool useFallback) { 4966f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table; 4967f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t value; 4968f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4969f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 4970f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 4971f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return -1; 4972f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4973f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4974f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 4975f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=sharedData->mbcs.fromUnicodeTable; 4976f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4977f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the byte for the output */ 4978f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 4979f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 4980f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(useFallback ? value>=0x800 : value>=0xc00) { 4981f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return value&0xff; 4982f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 4983f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return -1; 4984f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 4985f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 4986f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif 4987f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4988f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ 4989f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4990f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ 4991f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const UChar32 4992f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; 4993f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4994f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ 4995f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic const UChar32 4996f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; 4997f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 4998f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 4999f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 5000f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterToUnicodeArgs *pToUArgs, 5001f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 5002f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *utf8, *cnv; 5003f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit; 5004f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 5005f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity; 5006f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5007f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table, *sbcsIndex; 5008f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *results; 5009f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5010f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t oldToULength, toULength, toULimit; 5011f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5012f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 5013f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t b, t1, t2; 5014f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5015f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips; 5016f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t value, minValue; 5017f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool hasSupplementary; 5018f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5019f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 5020f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8=pToUArgs->converter; 5021f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pFromUArgs->converter; 5022f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(uint8_t *)pToUArgs->source; 5023f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 5024f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pFromUArgs->target; 5025f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 5026f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5027f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 5028f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sbcsIndex=cnv->sharedData->mbcs.sbcsIndex; 5029f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 5030f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 5031f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5032f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 5033f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5034f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 5035f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5036f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->useFallback) { 5037f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use all roundtrip and fallback results */ 5038f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0x800; 5039f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5040f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* use only roundtrips and fallbacks from private-use characters */ 5041f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius minValue=0xc00; 5042f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5043f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 5044f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5045f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from the UTF-8 UConverter */ 5046f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar32)utf8->toUnicodeStatus; 5047f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0) { 5048f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=oldToULength=utf8->toULength; 5049f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=(int8_t)utf8->mode; 5050f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5051f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=oldToULength=toULimit=0; 5052f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5053f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5054f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5055f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Make sure that the last byte sequence before sourceLimit is complete 5056f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * or runs into a lead byte. 5057f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Do not go back into the bytes that will be read for finishing a partial 5058f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequence from the previous buffer. 5059f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In the conversion loop compare source with sourceLimit only once 5060f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * per multi-byte character. 5061f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5062f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { 5063f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i, length; 5064f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5065f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); 5066f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<3 && i<length;) { 5067f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*(sourceLimit-i-1); 5068f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U8_IS_TRAIL(b)) { 5069f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++i; 5070f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5071f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i<U8_COUNT_TRAIL_BYTES(b)) { 5072f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ 5073f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit-=i+1; 5074f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5075f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5076f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5077f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5078f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5079f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5080f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 5081f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=0; 5082f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=0; 5083f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto moreBytes; 5084f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5085f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Note: We could avoid the goto by duplicating some of the moreBytes 5086f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * code, but only up to the point of collecting a complete UTF-8 5087f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequence; then recurse for the toUBytes[toULength] 5088f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * and then continue with normal conversion. 5089f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 5090f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If so, move this code to just after initializing the minimum 5091f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * set of local variables for reading the UTF-8 input 5092f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (utf8, source, target, limits but not cnv, table, minValue, etc.). 5093f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * 5094f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Potential advantages: 5095f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - avoid the goto 5096f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - oldToULength could become a local variable in just those code blocks 5097f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * that deal with buffer boundaries 5098f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - possibly faster if the goto prevents some compiler optimizations 5099f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * (this would need measuring to confirm) 5100f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Disadvantage: 5101f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * - code duplication 5102f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5103f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5104f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5105f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 5106f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 5107f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 5108f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*source++; 5109f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((int8_t)b>=0) { 5110f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert ASCII */ 5111f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { 5112f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)b; 5113f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5114f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5115f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5116f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b; 5117f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c); 5118f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5119f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5120f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(b<0xe0) { 5121f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( /* handle U+0080..U+07FF inline */ 5122f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b>=0xc2 && 5123f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (t1=(uint8_t)(*source-0x80)) <= 0x3f 5124f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5125f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b&0x1f; 5126f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 5127f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1); 5128f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value>=minValue) { 5129f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 5130f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5131f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5132f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5133f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)|t1; 5134f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5135f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5136f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5137f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5138f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(b==0xe0) { 5139f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( /* handle U+0800..U+0FFF inline */ 5140f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 && 5141f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (t2=(uint8_t)(source[1]-0x80)) <= 0x3f 5142f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5143f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=t1; 5144f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source+=2; 5145f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2); 5146f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value>=minValue) { 5147f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 5148f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5149f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5150f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5151f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)|t2; 5152f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5153f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5154f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5155f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5156f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5157f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5158f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5159f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5160f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0) { 5161f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* handle "complicated" and error cases, and continuing partial characters */ 5162f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oldToULength=0; 5163f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=1; 5164f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 5165f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b; 5166f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusmoreBytes: 5167f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(toULength<toULimit) { 5168f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5169f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The sourceLimit may have been adjusted before the conversion loop 5170f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to stop before a truncated sequence. 5171f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Here we need to use the real limit in case we have two truncated 5172f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequences at the end. 5173f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See ticket #7492. 5174f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5175f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<(uint8_t *)pToUArgs->sourceLimit) { 5176f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*source; 5177f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U8_IS_TRAIL(b)) { 5178f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 5179f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++toULength; 5180f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)+b; 5181f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5182f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; /* sequence too short, stop with toULength<toULimit */ 5183f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5184f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5185f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 5186f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=(toULength-oldToULength); 5187f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(oldToULength<toULength) { 5188f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[oldToULength++]=*source++; 5189f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5190f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=c; 5191f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5192f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->mode=toULimit; 5193f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5194f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5195f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 5196f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5197f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5198f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5199f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( toULength==toULimit && /* consumed all trail bytes */ 5200f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (toULength==3 || toULength==2) && /* BMP */ 5201f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 5202f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 5203f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5204f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 5205f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if( 5206f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength==toULimit && toULength==4 && 5207f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 5208f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5209f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* supplementary code point */ 5210f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!hasSupplementary) { 5211f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 5212f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=0; 5213f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5214f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 5215f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5216f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5217f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* error handling: illegal UTF-8 byte sequence */ 5218f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=(toULength-oldToULength); 5219f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(oldToULength<toULength) { 5220f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[oldToULength++]=*source++; 5221f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5222f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5223f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5224f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5225f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 5226f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 5227f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5228f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5229f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5230f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5231f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value>=minValue) { 5232f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* output the mapping for c */ 5233f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 5234f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5235f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5236f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* value<minValue means c is unassigned (unmappable) */ 5237f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5238f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Try an extension mapping. 5239f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Pass in no source because we don't have UTF-16 input. 5240f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If we have a partial match on c, we will return and revert 5241f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to UTF-8->UTF-16->charset conversion. 5242f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5243f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius static const UChar nul=0; 5244f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *noSource=&nul; 5245f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 5246f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &noSource, noSource, 5247f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, target+targetCapacity, 5248f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, -1, 5249f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->flush, 5250f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 5251f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5252f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 5253f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 5254f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 5255f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5256f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(cnv->preFromUFirstCP>=0) { 5257f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5258f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Partial match, return and revert to pivoting. 5259f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In normal from-UTF-16 conversion, we would just continue 5260f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * but then exit the loop because the extension match would 5261f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * have consumed the source. 5262f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5263f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_USING_DEFAULT_WARNING; 5264f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5265f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5266f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 5267f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5268f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 5269f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); 5270f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5271f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5272f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5273f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 5274f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 5275f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5276f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5277f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5278f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5279f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5280f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The sourceLimit may have been adjusted before the conversion loop 5281f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to stop before a truncated sequence. 5282f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If so, then collect the truncated sequence now. 5283f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5284f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_SUCCESS(*pErrorCode) && 5285f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->preFromUFirstCP<0 && 5286f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { 5287f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=utf8->toUBytes[0]=b=*source++; 5288f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=1; 5289f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 5290f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 5291f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[toULength++]=b=*source++; 5292f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)+b; 5293f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5294f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=c; 5295f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5296f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->mode=toULimit; 5297f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5298f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5299f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 5300f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5301f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5302f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5303f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5304f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 5305f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 5306f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverterToUnicodeArgs *pToUArgs, 5307f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 5308f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *utf8, *cnv; 5309f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint8_t *source, *sourceLimit; 5310f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t *target; 5311f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t targetCapacity; 5312f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5313f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *table, *mbcsIndex; 5314f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const uint16_t *results; 5315f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5316f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int8_t oldToULength, toULength, toULimit; 5317f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5318f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UChar32 c; 5319f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint8_t b, t1, t2; 5320f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5321f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t stage2Entry; 5322f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint32_t asciiRoundtrips; 5323f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius uint16_t value; 5324f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool hasSupplementary; 5325f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5326f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* set up the local pointers */ 5327f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8=pToUArgs->converter; 5328f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv=pFromUArgs->converter; 5329f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source=(uint8_t *)pToUArgs->source; 5330f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 5331f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius target=(uint8_t *)pFromUArgs->target; 5332f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 5333f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5334f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius table=cnv->sharedData->mbcs.fromUnicodeTable; 5335f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 5336f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 5337f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 5338f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5339f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 5340f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5341f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 5342f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5343f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 5344f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5345f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the converter state from the UTF-8 UConverter */ 5346f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(UChar32)utf8->toUnicodeStatus; 5347f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0) { 5348f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=oldToULength=utf8->toULength; 5349f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=(int8_t)utf8->mode; 5350f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5351f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=oldToULength=toULimit=0; 5352f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5353f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5354f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5355f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Make sure that the last byte sequence before sourceLimit is complete 5356f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * or runs into a lead byte. 5357f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Do not go back into the bytes that will be read for finishing a partial 5358f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequence from the previous buffer. 5359f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In the conversion loop compare source with sourceLimit only once 5360f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * per multi-byte character. 5361f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5362f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { 5363f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t i, length; 5364f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5365f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); 5366f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<3 && i<length;) { 5367f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*(sourceLimit-i-1); 5368f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U8_IS_TRAIL(b)) { 5369f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++i; 5370f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5371f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(i<U8_COUNT_TRAIL_BYTES(b)) { 5372f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ 5373f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius sourceLimit-=i+1; 5374f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5375f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5376f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5377f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5378f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5379f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5380f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c!=0 && targetCapacity>0) { 5381f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=0; 5382f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=0; 5383f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto moreBytes; 5384f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* See note in ucnv_SBCSFromUTF8() about this goto. */ 5385f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5386f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5387f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* conversion loop */ 5388f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 5389f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(targetCapacity>0) { 5390f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*source++; 5391f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if((int8_t)b>=0) { 5392f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* convert ASCII */ 5393f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { 5394f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=b; 5395f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5396f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5397f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5398f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b); 5399f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 5400f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b; 5401f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 5402f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5403f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5404f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5405f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(b>0xe0) { 5406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( /* handle U+1000..U+D7FF inline */ 5407f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) || 5408f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (b==0xed && (t1 <= 0x1f))) && 5409f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (t2=(uint8_t)(source[1]-0x80)) <= 0x3f 5410f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5411f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=((b&0xf)<<6)|t1; 5412f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source+=2; 5413f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); 5414f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 5415f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)|t2; 5416f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 5417f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5418f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5420f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5421f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(b<0xe0) { 5422f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( /* handle U+0080..U+07FF inline */ 5423f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b>=0xc2 && 5424f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (t1=(uint8_t)(*source-0x80)) <= 0x3f 5425f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5426f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b&0x1f; 5427f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 5428f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1); 5429f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value==0) { 5430f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)|t1; 5431f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 5432f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5433f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5434f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5435f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5436f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5437f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=-1; 5438f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5440f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(c<0) { 5441f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* handle "complicated" and error cases, and continuing partial characters */ 5442f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius oldToULength=0; 5443f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=1; 5444f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 5445f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=b; 5446f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusmoreBytes: 5447f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(toULength<toULimit) { 5448f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5449f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The sourceLimit may have been adjusted before the conversion loop 5450f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to stop before a truncated sequence. 5451f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Here we need to use the real limit in case we have two truncated 5452f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * sequences at the end. 5453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * See ticket #7492. 5454f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5455f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(source<(uint8_t *)pToUArgs->sourceLimit) { 5456f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius b=*source; 5457f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U8_IS_TRAIL(b)) { 5458f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++source; 5459f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ++toULength; 5460f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)+b; 5461f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5462f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; /* sequence too short, stop with toULength<toULimit */ 5463f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5464f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5465f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 5466f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=(toULength-oldToULength); 5467f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(oldToULength<toULength) { 5468f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[oldToULength++]=*source++; 5469f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5470f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=c; 5471f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5472f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->mode=toULimit; 5473f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5474f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5475f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 5476f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5477f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5478f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5479f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( toULength==toULimit && /* consumed all trail bytes */ 5480f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (toULength==3 || toULength==2) && /* BMP */ 5481f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 5482f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 5483f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5484f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 5485f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if( 5486f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength==toULimit && toULength==4 && 5487f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 5488f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5489f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* supplementary code point */ 5490f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!hasSupplementary) { 5491f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 5492f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=0; 5493f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5494f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 5495f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5496f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5497f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* error handling: illegal UTF-8 byte sequence */ 5498f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source-=(toULength-oldToULength); 5499f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(oldToULength<toULength) { 5500f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[oldToULength++]=*source++; 5501f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5502f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5503f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5504f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5505f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_CHAR_FOUND; 5506f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 5507f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5508f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5509f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* get the bytes and the length for the output */ 5510f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* MBCS_OUTPUT_2 */ 5511f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c); 5512f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5513f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* is this code point assigned, or do we use fallbacks? */ 5514f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 5515f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 5516f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5517f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius goto unassigned; 5518f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5519f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5520f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5521f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5522f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write the output character bytes from value and length */ 5523f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* from the first if in the loop we know that targetCapacity>0 */ 5524f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(value<=0xff) { 5525f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* this is easy because we know that there is enough space */ 5526f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 5527f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius --targetCapacity; 5528f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else /* length==2 */ { 5529f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)(value>>8); 5530f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(2<=targetCapacity) { 5531f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *target++=(uint8_t)value; 5532f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity-=2; 5533f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5534f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBuffer[0]=(char)value; 5535f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->charErrorBufferLength=1; 5536f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5537f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target overflow */ 5538f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 5539f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5540f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5541f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5542f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5543f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5544f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusunassigned: 5545f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius { 5546f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5547f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Try an extension mapping. 5548f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Pass in no source because we don't have UTF-16 input. 5549f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If we have a partial match on c, we will return and revert 5550f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to UTF-8->UTF-16->charset conversion. 5551f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5552f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius static const UChar nul=0; 5553f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const UChar *noSource=&nul; 5554f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=_extFromU(cnv, cnv->sharedData, 5555f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c, &noSource, noSource, 5556f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius &target, target+targetCapacity, 5557f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius NULL, -1, 5558f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->flush, 5559f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pErrorCode); 5560f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5561f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_FAILURE(*pErrorCode)) { 5562f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* not mappable or buffer overflow */ 5563f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUChar32=c; 5564f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5565f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(cnv->preFromUFirstCP>=0) { 5566f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5567f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Partial match, return and revert to pivoting. 5568f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * In normal from-UTF-16 conversion, we would just continue 5569f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * but then exit the loop because the extension match would 5570f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * have consumed the source. 5571f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5572f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_USING_DEFAULT_WARNING; 5573f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5574f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5575f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* a mapping was written to the target, continue */ 5576f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5577f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* recalculate the targetCapacity after an extension mapping */ 5578f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); 5579f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius continue; 5580f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5581f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5582f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5583f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* target is full */ 5584f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 5585f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5586f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5587f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5588f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5589f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* 5590f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * The sourceLimit may have been adjusted before the conversion loop 5591f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to stop before a truncated sequence. 5592f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * If so, then collect the truncated sequence now. 5593f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5594f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(U_SUCCESS(*pErrorCode) && 5595f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->preFromUFirstCP<0 && 5596f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { 5597f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=utf8->toUBytes[0]=b=*source++; 5598f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULength=1; 5599f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 5600f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius while(source<sourceLimit) { 5601f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUBytes[toULength++]=b=*source++; 5602f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius c=(c<<6)+b; 5603f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5604f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toUnicodeStatus=c; 5605f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->toULength=toULength; 5606f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius utf8->mode=toULimit; 5607f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5608f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5609f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* write back the updated pointers */ 5610f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pToUArgs->source=(char *)source; 5611f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius pFromUArgs->target=(char *)target; 5612f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5613f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5614f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* miscellaneous ------------------------------------------------------------ */ 5615f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5616f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 5617f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetStarters(const UConverter* cnv, 5618f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UBool starters[256], 5619f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *) { 5620f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius const int32_t *state0; 5621f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int i; 5622f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5623f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState]; 5624f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius for(i=0; i<256; ++i) { 5625f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* all bytes that cause a state transition from state 0 are lead bytes */ 5626f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]); 5627f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5628f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5629f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5630f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius/* 5631f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * This is an internal function that allows other converter implementations 5632f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * to check whether a byte is a lead byte. 5633f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius */ 5634f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC UBool 5635f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) { 5636f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]); 5637f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5638f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5639f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic void 5640f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, 5641f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t offsetIndex, 5642f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UErrorCode *pErrorCode) { 5643f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius UConverter *cnv=pArgs->converter; 5644f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius char *p, *subchar; 5645f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius char buffer[4]; 5646f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius int32_t length; 5647f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5648f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* first, select between subChar and subChar1 */ 5649f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if( cnv->subChar1!=0 && 5650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (cnv->sharedData->mbcs.extIndexes!=NULL ? 5651f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->useSubChar1 : 5652f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius (cnv->invalidUCharBuffer[0]<=0xff)) 5653f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ) { 5654f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */ 5655f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius subchar=(char *)&cnv->subChar1; 5656f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=1; 5657f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else { 5658f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* select subChar in all other cases */ 5659f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius subchar=(char *)cnv->subChars; 5660f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=cnv->subCharLen; 5661f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5662f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5663f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* reset the selector for the next code point */ 5664f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->useSubChar1=FALSE; 5665f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5666f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) { 5667f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius p=buffer; 5668f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5669f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* fromUnicodeStatus contains prevLength */ 5670f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius switch(length) { 5671f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 1: 5672f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->fromUnicodeStatus==2) { 5673f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* DBCS mode and SBCS sub char: change to SBCS */ 5674f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=1; 5675f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *p++=UCNV_SI; 5676f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5677f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *p++=subchar[0]; 5678f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5679f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius case 2: 5680f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(cnv->fromUnicodeStatus<=1) { 5681f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* SBCS mode and DBCS sub char: change to DBCS */ 5682f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius cnv->fromUnicodeStatus=2; 5683f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *p++=UCNV_SO; 5684f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5685f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *p++=subchar[0]; 5686f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *p++=subchar[1]; 5687f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius break; 5688f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius default: 5689f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 5690f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return; 5691f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5692f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius subchar=buffer; 5693f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius length=(int32_t)(p-buffer); 5694f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5695f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5696f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode); 5697f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5698f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5699f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusU_CFUNC UConverterType 5700f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusucnv_MBCSGetType(const UConverter* converter) { 5701f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */ 5702f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if(converter->sharedData->mbcs.countStates==1) { 5703f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UConverterType)UCNV_SBCS; 5704f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) { 5705f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UConverterType)UCNV_EBCDIC_STATEFUL; 5706f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) { 5707f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UConverterType)UCNV_DBCS; 5708f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 5709f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return (UConverterType)UCNV_MBCS; 5710f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius} 5711f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius 5712f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 5713