16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org****************************************************************************** 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2000-2013, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org****************************************************************************** 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: ucnvmbcs.c 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2000jul03 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* The current code in this file replaces the previous implementation 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* of conversion code from multi-byte codepages to Unicode and back. 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* This implementation supports the following: 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - legacy variable-length codepages with up to 4 bytes per character 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - all Unicode code points (up to 0x10ffff) 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - efficient distinction of unassigned vs. illegal byte sequences 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - it is possible in fromUnicode() to directly deal with simple 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* stateful encodings (used for EBCDIC_STATEFUL) 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - it is possible to convert Unicode code points 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* to a single zero byte (but not as a fallback except for SBCS) 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Remaining limitations in fromUnicode: 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - byte sequences must not have leading zero bytes 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* - limitation to up to 4 bytes per character 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* limitations and adds m:n character mappings and other features. 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* See ucnv_ext.h for details. 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Change history: 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U, 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* macros to ucnvmbcs.h file 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv.h" 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/ucnv_cb.h" 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h" 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/uset.h" 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf8.h" 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_bld.h" 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnvmbcs.h" 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_ext.h" 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ucnv_cnv.h" 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmutex.h" 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* control optimizations according to the platform */ 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MBCS_UNROLL_SINGLE_TO_BMP 1 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MBCS_UNROLL_SINGLE_FROM_BMP 0 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * _MBCSHeader versions 5.3 & 4.3 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (Note that the _MBCSHeader version is in addition to the converter formatVersion.) 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version is optional. Version 5 is used for incompatible data format changes. 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * makeconv will continue to generate version 4 files if possible. 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Changes from version 4: 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The main difference is an additional _MBCSHeader field with 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - the length (number of uint32_t) of the _MBCSHeader 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - flags for further incompatible data format changes 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - flags for further, backward compatible data format changes 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the file and needs to be reconstituted at load time. 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This requires a utf8Friendly format with an additional mbcsIndex table for fast 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar. 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (For details about these structures see below, and see ucnvmbcs.h.) 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the Unicode code points. (This requires that the .ucm file has the |0 etc. 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * precision markers for all mappings.) 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All fallbacks have been moved to the extension table, leaving only roundtrips in the 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * omitted data that can be reconstituted from the toUnicode data. 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted. 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * With only roundtrip mappings in the base fromUnicode data, this part is fully 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * redundant with the mbcsIndex and will be reconstituted from that (also using the 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * stage 1 table which contains the information about how stage 2 was compacted). 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The rest of the stage 2 table, the part for code points above maxFastUChar, 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is stored in the file and will be appended to the reconstituted part. 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The entire fromUBytes array is omitted from the file and will be reconstitued. 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is done by enumerating all toUnicode roundtrip mappings, performing 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * each mapping (using the stage 1 and reconstituted stage 2 tables) and 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * writing instead of reading the byte values. 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * _MBCSHeader version 4.3 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Change from version 4.2: 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - Optional utf8Friendly data structures, with 64-entry stage 3 block 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * files which can be used instead of stages 1 & 2. 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Faster lookups for roundtrips from most commonly used characters, 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and lookups from UTF-8 byte sequences with a natural bit distribution. 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See ucnvmbcs.h for more details. 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Change from version 4.1: 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - Added an optional extension table structure at the end of the .cnv file. 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It is present if the upper bits of the header flags field contains a non-zero 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte offset to it. 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Files that contain only a conversion table and no base table 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * use the special outputType MBCS_OUTPUT_EXT_ONLY. 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * These contain the base table name between the MBCS header and the extension 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data. 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Change from version 4.0: 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - Replace header.reserved with header.fromUBytesLength so that all 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * fields in the data have length. 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Changes from version 3 (for performance improvements): 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - new bit distribution for state table entries 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - reordered action codes 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - new data structure for single-byte fromUnicode 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * + stage 2 only contains indexes 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * + stage 3 stores 16 bits per character with classification bits 15..8 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - no multiplier for stage 1 entries 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - stage 2 for non-single-byte codepages contains the index and the flags in 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * one 32-bit value 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For more details about old versions of the MBCS data structure, see 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the corresponding versions of this file. 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Converting stateless codepage data ---------------------------------------*** 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (or codepage data with simple states) to Unicode. 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Data structure and algorithm for converting from complex legacy codepages 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to Unicode. (Designed before 2000-may-22.) 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The basic idea is that the structure of legacy codepages can be described 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * with state tables. 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When reading a byte stream, each input byte causes a state transition. 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Some transitions result in the output of a code point, some result in 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "unassigned" or "illegal" output. 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is used here for character conversion. 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The data structure begins with a state table consisting of a row 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * per state, with 256 entries (columns) per row for each possible input 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte value. 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Each entry is 32 bits wide, with two formats distinguished by 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the sign bit (bit 31): 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * One format for transitional entries (bit 31 not set) for non-final bytes, and 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * one format for final entries (bit 31 set). 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Both formats contain the number of the next state in the same bit 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * positions. 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * State 0 is the initial state. 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Most of the time, the offset values of subsequent states are added 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * up to a scalar value. This value will eventually be the index of 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the Unicode code point in a table that follows the state table. 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The effect is that the code points for final state table rows 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * are contiguous. The code points of final state rows follow each other 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the order of the references to those final states by previous 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * states, etc. 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For some terminal states, the offset is itself the output Unicode 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code point (16 bits for a BMP code point or 20 bits for a supplementary 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code point (stored as code point minus 0x10000 so that 20 bits are enough). 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For others, the code point in the Unicode table is stored with either 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * one or two code units: one for BMP code points, two for a pair of 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * surrogates. 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All code points for a final state entry take up the same number of code 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * units, regardless of whether they all actually _use_ the same number 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of code units. This is necessary for simple array access. 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An additional feature comes in with what in ICU is called "fallback" 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * mappings: 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In addition to round-trippable, precise, 1:1 mappings, there are often 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * mappings defined between similar, though not the same, characters. 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Typically, such mappings occur only in fromUnicode mapping tables because 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Unicode has a superset repertoire of most other codepages. However, it 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is possible to provide such mappings in the toUnicode tables, too. 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In this case, the fallback mappings are partly integrated into the 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * general state tables because the structure of the encoding includes their 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte sequences. 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For final entries in an initial state, fallback mappings are stored in 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the entry itself like with roundtrip mappings. 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For other final entries, they are stored in the code units table if 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the entry is for a pair of code units. 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For single-unit results in the code units table, there is no space to 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * alternatively hold a fallback mapping; in this case, the code unit 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is stored as U+fffe (unassigned), and the fallback mapping needs to 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * be looked up by the scalar offset value in a separate table. 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * "Unassigned" state entries really mean "structurally unassigned", 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * i.e., such a byte sequence will never have a mapping result. 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The interpretation of the bits in each entry is as follows: 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bit 31 not set, not a terminal entry ("transitional"): 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 30..24 next state 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 23..0 offset delta, to be added up 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bit 31 set, terminal ("final") entry: 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 30..24 next state (regardless of action code) 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 23..20 action code: 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * action codes 0 and 1 result in precise-mapping Unicode code points 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0 valid byte sequence 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..16 not used, 0 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 15..0 16-bit Unicode BMP code point 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * never U+fffe or U+ffff 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1 valid byte sequence 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..0 20-bit Unicode supplementary code point 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * never U+fffe or U+ffff 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2 valid byte sequence (fallback) 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..16 not used, 0 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 15..0 16-bit Unicode BMP code point as fallback result 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3 valid byte sequence (fallback) 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..0 20-bit Unicode supplementary code point as fallback result 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * depending on the code units they result in 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4 valid byte sequence 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..9 not used, 0 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8..0 final offset delta 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pointing to one 16-bit code unit which may be 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * fffe unassigned -- look for a fallback for this offset 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ffff illegal 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 5 valid byte sequence 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..9 not used, 0 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8..0 final offset delta 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pointing to two 16-bit code units 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (typically UTF-16 surrogates) 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the result depends on the first code unit as follows: 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0000..d7ff roundtrip BMP code point (1st alone) 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * d800..dbff roundtrip surrogate pair (1st, 2nd) 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * dc00..dfff fallback surrogate pair (1st-400, 2nd) 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * e000 roundtrip BMP code point (2nd alone) 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * e001 fallback BMP code point (2nd alone) 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * fffe unassigned 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ffff illegal 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (the final offset deltas are at most 255 * 2, 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * times 2 because of storing code unit pairs) 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6 unassigned byte sequence 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..16 not used, 0 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2) 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * this does not contain a final offset delta because the main 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * purpose of this action code is to save scalar offset values; 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * therefore, fallback values cannot be assigned to byte 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequences that result in this action code 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 7 illegal byte sequence 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..16 not used, 0 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2) 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8 state change only 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 19..0 not used, 0 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * useful for state changes in simple stateful encodings, 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * at Shift-In/Shift-Out codes 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 9..15 reserved for future use 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * current implementations will only perform a state change 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and ignore bits 19..0 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An encoding with contiguous ranges of unassigned byte sequences, like 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Shift-JIS and especially EUC-TW, can be stored efficiently by having 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * at least two states for the trail bytes: 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * One trail byte state that results in code points, and one that only 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * has "unassigned" and "illegal" terminal states. 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note: partly by accident, this data structure supports simple stateful 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * encodings without any additional logic. 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Currently, only simple Shift-In/Shift-Out schemes are handled with 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * appropriate state tables (especially EBCDIC_STATEFUL!). 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS version 2 added: 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * unassigned and illegal action codes have U+fffe and U+ffff 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP() 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Converting from Unicode to codepage bytes --------------------------------*** 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The conversion data structure for fromUnicode is designed for the known 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a sequence of 1..4 bytes, in addition to a flag that indicates if there is 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a roundtrip mapping. 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * like in the character properties table. 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The beginning of the trie is at offsetFromUTable, the beginning of stage 3 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * with the resulting bytes is at offsetFromUBytes. 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Beginning with version 4, single-byte codepages have a significantly different 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * trie compared to other codepages. 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In all cases, the entry in stage 1 is directly the index of the block of 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 64 entries in stage 2. 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Single-byte lookup: 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3. 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Stage 3 contains one 16-bit word per result: 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 15..8 indicate the kind of result: 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * f roundtrip result 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * c fallback result from private-use code point 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 8 fallback result from other code points 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0 unassigned 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 7..0 contain the codepage byte. A zero byte is always possible. 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ASCII code points can be looked up with a linear array access into stage 3. 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See maxFastUChar and other details in ucnvmbcs.h. 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Multi-byte lookup: 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Stage 2 contains a 32-bit word for each 16-block in stage 3: 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If this test is false, then a non-zero result will be interpreted as 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a fallback mapping. 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char) 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Stage 3 contains 2, 3, or 4 bytes per result. 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness, 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * while 3 bytes are stored as bytes in big-endian order. 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Leading zero bytes are ignored, and the number of bytes is counted. 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A zero byte mapping result is possible as a roundtrip result. 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For some output types, the actual result is processed from this; 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * see ucnv_MBCSFromUnicodeWithOffsets(). 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10), 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or (version 3 and up) for BMP-only codepages, it contains 64 entries. 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In version 4.3, a utf8Friendly file contains an mbcsIndex table. 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ASCII code points can be looked up with a linear array access into stage 3. 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h. 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In version 3, stage 2 blocks may overlap by multiples of the multiplier 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for compaction. 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks) 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * may overlap by any number of entries. 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS version 2 added: 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the converter checks for known output types, which allows 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * adding new ones without crashing an unaware converter 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _SBCSUTF8Impl; 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _DBCSUTF8Impl; 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* GB 18030 data ------------------------------------------------------------ */ 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* helper macros for linear values for GB 18030 four-byte sequences */ 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d)) 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30) 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff) 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Some ranges of GB 18030 where both the Unicode code points and the 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * GB four-byte sequences are contiguous and are handled algorithmically by 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the special callback functions below. 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The values are start & end of Unicode & GB codes. 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that single surrogates are not mapped by GB 18030 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * as of the re-released mapping tables from 2000-nov-30. 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const uint32_t 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggb18030Ranges[14][4]={ 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)}, 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)}, 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)}, 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)}, 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)}, 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)}, 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)}, 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)}, 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)}, 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)}, 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)}, 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)}, 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)}, 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)} 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* bit flag for UConverter.options indicating GB 18030 special handling */ 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _MBCS_OPTION_GB18030 0x8000 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _MBCS_OPTION_KEIS 0x01000 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _MBCS_OPTION_JEF 0x02000 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define _MBCS_OPTION_JIPS 0x04000 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define KEIS_SO_CHAR_1 0x0A 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define KEIS_SO_CHAR_2 0x42 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define KEIS_SI_CHAR_1 0x0A 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define KEIS_SI_CHAR_2 0x41 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JEF_SO_CHAR 0x28 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JEF_SI_CHAR 0x29 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JIPS_SO_CHAR_1 0x1A 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JIPS_SO_CHAR_2 0x70 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JIPS_SI_CHAR_1 0x1A 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define JIPS_SI_CHAR_2 0x71 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenum SISO_Option { 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SI, 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SO 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef enum SISO_Option SISO_Option; 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) { 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t SISOLength = 0; 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch (option) { 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case SI: 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = KEIS_SI_CHAR_1; 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[1] = KEIS_SI_CHAR_2; 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 2; 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = JEF_SI_CHAR; 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 1; 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = JIPS_SI_CHAR_1; 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[1] = JIPS_SI_CHAR_2; 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 2; 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = UCNV_SI; 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 1; 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case SO: 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = KEIS_SO_CHAR_1; 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[1] = KEIS_SO_CHAR_2; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 2; 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = JEF_SO_CHAR; 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 1; 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = JIPS_SO_CHAR_1; 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[1] = JIPS_SO_CHAR_2; 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 2; 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value[0] = UCNV_SO; 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org SISOLength = 1; 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Should never happen. */ 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return SISOLength; 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Miscellaneous ------------------------------------------------------------ */ 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * consecutive sequences of bytes, starting from the one encoded in value, 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to Unicode code points. (Multiple mappings to reduce per-function call overhead.) 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does not currently support m:n mappings or reverse fallbacks. 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This function will not be called for sequences of bytes with leading zeros. 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode() 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param value contains 1..4 bytes of the first byte sequence, right-aligned 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @param codePoints resulting Unicode code points, or negative if a byte sequence does 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * not map to anything 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return TRUE to continue enumeration, FALSE to stop 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef UBool U_CALLCONV 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]); 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* similar to ucnv_MBCSGetNextUChar() but recursive */ 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgenumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[], 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t state, uint32_t offset, 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value, 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterEnumToUCallback *callback, const void *context, 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 codePoints[32]; 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *row; 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unicodeCodeUnits; 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 anyCodePoints; 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t b, limit; 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org row=mbcsTable->stateTable[state]; 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeCodeUnits=mbcsTable->unicodeCodeUnits; 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value<<=8; 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org anyCodePoints=-1; /* becomes non-negative if there is a mapping */ 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=(stateProps[state]&0x38)<<2; 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b==0 && stateProps[state]>=0x40) { 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */ 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codePoints[0]=U_SENTINEL; 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=1; 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=((stateProps[state]&7)+1)<<5; 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(b<limit) { 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry=row[b]; 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry); 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]>=0) { 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recurse to a state with non-ignorable actions */ 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!enumToU( 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable, stateProps, nextState, 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|(uint32_t)b, 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callback, context, 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode)) { 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codePoints[b&0x1f]=U_SENTINEL; 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t action; 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=MBCS_ENTRY_FINAL_ACTION(entry); 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_DIRECT_16) { 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16) { 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[finalOffset]; 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xfffe) { 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U_SENTINEL; 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16_PAIR) { 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[finalOffset++]; 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xd800) { 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point below 0xd800 */ 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c<=0xdbff) { 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip or fallback supplementary code point */ 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00); 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xe000) { 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[finalOffset]; 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U_SENTINEL; 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_20) { 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U_SENTINEL; 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codePoints[b&0x1f]=c; 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org anyCodePoints&=c; 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(((++b)&0x1f)==0) { 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(anyCodePoints>=0) { 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) { 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org anyCodePoints=-1; 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Only called if stateProps[state]==-1. 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A recursive call may do stateProps[state]|=0x40 if this state is the target of an 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_STATE_CHANGE_ONLY. 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int8_t 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) { 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *row; 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t min, max, entry, nextState; 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org row=stateTable[state]; 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[state]=0; 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* find first non-ignorable state */ 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(min=0;; ++min) { 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[min]; 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextState=MBCS_ENTRY_STATE(entry); 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]==-1) { 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getStateProp(stateTable, stateProps, nextState); 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]>=0) { 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(min==0xff) { 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[state]=-0x40; /* (int8_t)0xc0 */ 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return stateProps[state]; 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[state]|=(int8_t)((min>>5)<<3); 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* find last non-ignorable state */ 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(max=0xff; min<max; --max) { 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[max]; 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextState=MBCS_ENTRY_STATE(entry); 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]==-1) { 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getStateProp(stateTable, stateProps, nextState); 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]>=0) { 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[state]|=(int8_t)(max>>5); 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recurse further and collect direct-state information */ 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(min<=max) { 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[min]; 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextState=MBCS_ENTRY_STATE(entry); 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[nextState]==-1) { 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getStateProp(stateTable, stateProps, nextState); 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_FINAL(entry)) { 6496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[nextState]|=0x40; 6506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) { 6516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateProps[state]|=0x40; 6526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++min; 6556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return stateProps[state]; 6576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 6606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Internal function enumerating the toUnicode data of an MBCS converter. 6616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U 6626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * table, but could also be used for a future ucnv_getUnicodeSet() option 6636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that includes reverse fallbacks (after updating this function's implementation). 6646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Currently only handles roundtrip mappings. 6656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Does not currently handle extensions. 6666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 6676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 6686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable, 6696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterEnumToUCallback *callback, const void *context, 6706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 6716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 6726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Properties for each state, to speed up the enumeration. 6736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Ignorable actions are unassigned/illegal/state-change-only: 6746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * They do not lead to mappings. 6756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 7..6: 6776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1 direct/initial state (stateful converters have multiple) 6786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0 non-initial state with transitions or with non-ignorable result actions 6796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * -1 final state with only ignorable actions 6806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 5..3: 6826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The lowest byte value with non-ignorable actions is 6836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * value<<5 (rounded down). 6846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 2..0: 6866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The highest byte value with non-ignorable actions is 6876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (value<<5)&0x1f (rounded up). 6886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 6896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t stateProps[MBCS_MAX_STATE_COUNT]; 6906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t state; 6916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memset(stateProps, -1, sizeof(stateProps)); 6936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recurse from state 0 and set all stateProps */ 6956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org getStateProp(mbcsTable->stateTable, stateProps, 0); 6966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(state=0; state<mbcsTable->countStates; ++state) { 6986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /*if(stateProps[state]==-1) { 6996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("unused/unreachable <icu:state> %d\n", state); 7006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }*/ 7016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(stateProps[state]>=0x40) { 7026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* start from each direct state */ 7036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org enumToU( 7046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable, stateProps, state, 0, 0, 7056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org callback, context, 7066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 7076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 7106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void 7126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, 7136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const USetAdder *sa, 7146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterUnicodeSet which, 7156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterSetFilter filter, 7166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 7176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UConverterMBCSTable *mbcsTable; 7186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 7196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t st3; 7216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t st1, maxStage1, st2; 7226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 7246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* enumerate the from-Unicode trie table */ 7266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable=&sharedData->mbcs; 7276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=mbcsTable->fromUnicodeTable; 7286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { 7296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maxStage1=0x440; 7306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maxStage1=0x40; 7326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; /* keep track of the current code point while enumerating */ 7356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_1) { 7376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *stage2, *stage3, *results; 7386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t minValue; 7396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(const uint16_t *)mbcsTable->fromUnicodeBytes; 7416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 7436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set a threshold variable for selecting which mappings to use. 7446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See ucnv_MBCSSingleFromBMPWithOffsets() and 7456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_SINGLE_RESULT_FROM_U() for details. 7466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 7476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(which==UCNV_ROUNDTRIP_SET) { 7486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use only roundtrips */ 7496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0xf00; 7506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { 7516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use all roundtrip and fallback results */ 7526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0x800; 7536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(st1=0; st1<maxStage1; ++st1) { 7566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st2=table[st1]; 7576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(st2>maxStage1) { 7586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2=table+st2; 7596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(st2=0; st2<64; ++st2) { 7606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((st3=stage2[st2])!=0) { 7616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read the stage 3 block */ 7626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3=results+st3; 7636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 7656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(*stage3++>=minValue) { 7666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 7676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 7696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=16; /* empty stage 3 block */ 7716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=1024; /* empty stage 2 block */ 7756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 7776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 7786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint32_t *stage2; 7796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *stage3, *bytes; 7806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t st3Multiplier; 7816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value; 7826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useFallback; 7836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=mbcsTable->fromUnicodeBytes; 7856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); 7876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 7886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(mbcsTable->outputType) { 7896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 7906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 7916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3Multiplier=3; 7926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 7946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3Multiplier=4; 7956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 7976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3Multiplier=2; 7986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 7996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(st1=0; st1<maxStage1; ++st1) { 8026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st2=table[st1]; 8036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(st2>(maxStage1>>1)) { 8046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2=(const uint32_t *)table+st2; 8056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(st2=0; st2<64; ++st2) { 8066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((st3=stage2[st2])!=0) { 8076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* read the stage 3 block */ 8086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3; 8096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the roundtrip flags for the stage 3 block */ 8116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=16; 8126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 8136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 8146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Add code points for which the roundtrip flag is set, 8156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or which map to non-zero bytes if we use fallbacks. 8166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See ucnv_MBCSFromUnicodeWithOffsets() for details. 8176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 8186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(filter) { 8196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_NONE: 8206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(st3&1) { 8226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=st3Multiplier; 8246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(useFallback) { 8256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t b=0; 8266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(st3Multiplier) { 8276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 4: 8286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b|=*stage3++; 8296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: /*fall through*/ 8306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b|=*stage3++; 8316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 8326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b|=stage3[0]|stage3[1]; 8336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=2; 8346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 8356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b!=0) { 8386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_DBCS_ONLY: 8456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Ignore single-byte results (<0x100). */ 8466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) { 8486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=2; /* +=st3Multiplier */ 8526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_2022_CN: 8556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ 8566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) { 8586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=3; /* +=st3Multiplier */ 8626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_SJIS: 8656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ 8666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { 8686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=2; /* +=st3Multiplier */ 8726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_GR94DBCS: 8756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ 8766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( ((st3&1)!=0 || useFallback) && 8786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) && 8796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint8_t)(value-0xa1)<=(0xfe - 0xa1) 8806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=2; /* +=st3Multiplier */ 8856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 8876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case UCNV_SET_FILTER_HZ: 8886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */ 8896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 8906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( ((st3&1)!=0 || useFallback) && 8916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) && 8926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (uint8_t)(value-0xa1)<=(0xfe - 0xa1) 8936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 8946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->add(sa->set, c); 8956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 8966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=1; 8976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage3+=2; /* +=st3Multiplier */ 8986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while((++c&0xf)!=0); 8996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 9006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 9016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INTERNAL_PROGRAM_ERROR; 9026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 9036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=16; /* empty stage 3 block */ 9066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c+=1024; /* empty stage 2 block */ 9106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode); 9156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void 9186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, 9196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const USetAdder *sa, 9206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterUnicodeSet which, 9216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 9226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetFilteredUnicodeSetForUnicode( 9236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sharedData, sa, which, 9246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 9256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_SET_FILTER_DBCS_ONLY : 9266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_SET_FILTER_NONE, 9276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 9286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 9316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetUnicodeSet(const UConverter *cnv, 9326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const USetAdder *sa, 9336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterUnicodeSet which, 9346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 9356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->options&_MBCS_OPTION_GB18030) { 9366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->addRange(sa->set, 0, 0xd7ff); 9376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sa->addRange(sa->set, 0xe000, 0x10ffff); 9386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 9396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode); 9406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 9426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* conversion extensions for input not in the main table -------------------- */ 9446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 9466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Hardcoded extension handling for GB 18030. 9476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file. 9486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 9496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In the future, conversion extensions may handle m:n mappings and delta tables, 9506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html 9516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 9526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If an input character cannot be mapped, then these functions set an error 9536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code. The framework will then call the callback function. 9546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 9576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return if(U_FAILURE) return the code point for cnv->fromUChar32 9586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * else return 0 after output has been written to the target 9596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 9606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 9616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_extFromU(UConverter *cnv, const UConverterSharedData *sharedData, 9626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 cp, 9636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar **source, const UChar *sourceLimit, 9646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t **target, const uint8_t *targetLimit, 9656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t **offsets, int32_t sourceIndex, 9666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool flush, 9676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 9686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *cx; 9696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->useSubChar1=FALSE; 9716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (cx=sharedData->mbcs.extIndexes)!=NULL && 9736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_extInitialMatchFromU( 9746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv, cx, 9756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cp, source, sourceLimit, 9766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (char **)target, (char *)targetLimit, 9776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets, sourceIndex, 9786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flush, 9796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode) 9806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 9816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* an extension mapping handled the input */ 9826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 9836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* GB 18030 */ 9856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&_MBCS_OPTION_GB18030)!=0) { 9866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint32_t *range; 9876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 9886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org range=gb18030Ranges[0]; 9906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) { 9916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) { 9926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* found the Unicode code point, output the four-byte sequence for it */ 9936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t linear; 9946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char bytes[4]; 9956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the linear value of the first GB 18030 code in this range */ 9976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org linear=range[2]-LINEAR_18030_BASE; 9986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 9996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the offset from the beginning of the range */ 10006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org linear+=((uint32_t)cp-range[0]); 10016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* turn this into a four-byte sequence */ 10036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[3]=(char)(0x30+linear%10); linear/=10; 10046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[2]=(char)(0x81+linear%126); linear/=126; 10056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[1]=(char)(0x30+linear%10); linear/=10; 10066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[0]=(char)(0x81+linear); 10076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output this sequence */ 10096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_fromUWriteBytes(cnv, 10106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes, 4, (char **)target, (char *)targetLimit, 10116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets, sourceIndex, pErrorCode); 10126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 10136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping */ 10186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 10196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return cp; 10206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 10236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Input sequence: cnv->toUBytes[0..length[ 10246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input 10256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * else return 0 after output has been written to the target 10266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 10276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int8_t 10286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_extToU(UConverter *cnv, const UConverterSharedData *sharedData, 10296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t length, 10306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t **source, const uint8_t *sourceLimit, 10316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar **target, const UChar *targetLimit, 10326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t **offsets, int32_t sourceIndex, 10336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool flush, 10346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 10356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *cx; 10366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( (cx=sharedData->mbcs.extIndexes)!=NULL && 10386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_extInitialMatchToU( 10396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv, cx, 10406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length, (const char **)source, (const char *)sourceLimit, 10416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target, targetLimit, 10426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets, sourceIndex, 10436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org flush, 10446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode) 10456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 10466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; /* an extension mapping handled the input */ 10476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* GB 18030 */ 10506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) { 10516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint32_t *range; 10526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t linear; 10536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 10546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]); 10566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org range=gb18030Ranges[0]; 10576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<sizeof(gb18030Ranges)/sizeof(gb18030Ranges[0]); range+=4, ++i) { 10586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(range[2]<=linear && linear<=range[3]) { 10596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* found the sequence, output the Unicode code point for it */ 10606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ZERO_ERROR; 10616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* add the linear difference between the input and start sequences to the start code point */ 10636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org linear=range[0]+(linear-range[2]); 10646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output this code point */ 10666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode); 10676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 10696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 10726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping */ 10746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_CHAR_FOUND; 10756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return length; 10766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 10776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* EBCDIC swap LF<->NL ------------------------------------------------------ */ 10796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 10806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 10816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This code modifies a standard EBCDIC<->Unicode mapping table for 10826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * OS/390 (z/OS) Unix System Services (Open Edition). 10836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The difference is in the mapping of Line Feed and New Line control codes: 10846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Standard EBCDIC maps 10856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <U000A> \x25 |0 10876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <U0085> \x15 |0 10886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but OS/390 USS EBCDIC swaps the control codes for LF and NL, 10906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * mapping 10916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <U000A> \x15 |0 10936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * <U0085> \x25 |0 10946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 10956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This code modifies a loaded standard EBCDIC<->Unicode mapping table 10966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * by copying it into allocated memory and swapping the LF and NL values. 10976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It allows to support the same EBCDIC charset in both versions without 10986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * duplicating the entire installed table. 10996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* standard EBCDIC codes */ 11026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define EBCDIC_LF 0x25 11036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define EBCDIC_NL 0x15 11046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */ 11066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define EBCDIC_RT_LF 0xf25 11076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define EBCDIC_RT_NL 0xf15 11086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Unicode code points */ 11106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define U_LF 0x0a 11116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define U_NL 0x85 11126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 11146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) { 11156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterMBCSTable *mbcsTable; 11166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table, *results; 11186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *bytes; 11196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t (*newStateTable)[256]; 11216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t *newResults; 11226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *p; 11236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *name; 11246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Entry; 11266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t size, sizeofFromUBytes; 11276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable=&sharedData->mbcs; 11296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=mbcsTable->fromUnicodeTable; 11316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=mbcsTable->fromUnicodeBytes; 11326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(const uint16_t *)bytes; 11336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Check that this is an EBCDIC table with SBCS portion - 11366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings. 11376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If not, ignore the option. Options are always ignored if they do not apply. 11396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!( 11416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) && 11426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && 11436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL) 11446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org )) { 11456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_1) { 11496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!( 11506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) && 11516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL) 11526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org )) { 11536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* MBCS_OUTPUT_2_SISO */ { 11566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); 11576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!( 11586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 && 11596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF) 11606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org )) { 11616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); 11656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!( 11666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 && 11676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL) 11686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org )) { 11696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->fromUBytesLength>0) { 11746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We _know_ the number of bytes in the fromUnicodeBytes array 11766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * starting with header.version 4.1. 11776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeofFromUBytes=mbcsTable->fromUBytesLength; 11796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 11806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Otherwise: 11826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There used to be code to enumerate the fromUnicode 11836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * trie and find the highest entry, but it was removed in ICU 3.2 11846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * because it was not tested and caused a low code coverage number. 11856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See Jitterbug 3674. 11866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This affects only some .cnv file formats with a header.version 11876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * below 4.1, and only when swaplfnl is requested. 11886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 11896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ucnvmbcs.c revision 1.99 is the last one with the 11906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ucnv_MBCSSizeofFromUBytes() function. 11916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 11926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_FORMAT_ERROR; 11936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 11946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 11956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 11966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 11976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The table has an appropriate format. 11986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Allocate and build 11996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - a modified to-Unicode state table 12006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - a modified from-Unicode output array 12016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - a converter name string with the swap option appended 12026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 12036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size= 12046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->countStates*1024+ 12056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeofFromUBytes+ 12066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_MAX_CONVERTER_NAME_LENGTH+20; 12076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=(uint8_t *)uprv_malloc(size); 12086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(p==NULL) { 12096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 12106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 12116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy and modify the to-Unicode state table */ 12146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newStateTable=(int32_t (*)[256])p; 12156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024); 12166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL); 12186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); 12196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy and modify the from-Unicode result table */ 12216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newResults=(uint16_t *)newStateTable[mbcsTable->countStates]; 12226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(newResults, bytes, sizeofFromUBytes); 12236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conveniently, the table access macros work on the left side of expressions */ 12256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_1) { 12266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL; 12276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF; 12286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* MBCS_OUTPUT_2_SISO */ { 12296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); 12306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL; 12316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); 12336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF; 12346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the canonical converter name */ 12376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org name=(char *)newResults+sizeofFromUBytes; 12386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcpy(name, sharedData->staticData->name); 12396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING); 12406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the pointers */ 12426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_lock(NULL); 12436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->swapLFNLStateTable==NULL) { 12446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLStateTable=newStateTable; 12456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults; 12466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLName=name; 12476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newStateTable=NULL; 12496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_unlock(NULL); 12516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* release the allocated memory if another thread beat us to it */ 12536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newStateTable!=NULL) { 12546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(newStateTable); 12556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 12576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 12586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* reconstitute omitted fromUnicode data ------------------------------------ */ 12606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */ 12626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV 12636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgwriteStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) { 12646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context; 12656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 12666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t *stage2; 12676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *bytes, *p; 12686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 12696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, st3; 12706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=mbcsTable->fromUnicodeTable; 12726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=(uint8_t *)mbcsTable->fromUnicodeBytes; 12736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 12746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ 12756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(mbcsTable->outputType) { 12766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3_EUC: 12776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xffff) { 12786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* short sequences are stored directly */ 12796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 0 or 1 */ 12806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0x8effff) { 12816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 2 */ 12826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value&=0x7fff; 12836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* first byte is 0x8f */ { 12846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 3 */ 12856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value&=0xff7f; 12866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 12886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 12896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xffffff) { 12906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* short sequences are stored directly */ 12916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 0 or 1 */ 12926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0x8effffff) { 12936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 2 */ 12946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value&=0x7fffff; 12956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* first byte is 0x8f */ { 12966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* code set 3 */ 12976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value&=0xff7fff; 12986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 12996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 13016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<=0x1f; ++value, ++i) { 13056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=codePoints[i]; 13066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0) { 13076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 13086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* locate the stage 2 & 3 data */ 13116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f); 13126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=bytes; 13136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3=(int32_t)(uint16_t)*stage2*16+(c&0xf); 13146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the codepage bytes into stage 3 */ 13166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(mbcsTable->outputType) { 13176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 13186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 13196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p+=st3*3; 13206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p[0]=(uint8_t)(value>>16); 13216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p[1]=(uint8_t)(value>>8); 13226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p[2]=(uint8_t)value; 13236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 13256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((uint32_t *)p)[st3]=value; 13266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 13286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2 bytes per character */ 13296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ((uint16_t *)p)[st3]=(uint16_t)value; 13306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 13316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the roundtrip flag */ 13346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *stage2|=(1UL<<(16+(c&0xf))); 13356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 13376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 13406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgreconstituteData(UConverterMBCSTable *mbcsTable, 13416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage1Length, uint32_t stage2Length, 13426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */ 13436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 13446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t *stage1; 13456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t *stage2; 13466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength; 13476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength); 13486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->reconstitutedData==NULL) { 13496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 13506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 13516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memset(mbcsTable->reconstitutedData, 0, dataLength); 13536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy existing data and reroute the pointers */ 13556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage1=(uint16_t *)mbcsTable->reconstitutedData; 13566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2); 13576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2=(uint32_t *)(stage1+stage1Length); 13596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(stage2+(fullStage2Length-stage2Length), 13606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUnicodeTable+stage1Length, 13616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Length*4); 13626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUnicodeTable=stage1; 13646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length); 13656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ 13676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2=(uint32_t *)stage1; 13686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reconstitute the initial part of stage 2 from the mbcsIndex */ 13706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 13716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6; 13726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stageUTF8Index=0; 13736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t st1, st2, st3, i; 13746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 13756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) { 13766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st2=stage1[st1]; 13776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(st2!=stage1Length/2) { 13786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */ 13796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<16; ++i) { 13806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3=mbcsTable->mbcsIndex[stageUTF8Index++]; 13816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(st3!=0) { 13826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */ 13836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st3>>=4; 13846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 13856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are 13866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * allocated together as a single 64-block for access from the mbcsIndex 13876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 13886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2[st2++]=st3++; 13896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2[st2++]=st3++; 13906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2[st2++]=st3++; 13916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2[st2++]=st3; 13926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no stage 3 block, skip */ 13946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org st2+=4; 13956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 13976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 13986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no stage 2 block, skip */ 13996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stageUTF8Index+=16; 14006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ 14056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode); 14066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 14076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* MBCS setup functions ----------------------------------------------------- */ 14096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 14116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSLoad(UConverterSharedData *sharedData, 14126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterLoadArgs *pArgs, 14136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *raw, 14146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 14156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UDataInfo info; 14166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterMBCSTable *mbcsTable=&sharedData->mbcs; 14176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org _MBCSHeader *header=(_MBCSHeader *)raw; 14186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t offset; 14196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t headerLength; 14206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool noFromU=FALSE; 14216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(header->version[0]==4) { 14236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org headerLength=MBCS_HEADER_V4_LENGTH; 14246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(header->version[0]==5 && header->version[1]>=3 && 14256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) { 14266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org headerLength=header->options&MBCS_OPT_LENGTH_MASK; 14276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0); 14286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 14296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 14306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->outputType=(uint8_t)header->flags; 14346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) { 14356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 14366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* extension data, header version 4.2 and higher */ 14406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=header->flags>>8; 14416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset!=0) { 14426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->extIndexes=(const int32_t *)(raw+offset); 14436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) { 14466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterLoadArgs args={ 0 }; 14476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterSharedData *baseSharedData; 14486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *extIndexes; 14496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *baseName; 14506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* extension-only file, load the base table and set values appropriately */ 14526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((extIndexes=mbcsTable->extIndexes)==NULL) { 14536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* extension-only file without extension */ 14546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 14556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pArgs->nestedLoads!=1) { 14596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* an extension table must not be loaded as a base table */ 14606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FILE; 14616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* load the base table */ 14656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseName=(const char *)header+headerLength*4; 14666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(0==uprv_strcmp(baseName, sharedData->staticData->name)) { 14676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* forbid loading this same extension-only file */ 14686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 14696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 14726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */ 14736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.size=sizeof(UConverterLoadArgs); 14746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.nestedLoads=2; 14756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable; 14766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.reserved=pArgs->reserved; 14776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.options=pArgs->options; 14786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.pkg=pArgs->pkg; 14796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org args.name=baseName; 14806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseSharedData=ucnv_load(&args, pErrorCode); 14816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 14826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( baseSharedData->staticData->conversionType!=UCNV_MBCS || 14856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseSharedData->mbcs.baseSharedData!=NULL 14866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 14876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_unload(baseSharedData); 14886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 14896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 14906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 14916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pArgs->onlyTestIsLoadable) { 14926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 14936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Exit as soon as we know that we can load the converter 14946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and the format is valid and supported. 14956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The worst that can happen in the following code is a memory 14966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * allocation error. 14976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 14986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_unload(baseSharedData); 14996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 15006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy the base table data */ 15036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable)); 15046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* overwrite values with relevant ones for the extension converter */ 15066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->baseSharedData=baseSharedData; 15076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->extIndexes=extIndexes; 15086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 15106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It would be possible to share the swapLFNL data with a base converter, 15116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but the generated name would have to be different, and the memory 15126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * would have to be free'd only once. 15136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It is easier to just create the data for the extension converter 15146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * separately when it is requested. 15156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 15166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLStateTable=NULL; 15176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLFromUnicodeBytes=NULL; 15186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->swapLFNLName=NULL; 15196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 15216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The reconstitutedData must be deleted only when the base converter 15226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is unloaded. 15236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 15246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->reconstitutedData=NULL; 15256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 15276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Set a special, runtime-only outputType if the extension converter 15286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is a DBCS version of a base converter that also maps single bytes. 15296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 15306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( sharedData->staticData->conversionType==UCNV_DBCS || 15316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (sharedData->staticData->conversionType==UCNV_MBCS && 15326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sharedData->staticData->minBytesPerChar>=2) 15336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 15346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) { 15356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the base converter is SI/SO-stateful */ 15366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 15376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the dbcs state from the state table entry for SO=0x0e */ 15396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=mbcsTable->stateTable[0][0xe]; 15406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( MBCS_ENTRY_IS_FINAL(entry) && 15416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY && 15426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_STATE(entry)!=0 15436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 15446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); 15456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; 15476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if( 15496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseSharedData->staticData->conversionType==UCNV_MBCS && 15506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseSharedData->staticData->minBytesPerChar==1 && 15516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org baseSharedData->staticData->maxBytesPerChar==2 && 15526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->countStates<=127 15536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 15546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* non-stateful base converter, need to modify the state table */ 15556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t (*newStateTable)[256]; 15566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *state; 15576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, count; 15586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* allocate a new state table and copy the base state table contents */ 15606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count=mbcsTable->countStates; 15616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024); 15626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newStateTable==NULL) { 15636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_unload(baseSharedData); 15646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 15656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 15666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024); 15696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* change all final single-byte entries to go to a new all-illegal state */ 15716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=newStateTable[0]; 15726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<256; ++i) { 15736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_FINAL(state[i])) { 15746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state[i]=MBCS_ENTRY_TRANSITION(count, 0); 15756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* build the new all-illegal state */ 15796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=newStateTable[count]; 15806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<256; ++i) { 15816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0); 15826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->stateTable=(const int32_t (*)[256])newStateTable; 15846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->countStates=(uint8_t)(count+1); 15856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->stateTableOwned=TRUE; 15866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; 15886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 15906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 15916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 15926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * unlike below for files with base tables, do not get the unicodeMask 15936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the sharedData; instead, use the base table's unicodeMask, 15946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * which we copied in the memcpy above; 15956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * this is necessary because the static data unicodeMask, especially 15966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data 15976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 15986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 15996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion file with a base table; an additional extension table is optional */ 16006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* make sure that the output type is known */ 16016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(mbcsTable->outputType) { 16026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_1: 16036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2: 16046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 16056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 16066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3_EUC: 16076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 16086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2_SISO: 16096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* OK */ 16106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 16116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 16126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INVALID_TABLE_FORMAT; 16136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 16146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pArgs->onlyTestIsLoadable) { 16166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Exit as soon as we know that we can load the converter 16186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and the format is valid and supported. 16196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The worst that can happen in the following code is a memory 16206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * allocation error. 16216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 16236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->countStates=(uint8_t)header->countStates; 16266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->countToUFallbacks=header->countToUFallbacks; 16276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4); 16286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates); 16296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits); 16306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable); 16326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes); 16336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->fromUBytesLength=header->fromUBytesLength; 16346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * converter versions 6.1 and up contain a unicodeMask that is 16376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * used here to select the most efficient function implementations 16386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org info.size=sizeof(UDataInfo); 16406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); 16416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { 16426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* mask off possible future extensions to be safe */ 16436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3); 16446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ 16466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; 16476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * _MBCSHeader.version 4.3 adds utf8Friendly data structures. 16516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Check for the header version, SBCS vs. MBCS, and for whether the 16526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * data structures are optimized for code points as high as what the 16536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * runtime code is designed for. 16546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The implementation does not handle mapping tables with entries for 16556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * unpaired surrogates. 16566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( header->version[1]>=3 && 16586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 && 16596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (mbcsTable->countStates==1 ? 16606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (header->version[2]>=(SBCS_FAST_MAX>>8)) : 16616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (header->version[2]>=(MBCS_FAST_MAX>>8)) 16626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) 16636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 16646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->utf8Friendly=TRUE; 16656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->countStates==1) { 16676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. 16696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Build a table with indexes to each block, to be used instead of 16706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the regular stage 1/2 table. 16716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 16736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) { 16746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; 16756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */ 16776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->maxFastUChar=SBCS_FAST_MAX; 16786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 16796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 16806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. 16816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The .cnv file is prebuilt with an additional stage table with indexes 16826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to each block. 16836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 16846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->mbcsIndex=(const uint16_t *) 16856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (mbcsTable->fromUnicodeBytes+ 16866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (noFromU ? 0 : mbcsTable->fromUBytesLength)); 16876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff; 16886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 16906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ 16926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 16936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips=0xffffffff; 16946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i; 16956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 16966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<0x80; ++i) { 16976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { 16986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips&=~((uint32_t)1<<(i>>2)); 16996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->asciiRoundtrips=asciiRoundtrips; 17026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(noFromU) { 17056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage1Length= 17066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ? 17076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0x440 : 0x40; 17086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Length= 17096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (header->offsetFromUBytes-header->offsetFromUTable)/4- 17106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage1Length/2; 17116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode); 17126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Set the impl pointer here so that it is set for both extension-only and base tables. */ 17166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->utf8Friendly) { 17176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->countStates==1) { 17186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sharedData->impl=&_SBCSUTF8Impl; 17196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 17206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_2) { 17216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sharedData->impl=&_DBCSUTF8Impl; 17226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) { 17276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 17286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip. 17296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly. 17306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 17316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable->asciiRoundtrips=0; 17326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 17366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSUnload(UConverterSharedData *sharedData) { 17376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterMBCSTable *mbcsTable=&sharedData->mbcs; 17386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->swapLFNLStateTable!=NULL) { 17406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(mbcsTable->swapLFNLStateTable); 17416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->stateTableOwned) { 17436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free((void *)mbcsTable->stateTable); 17446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->baseSharedData!=NULL) { 17466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_unload(mbcsTable->baseSharedData); 17476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mbcsTable->reconstitutedData!=NULL) { 17496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(mbcsTable->reconstitutedData); 17506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 17526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 17546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSOpen(UConverter *cnv, 17556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterLoadArgs *pArgs, 17566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 17576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterMBCSTable *mbcsTable; 17586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *extIndexes; 17596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t outputType; 17606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t maxBytesPerUChar; 17616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(pArgs->onlyTestIsLoadable) { 17636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 17646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsTable=&cnv->sharedData->mbcs; 17676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outputType=mbcsTable->outputType; 17686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(outputType==MBCS_OUTPUT_DBCS_ONLY) { 17706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the swaplfnl option does not apply, remove it */ 17716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; 17726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) { 17756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* do this because double-checked locking is broken */ 17766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isCached; 17776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_lock(NULL); 17796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org isCached=mbcsTable->swapLFNLStateTable!=NULL; 17806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org umtx_unlock(NULL); 17816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!isCached) { 17836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) { 17846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 17856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; /* something went wrong */ 17866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the option does not apply, remove it */ 17896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; 17906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 17946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(uprv_strstr(pArgs->name, "18030")!=NULL) { 17956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) { 17966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set a flag for GB 18030 mode, which changes the callback behavior */ 17976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options|=_MBCS_OPTION_GB18030; 17986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 17996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) { 18006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set a flag for KEIS converter, which changes the SI/SO character sequence */ 18016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options|=_MBCS_OPTION_KEIS; 18026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) { 18036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set a flag for JEF converter, which changes the SI/SO character sequence */ 18046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options|=_MBCS_OPTION_JEF; 18056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) { 18066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set a flag for JIPS converter, which changes the SI/SO character sequence */ 18076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->options|=_MBCS_OPTION_JIPS; 18086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fix maxBytesPerUChar depending on outputType and options etc. */ 18116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(outputType==MBCS_OUTPUT_2_SISO) { 18126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->maxBytesPerUChar=3; /* SO+DBCS */ 18136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org extIndexes=mbcsTable->extIndexes; 18166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(extIndexes!=NULL) { 18176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes); 18186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(outputType==MBCS_OUTPUT_2_SISO) { 18196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++maxBytesPerUChar; /* SO + multiple DBCS */ 18206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(maxBytesPerUChar>cnv->maxBytesPerUChar) { 18236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->maxBytesPerUChar=maxBytesPerUChar; 18246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 18286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 18296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * documentation of UConverter fields used for status 18306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset() 18316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 18326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* toUnicode */ 18346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus=0; /* offset */ 18356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=0; /* state */ 18366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=0; /* byteIndex */ 18376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fromUnicode */ 18396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=0; 18406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=1; /* prevLength */ 18416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 18426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const char * 18456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetName(const UConverter *cnv) { 18466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) { 18476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return cnv->sharedData->mbcs.swapLFNLName; 18486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 18496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return cnv->sharedData->staticData->name; 18506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* MBCS-to-Unicode conversion functions ------------------------------------- */ 18546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 18566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) { 18576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const _MBCSToUFallback *toUFallbacks; 18586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t i, start, limit; 18596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=mbcsTable->countToUFallbacks; 18616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(limit>0) { 18626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* do a binary search for the fallback mapping */ 18636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toUFallbacks=mbcsTable->toUFallbacks; 18646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=0; 18656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(start<limit-1) { 18666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i=(start+limit)/2; 18676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset<toUFallbacks[i].offset) { 18686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=i; 18696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 18706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start=i; 18716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* did we really find it? */ 18756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset==toUFallbacks[start].offset) { 18766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return toUFallbacks[start].codePoint; 18776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 18796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xfffe; 18816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 18826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */ 18846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 18856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 18866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 18876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 18886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 18896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *target; 18906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *targetLimit; 18916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 18926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 18946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex; 18966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 18976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 18986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c; 18996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 19006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 19026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 19036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(const uint8_t *)pArgs->source; 19046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(const uint8_t *)pArgs->sourceLimit; 19056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=pArgs->target; 19066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetLimit=pArgs->targetLimit; 19076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 19086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 19106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 19116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 19126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=cnv->sharedData->mbcs.stateTable; 19136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 19166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=0; 19176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 19196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 19206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 19216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This following test is to see if available input would overflow the output. 19226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not catch output of more than one code unit that 19236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * overflows as a result of a surrogate pair or callback output 19246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the last source byte. 19256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, those situations also test for overflows and will 19266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then break the loop, too. 19276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 19286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target>=targetLimit) { 19296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 19306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 19316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 19326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[0][*source++]; 19356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_ENTRY_IS_FINAL(entry) */ 19366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the most common case first */ 19386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 19396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 19406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 19416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 19426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 19436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of action codes: prepare for a new character */ 19466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++sourceIndex; 19476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 19486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 19516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 19526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 19536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 19546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 19556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_DIRECT_20 || 19566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 19576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 19586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=MBCS_ENTRY_FINAL_VALUE(entry); 19596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output surrogate pair */ 19606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)(0xd800|(UChar)(entry>>10)); 19616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 19626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 19636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); 19656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target<targetLimit) { 19666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 19676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 19686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 19696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 19716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 19726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBuffer[0]=c; 19736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBufferLength=1; 19746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 19756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 19766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++sourceIndex; 19796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 19806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 19816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv)) { 19826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 19836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 19846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 19856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 19866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 19886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++sourceIndex; 19896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 19906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 19916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 19926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just fall through */ 19936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 19946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 19956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 19966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 19976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved, must never occur */ 19986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++sourceIndex; 19996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 20006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 20036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 20046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 20056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* unassigned sequences indicated with byteIndex>0 */ { 20066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 20076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 20086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[0]=*(source-1); 20096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=_extToU(cnv, cnv->sharedData, 20106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, &source, sourceLimit, 20116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, targetLimit, 20126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 20136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 20146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 20156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source); 20166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 20186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 20196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 20206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 20256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 20266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=target; 20276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 20286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 20296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 20316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages 20326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that only map to and from the BMP. 20336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In addition to single-byte optimizations, the offset calculations 20346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * become much easier. 20356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 20366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 20376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs, 20386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 20396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 20406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit, *lastSource; 20416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *target; 20426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity, length; 20436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 20446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 20466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex; 20486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 20506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 20516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 20536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 20546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(const uint8_t *)pArgs->source; 20556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(const uint8_t *)pArgs->sourceLimit; 20566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=pArgs->target; 20576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 20586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 20596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 20616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 20626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 20636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=cnv->sharedData->mbcs.stateTable; 20646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 20676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=0; 20686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 20696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 20716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * since the conversion here is 1:1 UChar:uint8_t, we need only one counter 20726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for the minimum of the sourceLength and targetCapacity 20736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 20746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source); 20756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<targetCapacity) { 20766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=length; 20776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 20786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if MBCS_UNROLL_SINGLE_TO_BMP 20806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unrolling makes it faster on Pentium III/Windows 2000 */ 20816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unroll the loop with the most common case */ 20826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunrolled: 20836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>=16) { 20846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count, loops, oredEntries; 20856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 20866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org loops=count=targetCapacity>>4; 20876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 20886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries=entry=stateTable[0][*source++]; 20896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 20906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 20916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 20926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 20936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 20946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 20956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 20966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 20976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 20986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 20996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oredEntries|=entry=stateTable[0][*source++]; 21196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* were all 16 entries really valid? */ 21226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) { 21236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no, return to the first of these 16 */ 21246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=16; 21256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target-=16; 21266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 21276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(--count>0); 21296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count=loops-count; 21306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity-=16*count; 21316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 21336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource+=16*count; 21346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 21356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 21526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 21566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 21586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(targetCapacity > 0 && source < sourceLimit) { 21596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[0][*source++]; 21606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_ENTRY_IS_FINAL(entry) */ 21616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the most common case first */ 21636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 21646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 21656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 21676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 21686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 21716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 21726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 21736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 21746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 21756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 21766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv)) { 21776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 21786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 21796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 21806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 21816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 21836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just fall through */ 21846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 21856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 21866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 21876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 21886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved, must never occur */ 21896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 21906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 21916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set offsets since the start or the last extension */ 21936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 21946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count=(int32_t)(source-lastSource); 21956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 21966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* predecrement: do not set the offset for the callback-causing character */ 21976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(--count>0) { 21986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 21996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* offset and sourceIndex are now set for the current character */ 22016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 22046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 22056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* unassigned sequences indicated with byteIndex>0 */ { 22076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 22086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 22096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUBytes[0]=*(source-1); 22106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=_extToU(cnv, cnv->sharedData, 22116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1, &source, sourceLimit, 22126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, pArgs->targetLimit, 22136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 22146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 22156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 22166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex+=1+(int32_t)(source-lastSource); 22176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 22196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 22206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 22216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 22246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-target); 22256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source); 22266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<targetCapacity) { 22276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=length; 22286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if MBCS_UNROLL_SINGLE_TO_BMP 22326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unrolling makes it faster on Pentium III/Windows 2000 */ 22336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unrolled; 22346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 22356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) { 22386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 22396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 22406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set offsets since the start or the last callback */ 22436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 22446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size_t count=source-lastSource; 22456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 22466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 22476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 22486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 22526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 22536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=target; 22546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 22556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 22566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 22586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orghasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { 22596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *row=stateTable[state]; 22606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t b, entry; 22616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* First test for final entries in this state for some commonly valid byte values. */ 22626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[0xa1]; 22636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( !MBCS_ENTRY_IS_TRANSITION(entry) && 22646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 22656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 22666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 22676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[0x41]; 22696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( !MBCS_ENTRY_IS_TRANSITION(entry) && 22706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 22716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 22726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 22736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Then test for final entries in this state. */ 22756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(b=0; b<=0xff; ++b) { 22766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[b]; 22776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( !MBCS_ENTRY_IS_TRANSITION(entry) && 22786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL 22796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 22806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 22816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Then recurse for transition entries. */ 22846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(b=0; b<=0xff; ++b) { 22856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=row[b]; 22866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( MBCS_ENTRY_IS_TRANSITION(entry) && 22876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) 22886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 22896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 22906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 22926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 22936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 22946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 22956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 22966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Is byte b a single/lead byte in this state? 22976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Recurse for transition states, because here we don't want to say that 22986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * b is a lead byte if all byte sequences that start with b are illegal. 22996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 23006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool 23016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgisSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { 23026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *row=stateTable[state]; 23036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry=row[b]; 23046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ 23056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); 23066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 23076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 23086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { 23096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; /* SI/SO are illegal for DBCS-only conversion */ 23106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 23116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return action!=MBCS_STATE_ILLEGAL; 23126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 23156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void 23176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 23186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 23196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 23206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 23216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar *target; 23226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *targetLimit; 23236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 23246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 23266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unicodeCodeUnits; 23276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t offset; 23296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t state; 23306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t byteIndex; 23316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *bytes; 23326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex, nextSourceIndex; 23346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 23366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar c; 23376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 23386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use optimized function if possible */ 23406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 23416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->preToULength>0) { 23436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 23446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pass sourceIndex=-1 because we continue from an earlier buffer 23456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the future, this may change with continuous offsets 23466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 23476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode); 23486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) { 23506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->sharedData->mbcs.countStates==1) { 23556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 23566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode); 23576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 23586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode); 23596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 23616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 23646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(const uint8_t *)pArgs->source; 23656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(const uint8_t *)pArgs->sourceLimit; 23666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=pArgs->target; 23676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetLimit=pArgs->targetLimit; 23686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 23696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 23716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 23726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 23736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=cnv->sharedData->mbcs.stateTable; 23746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; 23766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 23786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=cnv->toUnicodeStatus; 23796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=cnv->toULength; 23806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=cnv->toUBytes; 23816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 23836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if we are in the SBCS state for a DBCS-only converter, 23846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then load the DBCS state from the MBCS data 23856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (dbcsOnlyState==0 if it is not a DBCS-only converter) 23866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 23876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((state=(uint8_t)(cnv->mode))==0) { 23886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=cnv->sharedData->mbcs.dbcsOnlyState; 23896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 23906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 23926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=byteIndex==0 ? 0 : -1; 23936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex=0; 23946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 23956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 23966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 23976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 23986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This following test is to see if available input would overflow the output. 23996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not catch output of more than one code unit that 24006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * overflows as a result of a surrogate pair or callback output 24016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the last source byte. 24026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, those situations also test for overflows and will 24036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then break the loop, too. 24046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 24056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target>=targetLimit) { 24066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 24076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 24086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 24096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(byteIndex==0) { 24126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* optimized loop for 1/2-byte input and BMP output */ 24136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets==NULL) { 24146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 24156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[state][*source]; 24166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 24176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 24186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); 24196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( source<sourceLimit && 24226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 24236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 24246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 24256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 24266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 24286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 24296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 24306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the state and leave the optimized loop */ 24326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[0]=*(source-1); 24336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=1; 24346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 24356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 24386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 24396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 24416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 24426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* leave the optimized loop */ 24446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 24456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(source<sourceLimit && target<targetLimit); 24486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* offsets!=NULL */ { 24496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 24506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[state][*source]; 24516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 24526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 24536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); 24546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( source<sourceLimit && 24576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 24586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 24596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 24606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 24616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 24636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 24646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 24656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=(nextSourceIndex+=2); 24666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 24686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 24696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the state and leave the optimized loop */ 24716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 24726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[0]=*(source-1); 24736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=1; 24746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 24756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 24786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 24796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 24806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 24816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 24826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 24836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=++nextSourceIndex; 24846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 24866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 24876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* leave the optimized loop */ 24886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 24896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(source<sourceLimit && target<targetLimit); 24926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 24936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 24946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 24956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * these tests and break statements could be put inside the loop 24966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if C had "break outerLoop" like Java 24976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 24986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source>=sourceLimit) { 24996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target>=targetLimit) { 25026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 25036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 25046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 25086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[byteIndex++]=*source++; 25096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* byteIndex>0 */ { 25106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 25116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[state][bytes[byteIndex++]=*source++]; 25126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 25156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 25166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 25176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 25186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* save the previous state for proper extension mapping with SI/SO-stateful converters */ 25216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=state; 25226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the next state early so that we can reuse the entry variable */ 25246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 25256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 25276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 25286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 25296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 25306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 25316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_16) { 25326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 25336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset]; 25346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xfffe) { 25356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 25366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 25376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xfffe) { 25426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { 25436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output fallback BMP code point */ 25446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)entry; 25456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 25516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 25526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 25536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_16) { 25556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 25566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 25576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16_PAIR) { 25626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 25636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset++]; 25646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xd800) { 25656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point below 0xd800 */ 25666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 25676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 25726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip or fallback surrogate pair */ 25736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)(c&0xdbff); 25746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target<targetLimit) { 25796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=unicodeCodeUnits[offset]; 25806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 25846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 25856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset]; 25866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBufferLength=1; 25876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 25886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 25896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 25906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 25916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 25936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 25946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=unicodeCodeUnits[offset]; 25956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 25966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 25976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 25986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 25996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xffff) { 26006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 26016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 26026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_20 || 26046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 26056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 26066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=MBCS_ENTRY_FINAL_VALUE(entry); 26076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output surrogate pair */ 26086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)(0xd800|(UChar)(entry>>10)); 26096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 26106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 26116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 26136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); 26146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(target<targetLimit) { 26156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=c; 26166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 26176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 26186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 26206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 26216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBuffer[0]=c; 26226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->UCharErrorBufferLength=1; 26236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 26246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 26266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 26276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_CHANGE_ONLY) { 26296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 26306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This serves as a state change without any output. 26316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It is useful for reading simple stateful encodings, 26326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for example using just Shift-In/Shift-Out codes. 26336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The 21 unused bits may later be used for more sophisticated 26346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * state transitions. 26356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 26366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->sharedData->mbcs.dbcsOnlyState==0) { 26376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 26386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 26396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* SI/SO are illegal for DBCS-only conversion */ 26406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)(cnv->mode); /* restore the previous state */ 26416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 26436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 26446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 26466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv)) { 26476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 26486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 26496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 26506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 26516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 26536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 26556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just fall through */ 26566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 26576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 26586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 26596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 26606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved, must never occur */ 26616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=0; 26626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* end of action codes: prepare for a new character */ 26656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 26666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 26676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(byteIndex==0) { 26686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 26696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U_FAILURE(*pErrorCode)) { 26706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 26716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(byteIndex>1) { 26726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 26736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Ticket 5691: consistent illegal sequences: 26746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - We include at least the first byte in the illegal sequence. 26756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - If any of the non-initial bytes could be the start of a character, 26766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * we stop the illegal sequence before the first one of those. 26776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 26786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); 26796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t i; 26806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=1; 26816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]); 26826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i) {} 26836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<byteIndex) { 26846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Back out some bytes. */ 26856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t backOutDistance=byteIndex-i; 26866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source); 26876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=i; /* length of reported illegal byte sequence */ 26886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(backOutDistance<=bytesFromThisBuffer) { 26896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=backOutDistance; 26906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 26916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Back out bytes from the previous buffer: Need to replay them. */ 26926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); 26936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* preToULength is negative! */ 26946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); 26956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(const uint8_t *)pArgs->source; 26966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 26996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 27006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* unassigned sequences indicated with byteIndex>0 */ { 27016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 27026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 27036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex=_extToU(cnv, cnv->sharedData, 27046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org byteIndex, &source, sourceLimit, 27056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, targetLimit, 27066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 27076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 27086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 27096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source); 27106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 27126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 27136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 27146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter */ 27196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus=offset; 27206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=state; 27216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=byteIndex; 27226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 27246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 27256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=target; 27266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 27276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 27286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 27306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages. 27316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We still need a conversion loop in case we find reserved action codes, which are to be ignored. 27326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 27336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 27346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs, 27356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 27366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 27376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 27386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 27396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 27416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 27426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 27446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 27456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(const uint8_t *)pArgs->source; 27466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(const uint8_t *)pArgs->sourceLimit; 27476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 27486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 27496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 27506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=cnv->sharedData->mbcs.stateTable; 27516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 27546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 27556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[0][*source++]; 27566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_ENTRY_IS_FINAL(entry) */ 27576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointer early so that we can return directly */ 27596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 27606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 27626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 27636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 27646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 27676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 27686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 27696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 27706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 27716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( action==MBCS_STATE_VALID_DIRECT_20 || 27726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 27736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 27746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 27756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 27766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 27776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv)) { 27786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 27796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 27806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 27826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just fall through */ 27836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 27846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 27856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 27866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 27876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved, must never occur */ 27886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 27896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 27916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 27926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 27936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 27946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* unassigned sequence */ { 27956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* defer to the generic implementation */ 27966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source-1; 27976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCNV_GET_NEXT_UCHAR_USE_TO_U; 27986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 27996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no output because of empty input or only state changes */ 28026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 28036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 28046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 28056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 28076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Version of _MBCSToUnicodeWithOffsets() optimized for single-character 28086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * conversion without offset handling. 28096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 28106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * When a character does not have a mapping to Unicode, then we return to the 28116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback 28126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * handling. 28136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We also defer to the generic code in other complicated cases and have them 28146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ultimately handled by _MBCSToUnicodeWithOffsets() itself. 28156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 28166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * All normal mappings and errors are handled here. 28176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 28186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UChar32 28196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, 28206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 28216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 28226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit, *lastSource; 28236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 28256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unicodeCodeUnits; 28266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t offset; 28286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t state; 28296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 28316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 28326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 28336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use optimized function if possible */ 28356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 28366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->preToULength>0) { 28386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use the generic code in ucnv_getNextUChar() to continue with a partial match */ 28396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCNV_GET_NEXT_UCHAR_USE_TO_U; 28406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) { 28436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 28446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Using the generic ucnv_getNextUChar() code lets us deal correctly 28456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * with the rare case of a codepage that maps single surrogates 28466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * without adding the complexity to this already complicated function here. 28476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 28486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCNV_GET_NEXT_UCHAR_USE_TO_U; 28496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(cnv->sharedData->mbcs.countStates==1) { 28506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode); 28516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 28546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=lastSource=(const uint8_t *)pArgs->source; 28556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(const uint8_t *)pArgs->sourceLimit; 28566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 28586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; 28596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 28606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=cnv->sharedData->mbcs.stateTable; 28616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; 28636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 28656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=cnv->toUnicodeStatus; 28666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 28686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if we are in the SBCS state for a DBCS-only converter, 28696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then load the DBCS state from the MBCS data 28706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (dbcsOnlyState==0 if it is not a DBCS-only converter) 28716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 28726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((state=(uint8_t)(cnv->mode))==0) { 28736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=cnv->sharedData->mbcs.dbcsOnlyState; 28746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 28776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U_SENTINEL; 28786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 28796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[state][*source++]; 28806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 28816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 28826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 28836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* optimization for 1/2-byte input and BMP output */ 28856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( source<sourceLimit && 28866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && 28876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && 28886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe 28896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 28906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 28916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 28926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 28936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 28946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 28956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 28966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* save the previous state for proper extension mapping with SI/SO-stateful converters */ 28976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=state; 28986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 28996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the next state early so that we can reuse the entry variable */ 29006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ 29016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 29036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 29046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 29056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 29066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 29076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_DIRECT_16) { 29086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 29096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 29106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16) { 29126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 29136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset]; 29146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xfffe) { 29156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 29166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xfffe) { 29186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { 29196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 29226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 29236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 29246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16_PAIR) { 29266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 29276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset++]; 29286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xd800) { 29296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point below 0xd800 */ 29306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 29326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip or fallback supplementary code point */ 29336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00); 29346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 29366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 29376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset]; 29386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xffff) { 29406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 29416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 29426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_20 || 29446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) 29456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 29466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 29476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); 29486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_CHANGE_ONLY) { 29506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 29516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This serves as a state change without any output. 29526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It is useful for reading simple stateful encodings, 29536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for example using just Shift-In/Shift-Out codes. 29546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The 21 unused bits may later be used for more sophisticated 29556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * state transitions. 29566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 29576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->sharedData->mbcs.dbcsOnlyState!=0) { 29586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* SI/SO are illegal for DBCS-only conversion */ 29596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)(cnv->mode); /* restore the previous state */ 29606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 29626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 29636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 29656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(UCNV_TO_U_USE_FALLBACK(cnv)) { 29666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 29676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 29686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 29716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* just fall through */ 29726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 29736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 29746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 29756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 29766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved (must never occur), or only state change */ 29776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 29786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 29796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 29806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* end of action codes: prepare for a new character */ 29836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 29846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 29866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 29876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 29886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* unassigned sequence */ { 29896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* defer to the generic implementation */ 29906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus=0; 29916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=state; 29926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)lastSource; 29936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return UCNV_GET_NEXT_UCHAR_USE_TO_U; 29946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 29976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 29986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0) { 29996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) { 30006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* incomplete character byte sequence */ 30016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *bytes=cnv->toUBytes; 30026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=(int8_t)(source-lastSource); 30036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 30046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bytes++=*lastSource++; 30056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(lastSource<source); 30066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_TRUNCATED_CHAR_FOUND; 30076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U_FAILURE(*pErrorCode)) { 30086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 30096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 30106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Ticket 5691: consistent illegal sequences: 30116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - We include at least the first byte in the illegal sequence. 30126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - If any of the non-initial bytes could be the start of a character, 30136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * we stop the illegal sequence before the first one of those. 30146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 30156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); 30166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *bytes=cnv->toUBytes; 30176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bytes++=*lastSource++; /* first byte */ 30186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(lastSource==source) { 30196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=1; 30206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* lastSource<source: multi-byte character */ { 30216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t i; 30226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=1; 30236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource); 30246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i 30256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 30266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *bytes++=*lastSource++; 30276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toULength=i; 30296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=lastSource; 30306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 30326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no output because of empty input or only state changes */ 30336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 30346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0xffff; 30366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter, ready for a new character */ 30396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->toUnicodeStatus=0; 30406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->mode=state; 30416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointer */ 30436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=(const char *)source; 30446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 30456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 30466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 30486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 30496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus 30506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Removal improves code coverage. 30516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 30526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 30536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. 30546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle the EBCDIC swaplfnl option (set in UConverter). 30556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle conversion extensions (_extToU()). 30566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 30576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UChar32 30586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, 30596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t b, UBool useFallback) { 30606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t entry; 30616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t action; 30626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=sharedData->mbcs.stateTable[0][b]; 30646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_ENTRY_IS_FINAL(entry) */ 30656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { 30676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 30686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 30696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 30716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 30726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 30736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 30746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 30756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 30766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_DIRECT_20) { 30776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 30786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 30796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 30806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!TO_U_USE_FALLBACK(useFallback)) { 30816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xfffe; 30826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 30846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 30856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { 30866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!TO_U_USE_FALLBACK(useFallback)) { 30876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xfffe; 30886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 30906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 30916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 30926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xfffe; 30936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_ILLEGAL) { 30946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 30956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 30966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reserved, must never occur */ 30976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 30986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 30996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 31006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 31016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 31036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is a simple version of _MBCSGetNextUChar() that is used 31046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * by other converter implementations. 31056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It only returns an "assigned" result if it consumes the entire input. 31066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not use state from the converter, nor error codes. 31076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle the EBCDIC swaplfnl option (set in UConverter). 31086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It handles conversion extensions but not GB 18030. 31096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 31106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Return value: 31116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+fffe unassigned 31126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * U+ffff illegal 31136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * otherwise the Unicode code point 31146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 31156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UChar32 31166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, 31176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *source, int32_t length, 31186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useFallback) { 31196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t (*stateTable)[256]; 31206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *unicodeCodeUnits; 31216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t offset; 31236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t state, action; 31246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 31266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, entry; 31276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<=0) { 31296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no input at all: "illegal" */ 31306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 31316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 31346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 31356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus 31366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * TODO In future releases, verify that this function is never called for SBCS 31376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * conversions, i.e., that sharedData->mbcs.countStates==1 is still true. 31386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Removal improves code coverage. 31396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 31406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use optimized function if possible */ 31416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(sharedData->mbcs.countStates==1) { 31426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length==1) { 31436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback); 31446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 31456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; /* illegal: more than a single byte for an SBCS converter */ 31466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 31496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 31516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stateTable=sharedData->mbcs.stateTable; 31526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits; 31536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* converter state */ 31556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=0; 31566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=sharedData->mbcs.dbcsOnlyState; 31576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 31596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0;;) { 31606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org entry=stateTable[state][(uint8_t)source[i++]]; 31616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(MBCS_ENTRY_IS_TRANSITION(entry)) { 31626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); 31636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); 31646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 31656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i==length) { 31666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; /* truncated character */ 31676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 31696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 31706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * An if-else-if chain provides more reliable performance for 31716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the most common cases compared to a switch. 31726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 31736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); 31746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(action==MBCS_STATE_VALID_16) { 31756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 31766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset]; 31776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0xfffe) { 31786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* done */ 31796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv)) { 31806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset); 31816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* else done with 0xfffe */ 31826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 31836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_16) { 31856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 31866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 31876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 31886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_16_PAIR) { 31896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); 31906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset++]; 31916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0xd800) { 31926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point below 0xd800 */ 31936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { 31946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip or fallback supplementary code point */ 31956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00)); 31966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { 31976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ 31986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=unicodeCodeUnits[offset]; 31996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(c==0xffff) { 32006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 32016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0xfffe; 32036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_VALID_DIRECT_20) { 32066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 32076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 32086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { 32106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!TO_U_USE_FALLBACK(useFallback)) { 32116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0xfffe; 32126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output BMP code point */ 32156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); 32166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { 32186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!TO_U_USE_FALLBACK(useFallback)) { 32196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0xfffe; 32206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output supplementary code point */ 32236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); 32246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(action==MBCS_STATE_UNASSIGNED) { 32266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0xfffe; 32276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 32286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 32316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * forbid MBCS_STATE_CHANGE_ONLY for this function, 32326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and MBCS_STATE_ILLEGAL and reserved action codes 32336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 32346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 32356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i!=length) { 32396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* illegal for this function: not all input consumed */ 32406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0xffff; 32416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c==0xfffe) { 32446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 32456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *cx=sharedData->mbcs.extIndexes; 32466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cx!=NULL) { 32476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ucnv_extSimpleMatchToU(cx, source, length, useFallback); 32486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return c; 32526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 32536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* MBCS-from-Unicode conversion functions ----------------------------------- */ 32556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */ 32576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 32586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 32596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 32606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 32616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *source, *sourceLimit; 32626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 32636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 32646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 32656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 32676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mbcsIndex; 32686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *bytes; 32696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 32716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex, nextSourceIndex; 32736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Entry; 32756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips; 32766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value; 32776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t unicodeMask; 32786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use optimized function if possible */ 32806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 32816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeMask=cnv->sharedData->mbcs.unicodeMask; 32826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 32846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=pArgs->source; 32856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=pArgs->sourceLimit; 32866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pArgs->target; 32876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 32886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 32896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 32916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 32926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 32936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 32946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 32956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=cnv->sharedData->mbcs.fromUnicodeBytes; 32966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 32976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 32986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 32996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 33006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=cnv->fromUChar32; 33016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 33036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex= c==0 ? 0 : -1; 33046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex=0; 33056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 33076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 33086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto getTrail; 33096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 33126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 33136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This following test is to see if available input would overflow the output. 33146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not catch output of more than one byte that 33156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * overflows as a result of a multi-byte character or callback output 33166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the last source character. 33176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, those situations also test for overflows and will 33186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then break the loop, too. 33196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 33206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 33216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 33226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get a correct Unicode code point: 33236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single UChar for a BMP code point or 33246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a matched surrogate pair for a "supplementary code point". 33256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 33266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 33276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 33286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 33296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)c; 33306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 33316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 33326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 33336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 33356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 33366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 33376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 33396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX 33406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to avoid dealing with surrogates. 33416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_FAST_MAX must be >=0xd7ff. 33426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 33436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0xd7ff) { 33446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c); 33456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ 33466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 33476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 33486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output the value */ 33506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 33526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This also tests if the codepage maps single surrogates. 33536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If it does, then surrogates are not paired but mapped separately. 33546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that in this case unmatched surrogates are not detected. 33556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 33566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { 33576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 33586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetTrail: 33596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<sourceLimit) { 33606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the following code unit */ 33616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar trail=*source; 33626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(trail)) { 33636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 33646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 33656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, trail); 33666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 33676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 33686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(unassigned) */ 33696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 33706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert this supplementary code point */ 33726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit this condition tree */ 33736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched lead code unit (1st surrogate) */ 33756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 33766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 33776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 33816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 33846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched trail code unit (2nd surrogate) */ 33856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 33866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 33876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 33886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 33906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes */ 33926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 33936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the bytes and the length for the output */ 33956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_OUTPUT_2 */ 33966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 33976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 33986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 33996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 34006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 34016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 34026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 34036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We allow a 0 byte output if the "assigned" bit is set for this entry. 34046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There is no way with this data structure for fallback output 34056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to be a zero byte. 34066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 34076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunassigned: 34096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 34106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 34116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 34126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &source, sourceLimit, 34136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, target+targetCapacity, 34146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 34156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 34166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 34176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex+=(int32_t)(source-pArgs->source); 34186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 34206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 34216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 34246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 34266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 34276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 34296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 34306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 34316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the output character bytes from value and length */ 34366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* from the first if in the loop we know that targetCapacity>0 */ 34376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 34386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is easy because we know that there is enough space */ 34396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 34406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 34416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 34426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 34446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* length==2 */ { 34456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>8); 34466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(2<=targetCapacity) { 34476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 34486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 34496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 34506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 34516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity-=2; 34536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 34556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 34566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0]=(char)value; 34586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength=1; 34596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 34616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=0; 34626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 34636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 34646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 34696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 34706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 34716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 34726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 34736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 34746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 34756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 34766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 34786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter */ 34806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 34816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 34836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 34846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=(char *)target; 34856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 34866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 34876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */ 34896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 34906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 34916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 34926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 34936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *source, *sourceLimit; 34946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 34956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 34966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 34976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 34986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 34996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *results; 35006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 35026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex, nextSourceIndex; 35046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t value, minValue; 35066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool hasSupplementary; 35076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 35096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 35106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=pArgs->source; 35116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=pArgs->sourceLimit; 35126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pArgs->target; 35136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 35146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 35156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 35176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 35186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 35196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 35216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->useFallback) { 35246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use all roundtrip and fallback results */ 35256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0x800; 35266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use only roundtrips and fallbacks from private-use characters */ 35286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0xc00; 35296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 35316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 35336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=cnv->fromUChar32; 35346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 35366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex= c==0 ? 0 : -1; 35376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex=0; 35386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 35406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 35416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto getTrail; 35426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 35456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 35466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This following test is to see if available input would overflow the output. 35476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not catch output of more than one byte that 35486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * overflows as a result of a multi-byte character or callback output 35496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the last source character. 35506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, those situations also test for overflows and will 35516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then break the loop, too. 35526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 35536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 35546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 35556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get a correct Unicode code point: 35566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single UChar for a BMP code point or 35576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a matched surrogate pair for a "supplementary code point". 35586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 35596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 35606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 35616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(c)) { 35626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 35636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetTrail: 35646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<sourceLimit) { 35656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the following code unit */ 35666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar trail=*source; 35676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(trail)) { 35686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 35696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 35706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, trail); 35716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!hasSupplementary) { 35726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 35736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(unassigned) */ 35746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 35756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert this supplementary code point */ 35776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit this condition tree */ 35786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched lead code unit (1st surrogate) */ 35806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 35816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 35826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 35866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 35896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched trail code unit (2nd surrogate) */ 35906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 35916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 35926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 35936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 35956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes */ 35976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 35986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 35996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 36006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value>=minValue) { 36016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* assigned, write the output character bytes from value and length */ 36026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* length==1 */ 36036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is easy because we know that there is enough space */ 36046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 36056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 36066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 36076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 36096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 36116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 36126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 36136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { /* unassigned */ 36146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunassigned: 36156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 36166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 36176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 36186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &source, sourceLimit, 36196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, target+targetCapacity, 36206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 36216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 36226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 36236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex+=(int32_t)(source-pArgs->source); 36246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 36266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 36276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 36296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 36306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 36326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 36336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 36356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 36366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 36396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 36406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 36416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 36426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter */ 36466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 36476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 36496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 36506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=(char *)target; 36516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 36526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 36536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 36556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages 36566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that map only to and from the BMP. 36576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In addition to single-byte/state optimizations, the offset calculations 36586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * become much easier. 36596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It would be possible to use the sbcsIndex for UTF-8-friendly tables, 36606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but measurements have shown that this diminishes performance 36616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in more cases than it improves it. 36626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See SVN revision 21013 (2007-feb-06) for the last version with #if switches 36636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for various MBCS and SBCS optimizations. 36646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 36656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 36666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs, 36676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 36686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 36696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *source, *sourceLimit, *lastSource; 36706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 36716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity, length; 36726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 36736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 36756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *results; 36766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 36786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sourceIndex; 36806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips; 36826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t value, minValue; 36836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 36856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 36866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=pArgs->source; 36876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=pArgs->sourceLimit; 36886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pArgs->target; 36896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 36906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 36916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 36926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 36936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 36946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 36956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 36966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 36976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 36986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 36996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->useFallback) { 37016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use all roundtrip and fallback results */ 37026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0x800; 37036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 37046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use only roundtrips and fallbacks from private-use characters */ 37056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0xc00; 37066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 37096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=cnv->fromUChar32; 37106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 37126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex= c==0 ? 0 : -1; 37136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 37146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 37166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * since the conversion here is 1:1 UChar:uint8_t, we need only one counter 37176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for the minimum of the sourceLength and targetCapacity 37186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 37196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source); 37206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<targetCapacity) { 37216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=length; 37226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 37256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 37266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto getTrail; 37276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if MBCS_UNROLL_SINGLE_FROM_BMP 37306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unrolling makes it slower on Pentium III/Windows 2000?! */ 37316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unroll the loop with the most common case */ 37326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunrolled: 37336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>=4) { 37346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count, loops; 37356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t andedValues; 37366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org loops=count=targetCapacity>>2; 37386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 37396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 37406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 37416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 37426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 37436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 37446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 37456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 37466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 37476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 37486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 37496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 37506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 37516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* were all 4 entries really valid? */ 37536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(andedValues<minValue) { 37546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no, return to the first of these 4 */ 37556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=4; 37566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target-=4; 37576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 37586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(--count>0); 37606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count=loops-count; 37616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity-=4*count; 37626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 37646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource+=4*count; 37656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 37666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 37676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 37686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 37696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 37706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 37716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 37756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 37776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 37786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(targetCapacity>0) { 37796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 37806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get a correct Unicode code point: 37816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single UChar for a BMP code point or 37826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a matched surrogate pair for a "supplementary code point". 37836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 37846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 37856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 37866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Do not immediately check for single surrogates: 37876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Assume that they are unassigned and check for them in that case. 37886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This speeds up the conversion of assigned characters. 37896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 37906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes */ 37916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 37926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)c; 37936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 37946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 37956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 37966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 37976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 37986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 37996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value>=minValue) { 38006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* assigned, write the output character bytes from value and length */ 38016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* length==1 */ 38026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is easy because we know that there is enough space */ 38036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 38046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 38056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 38076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 38086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 38096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(!U16_IS_SURROGATE(c)) { 38106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal, unassigned BMP character */ 38116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(U16_IS_SURROGATE_LEAD(c)) { 38126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetTrail: 38136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<sourceLimit) { 38146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the following code unit */ 38156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar trail=*source; 38166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(trail)) { 38176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 38186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, trail); 38196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this codepage does not map supplementary code points */ 38206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(unassigned) */ 38216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched lead code unit (1st surrogate) */ 38236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 38246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 38256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 38296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (pArgs->flush) { 38306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_TRUNCATED_CHAR_FOUND; 38316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched trail code unit (2nd surrogate) */ 38366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 38376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 38386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* c does not have a mapping */ 38426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the number of code units for c to correctly advance sourceIndex */ 38446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=U16_LENGTH(c); 38456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set offsets since the start or the last extension */ 38476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 38486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t count=(int32_t)(source-lastSource); 38496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* do not set the offset for this character */ 38516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count-=length; 38526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 38546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 38556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 38566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* offsets and sourceIndex are now set for the current character */ 38586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 38616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 38626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 38636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &source, sourceLimit, 38646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, (const uint8_t *)(pArgs->targetLimit), 38656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 38666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 38676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 38686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex+=length+(int32_t)(source-lastSource); 38696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastSource=source; 38706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 38726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 38736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 38746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 38756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 38766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 38786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 38796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source); 38806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<targetCapacity) { 38816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=length; 38826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if MBCS_UNROLL_SINGLE_FROM_BMP 38866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unrolling makes it slower on Pentium III/Windows 2000?! */ 38876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unrolled; 38886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 38896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) { 38926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 38936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 38946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 38956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 38966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set offsets since the start or the last callback */ 38976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 38986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size_t count=source-lastSource; 38996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) { 39006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 39016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Caller gave us a partial supplementary character, 39026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org which this function couldn't convert in any case. 39036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org The callback will handle the offset. 39046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 39056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org count--; 39066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(count>0) { 39086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex++; 39096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --count; 39106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter */ 39146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 39156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 39176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 39186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=(char *)target; 39196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 39206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 39216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC void 39236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 39246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 39256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv; 39266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *source, *sourceLimit; 39276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 39286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 39296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t *offsets; 39306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 39326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *mbcsIndex; 39336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *p, *bytes; 39346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t outputType; 39356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 39376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t prevSourceIndex, sourceIndex, nextSourceIndex; 39396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Entry; 39416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips; 39426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value; 39436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */ 39446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t siBytes[2] = {0, 0}; 39456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t soBytes[2] = {0, 0}; 39466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t siLength, soLength; 39476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length = 0, prevLength; 39486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t unicodeMask; 39496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pArgs->converter; 39516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->preFromUFirstCP>=0) { 39536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 39546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * pass sourceIndex=-1 because we continue from an earlier buffer 39556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in the future, this may change with continuous offsets 39566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 39576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode); 39586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) { 39606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 39616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use optimized function if possible */ 39656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outputType=cnv->sharedData->mbcs.outputType; 39666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org unicodeMask=cnv->sharedData->mbcs.unicodeMask; 39676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) { 39686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 39696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode); 39706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode); 39726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 39746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) { 39756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode); 39766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 39776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 39806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=pArgs->source; 39816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=pArgs->sourceLimit; 39826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pArgs->target; 39836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); 39846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offsets=pArgs->offsets; 39856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 39876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->sharedData->mbcs.utf8Friendly) { 39886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 39896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsIndex=NULL; 39916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 39936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 39946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 39956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=cnv->sharedData->mbcs.fromUnicodeBytes; 39966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 39976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 39986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 39996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from UConverter */ 40006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=cnv->fromUChar32; 40016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(outputType==MBCS_OUTPUT_2_SISO) { 40036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=cnv->fromUnicodeStatus; 40046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevLength==0) { 40056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the real value */ 40066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=1; 40076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 40096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* prevent fromUnicodeStatus from being set to something non-0 */ 40106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=0; 40116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sourceIndex=-1 if the current character began in the previous buffer */ 40146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSourceIndex=-1; 40156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex= c==0 ? 0 : -1; 40166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex=0; 40176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Get the SI/SO character for the converter */ 40196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org siLength = getSISOBytes(SI, cnv->options, siBytes); 40206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org soLength = getSISOBytes(SO, cnv->options, soBytes); 40216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 40236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 40246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is another piece of ugly code: 40256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A goto into the loop if the converter state contains a first surrogate 40266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the previous function call. 40276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It saves me to check in each loop iteration a check of if(c==0) 40286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and duplicating the trail-surrogate-handling code in the else 40296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * branch of that check. 40306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * I could not find any other way to get around this other than 40316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * using a function call for the conversion and callback, which would 40326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * be even more inefficient. 40336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 40346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Markus Scherer 2000-jul-19 40356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 40366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 40376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto getTrail; 40386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 40416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 40426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This following test is to see if available input would overflow the output. 40436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not catch output of more than one byte that 40446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * overflows as a result of a multi-byte character or callback output 40456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * from the last source character. 40466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Therefore, those situations also test for overflows and will 40476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * then break the loop, too. 40486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 40496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 40506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 40516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Get a correct Unicode code point: 40526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a single UChar for a BMP code point or 40536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a matched surrogate pair for a "supplementary code point". 40546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 40556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=*source++; 40566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 40576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { 40586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)c; 40596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 40606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 40616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSourceIndex=sourceIndex; 40626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 40636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 40656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 40666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 40676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 40696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX 40706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to avoid dealing with surrogates. 40716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * MBCS_FAST_MAX must be >=0xd7ff. 40726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 40736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0xd7ff && mbcsIndex!=NULL) { 40746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=mbcsIndex[c>>6]; 40756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 40766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */ 40776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ 40786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(outputType) { 40796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2: 40806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((const uint16_t *)bytes)[value +(c&0x3f)]; 40816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 40826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 40836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 40846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 40856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 40866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 40886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 40896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 40906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 40916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2_SISO: 40926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 1/2-byte stateful with Shift-In/Shift-Out */ 40936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 40946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Save the old state in the converter object 40956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * right here, then change the local prevLength state variable if necessary. 40966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Then, if this character turns out to be unassigned or a fallback that 40976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is not taken, the callback code must not save the new state in the converter 40986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * because the new state is for a character that is not output. 40996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * However, the callback must still restore the state from the converter 41006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in case the callback function changed it for its output. 41016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 41026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=prevLength; /* save the old state */ 41036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((const uint16_t *)bytes)[value +(c&0x3f)]; 41046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 41056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 41066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 41076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(prevLength<=1) { 41086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 41096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* change from double-byte mode to single-byte */ 41116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (siLength == 1) { 41126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[0]<<8; 41136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 2; 41146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (siLength == 2) { 41156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[1]<<8; 41166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[0]<<16; 41176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 3; 41186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=1; 41206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevLength==2) { 41236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 41246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* change from single-byte mode to double-byte */ 41266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (soLength == 1) { 41276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[0]<<16; 41286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 3; 41296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (soLength == 2) { 41306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[1]<<16; 41316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[0]<<24; 41326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 4; 41336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=2; 41356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_DBCS_ONLY: 41396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* table with single-byte results, but only DBCS mappings used */ 41406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((const uint16_t *)bytes)[value +(c&0x3f)]; 41416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 41426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping or SBCS result, not taken for DBCS-only */ 41436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 41446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 41466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 41496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=bytes+(value+(c&0x3f))*3; 41506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 41516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 41526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 41536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 41546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 41566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 41586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 41596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 41616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 41646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((const uint32_t *)bytes)[value +(c&0x3f)]; 41656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 41666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 41676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 41686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 41706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 41726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 41736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffffff) { 41746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 41756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 41776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3_EUC: 41806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((const uint16_t *)bytes)[value +(c&0x3f)]; 41816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation */ 41826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 41836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 41846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 41856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 41876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 41896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e8000; 41906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 41916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x80)==0) { 41926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f0080; 41936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 41946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 41956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 41966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 41976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 41986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 41996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=bytes+(value+(c&0x3f))*3; 42006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 42016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation applied to the first two bytes */ 42026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 42036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 42046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 42056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 42076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 42096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 42106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x800000)==0) { 42116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e800000; 42126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 42136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 42146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f008000; 42156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 42166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 42186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 42216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* must not occur */ 42226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 42236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * To avoid compiler warnings that value & length may be 42246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * used without having been initialized, we set them here. 42256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In reality, this is unreachable code. 42266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not having a default branch also causes warnings with 42276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * some compilers. 42286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 42296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=0; 42306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 42316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output the value */ 42346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 42366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This also tests if the codepage maps single surrogates. 42376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If it does, then surrogates are not paired but mapped separately. 42386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note that in this case unmatched surrogates are not detected. 42396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 42406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { 42416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_SURROGATE_LEAD(c)) { 42426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetTrail: 42436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<sourceLimit) { 42446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* test the following code unit */ 42456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar trail=*source; 42466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U16_IS_TRAIL(trail)) { 42476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 42486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++nextSourceIndex; 42496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=U16_GET_SUPPLEMENTARY(c, trail); 42506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 42516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 42526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=prevLength; /* save the old state */ 42536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(unassigned) */ 42546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 42556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert this supplementary code point */ 42576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit this condition tree */ 42586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched lead code unit (1st surrogate) */ 42606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 42616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 42626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no more input */ 42666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 42696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is an unmatched trail code unit (2nd surrogate) */ 42706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* callback(illegal) */ 42716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 42726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 42736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 42756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes */ 42776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 42786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 42796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The basic lookup is a triple-stage compact array (trie) lookup. 42806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For details see the beginning of this file. 42816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 42826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Single-byte codepages are handled with a different data structure 42836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * by _MBCSSingle... functions. 42846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 42856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The result consists of a 32-bit value from stage 2 and 42866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * a pointer to as many bytes as are stored per character. 42876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The pointer points to the character's bytes in stage 3. 42886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Bits 15..0 of the stage 2 entry contain the stage 3 index 42896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * for that pointer, while bits 31..16 are flags for which of 42906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the 16 characters in the block are roundtrip-assigned. 42916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 42926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t 42936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * respectively as uint32_t, in the platform encoding. 42946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For 3-byte codepages, the bytes are always stored in big-endian order. 42956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 42966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * For EUC encodings that use only either 0x8e or 0x8f as the first 42976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * byte of their longest byte sequences, the first two bytes in 42986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * this third stage indicate with their 7th bits whether these bytes 42996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * are to be written directly or actually need to be preceeded by 43006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * one of the two Single-Shift codes. With this, the third stage 43016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * stores one byte fewer per character than the actual maximum length of 43026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * EUC byte sequences. 43036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 43046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Other than that, leading zero bytes are removed and the other 43056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * bytes output. A single zero byte may be output if the "assigned" 43066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * bit in stage 2 was on. 43076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The data structure does not support zero byte output as a fallback, 43086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and also does not allow output of leading zeros. 43096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 43106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 43116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 43126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the bytes and the length for the output */ 43136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(outputType) { 43146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2: 43156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 43166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 43176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 43186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 43206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2_SISO: 43236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 1/2-byte stateful with Shift-In/Shift-Out */ 43246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 43256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Save the old state in the converter object 43266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * right here, then change the local prevLength state variable if necessary. 43276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Then, if this character turns out to be unassigned or a fallback that 43286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is not taken, the callback code must not save the new state in the converter 43296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * because the new state is for a character that is not output. 43306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * However, the callback must still restore the state from the converter 43316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * in case the callback function changed it for its output. 43326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 43336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=prevLength; /* save the old state */ 43346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 43356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 43366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) { 43376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping, leave value==0 */ 43386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 43396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(prevLength<=1) { 43406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 43416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* change from double-byte mode to single-byte */ 43436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (siLength == 1) { 43446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[0]<<8; 43456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 2; 43466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (siLength == 2) { 43476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[1]<<8; 43486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)siBytes[0]<<16; 43496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 3; 43506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=1; 43526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prevLength==2) { 43556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 43566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* change from single-byte mode to double-byte */ 43586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (soLength == 1) { 43596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[0]<<16; 43606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 3; 43616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if (soLength == 2) { 43626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[1]<<16; 43636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=(uint32_t)soBytes[0]<<24; 43646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length = 4; 43656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=2; 43676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_DBCS_ONLY: 43716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* table with single-byte results, but only DBCS mappings used */ 43726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 43736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 43746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping or SBCS result, not taken for DBCS-only */ 43756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 43766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 43776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 43796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 43826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); 43836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 43846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 43856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 43866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 43876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 43886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 43896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 43906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 43916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 43926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 43936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); 43946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 43956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 43966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 43976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 43986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffffff) { 43996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 44006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 44016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 44026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3_EUC: 44056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); 44066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation */ 44076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 44086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 44096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 44106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e8000; 44116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 44126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x80)==0) { 44136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f0080; 44146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 44156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 44166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 44176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 44206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); 44216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 44226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation applied to the first two bytes */ 44236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 44246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 44256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 44266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 44276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x800000)==0) { 44286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e800000; 44296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 44306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 44316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f008000; 44326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 44336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 44346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 44356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 44386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* must not occur */ 44396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 44406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * To avoid compiler warnings that value & length may be 44416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * used without having been initialized, we set them here. 44426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In reality, this is unreachable code. 44436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Not having a default branch also causes warnings with 44446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * some compilers. 44456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 44466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 44476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 44486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 44526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 || 44536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 44546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 44556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 44566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We allow a 0 byte output if the "assigned" bit is set for this entry. 44576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There is no way with this data structure for fallback output 44586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to be a zero byte. 44596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 44606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunassigned: 44626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* try an extension mapping */ 44636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 44646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 44656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &source, sourceLimit, 44666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, target+targetCapacity, 44676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &offsets, sourceIndex, 44686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush, 44696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 44706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextSourceIndex+=(int32_t)(source-pArgs->source); 44716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=cnv->fromUnicodeStatus; /* restore SISO state */ 44726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 44746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 44756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 44766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 44776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 44786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 44806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); 44816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 44836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 44846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSourceIndex=sourceIndex; 44856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 44866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 44886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 44916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 44926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the output character bytes from value and length */ 44936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* from the first if in the loop we know that targetCapacity>0 */ 44946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length<=targetCapacity) { 44956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets==NULL) { 44966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(length) { 44976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* each branch falls through to the next one */ 44986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 4: 44996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>24); 45006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: /*fall through*/ 45016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>16); 45026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 45036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>8); 45046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: /*fall through*/ 45056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 45066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 45076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* will never occur */ 45086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 45116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(length) { 45126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* each branch falls through to the next one */ 45136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 4: 45146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>24); 45156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: /*fall through*/ 45176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>16); 45186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 45206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>8); 45216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: /*fall through*/ 45236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 45246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 45266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* will never occur */ 45276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity-=length; 45316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 45326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *charErrorBuffer; 45336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 45356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We actually do this backwards here: 45366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In order to save an intermediate variable, we output 45376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * first to the overflow buffer what does not fit into the 45386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * regular target. 45396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 45406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* we know that 1<=targetCapacity<length<=4 */ 45416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length-=targetCapacity; 45426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; 45436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(length) { 45446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* each branch falls through to the next one */ 45456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: 45466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *charErrorBuffer++=(uint8_t)(value>>16); 45476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 45486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *charErrorBuffer++=(uint8_t)(value>>8); 45496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: /*fall through*/ 45506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *charErrorBuffer=(uint8_t)value; 45516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 45526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* will never occur */ 45536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength=(int8_t)length; 45566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now output what fits into the regular target */ 45586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value>>=8*length; /* length was reduced by targetCapacity */ 45596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(targetCapacity) { 45606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* each branch falls through to the next one */ 45616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 3: 45626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>16); 45636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 45646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: /*fall through*/ 45676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>8); 45686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 45696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: /*fall through*/ 45726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 45736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 45746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=sourceIndex; 45756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 45776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* will never occur */ 45786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 45826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=0; 45836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 45846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 45856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 45886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* normal end of conversion: prepare for a new character */ 45896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=0; 45906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 45916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevSourceIndex=sourceIndex; 45926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceIndex=nextSourceIndex; 45936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 45946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 45956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 45966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 45976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 45986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 45996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 46036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the end of the input stream and detection of truncated input 46046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * are handled by the framework, but for EBCDIC_STATEFUL conversion 46056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * we need to emit an SI at the very end 46066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 46076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * conditions: 46086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * successful 46096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * EBCDIC_STATEFUL in DBCS mode 46106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * end of input and no truncated input 46116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 46126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( U_SUCCESS(*pErrorCode) && 46136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org outputType==MBCS_OUTPUT_2_SISO && prevLength==2 && 46146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->flush && source>=sourceLimit && c==0 46156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 46166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ 46176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 46186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)siBytes[0]; 46196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (siLength == 2) { 46206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (targetCapacity<2) { 46216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0]=(uint8_t)siBytes[1]; 46226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength=1; 46236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 46246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 46256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)siBytes[1]; 46266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offsets!=NULL) { 46296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the last source character's index (sourceIndex points at sourceLimit now) */ 46306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *offsets++=prevSourceIndex; 46316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 46336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 46346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0]=(uint8_t)siBytes[0]; 46356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (siLength == 2) { 46366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[1]=(uint8_t)siBytes[1]; 46376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength=siLength; 46396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 46406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prevLength=1; /* we switched into SBCS */ 46426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the converter state back into UConverter */ 46456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 46466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=prevLength; 46476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 46496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->source=source; 46506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->target=(char *)target; 46516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pArgs->offsets=offsets; 46526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 46536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 46556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is another simple conversion function for internal use by other 46566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * conversion implementations. 46576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not use the converter state nor call callbacks. 46586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle the EBCDIC swaplfnl option (set in UConverter). 46596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It handles conversion extensions but not GB 18030. 46606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 46616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It converts one single Unicode code point into codepage bytes, encoded 46626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * as one 32-bit value. The function returns the number of bytes in *pValue: 46636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1..4 the number of bytes in *pValue 46646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0 unassigned (*pValue undefined) 46656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * -1 illegal (currently not used, *pValue undefined) 46666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 46676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * *pValue will contain the resulting bytes with the last byte in bits 7..0, 46686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the second to last byte in bits 15..8, etc. 46696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Currently, the function assumes but does not check that 0<=c<=0x10ffff. 46706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 46716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t 46726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSFromUChar32(UConverterSharedData *sharedData, 46736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c, uint32_t *pValue, 46746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useFallback) { 46756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *cx; 46766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 46776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 46786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ 46796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *p; 46806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 46816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Entry; 46826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value; 46836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 46846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 46866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 46876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=sharedData->mbcs.fromUnicodeTable; 46886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 46896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 46906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) { 46916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 46926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 46936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(useFallback ? value>=0x800 : value>=0xc00) { 46946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pValue=value&0xff; 46956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 1; 46966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 46976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* outputType!=MBCS_OUTPUT_1 */ { 46986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 46996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the bytes and the length for the output */ 47016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(sharedData->mbcs.outputType) { 47026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_2: 47036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 47066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 47116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ 47126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_DBCS_ONLY: 47136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* table with single-byte results, but only DBCS mappings used */ 47146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* no mapping or SBCS result, not taken for DBCS-only */ 47176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ 47186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=0; 47196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3: 47246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 47266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 47286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 47296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 47326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4: 47356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 47386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 47396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffffff) { 47416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 47426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 47446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_3_EUC: 47476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation */ 47496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 47516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 47526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e8000; 47536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 47546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x80)==0) { 47556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f0080; 47566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 47576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case MBCS_OUTPUT_4_EUC: 47626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); 47636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; 47646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* EUC 16-bit fixed-length representation applied to the first two bytes */ 47656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 47666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 47676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value<=0xffff) { 47686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 47696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x800000)==0) { 47706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8e800000; 47716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 47726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((value&0x8000)==0) { 47736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value|=0x8f008000; 47746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 47756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 47766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 47776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 47796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 47806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 47816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* must not occur */ 47826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 47836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 47856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 47866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 47876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (FROM_U_USE_FALLBACK(useFallback, c) && value!=0) 47886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 47896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 47906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * We allow a 0 byte output if the "assigned" bit is set for this entry. 47916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * There is no way with this data structure for fallback output 47926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to be a zero byte. 47936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 47946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* assigned */ 47956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pValue=value; 47966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return length; 47976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 47996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cx=sharedData->mbcs.extIndexes; 48026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cx!=NULL) { 48036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback); 48046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return length>=0 ? length : -length; /* return abs(length); */ 48056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* unassigned */ 48086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 48096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 48106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if 0 48136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 48146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This function has been moved to ucnv2022.c for inlining. 48156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This implementation is here only for documentation purposes 48166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 48176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/** 48196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages. 48206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle the EBCDIC swaplfnl option (set in UConverter). 48216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It does not handle conversion extensions (_extFromU()). 48226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 48236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It returns the codepage byte for the code point, or -1 if it is unassigned. 48246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 48256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC int32_t 48266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, 48276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c, 48286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool useFallback) { 48296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table; 48306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t value; 48316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 48336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { 48346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 48356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ 48386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=sharedData->mbcs.fromUnicodeTable; 48396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the byte for the output */ 48416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); 48426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 48436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(useFallback ? value>=0x800 : value>=0xc00) { 48446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return value&0xff; 48456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 48466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return -1; 48476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 48496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 48506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ 48526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* minimum code point values for n-byte UTF-8 sequences, n=0..4 */ 48546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32 48556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_minLegal[5]={ 0, 0, 0x80, 0x800, 0x10000 }; 48566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ 48586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UChar32 48596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgutf8_offsets[7]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; 48606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 48626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 48636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterToUnicodeArgs *pToUArgs, 48646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 48656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *utf8, *cnv; 48666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 48676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 48686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 48696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table, *sbcsIndex; 48716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *results; 48726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t oldToULength, toULength, toULimit; 48746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 48766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t b, t1, t2; 48776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips; 48796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t value, minValue; 48806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool hasSupplementary; 48816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 48836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8=pToUArgs->converter; 48846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pFromUArgs->converter; 48856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(uint8_t *)pToUArgs->source; 48866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 48876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pFromUArgs->target; 48886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 48896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 48916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sbcsIndex=cnv->sharedData->mbcs.sbcsIndex; 48926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 48936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 48946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 48956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 48966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 48976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 48986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 48996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->useFallback) { 49006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use all roundtrip and fallback results */ 49016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0x800; 49026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* use only roundtrips and fallbacks from private-use characters */ 49046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org minValue=0xc00; 49056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 49076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from the UTF-8 UConverter */ 49096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)utf8->toUnicodeStatus; 49106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0) { 49116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=utf8->toULength; 49126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=(int8_t)utf8->mode; 49136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=toULimit=0; 49156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 49186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Make sure that the last byte sequence before sourceLimit is complete 49196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or runs into a lead byte. 49206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Do not go back into the bytes that will be read for finishing a partial 49216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequence from the previous buffer. 49226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In the conversion loop compare source with sourceLimit only once 49236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * per multi-byte character. 49246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 49256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 49266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, length; 49276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); 49296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<3 && i<length;) { 49306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*(sourceLimit-i-1); 49316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 49326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 49336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<U8_COUNT_TRAIL_BYTES(b)) { 49356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ 49366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit-=i+1; 49376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 49396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 49446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=0; 49456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=0; 49466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto moreBytes; 49476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 49486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note: We could avoid the goto by duplicating some of the moreBytes 49496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code, but only up to the point of collecting a complete UTF-8 49506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequence; then recurse for the toUBytes[toULength] 49516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and then continue with normal conversion. 49526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 49536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If so, move this code to just after initializing the minimum 49546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * set of local variables for reading the UTF-8 input 49556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (utf8, source, target, limits but not cnv, table, minValue, etc.). 49566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 49576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Potential advantages: 49586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - avoid the goto 49596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - oldToULength could become a local variable in just those code blocks 49606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * that deal with buffer boundaries 49616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - possibly faster if the goto prevents some compiler optimizations 49626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (this would need measuring to confirm) 49636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Disadvantage: 49646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - code duplication 49656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 49666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 49686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 49696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 49706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 49716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source++; 49726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((int8_t)b>=0) { 49736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert ASCII */ 49746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { 49756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)b; 49766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 49776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 49786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 49806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c); 49816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b<0xe0) { 49846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+0080..U+07FF inline */ 49856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b>=0xc2 && 49866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=(uint8_t)(*source-0x80)) <= 0x3f 49876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 49886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b&0x1f; 49896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 49906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1); 49916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value>=minValue) { 49926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 49936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 49946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 49956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)|t1; 49976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 49986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 49996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 50006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(b==0xe0) { 50026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+0800..U+0FFF inline */ 50036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 && 50046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t2=(uint8_t)(source[1]-0x80)) <= 0x3f 50056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 50066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=t1; 50076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source+=2; 50086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2); 50096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value>=minValue) { 50106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 50116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 50126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 50136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)|t2; 50156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 50186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 50216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0) { 50246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* handle "complicated" and error cases, and continuing partial characters */ 50256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldToULength=0; 50266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=1; 50276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 50286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 50296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmoreBytes: 50306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(toULength<toULimit) { 50316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 50326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The sourceLimit may have been adjusted before the conversion loop 50336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to stop before a truncated sequence. 50346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Here we need to use the real limit in case we have two truncated 50356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequences at the end. 50366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See ticket #7492. 50376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 50386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<(uint8_t *)pToUArgs->sourceLimit) { 50396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source; 50406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 50416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 50426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++toULength; 50436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 50446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; /* sequence too short, stop with toULength<toULimit */ 50466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 50496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 50506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 50516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 50526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 50546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 50556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 50566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 50576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 50586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 50596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( toULength==toULimit && /* consumed all trail bytes */ 50636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (toULength==3 || toULength==2) && /* BMP */ 50646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 50656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 50666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 50676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 50686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if( 50696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength==toULimit && toULength==4 && 50706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 50716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 50726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* supplementary code point */ 50736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!hasSupplementary) { 50746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 50756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=0; 50766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); 50786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error handling: illegal UTF-8 byte sequence */ 50816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 50826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 50836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 50846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 50866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 50876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 50886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 50896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 50906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 50936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 50946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value>=minValue) { 50956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* output the mapping for c */ 50966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 50976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 50986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 50996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* value<minValue means c is unassigned (unmappable) */ 51006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 51016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Try an extension mapping. 51026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Pass in no source because we don't have UTF-16 input. 51036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If we have a partial match on c, we will return and revert 51046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to UTF-8->UTF-16->charset conversion. 51056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 51066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const UChar nul=0; 51076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *noSource=&nul; 51086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 51096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &noSource, noSource, 51106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, target+targetCapacity, 51116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, -1, 51126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->flush, 51136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 51146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 51166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 51176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 51186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(cnv->preFromUFirstCP>=0) { 51206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 51216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Partial match, return and revert to pivoting. 51226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In normal from-UTF-16 conversion, we would just continue 51236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but then exit the loop because the extension match would 51246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * have consumed the source. 51256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 51266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_USING_DEFAULT_WARNING; 51276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 51296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 51306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 51326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); 51336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 51366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 51376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 51386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 51396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 51436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The sourceLimit may have been adjusted before the conversion loop 51446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to stop before a truncated sequence. 51456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If so, then collect the truncated sequence now. 51466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 51476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && 51486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->preFromUFirstCP<0 && 51496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { 51506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=utf8->toUBytes[0]=b=*source++; 51516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=1; 51526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 51536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 51546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[toULength++]=b=*source++; 51556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 51566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 51586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 51596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 51606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 51616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 51636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 51646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 51656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 51666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 51686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, 51696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverterToUnicodeArgs *pToUArgs, 51706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 51716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *utf8, *cnv; 51726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint8_t *source, *sourceLimit; 51736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t *target; 51746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t targetCapacity; 51756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *table, *mbcsIndex; 51776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const uint16_t *results; 51786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int8_t oldToULength, toULength, toULimit; 51806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 c; 51826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint8_t b, t1, t2; 51836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t stage2Entry; 51856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t asciiRoundtrips; 51866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t value; 51876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool hasSupplementary; 51886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set up the local pointers */ 51906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8=pToUArgs->converter; 51916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv=pFromUArgs->converter; 51926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source=(uint8_t *)pToUArgs->source; 51936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit=(uint8_t *)pToUArgs->sourceLimit; 51946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org target=(uint8_t *)pFromUArgs->target; 51956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); 51966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 51976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org table=cnv->sharedData->mbcs.fromUnicodeTable; 51986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; 51996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { 52006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; 52016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; 52036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; 52056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); 52076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the converter state from the UTF-8 UConverter */ 52096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(UChar32)utf8->toUnicodeStatus; 52106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0) { 52116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=utf8->toULength; 52126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=(int8_t)utf8->mode; 52136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=oldToULength=toULimit=0; 52156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 52186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Make sure that the last byte sequence before sourceLimit is complete 52196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * or runs into a lead byte. 52206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Do not go back into the bytes that will be read for finishing a partial 52216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequence from the previous buffer. 52226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In the conversion loop compare source with sourceLimit only once 52236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * per multi-byte character. 52246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 52256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 52266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i, length; 52276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); 52296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<3 && i<length;) { 52306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*(sourceLimit-i-1); 52316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 52326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 52336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<U8_COUNT_TRAIL_BYTES(b)) { 52356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* exit the conversion loop before the lead byte if there are not enough trail bytes for it */ 52366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sourceLimit-=i+1; 52376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 52396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c!=0 && targetCapacity>0) { 52446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=0; 52456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=0; 52466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto moreBytes; 52476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* See note in ucnv_SBCSFromUTF8() about this goto. */ 52486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 52506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* conversion loop */ 52516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 52526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(targetCapacity>0) { 52536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source++; 52546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((int8_t)b>=0) { 52556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* convert ASCII */ 52566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { 52576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=b; 52586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 52596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 52606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b); 52626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 52636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 52646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 52656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(b>0xe0) { 52696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+1000..U+D7FF inline */ 52706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (((t1=(uint8_t)(source[0]-0x80), b<0xed) && (t1 <= 0x3f)) || 52716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (b==0xed && (t1 <= 0x1f))) && 52726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t2=(uint8_t)(source[1]-0x80)) <= 0x3f 52736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 52746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=((b&0xf)<<6)|t1; 52756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source+=2; 52766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); 52776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 52786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)|t2; 52796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 52806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 52836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(b<0xe0) { 52856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( /* handle U+0080..U+07FF inline */ 52866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b>=0xc2 && 52876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (t1=(uint8_t)(*source-0x80)) <= 0x3f 52886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 52896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b&0x1f; 52906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 52916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1); 52926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value==0) { 52936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)|t1; 52946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 52956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 52976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 52986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 52996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=-1; 53016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(c<0) { 53046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* handle "complicated" and error cases, and continuing partial characters */ 53056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldToULength=0; 53066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=1; 53076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 53086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=b; 53096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgmoreBytes: 53106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(toULength<toULimit) { 53116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 53126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The sourceLimit may have been adjusted before the conversion loop 53136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to stop before a truncated sequence. 53146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Here we need to use the real limit in case we have two truncated 53156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * sequences at the end. 53166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * See ticket #7492. 53176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 53186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(source<(uint8_t *)pToUArgs->sourceLimit) { 53196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b=*source; 53206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U8_IS_TRAIL(b)) { 53216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++source; 53226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++toULength; 53236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 53246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; /* sequence too short, stop with toULength<toULimit */ 53266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ 53296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 53306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 53316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 53326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 53346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 53356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 53366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 53376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 53386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 53396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( toULength==toULimit && /* consumed all trail bytes */ 53436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (toULength==3 || toULength==2) && /* BMP */ 53446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c-=utf8_offsets[toULength])>=utf8_minLegal[toULength] && 53456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (c<=0xd7ff || 0xe000<=c) /* not a surrogate */ 53466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 53476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 53486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if( 53496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength==toULimit && toULength==4 && 53506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (0x10000<=(c-=utf8_offsets[4]) && c<=0x10ffff) 53516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 53526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* supplementary code point */ 53536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!hasSupplementary) { 53546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ 53556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=0; 53566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stage2Entry=MBCS_STAGE_2_FROM_U(table, c); 53586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* error handling: illegal UTF-8 byte sequence */ 53616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source-=(toULength-oldToULength); 53626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(oldToULength<toULength) { 53636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[oldToULength++]=*source++; 53646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 53666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 53676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 53686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_CHAR_FOUND; 53696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 53706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* get the bytes and the length for the output */ 53736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* MBCS_OUTPUT_2 */ 53746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c); 53756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* is this code point assigned, or do we use fallbacks? */ 53776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || 53786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) 53796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 53806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org goto unassigned; 53816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 53846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 53856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the output character bytes from value and length */ 53866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* from the first if in the loop we know that targetCapacity>0 */ 53876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value<=0xff) { 53886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* this is easy because we know that there is enough space */ 53896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 53906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org --targetCapacity; 53916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else /* length==2 */ { 53926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)(value>>8); 53936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(2<=targetCapacity) { 53946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *target++=(uint8_t)value; 53956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity-=2; 53966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 53976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBuffer[0]=(char)value; 53986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->charErrorBufferLength=1; 53996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target overflow */ 54016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 54026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 54066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgunassigned: 54086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 54096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 54106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Try an extension mapping. 54116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Pass in no source because we don't have UTF-16 input. 54126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If we have a partial match on c, we will return and revert 54136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to UTF-8->UTF-16->charset conversion. 54146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 54156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static const UChar nul=0; 54166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UChar *noSource=&nul; 54176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=_extFromU(cnv, cnv->sharedData, 54186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c, &noSource, noSource, 54196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &target, target+targetCapacity, 54206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, -1, 54216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->flush, 54226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pErrorCode); 54236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*pErrorCode)) { 54256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* not mappable or buffer overflow */ 54266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUChar32=c; 54276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(cnv->preFromUFirstCP>=0) { 54296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 54306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Partial match, return and revert to pivoting. 54316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * In normal from-UTF-16 conversion, we would just continue 54326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * but then exit the loop because the extension match would 54336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * have consumed the source. 54346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 54356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_USING_DEFAULT_WARNING; 54366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 54386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* a mapping was written to the target, continue */ 54396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* recalculate the targetCapacity after an extension mapping */ 54416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); 54426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org continue; 54436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 54466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* target is full */ 54476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 54486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 54496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 54536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The sourceLimit may have been adjusted before the conversion loop 54546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to stop before a truncated sequence. 54556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If so, then collect the truncated sequence now. 54566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 54576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(*pErrorCode) && 54586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->preFromUFirstCP<0 && 54596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { 54606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=utf8->toUBytes[0]=b=*source++; 54616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULength=1; 54626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org toULimit=U8_COUNT_TRAIL_BYTES(b)+1; 54636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(source<sourceLimit) { 54646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUBytes[toULength++]=b=*source++; 54656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org c=(c<<6)+b; 54666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toUnicodeStatus=c; 54686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->toULength=toULength; 54696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utf8->mode=toULimit; 54706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write back the updated pointers */ 54736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pToUArgs->source=(char *)source; 54746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pFromUArgs->target=(char *)target; 54756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 54766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* miscellaneous ------------------------------------------------------------ */ 54786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 54806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetStarters(const UConverter* cnv, 54816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool starters[256], 54826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 54836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const int32_t *state0; 54846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int i; 54856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState]; 54876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<256; ++i) { 54886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* all bytes that cause a state transition from state 0 are lead bytes */ 54896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]); 54906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 54916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 54926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 54936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 54946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * This is an internal function that allows other converter implementations 54956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to check whether a byte is a lead byte. 54966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 54976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UBool 54986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) { 54996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]); 55006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 55016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 55036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, 55046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offsetIndex, 55056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode *pErrorCode) { 55066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UConverter *cnv=pArgs->converter; 55076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *p, *subchar; 55086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char buffer[4]; 55096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 55106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* first, select between subChar and subChar1 */ 55126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if( cnv->subChar1!=0 && 55136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (cnv->sharedData->mbcs.extIndexes!=NULL ? 55146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->useSubChar1 : 55156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (cnv->invalidUCharBuffer[0]<=0xff)) 55166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ) { 55176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */ 55186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org subchar=(char *)&cnv->subChar1; 55196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 55206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 55216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* select subChar in all other cases */ 55226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org subchar=(char *)cnv->subChars; 55236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=cnv->subCharLen; 55246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* reset the selector for the next code point */ 55276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->useSubChar1=FALSE; 55286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) { 55306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org p=buffer; 55316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* fromUnicodeStatus contains prevLength */ 55336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org switch(length) { 55346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 1: 55356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->fromUnicodeStatus==2) { 55366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* DBCS mode and SBCS sub char: change to SBCS */ 55376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=1; 55386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *p++=UCNV_SI; 55396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *p++=subchar[0]; 55416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org case 2: 55436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(cnv->fromUnicodeStatus<=1) { 55446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* SBCS mode and DBCS sub char: change to DBCS */ 55456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org cnv->fromUnicodeStatus=2; 55466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *p++=UCNV_SO; 55476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *p++=subchar[0]; 55496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *p++=subchar[1]; 55506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org break; 55516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org default: 55526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 55536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 55546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org subchar=buffer; 55566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(int32_t)(p-buffer); 55576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode); 55606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 55616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CFUNC UConverterType 55636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgucnv_MBCSGetType(const UConverter* converter) { 55646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */ 55656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(converter->sharedData->mbcs.countStates==1) { 55666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UConverterType)UCNV_SBCS; 55676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) { 55686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UConverterType)UCNV_EBCDIC_STATEFUL; 55696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) { 55706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UConverterType)UCNV_DBCS; 55716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 55726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UConverterType)UCNV_MBCS; 55736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 55746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _SBCSUTF8Impl={ 55766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_MBCS, 55776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSLoad, 55796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSUnload, 55806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSOpen, 55826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 55836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 55846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 55866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 55876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 55886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 55896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetNextUChar, 55906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetStarters, 55926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetName, 55936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSWriteSub, 55946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 55956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetUnicodeSet, 55966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 55976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 55986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_SBCSFromUTF8 55996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 56006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _DBCSUTF8Impl={ 56026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_MBCS, 56036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSLoad, 56056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSUnload, 56066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSOpen, 56086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 56126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 56136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 56146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 56156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetNextUChar, 56166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetStarters, 56186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetName, 56196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSWriteSub, 56206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetUnicodeSet, 56226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_DBCSFromUTF8 56256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 56266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic const UConverterImpl _MBCSImpl={ 56286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UCNV_MBCS, 56296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSLoad, 56316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSUnload, 56326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSOpen, 56346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 56386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSToUnicodeWithOffsets, 56396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 56406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSFromUnicodeWithOffsets, 56416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetNextUChar, 56426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetStarters, 56446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetName, 56456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSWriteSub, 56466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, 56476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ucnv_MBCSGetUnicodeSet 56486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 56496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Static data is in tools/makeconv/ucnvstat.c for data-based 56526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * converters. Be sure to update it as well. 56536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 56546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgconst UConverterSharedData _MBCSData={ 56566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UConverterSharedData), 1, 56576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org NULL, NULL, NULL, FALSE, &_MBCSImpl, 56586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0 56596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 56606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 56616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ 5662