1Index: source/common/ucnv2022.cpp 2=================================================================== 3--- source/common/ucnv2022.cpp (revision 259715) 4+++ source/common/ucnv2022.cpp (working copy) 5@@ -167,13 +167,19 @@ 6 * all versions, not just JIS7 and JIS8. 7 * - ICU does not distinguish between different versions of JIS X 0208. 8 */ 9+#if UCONFIG_NO_NON_HTML5_CONVERSION 10+enum { MAX_JA_VERSION=0 }; 11+#else 12 enum { MAX_JA_VERSION=4 }; 13+#endif 14 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 15 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 16+#if !UCONFIG_NO_NON_HTML5_CONVERSION 17 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 18 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 19 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 20 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 21+#endif 22 }; 23 24 typedef enum { 25@@ -361,14 +367,25 @@ 26 }; 27 28 29+/* Enable ISO-2022-{KR,CN,CN-Ext} for now. 30+ * TODO(jshin): Disable it when we know what to do about 'replacement' 31+ * encodings. See http://crbug.com/277037 and 32+ * https://codereview.chromium.org/145973021/ 33+ */ 34+#ifndef U_ENABLE_ISO_2022_KR_CN 35+#define U_ENABLE_ISO_2022_KR_CN 1 36+#endif 37+ 38 /* Type def for refactoring changeState_2022 code*/ 39 typedef enum{ 40 #ifdef U_ENABLE_GENERIC_ISO_2022 41 ISO_2022=0, 42 #endif 43 ISO_2022_JP=1, 44+#ifdef U_ENABLE_ISO_2022_KR_CN 45 ISO_2022_KR=2, 46 ISO_2022_CN=3 47+#endif 48 } Variant2022; 49 50 /*********** ISO 2022 Converter Protos ***********/ 51@@ -485,24 +502,28 @@ 52 /* prevent indexing beyond jpCharsetMasks[] */ 53 myConverterData->version = version = 0; 54 } 55+#if !UCONFIG_NO_NON_HTML5_CONVERSION 56 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 57 myConverterData->myConverterArray[ISO8859_7] = 58 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 59 } 60+#endif 61 myConverterData->myConverterArray[JISX208] = 62 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); 63+#if !UCONFIG_NO_NON_HTML5_CONVERSION 64 if(jpCharsetMasks[version]&CSM(JISX212)) { 65 myConverterData->myConverterArray[JISX212] = 66 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); 67 } 68 if(jpCharsetMasks[version]&CSM(GB2312)) { 69 myConverterData->myConverterArray[GB2312] = 70- ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 71+ ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 72 } 73 if(jpCharsetMasks[version]&CSM(KSC5601)) { 74 myConverterData->myConverterArray[KSC5601] = 75 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); 76 } 77+#endif 78 79 /* set the function pointers to appropriate funtions */ 80 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 81@@ -513,6 +534,7 @@ 82 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); 83 myConverterData->name[len+1]='\0'; 84 } 85+#ifdef U_ENABLE_ISO_2022_KR_CN 86 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 87 (myLocale[2]=='_' || myLocale[2]=='\0')) 88 { 89@@ -558,13 +580,13 @@ 90 91 /* open the required converters and cache them */ 92 myConverterData->myConverterArray[GB2312_1] = 93- ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); 94+ ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); 95 if(version==1) { 96 myConverterData->myConverterArray[ISO_IR_165] = 97- ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode); 98+ ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode); 99 } 100 myConverterData->myConverterArray[CNS_11643] = 101- ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode); 102+ ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode); 103 104 105 /* set the function pointers to appropriate funtions */ 106@@ -582,6 +604,7 @@ 107 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); 108 } 109 } 110+#endif // U_ENABLE_ISO_2022_KR_CN 111 else{ 112 #ifdef U_ENABLE_GENERIC_ISO_2022 113 myConverterData->isFirstBuffer = TRUE; 114Index: source/common/ucnvbocu.cpp 115=================================================================== 116--- source/common/ucnvbocu.cpp (revision 259715) 117+++ source/common/ucnvbocu.cpp (working copy) 118@@ -19,7 +19,7 @@ 119 120 #include "unicode/utypes.h" 121 122-#if !UCONFIG_NO_CONVERSION 123+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 124 125 #include "unicode/ucnv.h" 126 #include "unicode/ucnv_cb.h" 127Index: source/common/ucnvisci.c 128=================================================================== 129--- source/common/ucnvisci.c (revision 259715) 130+++ source/common/ucnvisci.c (working copy) 131@@ -17,7 +17,7 @@ 132 133 #include "unicode/utypes.h" 134 135-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 136+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 137 138 #include "unicode/ucnv.h" 139 #include "unicode/ucnv_cb.h" 140Index: source/common/ucnvscsu.c 141=================================================================== 142--- source/common/ucnvscsu.c (revision 259715) 143+++ source/common/ucnvscsu.c (working copy) 144@@ -21,7 +21,7 @@ 145 146 #include "unicode/utypes.h" 147 148-#if !UCONFIG_NO_CONVERSION 149+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 150 151 #include "unicode/ucnv.h" 152 #include "unicode/ucnv_cb.h" 153Index: source/common/ucnv_u7.c 154=================================================================== 155--- source/common/ucnv_u7.c (revision 259715) 156+++ source/common/ucnv_u7.c (working copy) 157@@ -16,7 +16,7 @@ 158 159 #include "unicode/utypes.h" 160 161-#if !UCONFIG_NO_CONVERSION 162+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 163 164 #include "unicode/ucnv.h" 165 #include "ucnv_bld.h" 166Index: source/common/unicode/uconfig.h 167=================================================================== 168--- source/common/unicode/uconfig.h (revision 259715) 169+++ source/common/unicode/uconfig.h (working copy) 170@@ -265,6 +265,14 @@ 171 #endif 172 173 /** 174+ * This switch turns off all the converters NOT listed in 175+ * the encoding standard : http://encoding.spec.whatwg.org 176+ */ 177+#ifndef UCONFIG_NO_NON_HTML5_CONVERSION 178+#define UCONFIG_NO_NON_HTML5_CONVERSION 0 179+#endif 180+ 181+/** 182 * \def UCONFIG_NO_LEGACY_CONVERSION 183 * This switch turns off all converters except for 184 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) 185Index: source/common/ucnv_bld.cpp 186=================================================================== 187--- source/common/ucnv_bld.cpp (revision 259715) 188+++ source/common/ucnv_bld.cpp (working copy) 189@@ -79,16 +79,25 @@ 190 &_HZData, 191 #endif 192 193+#if UCONFIG_NO_NON_HTML5_CONVERSION 194+ NULL, 195+#else 196 &_SCSUData, 197+#endif 198 199-#if UCONFIG_NO_LEGACY_CONVERSION 200+ 201+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 202 NULL, 203 #else 204 &_ISCIIData, 205 #endif 206 207 &_ASCIIData, 208+#if UCONFIG_NO_NON_HTML5_CONVERSION 209+ NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, 210+#else 211 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 212+#endif 213 214 #if UCONFIG_NO_LEGACY_CONVERSION 215 NULL, 216Index: source/common/ucnv_u8.c 217=================================================================== 218--- source/common/ucnv_u8.c (revision 259715) 219+++ source/common/ucnv_u8.c (working copy) 220@@ -87,6 +87,15 @@ 221 static const uint32_t 222 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 223 224+static UBool hasCESU8Data(const UConverter *cnv) 225+{ 226+#if UCONFIG_NO_NON_HTML5_CONVERSION 227+ return FALSE; 228+#else 229+ return (UBool)(cnv->sharedData == &_CESU8Data); 230+#endif 231+} 232+ 233 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 234 UErrorCode * err) 235 { 236@@ -96,10 +105,10 @@ 237 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 238 const UChar *targetLimit = args->targetLimit; 239 unsigned char *toUBytes = cnv->toUBytes; 240- UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 241+ UBool isCESU8 = hasCESU8Data(cnv); 242 uint32_t ch, ch2 = 0; 243 int32_t i, inBytes; 244- 245+ 246 /* Restore size of current sequence */ 247 if (cnv->toUnicodeStatus && myTarget < targetLimit) 248 { 249@@ -226,7 +235,7 @@ 250 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 251 const UChar *targetLimit = args->targetLimit; 252 unsigned char *toUBytes = cnv->toUBytes; 253- UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 254+ UBool isCESU8 = hasCESU8Data(cnv); 255 uint32_t ch, ch2 = 0; 256 int32_t i, inBytes; 257 258@@ -357,7 +366,7 @@ 259 UChar32 ch; 260 uint8_t tempBuf[4]; 261 int32_t indexToWrite; 262- UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 263+ UBool isNotCESU8 = !hasCESU8Data(cnv); 264 265 if (cnv->fromUChar32 && myTarget < targetLimit) 266 { 267@@ -473,7 +482,7 @@ 268 int32_t offsetNum, nextSourceIndex; 269 int32_t indexToWrite; 270 uint8_t tempBuf[4]; 271- UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 272+ UBool isNotCESU8 = !hasCESU8Data(cnv); 273 274 if (cnv->fromUChar32 && myTarget < targetLimit) 275 { 276Index: source/common/unicode/urename.h 277=================================================================== 278--- source/common/unicode/urename.h (revision 259715) 279+++ source/common/unicode/urename.h (working copy) 280@@ -73,12 +73,16 @@ 281 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) 282 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) 283 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) 284+#if !UCONFIG_NO_NON_HTML5_CONVERSION 285 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) 286 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) 287+#endif 288 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) 289 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) 290+#if !UCONFIG_NO_NON_HTML5_CONVERSION 291 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) 292 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) 293+#endif 294 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) 295 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) 296 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) 297@@ -94,14 +98,18 @@ 298 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) 299 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) 300 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) 301+#if !UCONFIG_NO_NON_HTML5_CONVERSION 302 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) 303+#endif 304 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) 305 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) 306 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) 307 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) 308 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) 309 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) 310+#if !UCONFIG_NO_NON_HTML5_CONVERSION 311 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) 312+#endif 313 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) 314 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) 315 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) 316Index: source/common/ucnv_cnv.h 317=================================================================== 318--- source/common/ucnv_cnv.h (revision 259715) 319+++ source/common/ucnv_cnv.h (working copy) 320@@ -259,8 +259,13 @@ 321 _ISO2022Data, 322 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, 323 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, 324+#if !UCONFIG_NO_NON_HTML5_CONVERSION 325 _HZData,_ISCIIData, _SCSUData, _ASCIIData, 326 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; 327+#else 328+ _HZData, _ASCIIData, 329+ _UTF16Data, _UTF32Data, _CompoundTextData; 330+#endif 331 332 U_CDECL_END 333 334