1Index: source/common/ucnv2022.cpp
2===================================================================
3--- source/common/ucnv2022.cpp	(revision 259715)
4+++ source/common/ucnv2022.cpp	(working copy)
5@@ -167,13 +167,19 @@
6  *   all versions, not just JIS7 and JIS8.
7  * - ICU does not distinguish between different versions of JIS X 0208.
8  */
9+#if UCONFIG_NO_NON_HTML5_CONVERSION
10+enum { MAX_JA_VERSION=0 };
11+#else
12 enum { MAX_JA_VERSION=4 };
13+#endif
14 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={
15     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT),
16+#if !UCONFIG_NO_NON_HTML5_CONVERSION
17     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212),
18     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
19     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7),
20     CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7)
21+#endif
22 };
23 
24 typedef enum {
25@@ -361,14 +367,25 @@
26 };
27 
28 
29+/* Enable ISO-2022-{KR,CN,CN-Ext} for now.
30+ * TODO(jshin): Disable it when we know what to do about 'replacement'
31+ * encodings. See http://crbug.com/277037 and
32+ * https://codereview.chromium.org/145973021/
33+ */
34+#ifndef U_ENABLE_ISO_2022_KR_CN
35+#define U_ENABLE_ISO_2022_KR_CN 1
36+#endif
37+
38 /* Type def for refactoring changeState_2022 code*/
39 typedef enum{
40 #ifdef U_ENABLE_GENERIC_ISO_2022
41     ISO_2022=0,
42 #endif
43     ISO_2022_JP=1,
44+#ifdef U_ENABLE_ISO_2022_KR_CN
45     ISO_2022_KR=2,
46     ISO_2022_CN=3
47+#endif
48 } Variant2022;
49 
50 /*********** ISO 2022 Converter Protos ***********/
51@@ -485,24 +502,28 @@
52                 /* prevent indexing beyond jpCharsetMasks[] */
53                 myConverterData->version = version = 0;
54             }
55+#if !UCONFIG_NO_NON_HTML5_CONVERSION
56             if(jpCharsetMasks[version]&CSM(ISO8859_7)) {
57                 myConverterData->myConverterArray[ISO8859_7] =
58                     ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode);
59             }
60+#endif
61             myConverterData->myConverterArray[JISX208] =
62                 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode);
63+#if !UCONFIG_NO_NON_HTML5_CONVERSION
64             if(jpCharsetMasks[version]&CSM(JISX212)) {
65                 myConverterData->myConverterArray[JISX212] =
66                     ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode);
67             }
68             if(jpCharsetMasks[version]&CSM(GB2312)) {
69                 myConverterData->myConverterArray[GB2312] =
70-                    ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
71+                    ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);   /* gb_2312_80-1 */
72             }
73             if(jpCharsetMasks[version]&CSM(KSC5601)) {
74                 myConverterData->myConverterArray[KSC5601] =
75                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
76             }
77+#endif
78 
79             /* set the function pointers to appropriate funtions */
80             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
81@@ -513,6 +534,7 @@
82             myConverterData->name[len]=(char)(myConverterData->version+(int)'0');
83             myConverterData->name[len+1]='\0';
84         }
85+#ifdef U_ENABLE_ISO_2022_KR_CN
86         else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') &&
87             (myLocale[2]=='_' || myLocale[2]=='\0'))
88         {
89@@ -558,13 +580,13 @@
90 
91             /* open the required converters and cache them */
92             myConverterData->myConverterArray[GB2312_1] =
93-                ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
94+                ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode);
95             if(version==1) {
96                 myConverterData->myConverterArray[ISO_IR_165] =
97-                    ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
98+                    ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode);
99             }
100             myConverterData->myConverterArray[CNS_11643] =
101-                ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
102+                ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode);
103 
104 
105             /* set the function pointers to appropriate funtions */
106@@ -582,6 +604,7 @@
107                 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2");
108             }
109         }
110+#endif // U_ENABLE_ISO_2022_KR_CN
111         else{
112 #ifdef U_ENABLE_GENERIC_ISO_2022
113             myConverterData->isFirstBuffer = TRUE;
114Index: source/common/ucnvbocu.cpp
115===================================================================
116--- source/common/ucnvbocu.cpp	(revision 259715)
117+++ source/common/ucnvbocu.cpp	(working copy)
118@@ -19,7 +19,7 @@
119 
120 #include "unicode/utypes.h"
121 
122-#if !UCONFIG_NO_CONVERSION
123+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
124 
125 #include "unicode/ucnv.h"
126 #include "unicode/ucnv_cb.h"
127Index: source/common/ucnvisci.c
128===================================================================
129--- source/common/ucnvisci.c	(revision 259715)
130+++ source/common/ucnvisci.c	(working copy)
131@@ -17,7 +17,7 @@
132 
133 #include "unicode/utypes.h"
134 
135-#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
136+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
137 
138 #include "unicode/ucnv.h"
139 #include "unicode/ucnv_cb.h"
140Index: source/common/ucnvscsu.c
141===================================================================
142--- source/common/ucnvscsu.c	(revision 259715)
143+++ source/common/ucnvscsu.c	(working copy)
144@@ -21,7 +21,7 @@
145 
146 #include "unicode/utypes.h"
147 
148-#if !UCONFIG_NO_CONVERSION
149+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
150 
151 #include "unicode/ucnv.h"
152 #include "unicode/ucnv_cb.h"
153Index: source/common/ucnv_u7.c
154===================================================================
155--- source/common/ucnv_u7.c	(revision 259715)
156+++ source/common/ucnv_u7.c	(working copy)
157@@ -16,7 +16,7 @@
158 
159 #include "unicode/utypes.h"
160 
161-#if !UCONFIG_NO_CONVERSION
162+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION
163 
164 #include "unicode/ucnv.h"
165 #include "ucnv_bld.h"
166Index: source/common/unicode/uconfig.h
167===================================================================
168--- source/common/unicode/uconfig.h	(revision 259715)
169+++ source/common/unicode/uconfig.h	(working copy)
170@@ -265,6 +265,14 @@
171 #endif
172 
173 /**
174+ * This switch turns off all the converters NOT listed in
175+ * the encoding standard : http://encoding.spec.whatwg.org
176+ */
177+#ifndef UCONFIG_NO_NON_HTML5_CONVERSION
178+#define UCONFIG_NO_NON_HTML5_CONVERSION 0
179+#endif
180+
181+/**
182  * \def UCONFIG_NO_LEGACY_CONVERSION
183  * This switch turns off all converters except for
184  * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
185Index: source/common/ucnv_bld.cpp
186===================================================================
187--- source/common/ucnv_bld.cpp	(revision 259715)
188+++ source/common/ucnv_bld.cpp	(working copy)
189@@ -79,16 +79,25 @@
190     &_HZData,
191 #endif
192 
193+#if UCONFIG_NO_NON_HTML5_CONVERSION
194+    NULL,
195+#else
196     &_SCSUData,
197+#endif
198 
199-#if UCONFIG_NO_LEGACY_CONVERSION
200+
201+#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION
202     NULL,
203 #else
204     &_ISCIIData,
205 #endif
206 
207     &_ASCIIData,
208+#if UCONFIG_NO_NON_HTML5_CONVERSION
209+    NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL,
210+#else
211     &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData,
212+#endif
213 
214 #if UCONFIG_NO_LEGACY_CONVERSION
215     NULL,
216Index: source/common/ucnv_u8.c
217===================================================================
218--- source/common/ucnv_u8.c	(revision 259715)
219+++ source/common/ucnv_u8.c	(working copy)
220@@ -87,6 +87,15 @@
221 static const uint32_t
222 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff };
223 
224+static UBool hasCESU8Data(const UConverter *cnv)
225+{
226+#if UCONFIG_NO_NON_HTML5_CONVERSION
227+    return FALSE;
228+#else
229+    return (UBool)(cnv->sharedData == &_CESU8Data);
230+#endif
231+}
232+
233 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args,
234                                   UErrorCode * err)
235 {
236@@ -96,10 +105,10 @@
237     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
238     const UChar *targetLimit = args->targetLimit;
239     unsigned char *toUBytes = cnv->toUBytes;
240-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
241+    UBool isCESU8 = hasCESU8Data(cnv);
242     uint32_t ch, ch2 = 0;
243     int32_t i, inBytes;
244-  
245+
246     /* Restore size of current sequence */
247     if (cnv->toUnicodeStatus && myTarget < targetLimit)
248     {
249@@ -226,7 +235,7 @@
250     const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit;
251     const UChar *targetLimit = args->targetLimit;
252     unsigned char *toUBytes = cnv->toUBytes;
253-    UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data);
254+    UBool isCESU8 = hasCESU8Data(cnv);
255     uint32_t ch, ch2 = 0;
256     int32_t i, inBytes;
257 
258@@ -357,7 +366,7 @@
259     UChar32 ch;
260     uint8_t tempBuf[4];
261     int32_t indexToWrite;
262-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
263+    UBool isNotCESU8 = !hasCESU8Data(cnv);
264 
265     if (cnv->fromUChar32 && myTarget < targetLimit)
266     {
267@@ -473,7 +482,7 @@
268     int32_t offsetNum, nextSourceIndex;
269     int32_t indexToWrite;
270     uint8_t tempBuf[4];
271-    UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data);
272+    UBool isNotCESU8 = !hasCESU8Data(cnv);
273 
274     if (cnv->fromUChar32 && myTarget < targetLimit)
275     {
276Index: source/common/unicode/urename.h
277===================================================================
278--- source/common/unicode/urename.h	(revision 259715)
279+++ source/common/unicode/urename.h	(working copy)
280@@ -73,12 +73,16 @@
281 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
282 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
283 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
284+#if !UCONFIG_NO_NON_HTML5_CONVERSION
285 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
286 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
287+#endif
288 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
289 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
290+#if !UCONFIG_NO_NON_HTML5_CONVERSION
291 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
292 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
293+#endif
294 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
295 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
296 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
297@@ -94,14 +98,18 @@
298 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
299 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
300 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
301+#if !UCONFIG_NO_NON_HTML5_CONVERSION
302 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
303+#endif
304 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
305 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
306 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
307 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
308 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
309 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
310+#if !UCONFIG_NO_NON_HTML5_CONVERSION
311 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
312+#endif
313 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
314 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
315 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
316Index: source/common/ucnv_cnv.h
317===================================================================
318--- source/common/ucnv_cnv.h	(revision 259715)
319+++ source/common/ucnv_cnv.h	(working copy)
320@@ -259,8 +259,13 @@
321     _ISO2022Data, 
322     _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6,
323     _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19,
324+#if !UCONFIG_NO_NON_HTML5_CONVERSION
325     _HZData,_ISCIIData, _SCSUData, _ASCIIData,
326     _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData;
327+#else
328+    _HZData, _ASCIIData,
329+    _UTF16Data, _UTF32Data, _CompoundTextData;
330+#endif
331 
332 U_CDECL_END
333 
334