1e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Tencent is pleased to support the open source community by making RapidJSON available.
2e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
3e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
5e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Licensed under the MIT License (the "License"); you may not use this file except
6e462795ff5d4c7359f9e8637c10544bb2de70107tturney// in compliance with the License. You may obtain a copy of the License at
7e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
8e462795ff5d4c7359f9e8637c10544bb2de70107tturney// http://opensource.org/licenses/MIT
9e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
10e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Unless required by applicable law or agreed to in writing, software distributed
11e462795ff5d4c7359f9e8637c10544bb2de70107tturney// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12e462795ff5d4c7359f9e8637c10544bb2de70107tturney// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13e462795ff5d4c7359f9e8637c10544bb2de70107tturney// specific language governing permissions and limitations under the License.
14e462795ff5d4c7359f9e8637c10544bb2de70107tturney
15e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "unittest.h"
16e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/filereadstream.h"
17e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/filewritestream.h"
18e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/encodedstream.h"
19e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/stringbuffer.h"
20e462795ff5d4c7359f9e8637c10544bb2de70107tturney
21e462795ff5d4c7359f9e8637c10544bb2de70107tturneyusing namespace rapidjson;
22e462795ff5d4c7359f9e8637c10544bb2de70107tturney
23e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Verification of encoders/decoders with Hoehrmann's UTF8 decoder
24e462795ff5d4c7359f9e8637c10544bb2de70107tturney
25e462795ff5d4c7359f9e8637c10544bb2de70107tturney// http://www.unicode.org/Public/UNIDATA/Blocks.txt
26e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic const unsigned kCodepointRanges[] = {
27e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0000,     0x007F,     // Basic Latin
28e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0080,     0x00FF,     // Latin-1 Supplement
29e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0100,     0x017F,     // Latin Extended-A
30e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0180,     0x024F,     // Latin Extended-B
31e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0250,     0x02AF,     // IPA Extensions
32e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x02B0,     0x02FF,     // Spacing Modifier Letters
33e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0300,     0x036F,     // Combining Diacritical Marks
34e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0370,     0x03FF,     // Greek and Coptic
35e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0400,     0x04FF,     // Cyrillic
36e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0500,     0x052F,     // Cyrillic Supplement
37e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0530,     0x058F,     // Armenian
38e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0590,     0x05FF,     // Hebrew
39e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0600,     0x06FF,     // Arabic
40e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0700,     0x074F,     // Syriac
41e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0750,     0x077F,     // Arabic Supplement
42e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0780,     0x07BF,     // Thaana
43e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x07C0,     0x07FF,     // NKo
44e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0800,     0x083F,     // Samaritan
45e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0840,     0x085F,     // Mandaic
46e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0900,     0x097F,     // Devanagari
47e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0980,     0x09FF,     // Bengali
48e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0A00,     0x0A7F,     // Gurmukhi
49e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0A80,     0x0AFF,     // Gujarati
50e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0B00,     0x0B7F,     // Oriya
51e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0B80,     0x0BFF,     // Tamil
52e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0C00,     0x0C7F,     // Telugu
53e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0C80,     0x0CFF,     // Kannada
54e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0D00,     0x0D7F,     // Malayalam
55e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0D80,     0x0DFF,     // Sinhala
56e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0E00,     0x0E7F,     // Thai
57e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0E80,     0x0EFF,     // Lao
58e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x0F00,     0x0FFF,     // Tibetan
59e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1000,     0x109F,     // Myanmar
60e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10A0,     0x10FF,     // Georgian
61e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1100,     0x11FF,     // Hangul Jamo
62e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1200,     0x137F,     // Ethiopic
63e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1380,     0x139F,     // Ethiopic Supplement
64e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x13A0,     0x13FF,     // Cherokee
65e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1400,     0x167F,     // Unified Canadian Aboriginal Syllabics
66e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1680,     0x169F,     // Ogham
67e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x16A0,     0x16FF,     // Runic
68e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1700,     0x171F,     // Tagalog
69e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1720,     0x173F,     // Hanunoo
70e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1740,     0x175F,     // Buhid
71e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1760,     0x177F,     // Tagbanwa
72e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1780,     0x17FF,     // Khmer
73e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1800,     0x18AF,     // Mongolian
74e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x18B0,     0x18FF,     // Unified Canadian Aboriginal Syllabics Extended
75e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1900,     0x194F,     // Limbu
76e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1950,     0x197F,     // Tai Le
77e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1980,     0x19DF,     // New Tai Lue
78e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x19E0,     0x19FF,     // Khmer Symbols
79e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1A00,     0x1A1F,     // Buginese
80e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1A20,     0x1AAF,     // Tai Tham
81e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1B00,     0x1B7F,     // Balinese
82e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1B80,     0x1BBF,     // Sundanese
83e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1BC0,     0x1BFF,     // Batak
84e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1C00,     0x1C4F,     // Lepcha
85e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1C50,     0x1C7F,     // Ol Chiki
86e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1CD0,     0x1CFF,     // Vedic Extensions
87e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D00,     0x1D7F,     // Phonetic Extensions
88e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D80,     0x1DBF,     // Phonetic Extensions Supplement
89e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1DC0,     0x1DFF,     // Combining Diacritical Marks Supplement
90e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1E00,     0x1EFF,     // Latin Extended Additional
91e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F00,     0x1FFF,     // Greek Extended
92e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2000,     0x206F,     // General Punctuation
93e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2070,     0x209F,     // Superscripts and Subscripts
94e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x20A0,     0x20CF,     // Currency Symbols
95e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x20D0,     0x20FF,     // Combining Diacritical Marks for Symbols
96e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2100,     0x214F,     // Letterlike Symbols
97e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2150,     0x218F,     // Number Forms
98e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2190,     0x21FF,     // Arrows
99e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2200,     0x22FF,     // Mathematical Operators
100e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2300,     0x23FF,     // Miscellaneous Technical
101e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2400,     0x243F,     // Control Pictures
102e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2440,     0x245F,     // Optical Character Recognition
103e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2460,     0x24FF,     // Enclosed Alphanumerics
104e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2500,     0x257F,     // Box Drawing
105e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2580,     0x259F,     // Block Elements
106e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x25A0,     0x25FF,     // Geometric Shapes
107e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2600,     0x26FF,     // Miscellaneous Symbols
108e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2700,     0x27BF,     // Dingbats
109e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x27C0,     0x27EF,     // Miscellaneous Mathematical Symbols-A
110e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x27F0,     0x27FF,     // Supplemental Arrows-A
111e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2800,     0x28FF,     // Braille Patterns
112e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2900,     0x297F,     // Supplemental Arrows-B
113e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2980,     0x29FF,     // Miscellaneous Mathematical Symbols-B
114e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2A00,     0x2AFF,     // Supplemental Mathematical Operators
115e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2B00,     0x2BFF,     // Miscellaneous Symbols and Arrows
116e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2C00,     0x2C5F,     // Glagolitic
117e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2C60,     0x2C7F,     // Latin Extended-C
118e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2C80,     0x2CFF,     // Coptic
119e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2D00,     0x2D2F,     // Georgian Supplement
120e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2D30,     0x2D7F,     // Tifinagh
121e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2D80,     0x2DDF,     // Ethiopic Extended
122e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2DE0,     0x2DFF,     // Cyrillic Extended-A
123e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2E00,     0x2E7F,     // Supplemental Punctuation
124e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2E80,     0x2EFF,     // CJK Radicals Supplement
125e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2F00,     0x2FDF,     // Kangxi Radicals
126e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2FF0,     0x2FFF,     // Ideographic Description Characters
127e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3000,     0x303F,     // CJK Symbols and Punctuation
128e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3040,     0x309F,     // Hiragana
129e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x30A0,     0x30FF,     // Katakana
130e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3100,     0x312F,     // Bopomofo
131e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3130,     0x318F,     // Hangul Compatibility Jamo
132e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3190,     0x319F,     // Kanbun
133e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x31A0,     0x31BF,     // Bopomofo Extended
134e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x31C0,     0x31EF,     // CJK Strokes
135e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x31F0,     0x31FF,     // Katakana Phonetic Extensions
136e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3200,     0x32FF,     // Enclosed CJK Letters and Months
137e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3300,     0x33FF,     // CJK Compatibility
138e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x3400,     0x4DBF,     // CJK Unified Ideographs Extension A
139e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x4DC0,     0x4DFF,     // Yijing Hexagram Symbols
140e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x4E00,     0x9FFF,     // CJK Unified Ideographs
141e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA000,     0xA48F,     // Yi Syllables
142e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA490,     0xA4CF,     // Yi Radicals
143e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA4D0,     0xA4FF,     // Lisu
144e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA500,     0xA63F,     // Vai
145e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA640,     0xA69F,     // Cyrillic Extended-B
146e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA6A0,     0xA6FF,     // Bamum
147e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA700,     0xA71F,     // Modifier Tone Letters
148e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA720,     0xA7FF,     // Latin Extended-D
149e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA800,     0xA82F,     // Syloti Nagri
150e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA830,     0xA83F,     // Common Indic Number Forms
151e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA840,     0xA87F,     // Phags-pa
152e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA880,     0xA8DF,     // Saurashtra
153e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA8E0,     0xA8FF,     // Devanagari Extended
154e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA900,     0xA92F,     // Kayah Li
155e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA930,     0xA95F,     // Rejang
156e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA960,     0xA97F,     // Hangul Jamo Extended-A
157e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xA980,     0xA9DF,     // Javanese
158e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xAA00,     0xAA5F,     // Cham
159e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xAA60,     0xAA7F,     // Myanmar Extended-A
160e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xAA80,     0xAADF,     // Tai Viet
161e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xAB00,     0xAB2F,     // Ethiopic Extended-A
162e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xABC0,     0xABFF,     // Meetei Mayek
163e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xAC00,     0xD7AF,     // Hangul Syllables
164e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xD7B0,     0xD7FF,     // Hangul Jamo Extended-B
165e462795ff5d4c7359f9e8637c10544bb2de70107tturney    //0xD800,       0xDB7F,     // High Surrogates
166e462795ff5d4c7359f9e8637c10544bb2de70107tturney    //0xDB80,       0xDBFF,     // High Private Use Surrogates
167e462795ff5d4c7359f9e8637c10544bb2de70107tturney    //0xDC00,       0xDFFF,     // Low Surrogates
168e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xE000,     0xF8FF,     // Private Use Area
169e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xF900,     0xFAFF,     // CJK Compatibility Ideographs
170e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFB00,     0xFB4F,     // Alphabetic Presentation Forms
171e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFB50,     0xFDFF,     // Arabic Presentation Forms-A
172e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE00,     0xFE0F,     // Variation Selectors
173e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE10,     0xFE1F,     // Vertical Forms
174e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE20,     0xFE2F,     // Combining Half Marks
175e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE30,     0xFE4F,     // CJK Compatibility Forms
176e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE50,     0xFE6F,     // Small Form Variants
177e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFE70,     0xFEFF,     // Arabic Presentation Forms-B
178e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFF00,     0xFFEF,     // Halfwidth and Fullwidth Forms
179e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFFF0,     0xFFFF,     // Specials
180e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10000,    0x1007F,    // Linear B Syllabary
181e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10080,    0x100FF,    // Linear B Ideograms
182e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10100,    0x1013F,    // Aegean Numbers
183e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10140,    0x1018F,    // Ancient Greek Numbers
184e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10190,    0x101CF,    // Ancient Symbols
185e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x101D0,    0x101FF,    // Phaistos Disc
186e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10280,    0x1029F,    // Lycian
187e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x102A0,    0x102DF,    // Carian
188e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10300,    0x1032F,    // Old Italic
189e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10330,    0x1034F,    // Gothic
190e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10380,    0x1039F,    // Ugaritic
191e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x103A0,    0x103DF,    // Old Persian
192e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10400,    0x1044F,    // Deseret
193e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10450,    0x1047F,    // Shavian
194e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10480,    0x104AF,    // Osmanya
195e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10800,    0x1083F,    // Cypriot Syllabary
196e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10840,    0x1085F,    // Imperial Aramaic
197e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10900,    0x1091F,    // Phoenician
198e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10920,    0x1093F,    // Lydian
199e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10A00,    0x10A5F,    // Kharoshthi
200e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10A60,    0x10A7F,    // Old South Arabian
201e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10B00,    0x10B3F,    // Avestan
202e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10B40,    0x10B5F,    // Inscriptional Parthian
203e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10B60,    0x10B7F,    // Inscriptional Pahlavi
204e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10C00,    0x10C4F,    // Old Turkic
205e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x10E60,    0x10E7F,    // Rumi Numeral Symbols
206e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x11000,    0x1107F,    // Brahmi
207e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x11080,    0x110CF,    // Kaithi
208e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x12000,    0x123FF,    // Cuneiform
209e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x12400,    0x1247F,    // Cuneiform Numbers and Punctuation
210e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x13000,    0x1342F,    // Egyptian Hieroglyphs
211e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x16800,    0x16A3F,    // Bamum Supplement
212e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1B000,    0x1B0FF,    // Kana Supplement
213e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D000,    0x1D0FF,    // Byzantine Musical Symbols
214e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D100,    0x1D1FF,    // Musical Symbols
215e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D200,    0x1D24F,    // Ancient Greek Musical Notation
216e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D300,    0x1D35F,    // Tai Xuan Jing Symbols
217e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D360,    0x1D37F,    // Counting Rod Numerals
218e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1D400,    0x1D7FF,    // Mathematical Alphanumeric Symbols
219e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F000,    0x1F02F,    // Mahjong Tiles
220e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F030,    0x1F09F,    // Domino Tiles
221e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F0A0,    0x1F0FF,    // Playing Cards
222e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F100,    0x1F1FF,    // Enclosed Alphanumeric Supplement
223e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F200,    0x1F2FF,    // Enclosed Ideographic Supplement
224e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F300,    0x1F5FF,    // Miscellaneous Symbols And Pictographs
225e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F600,    0x1F64F,    // Emoticons
226e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F680,    0x1F6FF,    // Transport And Map Symbols
227e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x1F700,    0x1F77F,    // Alchemical Symbols
228e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x20000,    0x2A6DF,    // CJK Unified Ideographs Extension B
229e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2A700,    0x2B73F,    // CJK Unified Ideographs Extension C
230e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2B740,    0x2B81F,    // CJK Unified Ideographs Extension D
231e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x2F800,    0x2FA1F,    // CJK Compatibility Ideographs Supplement
232e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xE0000,    0xE007F,    // Tags
233e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xE0100,    0xE01EF,    // Variation Selectors Supplement
234e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xF0000,    0xFFFFF,    // Supplementary Private Use Area-A
235e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0x100000,   0x10FFFF,   // Supplementary Private Use Area-B
236e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0xFFFFFFFF
237e462795ff5d4c7359f9e8637c10544bb2de70107tturney};
238e462795ff5d4c7359f9e8637c10544bb2de70107tturney
239e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
240e462795ff5d4c7359f9e8637c10544bb2de70107tturney// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
241e462795ff5d4c7359f9e8637c10544bb2de70107tturney
242e462795ff5d4c7359f9e8637c10544bb2de70107tturney#define UTF8_ACCEPT 0u
243e462795ff5d4c7359f9e8637c10544bb2de70107tturney#define UTF8_REJECT 12u
244e462795ff5d4c7359f9e8637c10544bb2de70107tturney
245e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic const unsigned char utf8d[] = {
246e462795ff5d4c7359f9e8637c10544bb2de70107tturney    // The first part of the table maps bytes to character classes that
247e462795ff5d4c7359f9e8637c10544bb2de70107tturney    // to reduce the size of the transition table and create bitmasks.
248e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
249e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
250e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
251e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
252e462795ff5d4c7359f9e8637c10544bb2de70107tturney    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
253e462795ff5d4c7359f9e8637c10544bb2de70107tturney    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
254e462795ff5d4c7359f9e8637c10544bb2de70107tturney    8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
255e462795ff5d4c7359f9e8637c10544bb2de70107tturney    10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
256e462795ff5d4c7359f9e8637c10544bb2de70107tturney
257e462795ff5d4c7359f9e8637c10544bb2de70107tturney    // The second part is a transition table that maps a combination
258e462795ff5d4c7359f9e8637c10544bb2de70107tturney    // of a state of the automaton and a character class to a state.
259e462795ff5d4c7359f9e8637c10544bb2de70107tturney    0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
260e462795ff5d4c7359f9e8637c10544bb2de70107tturney    12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
261e462795ff5d4c7359f9e8637c10544bb2de70107tturney    12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
262e462795ff5d4c7359f9e8637c10544bb2de70107tturney    12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
263e462795ff5d4c7359f9e8637c10544bb2de70107tturney    12,36,12,12,12,12,12,12,12,12,12,12,
264e462795ff5d4c7359f9e8637c10544bb2de70107tturney};
265e462795ff5d4c7359f9e8637c10544bb2de70107tturney
266e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) {
267e462795ff5d4c7359f9e8637c10544bb2de70107tturney    unsigned type = utf8d[byte];
268e462795ff5d4c7359f9e8637c10544bb2de70107tturney
269e462795ff5d4c7359f9e8637c10544bb2de70107tturney    *codep = (*state != UTF8_ACCEPT) ?
270e462795ff5d4c7359f9e8637c10544bb2de70107tturney        (byte & 0x3fu) | (*codep << 6) :
271e462795ff5d4c7359f9e8637c10544bb2de70107tturney    (0xff >> type) & (byte);
272e462795ff5d4c7359f9e8637c10544bb2de70107tturney
273e462795ff5d4c7359f9e8637c10544bb2de70107tturney    *state = utf8d[256 + *state + type];
274e462795ff5d4c7359f9e8637c10544bb2de70107tturney    return *state;
275e462795ff5d4c7359f9e8637c10544bb2de70107tturney}
276e462795ff5d4c7359f9e8637c10544bb2de70107tturney
277e462795ff5d4c7359f9e8637c10544bb2de70107tturney//static bool IsUTF8(unsigned char* s) {
278e462795ff5d4c7359f9e8637c10544bb2de70107tturney//  unsigned codepoint, state = 0;
279e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
280e462795ff5d4c7359f9e8637c10544bb2de70107tturney//  while (*s)
281e462795ff5d4c7359f9e8637c10544bb2de70107tturney//      decode(&state, &codepoint, *s++);
282e462795ff5d4c7359f9e8637c10544bb2de70107tturney//
283e462795ff5d4c7359f9e8637c10544bb2de70107tturney//  return state == UTF8_ACCEPT;
284e462795ff5d4c7359f9e8637c10544bb2de70107tturney//}
285e462795ff5d4c7359f9e8637c10544bb2de70107tturney
286e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF8) {
287e462795ff5d4c7359f9e8637c10544bb2de70107tturney    StringBuffer os, os2;
288e462795ff5d4c7359f9e8637c10544bb2de70107tturney    for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
289e462795ff5d4c7359f9e8637c10544bb2de70107tturney        for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
290e462795ff5d4c7359f9e8637c10544bb2de70107tturney            os.Clear();
291e462795ff5d4c7359f9e8637c10544bb2de70107tturney            UTF8<>::Encode(os, codepoint);
292e462795ff5d4c7359f9e8637c10544bb2de70107tturney            const char* encodedStr = os.GetString();
293e462795ff5d4c7359f9e8637c10544bb2de70107tturney
294e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Decode with Hoehrmann
295e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
296e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCodepoint = 0;
297e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned state = 0;
298e462795ff5d4c7359f9e8637c10544bb2de70107tturney
299e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCount = 0;
300e462795ff5d4c7359f9e8637c10544bb2de70107tturney                for (const char* s = encodedStr; *s; ++s)
301e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    if (!decode(&state, &decodedCodepoint, (unsigned char)*s)) {
302e462795ff5d4c7359f9e8637c10544bb2de70107tturney                        EXPECT_EQ(codepoint, decodedCodepoint);
303e462795ff5d4c7359f9e8637c10544bb2de70107tturney                        decodedCount++;
304e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    }
305e462795ff5d4c7359f9e8637c10544bb2de70107tturney
306e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (*encodedStr)                // This decoder cannot handle U+0000
307e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    EXPECT_EQ(1u, decodedCount);    // Should only contain one code point
308e462795ff5d4c7359f9e8637c10544bb2de70107tturney
309e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(UTF8_ACCEPT, state);
310e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (UTF8_ACCEPT != state)
311e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
312e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
313e462795ff5d4c7359f9e8637c10544bb2de70107tturney
314e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Decode
315e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
316e462795ff5d4c7359f9e8637c10544bb2de70107tturney                StringStream is(encodedStr);
317e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCodepoint;
318e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF8<>::Decode(is, &decodedCodepoint);
319e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
320e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(codepoint, decodedCodepoint);
321e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (!result || codepoint != decodedCodepoint)
322e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
323e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
324e462795ff5d4c7359f9e8637c10544bb2de70107tturney
325e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Validate
326e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
327e462795ff5d4c7359f9e8637c10544bb2de70107tturney                StringStream is(encodedStr);
328e462795ff5d4c7359f9e8637c10544bb2de70107tturney                os2.Clear();
329e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF8<>::Validate(is, os2);
330e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
331e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
332e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
333e462795ff5d4c7359f9e8637c10544bb2de70107tturney        }
334e462795ff5d4c7359f9e8637c10544bb2de70107tturney    }
335e462795ff5d4c7359f9e8637c10544bb2de70107tturney}
336e462795ff5d4c7359f9e8637c10544bb2de70107tturney
337e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF16) {
338e462795ff5d4c7359f9e8637c10544bb2de70107tturney    GenericStringBuffer<UTF16<> > os, os2;
339e462795ff5d4c7359f9e8637c10544bb2de70107tturney    GenericStringBuffer<UTF8<> > utf8os;
340e462795ff5d4c7359f9e8637c10544bb2de70107tturney    for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
341e462795ff5d4c7359f9e8637c10544bb2de70107tturney        for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
342e462795ff5d4c7359f9e8637c10544bb2de70107tturney            os.Clear();
343e462795ff5d4c7359f9e8637c10544bb2de70107tturney            UTF16<>::Encode(os, codepoint);
344e462795ff5d4c7359f9e8637c10544bb2de70107tturney            const UTF16<>::Ch* encodedStr = os.GetString();
345e462795ff5d4c7359f9e8637c10544bb2de70107tturney
346e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Encode with Hoehrmann's code
347e462795ff5d4c7359f9e8637c10544bb2de70107tturney            if (codepoint != 0) // cannot handle U+0000
348e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
349e462795ff5d4c7359f9e8637c10544bb2de70107tturney                // encode with UTF8<> first
350e462795ff5d4c7359f9e8637c10544bb2de70107tturney                utf8os.Clear();
351e462795ff5d4c7359f9e8637c10544bb2de70107tturney                UTF8<>::Encode(utf8os, codepoint);
352e462795ff5d4c7359f9e8637c10544bb2de70107tturney
353e462795ff5d4c7359f9e8637c10544bb2de70107tturney                // transcode from UTF8 to UTF16 with Hoehrmann's code
354e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCodepoint = 0;
355e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned state = 0;
356e462795ff5d4c7359f9e8637c10544bb2de70107tturney                UTF16<>::Ch buffer[3], *p = &buffer[0];
357e462795ff5d4c7359f9e8637c10544bb2de70107tturney                for (const char* s = utf8os.GetString(); *s; ++s) {
358e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    if (!decode(&state, &decodedCodepoint, (unsigned char)*s))
359e462795ff5d4c7359f9e8637c10544bb2de70107tturney                        break;
360e462795ff5d4c7359f9e8637c10544bb2de70107tturney                }
361e462795ff5d4c7359f9e8637c10544bb2de70107tturney
362e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (codepoint <= 0xFFFF)
363e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    *p++ = static_cast<UTF16<>::Ch>(decodedCodepoint);
364e462795ff5d4c7359f9e8637c10544bb2de70107tturney                else {
365e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    // Encode code points above U+FFFF as surrogate pair.
366e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    *p++ = static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10));
367e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    *p++ = static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF));
368e462795ff5d4c7359f9e8637c10544bb2de70107tturney                }
369e462795ff5d4c7359f9e8637c10544bb2de70107tturney                *p++ = '\0';
370e462795ff5d4c7359f9e8637c10544bb2de70107tturney
371e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(0, StrCmp(buffer, encodedStr));
372e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
373e462795ff5d4c7359f9e8637c10544bb2de70107tturney
374e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Decode
375e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
376e462795ff5d4c7359f9e8637c10544bb2de70107tturney                GenericStringStream<UTF16<> > is(encodedStr);
377e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCodepoint;
378e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF16<>::Decode(is, &decodedCodepoint);
379e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
380e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(codepoint, decodedCodepoint);
381e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (!result || codepoint != decodedCodepoint)
382e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
383e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
384e462795ff5d4c7359f9e8637c10544bb2de70107tturney
385e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Validate
386e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
387e462795ff5d4c7359f9e8637c10544bb2de70107tturney                GenericStringStream<UTF16<> > is(encodedStr);
388e462795ff5d4c7359f9e8637c10544bb2de70107tturney                os2.Clear();
389e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF16<>::Validate(is, os2);
390e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
391e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
392e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
393e462795ff5d4c7359f9e8637c10544bb2de70107tturney        }
394e462795ff5d4c7359f9e8637c10544bb2de70107tturney    }
395e462795ff5d4c7359f9e8637c10544bb2de70107tturney}
396e462795ff5d4c7359f9e8637c10544bb2de70107tturney
397e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF32) {
398e462795ff5d4c7359f9e8637c10544bb2de70107tturney    GenericStringBuffer<UTF32<> > os, os2;
399e462795ff5d4c7359f9e8637c10544bb2de70107tturney    for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) {
400e462795ff5d4c7359f9e8637c10544bb2de70107tturney        for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) {
401e462795ff5d4c7359f9e8637c10544bb2de70107tturney            os.Clear();
402e462795ff5d4c7359f9e8637c10544bb2de70107tturney            UTF32<>::Encode(os, codepoint);
403e462795ff5d4c7359f9e8637c10544bb2de70107tturney            const UTF32<>::Ch* encodedStr = os.GetString();
404e462795ff5d4c7359f9e8637c10544bb2de70107tturney
405e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Decode
406e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
407e462795ff5d4c7359f9e8637c10544bb2de70107tturney                GenericStringStream<UTF32<> > is(encodedStr);
408e462795ff5d4c7359f9e8637c10544bb2de70107tturney                unsigned decodedCodepoint;
409e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF32<>::Decode(is, &decodedCodepoint);
410e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
411e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(codepoint, decodedCodepoint);
412e462795ff5d4c7359f9e8637c10544bb2de70107tturney                if (!result || codepoint != decodedCodepoint)
413e462795ff5d4c7359f9e8637c10544bb2de70107tturney                    std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl;
414e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
415e462795ff5d4c7359f9e8637c10544bb2de70107tturney
416e462795ff5d4c7359f9e8637c10544bb2de70107tturney            // Validate
417e462795ff5d4c7359f9e8637c10544bb2de70107tturney            {
418e462795ff5d4c7359f9e8637c10544bb2de70107tturney                GenericStringStream<UTF32<> > is(encodedStr);
419e462795ff5d4c7359f9e8637c10544bb2de70107tturney                os2.Clear();
420e462795ff5d4c7359f9e8637c10544bb2de70107tturney                bool result = UTF32<>::Validate(is, os2);
421e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_TRUE(result);
422e462795ff5d4c7359f9e8637c10544bb2de70107tturney                EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString()));
423e462795ff5d4c7359f9e8637c10544bb2de70107tturney            }
424e462795ff5d4c7359f9e8637c10544bb2de70107tturney        }
425e462795ff5d4c7359f9e8637c10544bb2de70107tturney    }
426e462795ff5d4c7359f9e8637c10544bb2de70107tturney}
427