1e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Tencent is pleased to support the open source community by making RapidJSON available. 2e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 3e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 5e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Licensed under the MIT License (the "License"); you may not use this file except 6e462795ff5d4c7359f9e8637c10544bb2de70107tturney// in compliance with the License. You may obtain a copy of the License at 7e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 8e462795ff5d4c7359f9e8637c10544bb2de70107tturney// http://opensource.org/licenses/MIT 9e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 10e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Unless required by applicable law or agreed to in writing, software distributed 11e462795ff5d4c7359f9e8637c10544bb2de70107tturney// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12e462795ff5d4c7359f9e8637c10544bb2de70107tturney// CONDITIONS OF ANY KIND, either express or implied. See the License for the 13e462795ff5d4c7359f9e8637c10544bb2de70107tturney// specific language governing permissions and limitations under the License. 14e462795ff5d4c7359f9e8637c10544bb2de70107tturney 15e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "unittest.h" 16e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/filereadstream.h" 17e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/filewritestream.h" 18e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/encodedstream.h" 19e462795ff5d4c7359f9e8637c10544bb2de70107tturney#include "rapidjson/stringbuffer.h" 20e462795ff5d4c7359f9e8637c10544bb2de70107tturney 21e462795ff5d4c7359f9e8637c10544bb2de70107tturneyusing namespace rapidjson; 22e462795ff5d4c7359f9e8637c10544bb2de70107tturney 23e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Verification of encoders/decoders with Hoehrmann's UTF8 decoder 24e462795ff5d4c7359f9e8637c10544bb2de70107tturney 25e462795ff5d4c7359f9e8637c10544bb2de70107tturney// http://www.unicode.org/Public/UNIDATA/Blocks.txt 26e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic const unsigned kCodepointRanges[] = { 27e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0000, 0x007F, // Basic Latin 28e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0080, 0x00FF, // Latin-1 Supplement 29e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0100, 0x017F, // Latin Extended-A 30e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0180, 0x024F, // Latin Extended-B 31e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0250, 0x02AF, // IPA Extensions 32e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x02B0, 0x02FF, // Spacing Modifier Letters 33e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0300, 0x036F, // Combining Diacritical Marks 34e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0370, 0x03FF, // Greek and Coptic 35e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0400, 0x04FF, // Cyrillic 36e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0500, 0x052F, // Cyrillic Supplement 37e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0530, 0x058F, // Armenian 38e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0590, 0x05FF, // Hebrew 39e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0600, 0x06FF, // Arabic 40e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0700, 0x074F, // Syriac 41e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0750, 0x077F, // Arabic Supplement 42e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0780, 0x07BF, // Thaana 43e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x07C0, 0x07FF, // NKo 44e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0800, 0x083F, // Samaritan 45e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0840, 0x085F, // Mandaic 46e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0900, 0x097F, // Devanagari 47e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0980, 0x09FF, // Bengali 48e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0A00, 0x0A7F, // Gurmukhi 49e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0A80, 0x0AFF, // Gujarati 50e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0B00, 0x0B7F, // Oriya 51e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0B80, 0x0BFF, // Tamil 52e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0C00, 0x0C7F, // Telugu 53e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0C80, 0x0CFF, // Kannada 54e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0D00, 0x0D7F, // Malayalam 55e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0D80, 0x0DFF, // Sinhala 56e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0E00, 0x0E7F, // Thai 57e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0E80, 0x0EFF, // Lao 58e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x0F00, 0x0FFF, // Tibetan 59e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1000, 0x109F, // Myanmar 60e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10A0, 0x10FF, // Georgian 61e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1100, 0x11FF, // Hangul Jamo 62e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1200, 0x137F, // Ethiopic 63e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1380, 0x139F, // Ethiopic Supplement 64e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x13A0, 0x13FF, // Cherokee 65e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1400, 0x167F, // Unified Canadian Aboriginal Syllabics 66e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1680, 0x169F, // Ogham 67e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x16A0, 0x16FF, // Runic 68e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1700, 0x171F, // Tagalog 69e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1720, 0x173F, // Hanunoo 70e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1740, 0x175F, // Buhid 71e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1760, 0x177F, // Tagbanwa 72e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1780, 0x17FF, // Khmer 73e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1800, 0x18AF, // Mongolian 74e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x18B0, 0x18FF, // Unified Canadian Aboriginal Syllabics Extended 75e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1900, 0x194F, // Limbu 76e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1950, 0x197F, // Tai Le 77e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1980, 0x19DF, // New Tai Lue 78e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x19E0, 0x19FF, // Khmer Symbols 79e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1A00, 0x1A1F, // Buginese 80e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1A20, 0x1AAF, // Tai Tham 81e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1B00, 0x1B7F, // Balinese 82e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1B80, 0x1BBF, // Sundanese 83e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1BC0, 0x1BFF, // Batak 84e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1C00, 0x1C4F, // Lepcha 85e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1C50, 0x1C7F, // Ol Chiki 86e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1CD0, 0x1CFF, // Vedic Extensions 87e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D00, 0x1D7F, // Phonetic Extensions 88e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D80, 0x1DBF, // Phonetic Extensions Supplement 89e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1DC0, 0x1DFF, // Combining Diacritical Marks Supplement 90e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1E00, 0x1EFF, // Latin Extended Additional 91e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F00, 0x1FFF, // Greek Extended 92e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2000, 0x206F, // General Punctuation 93e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2070, 0x209F, // Superscripts and Subscripts 94e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x20A0, 0x20CF, // Currency Symbols 95e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x20D0, 0x20FF, // Combining Diacritical Marks for Symbols 96e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2100, 0x214F, // Letterlike Symbols 97e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2150, 0x218F, // Number Forms 98e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2190, 0x21FF, // Arrows 99e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2200, 0x22FF, // Mathematical Operators 100e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2300, 0x23FF, // Miscellaneous Technical 101e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2400, 0x243F, // Control Pictures 102e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2440, 0x245F, // Optical Character Recognition 103e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2460, 0x24FF, // Enclosed Alphanumerics 104e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2500, 0x257F, // Box Drawing 105e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2580, 0x259F, // Block Elements 106e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x25A0, 0x25FF, // Geometric Shapes 107e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2600, 0x26FF, // Miscellaneous Symbols 108e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2700, 0x27BF, // Dingbats 109e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x27C0, 0x27EF, // Miscellaneous Mathematical Symbols-A 110e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x27F0, 0x27FF, // Supplemental Arrows-A 111e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2800, 0x28FF, // Braille Patterns 112e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2900, 0x297F, // Supplemental Arrows-B 113e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2980, 0x29FF, // Miscellaneous Mathematical Symbols-B 114e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2A00, 0x2AFF, // Supplemental Mathematical Operators 115e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2B00, 0x2BFF, // Miscellaneous Symbols and Arrows 116e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2C00, 0x2C5F, // Glagolitic 117e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2C60, 0x2C7F, // Latin Extended-C 118e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2C80, 0x2CFF, // Coptic 119e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2D00, 0x2D2F, // Georgian Supplement 120e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2D30, 0x2D7F, // Tifinagh 121e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2D80, 0x2DDF, // Ethiopic Extended 122e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2DE0, 0x2DFF, // Cyrillic Extended-A 123e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2E00, 0x2E7F, // Supplemental Punctuation 124e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2E80, 0x2EFF, // CJK Radicals Supplement 125e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2F00, 0x2FDF, // Kangxi Radicals 126e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2FF0, 0x2FFF, // Ideographic Description Characters 127e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3000, 0x303F, // CJK Symbols and Punctuation 128e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3040, 0x309F, // Hiragana 129e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x30A0, 0x30FF, // Katakana 130e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3100, 0x312F, // Bopomofo 131e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3130, 0x318F, // Hangul Compatibility Jamo 132e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3190, 0x319F, // Kanbun 133e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x31A0, 0x31BF, // Bopomofo Extended 134e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x31C0, 0x31EF, // CJK Strokes 135e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x31F0, 0x31FF, // Katakana Phonetic Extensions 136e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3200, 0x32FF, // Enclosed CJK Letters and Months 137e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3300, 0x33FF, // CJK Compatibility 138e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x3400, 0x4DBF, // CJK Unified Ideographs Extension A 139e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x4DC0, 0x4DFF, // Yijing Hexagram Symbols 140e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x4E00, 0x9FFF, // CJK Unified Ideographs 141e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA000, 0xA48F, // Yi Syllables 142e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA490, 0xA4CF, // Yi Radicals 143e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA4D0, 0xA4FF, // Lisu 144e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA500, 0xA63F, // Vai 145e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA640, 0xA69F, // Cyrillic Extended-B 146e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA6A0, 0xA6FF, // Bamum 147e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA700, 0xA71F, // Modifier Tone Letters 148e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA720, 0xA7FF, // Latin Extended-D 149e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA800, 0xA82F, // Syloti Nagri 150e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA830, 0xA83F, // Common Indic Number Forms 151e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA840, 0xA87F, // Phags-pa 152e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA880, 0xA8DF, // Saurashtra 153e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA8E0, 0xA8FF, // Devanagari Extended 154e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA900, 0xA92F, // Kayah Li 155e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA930, 0xA95F, // Rejang 156e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA960, 0xA97F, // Hangul Jamo Extended-A 157e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xA980, 0xA9DF, // Javanese 158e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xAA00, 0xAA5F, // Cham 159e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xAA60, 0xAA7F, // Myanmar Extended-A 160e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xAA80, 0xAADF, // Tai Viet 161e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xAB00, 0xAB2F, // Ethiopic Extended-A 162e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xABC0, 0xABFF, // Meetei Mayek 163e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xAC00, 0xD7AF, // Hangul Syllables 164e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xD7B0, 0xD7FF, // Hangul Jamo Extended-B 165e462795ff5d4c7359f9e8637c10544bb2de70107tturney //0xD800, 0xDB7F, // High Surrogates 166e462795ff5d4c7359f9e8637c10544bb2de70107tturney //0xDB80, 0xDBFF, // High Private Use Surrogates 167e462795ff5d4c7359f9e8637c10544bb2de70107tturney //0xDC00, 0xDFFF, // Low Surrogates 168e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xE000, 0xF8FF, // Private Use Area 169e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xF900, 0xFAFF, // CJK Compatibility Ideographs 170e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFB00, 0xFB4F, // Alphabetic Presentation Forms 171e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFB50, 0xFDFF, // Arabic Presentation Forms-A 172e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE00, 0xFE0F, // Variation Selectors 173e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE10, 0xFE1F, // Vertical Forms 174e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE20, 0xFE2F, // Combining Half Marks 175e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE30, 0xFE4F, // CJK Compatibility Forms 176e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE50, 0xFE6F, // Small Form Variants 177e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFE70, 0xFEFF, // Arabic Presentation Forms-B 178e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFF00, 0xFFEF, // Halfwidth and Fullwidth Forms 179e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFFF0, 0xFFFF, // Specials 180e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10000, 0x1007F, // Linear B Syllabary 181e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10080, 0x100FF, // Linear B Ideograms 182e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10100, 0x1013F, // Aegean Numbers 183e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10140, 0x1018F, // Ancient Greek Numbers 184e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10190, 0x101CF, // Ancient Symbols 185e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x101D0, 0x101FF, // Phaistos Disc 186e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10280, 0x1029F, // Lycian 187e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x102A0, 0x102DF, // Carian 188e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10300, 0x1032F, // Old Italic 189e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10330, 0x1034F, // Gothic 190e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10380, 0x1039F, // Ugaritic 191e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x103A0, 0x103DF, // Old Persian 192e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10400, 0x1044F, // Deseret 193e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10450, 0x1047F, // Shavian 194e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10480, 0x104AF, // Osmanya 195e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10800, 0x1083F, // Cypriot Syllabary 196e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10840, 0x1085F, // Imperial Aramaic 197e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10900, 0x1091F, // Phoenician 198e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10920, 0x1093F, // Lydian 199e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10A00, 0x10A5F, // Kharoshthi 200e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10A60, 0x10A7F, // Old South Arabian 201e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10B00, 0x10B3F, // Avestan 202e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10B40, 0x10B5F, // Inscriptional Parthian 203e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10B60, 0x10B7F, // Inscriptional Pahlavi 204e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10C00, 0x10C4F, // Old Turkic 205e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x10E60, 0x10E7F, // Rumi Numeral Symbols 206e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x11000, 0x1107F, // Brahmi 207e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x11080, 0x110CF, // Kaithi 208e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x12000, 0x123FF, // Cuneiform 209e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x12400, 0x1247F, // Cuneiform Numbers and Punctuation 210e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x13000, 0x1342F, // Egyptian Hieroglyphs 211e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x16800, 0x16A3F, // Bamum Supplement 212e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1B000, 0x1B0FF, // Kana Supplement 213e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D000, 0x1D0FF, // Byzantine Musical Symbols 214e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D100, 0x1D1FF, // Musical Symbols 215e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D200, 0x1D24F, // Ancient Greek Musical Notation 216e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D300, 0x1D35F, // Tai Xuan Jing Symbols 217e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D360, 0x1D37F, // Counting Rod Numerals 218e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1D400, 0x1D7FF, // Mathematical Alphanumeric Symbols 219e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F000, 0x1F02F, // Mahjong Tiles 220e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F030, 0x1F09F, // Domino Tiles 221e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F0A0, 0x1F0FF, // Playing Cards 222e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F100, 0x1F1FF, // Enclosed Alphanumeric Supplement 223e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F200, 0x1F2FF, // Enclosed Ideographic Supplement 224e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F300, 0x1F5FF, // Miscellaneous Symbols And Pictographs 225e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F600, 0x1F64F, // Emoticons 226e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F680, 0x1F6FF, // Transport And Map Symbols 227e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x1F700, 0x1F77F, // Alchemical Symbols 228e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x20000, 0x2A6DF, // CJK Unified Ideographs Extension B 229e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2A700, 0x2B73F, // CJK Unified Ideographs Extension C 230e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2B740, 0x2B81F, // CJK Unified Ideographs Extension D 231e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x2F800, 0x2FA1F, // CJK Compatibility Ideographs Supplement 232e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xE0000, 0xE007F, // Tags 233e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xE0100, 0xE01EF, // Variation Selectors Supplement 234e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xF0000, 0xFFFFF, // Supplementary Private Use Area-A 235e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0x100000, 0x10FFFF, // Supplementary Private Use Area-B 236e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0xFFFFFFFF 237e462795ff5d4c7359f9e8637c10544bb2de70107tturney}; 238e462795ff5d4c7359f9e8637c10544bb2de70107tturney 239e462795ff5d4c7359f9e8637c10544bb2de70107tturney// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de> 240e462795ff5d4c7359f9e8637c10544bb2de70107tturney// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. 241e462795ff5d4c7359f9e8637c10544bb2de70107tturney 242e462795ff5d4c7359f9e8637c10544bb2de70107tturney#define UTF8_ACCEPT 0u 243e462795ff5d4c7359f9e8637c10544bb2de70107tturney#define UTF8_REJECT 12u 244e462795ff5d4c7359f9e8637c10544bb2de70107tturney 245e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic const unsigned char utf8d[] = { 246e462795ff5d4c7359f9e8637c10544bb2de70107tturney // The first part of the table maps bytes to character classes that 247e462795ff5d4c7359f9e8637c10544bb2de70107tturney // to reduce the size of the transition table and create bitmasks. 248e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 249e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 250e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 251e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 252e462795ff5d4c7359f9e8637c10544bb2de70107tturney 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 253e462795ff5d4c7359f9e8637c10544bb2de70107tturney 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 254e462795ff5d4c7359f9e8637c10544bb2de70107tturney 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 255e462795ff5d4c7359f9e8637c10544bb2de70107tturney 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 256e462795ff5d4c7359f9e8637c10544bb2de70107tturney 257e462795ff5d4c7359f9e8637c10544bb2de70107tturney // The second part is a transition table that maps a combination 258e462795ff5d4c7359f9e8637c10544bb2de70107tturney // of a state of the automaton and a character class to a state. 259e462795ff5d4c7359f9e8637c10544bb2de70107tturney 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12, 260e462795ff5d4c7359f9e8637c10544bb2de70107tturney 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12, 261e462795ff5d4c7359f9e8637c10544bb2de70107tturney 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12, 262e462795ff5d4c7359f9e8637c10544bb2de70107tturney 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12, 263e462795ff5d4c7359f9e8637c10544bb2de70107tturney 12,36,12,12,12,12,12,12,12,12,12,12, 264e462795ff5d4c7359f9e8637c10544bb2de70107tturney}; 265e462795ff5d4c7359f9e8637c10544bb2de70107tturney 266e462795ff5d4c7359f9e8637c10544bb2de70107tturneystatic unsigned inline decode(unsigned* state, unsigned* codep, unsigned byte) { 267e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned type = utf8d[byte]; 268e462795ff5d4c7359f9e8637c10544bb2de70107tturney 269e462795ff5d4c7359f9e8637c10544bb2de70107tturney *codep = (*state != UTF8_ACCEPT) ? 270e462795ff5d4c7359f9e8637c10544bb2de70107tturney (byte & 0x3fu) | (*codep << 6) : 271e462795ff5d4c7359f9e8637c10544bb2de70107tturney (0xff >> type) & (byte); 272e462795ff5d4c7359f9e8637c10544bb2de70107tturney 273e462795ff5d4c7359f9e8637c10544bb2de70107tturney *state = utf8d[256 + *state + type]; 274e462795ff5d4c7359f9e8637c10544bb2de70107tturney return *state; 275e462795ff5d4c7359f9e8637c10544bb2de70107tturney} 276e462795ff5d4c7359f9e8637c10544bb2de70107tturney 277e462795ff5d4c7359f9e8637c10544bb2de70107tturney//static bool IsUTF8(unsigned char* s) { 278e462795ff5d4c7359f9e8637c10544bb2de70107tturney// unsigned codepoint, state = 0; 279e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 280e462795ff5d4c7359f9e8637c10544bb2de70107tturney// while (*s) 281e462795ff5d4c7359f9e8637c10544bb2de70107tturney// decode(&state, &codepoint, *s++); 282e462795ff5d4c7359f9e8637c10544bb2de70107tturney// 283e462795ff5d4c7359f9e8637c10544bb2de70107tturney// return state == UTF8_ACCEPT; 284e462795ff5d4c7359f9e8637c10544bb2de70107tturney//} 285e462795ff5d4c7359f9e8637c10544bb2de70107tturney 286e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF8) { 287e462795ff5d4c7359f9e8637c10544bb2de70107tturney StringBuffer os, os2; 288e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) { 289e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) { 290e462795ff5d4c7359f9e8637c10544bb2de70107tturney os.Clear(); 291e462795ff5d4c7359f9e8637c10544bb2de70107tturney UTF8<>::Encode(os, codepoint); 292e462795ff5d4c7359f9e8637c10544bb2de70107tturney const char* encodedStr = os.GetString(); 293e462795ff5d4c7359f9e8637c10544bb2de70107tturney 294e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Decode with Hoehrmann 295e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 296e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCodepoint = 0; 297e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned state = 0; 298e462795ff5d4c7359f9e8637c10544bb2de70107tturney 299e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCount = 0; 300e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (const char* s = encodedStr; *s; ++s) 301e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (!decode(&state, &decodedCodepoint, (unsigned char)*s)) { 302e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(codepoint, decodedCodepoint); 303e462795ff5d4c7359f9e8637c10544bb2de70107tturney decodedCount++; 304e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 305e462795ff5d4c7359f9e8637c10544bb2de70107tturney 306e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (*encodedStr) // This decoder cannot handle U+0000 307e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(1u, decodedCount); // Should only contain one code point 308e462795ff5d4c7359f9e8637c10544bb2de70107tturney 309e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(UTF8_ACCEPT, state); 310e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (UTF8_ACCEPT != state) 311e462795ff5d4c7359f9e8637c10544bb2de70107tturney std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl; 312e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 313e462795ff5d4c7359f9e8637c10544bb2de70107tturney 314e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Decode 315e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 316e462795ff5d4c7359f9e8637c10544bb2de70107tturney StringStream is(encodedStr); 317e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCodepoint; 318e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF8<>::Decode(is, &decodedCodepoint); 319e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 320e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(codepoint, decodedCodepoint); 321e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (!result || codepoint != decodedCodepoint) 322e462795ff5d4c7359f9e8637c10544bb2de70107tturney std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl; 323e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 324e462795ff5d4c7359f9e8637c10544bb2de70107tturney 325e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Validate 326e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 327e462795ff5d4c7359f9e8637c10544bb2de70107tturney StringStream is(encodedStr); 328e462795ff5d4c7359f9e8637c10544bb2de70107tturney os2.Clear(); 329e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF8<>::Validate(is, os2); 330e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 331e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); 332e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 333e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 334e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 335e462795ff5d4c7359f9e8637c10544bb2de70107tturney} 336e462795ff5d4c7359f9e8637c10544bb2de70107tturney 337e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF16) { 338e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringBuffer<UTF16<> > os, os2; 339e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringBuffer<UTF8<> > utf8os; 340e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) { 341e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) { 342e462795ff5d4c7359f9e8637c10544bb2de70107tturney os.Clear(); 343e462795ff5d4c7359f9e8637c10544bb2de70107tturney UTF16<>::Encode(os, codepoint); 344e462795ff5d4c7359f9e8637c10544bb2de70107tturney const UTF16<>::Ch* encodedStr = os.GetString(); 345e462795ff5d4c7359f9e8637c10544bb2de70107tturney 346e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Encode with Hoehrmann's code 347e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (codepoint != 0) // cannot handle U+0000 348e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 349e462795ff5d4c7359f9e8637c10544bb2de70107tturney // encode with UTF8<> first 350e462795ff5d4c7359f9e8637c10544bb2de70107tturney utf8os.Clear(); 351e462795ff5d4c7359f9e8637c10544bb2de70107tturney UTF8<>::Encode(utf8os, codepoint); 352e462795ff5d4c7359f9e8637c10544bb2de70107tturney 353e462795ff5d4c7359f9e8637c10544bb2de70107tturney // transcode from UTF8 to UTF16 with Hoehrmann's code 354e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCodepoint = 0; 355e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned state = 0; 356e462795ff5d4c7359f9e8637c10544bb2de70107tturney UTF16<>::Ch buffer[3], *p = &buffer[0]; 357e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (const char* s = utf8os.GetString(); *s; ++s) { 358e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (!decode(&state, &decodedCodepoint, (unsigned char)*s)) 359e462795ff5d4c7359f9e8637c10544bb2de70107tturney break; 360e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 361e462795ff5d4c7359f9e8637c10544bb2de70107tturney 362e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (codepoint <= 0xFFFF) 363e462795ff5d4c7359f9e8637c10544bb2de70107tturney *p++ = static_cast<UTF16<>::Ch>(decodedCodepoint); 364e462795ff5d4c7359f9e8637c10544bb2de70107tturney else { 365e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Encode code points above U+FFFF as surrogate pair. 366e462795ff5d4c7359f9e8637c10544bb2de70107tturney *p++ = static_cast<UTF16<>::Ch>(0xD7C0 + (decodedCodepoint >> 10)); 367e462795ff5d4c7359f9e8637c10544bb2de70107tturney *p++ = static_cast<UTF16<>::Ch>(0xDC00 + (decodedCodepoint & 0x3FF)); 368e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 369e462795ff5d4c7359f9e8637c10544bb2de70107tturney *p++ = '\0'; 370e462795ff5d4c7359f9e8637c10544bb2de70107tturney 371e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(0, StrCmp(buffer, encodedStr)); 372e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 373e462795ff5d4c7359f9e8637c10544bb2de70107tturney 374e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Decode 375e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 376e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringStream<UTF16<> > is(encodedStr); 377e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCodepoint; 378e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF16<>::Decode(is, &decodedCodepoint); 379e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 380e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(codepoint, decodedCodepoint); 381e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (!result || codepoint != decodedCodepoint) 382e462795ff5d4c7359f9e8637c10544bb2de70107tturney std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl; 383e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 384e462795ff5d4c7359f9e8637c10544bb2de70107tturney 385e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Validate 386e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 387e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringStream<UTF16<> > is(encodedStr); 388e462795ff5d4c7359f9e8637c10544bb2de70107tturney os2.Clear(); 389e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF16<>::Validate(is, os2); 390e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 391e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); 392e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 393e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 394e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 395e462795ff5d4c7359f9e8637c10544bb2de70107tturney} 396e462795ff5d4c7359f9e8637c10544bb2de70107tturney 397e462795ff5d4c7359f9e8637c10544bb2de70107tturneyTEST(EncodingsTest, UTF32) { 398e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringBuffer<UTF32<> > os, os2; 399e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (const unsigned* range = kCodepointRanges; *range != 0xFFFFFFFF; range += 2) { 400e462795ff5d4c7359f9e8637c10544bb2de70107tturney for (unsigned codepoint = range[0]; codepoint <= range[1]; ++codepoint) { 401e462795ff5d4c7359f9e8637c10544bb2de70107tturney os.Clear(); 402e462795ff5d4c7359f9e8637c10544bb2de70107tturney UTF32<>::Encode(os, codepoint); 403e462795ff5d4c7359f9e8637c10544bb2de70107tturney const UTF32<>::Ch* encodedStr = os.GetString(); 404e462795ff5d4c7359f9e8637c10544bb2de70107tturney 405e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Decode 406e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 407e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringStream<UTF32<> > is(encodedStr); 408e462795ff5d4c7359f9e8637c10544bb2de70107tturney unsigned decodedCodepoint; 409e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF32<>::Decode(is, &decodedCodepoint); 410e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 411e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(codepoint, decodedCodepoint); 412e462795ff5d4c7359f9e8637c10544bb2de70107tturney if (!result || codepoint != decodedCodepoint) 413e462795ff5d4c7359f9e8637c10544bb2de70107tturney std::cout << std::hex << codepoint << " " << decodedCodepoint << std::endl; 414e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 415e462795ff5d4c7359f9e8637c10544bb2de70107tturney 416e462795ff5d4c7359f9e8637c10544bb2de70107tturney // Validate 417e462795ff5d4c7359f9e8637c10544bb2de70107tturney { 418e462795ff5d4c7359f9e8637c10544bb2de70107tturney GenericStringStream<UTF32<> > is(encodedStr); 419e462795ff5d4c7359f9e8637c10544bb2de70107tturney os2.Clear(); 420e462795ff5d4c7359f9e8637c10544bb2de70107tturney bool result = UTF32<>::Validate(is, os2); 421e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_TRUE(result); 422e462795ff5d4c7359f9e8637c10544bb2de70107tturney EXPECT_EQ(0, StrCmp(encodedStr, os2.GetString())); 423e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 424e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 425e462795ff5d4c7359f9e8637c10544bb2de70107tturney } 426e462795ff5d4c7359f9e8637c10544bb2de70107tturney} 427