1b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Use of this source code is governed by a BSD-style license that can be 3b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// found in the LICENSE file. 4b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 5b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/json/string_escape.h" 6b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 7cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko#include <stddef.h> 8cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko 9cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko#include "base/macros.h" 10b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string_util.h" 11b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/utf_string_conversions.h" 12b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "testing/gtest/include/gtest/gtest.h" 13b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 14b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratnamespace base { 15b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 16b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF8) { 17b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const struct { 18b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* to_escape; 19b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* escaped; 20b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } cases[] = { 21b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, 22b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"a\b\f\n\r\t\v\1\\.\"z", 23b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, 24b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"b\x0f\x7f\xf0\xff!", // \xf0\xff is not a valid UTF-8 unit. 25b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat "b\\u000F\x7F\xEF\xBF\xBD\xEF\xBF\xBD!"}, 26b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"c<>d", "c\\u003C>d"}, 27cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko {"Hello\xe2\x80\xa8world", "Hello\\u2028world"}, 28cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko {"\xe2\x80\xa9purple", "\\u2029purple"}, 29b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat }; 30b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 31b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat for (size_t i = 0; i < arraysize(cases); ++i) { 32b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* in_ptr = cases[i].to_escape; 33b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string in_str = in_ptr; 34b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 35b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out; 36b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in_ptr, false, &out); 37b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(std::string(cases[i].escaped), out); 38b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 39b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 40b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat out.erase(); 41b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat bool convert_ok = EscapeJSONString(in_str, false, &out); 42b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(std::string(cases[i].escaped), out); 43b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 44b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 45b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat if (convert_ok) { 46b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string fooout = GetQuotedJSONString(in_str); 47b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"", fooout); 48b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 49b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 50b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 51b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 52b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string in = cases[0].to_escape; 53b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out; 54b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, false, &out); 55b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 56b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 57b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // test quoting 58b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out_quoted; 59b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, true, &out_quoted); 60b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(out.length() + 2, out_quoted.length()); 61b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(out_quoted.find(out), 1U); 62b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out_quoted)); 63b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 64b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // now try with a NULL in the string 65b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string null_prepend = "test"; 66b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat null_prepend.push_back(0); 67b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat in = null_prepend + in; 68b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string expected = "test\\u0000"; 69b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat expected += cases[0].escaped; 70b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat out.clear(); 71b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, false, &out); 72b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(expected, out); 73b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 74b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 75b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 76b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF16) { 77b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const struct { 78b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const wchar_t* to_escape; 79b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* escaped; 80b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } cases[] = { 81b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {L"b\uffb1\u00ff", "b\xEF\xBE\xB1\xC3\xBF"}, 82b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"}, 83b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {L"a\b\f\n\r\t\v\1\\.\"z", 84b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"}, 85b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {L"b\x0f\x7f\xf0\xff!", "b\\u000F\x7F\xC3\xB0\xC3\xBF!"}, 86b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {L"c<>d", "c\\u003C>d"}, 87cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko {L"Hello\u2028world", "Hello\\u2028world"}, 88cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko {L"\u2029purple", "\\u2029purple"}, 89b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat }; 90b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 91b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat for (size_t i = 0; i < arraysize(cases); ++i) { 92b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 in = WideToUTF16(cases[i].to_escape); 93b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 94b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out; 95b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, false, &out); 96b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(std::string(cases[i].escaped), out); 97b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 98b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 99b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat out = GetQuotedJSONString(in); 100b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"", out); 101b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 102b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 103b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 104b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 in = WideToUTF16(cases[0].to_escape); 105b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out; 106b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, false, &out); 107b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 108b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 109b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // test quoting 110b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string out_quoted; 111b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, true, &out_quoted); 112b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(out.length() + 2, out_quoted.length()); 113b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(out_quoted.find(out), 1U); 114b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 115b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 116b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // now try with a NULL in the string 117b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 null_prepend = WideToUTF16(L"test"); 118b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat null_prepend.push_back(0); 119b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat in = null_prepend + in; 120b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string expected = "test\\u0000"; 121b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat expected += cases[0].escaped; 122b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat out.clear(); 123b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeJSONString(in, false, &out); 124b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(expected, out); 125b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(IsStringUTF8(out)); 126b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 127b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 128b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF16OutsideBMP) { 129b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat { 130b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // {a, U+10300, !}, SMP. 131b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 test; 132b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back('a'); 133b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xD800); 134b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xDF00); 135b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back('!'); 136b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string actual; 137b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(EscapeJSONString(test, false, &actual)); 138b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("a\xF0\x90\x8C\x80!", actual); 139b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 140b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat { 141b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // {U+20021, U+2002B}, SIP. 142b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 test; 143b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xD840); 144b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xDC21); 145b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xD840); 146b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xDC2B); 147b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string actual; 148b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_TRUE(EscapeJSONString(test, false, &actual)); 149b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("\xF0\xA0\x80\xA1\xF0\xA0\x80\xAB", actual); 150b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 151b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat { 152b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat // {?, U+D800, @}, lone surrogate. 153b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat string16 test; 154b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back('?'); 155b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back(0xD800); 156b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat test.push_back('@'); 157b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string actual; 158b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_FALSE(EscapeJSONString(test, false, &actual)); 159b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("?\xEF\xBF\xBD@", actual); 160b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 161b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 162b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 163b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeBytes) { 164b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const struct { 165b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* to_escape; 166b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char* escaped; 167b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } cases[] = { 168b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"}, 169b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat {"\xe5\xc4\x4f\x05\xb6\xfd", "\\u00E5\\u00C4O\\u0005\\u00B6\\u00FD"}, 170b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat }; 171b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 172b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat for (size_t i = 0; i < arraysize(cases); ++i) { 173b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string in = std::string(cases[i].to_escape); 174b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_FALSE(IsStringUTF8(in)); 175b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 176b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(std::string(cases[i].escaped), 177b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeBytesAsInvalidJSONString(in, false)); 178b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"", 179b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeBytesAsInvalidJSONString(in, true)); 180b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat } 181b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 182b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat const char kEmbedNull[] = { '\xab', '\x39', '\0', '\x9f', '\xab' }; 183b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat std::string in(kEmbedNull, arraysize(kEmbedNull)); 184b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_FALSE(IsStringUTF8(in)); 185b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EXPECT_EQ(std::string("\\u00AB9\\u0000\\u009F\\u00AB"), 186b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat EscapeBytesAsInvalidJSONString(in, false)); 187b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} 188b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat 189b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat} // namespace base 190