1b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// Use of this source code is governed by a BSD-style license that can be
3b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat// found in the LICENSE file.
4b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
5b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/json/string_escape.h"
6b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
7cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko#include <stddef.h>
8cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko
9cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko#include "base/macros.h"
10b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/string_util.h"
11b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "base/strings/utf_string_conversions.h"
12b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat#include "testing/gtest/include/gtest/gtest.h"
13b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
14b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Eratnamespace base {
15b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
16b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF8) {
17b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  const struct {
18b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* to_escape;
19b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* escaped;
20b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  } cases[] = {
21b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
22b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"a\b\f\n\r\t\v\1\\.\"z",
23b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat        "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
24b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"b\x0f\x7f\xf0\xff!",  // \xf0\xff is not a valid UTF-8 unit.
25b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat        "b\\u000F\x7F\xEF\xBF\xBD\xEF\xBF\xBD!"},
26b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"c<>d", "c\\u003C>d"},
27cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko    {"Hello\xe2\x80\xa8world", "Hello\\u2028world"},
28cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko    {"\xe2\x80\xa9purple", "\\u2029purple"},
29b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  };
30b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
31b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  for (size_t i = 0; i < arraysize(cases); ++i) {
32b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* in_ptr = cases[i].to_escape;
33b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string in_str = in_ptr;
34b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
35b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string out;
36b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EscapeJSONString(in_ptr, false, &out);
37b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ(std::string(cases[i].escaped), out);
38b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(IsStringUTF8(out));
39b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
40b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    out.erase();
41b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    bool convert_ok = EscapeJSONString(in_str, false, &out);
42b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ(std::string(cases[i].escaped), out);
43b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(IsStringUTF8(out));
44b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
45b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    if (convert_ok) {
46b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat      std::string fooout = GetQuotedJSONString(in_str);
47b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat      EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"", fooout);
48b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat      EXPECT_TRUE(IsStringUTF8(out));
49b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    }
50b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
51b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
52b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string in = cases[0].to_escape;
53b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string out;
54b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, false, &out);
55b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out));
56b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
57b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // test quoting
58b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string out_quoted;
59b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, true, &out_quoted);
60b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(out.length() + 2, out_quoted.length());
61b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(out_quoted.find(out), 1U);
62b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out_quoted));
63b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
64b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // now try with a NULL in the string
65b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string null_prepend = "test";
66b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  null_prepend.push_back(0);
67b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  in = null_prepend + in;
68b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string expected = "test\\u0000";
69b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  expected += cases[0].escaped;
70b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  out.clear();
71b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, false, &out);
72b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(expected, out);
73b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out));
74b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
75b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
76b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF16) {
77b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  const struct {
78b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const wchar_t* to_escape;
79b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* escaped;
80b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  } cases[] = {
81b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {L"b\uffb1\u00ff", "b\xEF\xBE\xB1\xC3\xBF"},
82b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {L"\b\001aZ\"\\wee", "\\b\\u0001aZ\\\"\\\\wee"},
83b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {L"a\b\f\n\r\t\v\1\\.\"z",
84b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat        "a\\b\\f\\n\\r\\t\\u000B\\u0001\\\\.\\\"z"},
85b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {L"b\x0f\x7f\xf0\xff!", "b\\u000F\x7F\xC3\xB0\xC3\xBF!"},
86b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {L"c<>d", "c\\u003C>d"},
87cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko    {L"Hello\u2028world", "Hello\\u2028world"},
88cce46a0c214b37e8da48c522c83037e8ffa4f9fdAlex Vakulenko    {L"\u2029purple", "\\u2029purple"},
89b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  };
90b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
91b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  for (size_t i = 0; i < arraysize(cases); ++i) {
92b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    string16 in = WideToUTF16(cases[i].to_escape);
93b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
94b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string out;
95b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EscapeJSONString(in, false, &out);
96b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ(std::string(cases[i].escaped), out);
97b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(IsStringUTF8(out));
98b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
99b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    out = GetQuotedJSONString(in);
100b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"", out);
101b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(IsStringUTF8(out));
102b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
103b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
104b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  string16 in = WideToUTF16(cases[0].to_escape);
105b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string out;
106b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, false, &out);
107b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out));
108b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
109b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // test quoting
110b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string out_quoted;
111b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, true, &out_quoted);
112b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(out.length() + 2, out_quoted.length());
113b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(out_quoted.find(out), 1U);
114b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out));
115b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
116b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  // now try with a NULL in the string
117b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  string16 null_prepend = WideToUTF16(L"test");
118b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  null_prepend.push_back(0);
119b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  in = null_prepend + in;
120b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string expected = "test\\u0000";
121b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  expected += cases[0].escaped;
122b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  out.clear();
123b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EscapeJSONString(in, false, &out);
124b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(expected, out);
125b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_TRUE(IsStringUTF8(out));
126b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
127b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
128b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeUTF16OutsideBMP) {
129b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  {
130b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    // {a, U+10300, !}, SMP.
131b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    string16 test;
132b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back('a');
133b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xD800);
134b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xDF00);
135b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back('!');
136b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string actual;
137b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(EscapeJSONString(test, false, &actual));
138b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ("a\xF0\x90\x8C\x80!", actual);
139b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
140b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  {
141b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    // {U+20021, U+2002B}, SIP.
142b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    string16 test;
143b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xD840);
144b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xDC21);
145b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xD840);
146b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xDC2B);
147b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string actual;
148b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_TRUE(EscapeJSONString(test, false, &actual));
149b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ("\xF0\xA0\x80\xA1\xF0\xA0\x80\xAB", actual);
150b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
151b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  {
152b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    // {?, U+D800, @}, lone surrogate.
153b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    string16 test;
154b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back('?');
155b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back(0xD800);
156b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    test.push_back('@');
157b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string actual;
158b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_FALSE(EscapeJSONString(test, false, &actual));
159b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ("?\xEF\xBF\xBD@", actual);
160b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
161b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
162b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
163b8cf94937c52feb53b55c39e3f82094d27de464cDaniel EratTEST(JSONStringEscapeTest, EscapeBytes) {
164b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  const struct {
165b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* to_escape;
166b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    const char* escaped;
167b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  } cases[] = {
168b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"b\x0f\x7f\xf0\xff!", "b\\u000F\\u007F\\u00F0\\u00FF!"},
169b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    {"\xe5\xc4\x4f\x05\xb6\xfd", "\\u00E5\\u00C4O\\u0005\\u00B6\\u00FD"},
170b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  };
171b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
172b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  for (size_t i = 0; i < arraysize(cases); ++i) {
173b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    std::string in = std::string(cases[i].to_escape);
174b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_FALSE(IsStringUTF8(in));
175b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
176b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ(std::string(cases[i].escaped),
177b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat        EscapeBytesAsInvalidJSONString(in, false));
178b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat    EXPECT_EQ("\"" + std::string(cases[i].escaped) + "\"",
179b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat        EscapeBytesAsInvalidJSONString(in, true));
180b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  }
181b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
182b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  const char kEmbedNull[] = { '\xab', '\x39', '\0', '\x9f', '\xab' };
183b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  std::string in(kEmbedNull, arraysize(kEmbedNull));
184b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_FALSE(IsStringUTF8(in));
185b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat  EXPECT_EQ(std::string("\\u00AB9\\u0000\\u009F\\u00AB"),
186b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat            EscapeBytesAsInvalidJSONString(in, false));
187b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}
188b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat
189b8cf94937c52feb53b55c39e3f82094d27de464cDaniel Erat}  // namespace base
190