1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/json/string_escape.h"
6
7#include <stddef.h>
8#include <stdint.h>
9
10#include <limits>
11#include <string>
12
13#include "base/strings/string_util.h"
14#include "base/strings/stringprintf.h"
15#include "base/strings/utf_string_conversion_utils.h"
16#include "base/strings/utf_string_conversions.h"
17#include "base/third_party/icu/icu_utf.h"
18
19namespace base {
20
21namespace {
22
23// Format string for printing a \uXXXX escape sequence.
24const char kU16EscapeFormat[] = "\\u%04X";
25
26// The code point to output for an invalid input code unit.
27const uint32_t kReplacementCodePoint = 0xFFFD;
28
29// Used below in EscapeSpecialCodePoint().
30static_assert('<' == 0x3C, "less than sign must be 0x3c");
31
32// Try to escape the |code_point| if it is a known special character. If
33// successful, returns true and appends the escape sequence to |dest|. This
34// isn't required by the spec, but it's more readable by humans.
35bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) {
36  // WARNING: if you add a new case here, you need to update the reader as well.
37  // Note: \v is in the reader, but not here since the JSON spec doesn't
38  // allow it.
39  switch (code_point) {
40    case '\b':
41      dest->append("\\b");
42      break;
43    case '\f':
44      dest->append("\\f");
45      break;
46    case '\n':
47      dest->append("\\n");
48      break;
49    case '\r':
50      dest->append("\\r");
51      break;
52    case '\t':
53      dest->append("\\t");
54      break;
55    case '\\':
56      dest->append("\\\\");
57      break;
58    case '"':
59      dest->append("\\\"");
60      break;
61    // Escape < to prevent script execution; escaping > is not necessary and
62    // not doing so save a few bytes.
63    case '<':
64      dest->append("\\u003C");
65      break;
66    // Escape the "Line Separator" and "Paragraph Separator" characters, since
67    // they should be treated like a new line \r or \n.
68    case 0x2028:
69      dest->append("\\u2028");
70      break;
71    case 0x2029:
72      dest->append("\\u2029");
73      break;
74    default:
75      return false;
76  }
77  return true;
78}
79
80template <typename S>
81bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
82  bool did_replacement = false;
83
84  if (put_in_quotes)
85    dest->push_back('"');
86
87  // Casting is necessary because ICU uses int32_t. Try and do so safely.
88  CHECK_LE(str.length(),
89           static_cast<size_t>(std::numeric_limits<int32_t>::max()));
90  const int32_t length = static_cast<int32_t>(str.length());
91
92  for (int32_t i = 0; i < length; ++i) {
93    uint32_t code_point;
94    if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
95      code_point = kReplacementCodePoint;
96      did_replacement = true;
97    }
98
99    if (EscapeSpecialCodePoint(code_point, dest))
100      continue;
101
102    // Escape non-printing characters.
103    if (code_point < 32)
104      base::StringAppendF(dest, kU16EscapeFormat, code_point);
105    else
106      WriteUnicodeCharacter(code_point, dest);
107  }
108
109  if (put_in_quotes)
110    dest->push_back('"');
111
112  return !did_replacement;
113}
114
115}  // namespace
116
117bool EscapeJSONString(const StringPiece& str,
118                      bool put_in_quotes,
119                      std::string* dest) {
120  return EscapeJSONStringImpl(str, put_in_quotes, dest);
121}
122
123bool EscapeJSONString(const StringPiece16& str,
124                      bool put_in_quotes,
125                      std::string* dest) {
126  return EscapeJSONStringImpl(str, put_in_quotes, dest);
127}
128
129std::string GetQuotedJSONString(const StringPiece& str) {
130  std::string dest;
131  bool ok = EscapeJSONStringImpl(str, true, &dest);
132  DCHECK(ok);
133  return dest;
134}
135
136std::string GetQuotedJSONString(const StringPiece16& str) {
137  std::string dest;
138  bool ok = EscapeJSONStringImpl(str, true, &dest);
139  DCHECK(ok);
140  return dest;
141}
142
143std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
144                                           bool put_in_quotes) {
145  std::string dest;
146
147  if (put_in_quotes)
148    dest.push_back('"');
149
150  for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
151    unsigned char c = *it;
152    if (EscapeSpecialCodePoint(c, &dest))
153      continue;
154
155    if (c < 32 || c > 126)
156      base::StringAppendF(&dest, kU16EscapeFormat, c);
157    else
158      dest.push_back(*it);
159  }
160
161  if (put_in_quotes)
162    dest.push_back('"');
163
164  return dest;
165}
166
167}  // namespace base
168