string_escape.cc revision d57369da7c6519fef57db42085f7b42d4c8845c1
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/json/string_escape.h"
6
7#include <string>
8
9#include "base/strings/string_util.h"
10#include "base/strings/stringprintf.h"
11#include "base/strings/utf_string_conversion_utils.h"
12#include "base/strings/utf_string_conversions.h"
13#include "base/third_party/icu/icu_utf.h"
14
15namespace base {
16
17namespace {
18
19// Format string for printing a \uXXXX escape sequence.
20const char kU16EscapeFormat[] = "\\u%04X";
21
22// The code point to output for an invalid input code unit.
23const uint32 kReplacementCodePoint = 0xFFFD;
24
25// Used below in EscapeSpecialCodePoint().
26COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c);
27
28// Try to escape the |code_point| if it is a known special character. If
29// successful, returns true and appends the escape sequence to |dest|. This
30// isn't required by the spec, but it's more readable by humans.
31bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) {
32  // WARNING: if you add a new case here, you need to update the reader as well.
33  // Note: \v is in the reader, but not here since the JSON spec doesn't
34  // allow it.
35  switch (code_point) {
36    case '\b':
37      dest->append("\\b");
38      break;
39    case '\f':
40      dest->append("\\f");
41      break;
42    case '\n':
43      dest->append("\\n");
44      break;
45    case '\r':
46      dest->append("\\r");
47      break;
48    case '\t':
49      dest->append("\\t");
50      break;
51    case '\\':
52      dest->append("\\\\");
53      break;
54    case '"':
55      dest->append("\\\"");
56      break;
57    // Escape < to prevent script execution; escaping > is not necessary and
58    // not doing so save a few bytes.
59    case '<':
60      dest->append("\\u003C");
61      break;
62    default:
63      return false;
64  }
65  return true;
66}
67
68template <typename S>
69bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
70  bool did_replacement = false;
71
72  if (put_in_quotes)
73    dest->push_back('"');
74
75  // Casting is necessary because ICU uses int32. Try and do so safely.
76  CHECK_LE(str.length(), static_cast<size_t>(kint32max));
77  const int32 length = static_cast<int32>(str.length());
78
79  for (int32 i = 0; i < length; ++i) {
80    uint32 code_point;
81    if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
82      code_point = kReplacementCodePoint;
83      did_replacement = true;
84    }
85
86    if (EscapeSpecialCodePoint(code_point, dest))
87      continue;
88
89    // Escape non-printing characters.
90    if (code_point < 32)
91      base::StringAppendF(dest, kU16EscapeFormat, code_point);
92    else
93      WriteUnicodeCharacter(code_point, dest);
94  }
95
96  if (put_in_quotes)
97    dest->push_back('"');
98
99  return !did_replacement;
100}
101
102}  // namespace
103
104bool EscapeJSONString(const StringPiece& str,
105                      bool put_in_quotes,
106                      std::string* dest) {
107  return EscapeJSONStringImpl(str, put_in_quotes, dest);
108}
109
110bool EscapeJSONString(const StringPiece16& str,
111                      bool put_in_quotes,
112                      std::string* dest) {
113  return EscapeJSONStringImpl(str, put_in_quotes, dest);
114}
115
116std::string GetQuotedJSONString(const StringPiece& str) {
117  std::string dest;
118  bool ok = EscapeJSONStringImpl(str, true, &dest);
119  DCHECK(ok);
120  return dest;
121}
122
123std::string GetQuotedJSONString(const StringPiece16& str) {
124  std::string dest;
125  bool ok = EscapeJSONStringImpl(str, true, &dest);
126  DCHECK(ok);
127  return dest;
128}
129
130std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
131                                           bool put_in_quotes) {
132  std::string dest;
133
134  if (put_in_quotes)
135    dest.push_back('"');
136
137  for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
138    ToUnsigned<StringPiece::value_type>::Unsigned c = *it;
139    if (EscapeSpecialCodePoint(c, &dest))
140      continue;
141
142    if (c < 32 || c > 126)
143      base::StringAppendF(&dest, kU16EscapeFormat, c);
144    else
145      dest.push_back(*it);
146  }
147
148  if (put_in_quotes)
149    dest.push_back('"');
150
151  return dest;
152}
153
154}  // namespace base
155