15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/json/string_escape.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h" 10868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/stringprintf.h" 115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/strings/utf_string_conversion_utils.h" 125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/third_party/icu/icu_utf.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Format string for printing a \uXXXX escape sequence. 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const char kU16EscapeFormat[] = "\\u%04X"; 215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// The code point to output for an invalid input code unit. 235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const uint32 kReplacementCodePoint = 0xFFFD; 245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Used below in EscapeSpecialCodePoint(). 265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c); 275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Try to escape the |code_point| if it is a known special character. If 295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// successful, returns true and appends the escape sequence to |dest|. This 305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// isn't required by the spec, but it's more readable by humans. 315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) { 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // WARNING: if you add a new case here, you need to update the reader as well. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note: \v is in the reader, but not here since the JSON spec doesn't 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // allow it. 355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) switch (code_point) { 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\b': 375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\b"); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\f': 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\f"); 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\n': 435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\n"); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\r': 465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\r"); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\t': 495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\t"); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '\\': 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\\\"); 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case '"': 555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\\""); 565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) break; 575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Escape < to prevent script execution; escaping > is not necessary and 585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // not doing so save a few bytes. 595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) case '<': 605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->append("\\u003C"); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) default: 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return false; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return true; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)template <typename S> 695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) { 705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool did_replacement = false; 715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (put_in_quotes) 735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->push_back('"'); 745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Casting is necessary because ICU uses int32. Try and do so safely. 765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) CHECK_LE(str.length(), static_cast<size_t>(kint32max)); 775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const int32 length = static_cast<int32>(str.length()); 785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for (int32 i = 0; i < length; ++i) { 805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) uint32 code_point; 815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) { 825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) code_point = kReplacementCodePoint; 835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) did_replacement = true; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (EscapeSpecialCodePoint(code_point, dest)) 875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) continue; 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Escape non-printing characters. 905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (code_point < 32) 915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::StringAppendF(dest, kU16EscapeFormat, code_point); 925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) else 935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) WriteUnicodeCharacter(code_point, dest); 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (put_in_quotes) 975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest->push_back('"'); 985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return !did_replacement; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool EscapeJSONString(const StringPiece& str, 1055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool put_in_quotes, 1065d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string* dest) { 1075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return EscapeJSONStringImpl(str, put_in_quotes, dest); 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool EscapeJSONString(const StringPiece16& str, 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool put_in_quotes, 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string* dest) { 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return EscapeJSONStringImpl(str, put_in_quotes, dest); 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)std::string GetQuotedJSONString(const StringPiece& str) { 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string dest; 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool ok = EscapeJSONStringImpl(str, true, &dest); 1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(ok); 1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return dest; 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)std::string GetQuotedJSONString(const StringPiece16& str) { 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string dest; 1255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool ok = EscapeJSONStringImpl(str, true, &dest); 1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(ok); 1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return dest; 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, 1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool put_in_quotes) { 1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string dest; 1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (put_in_quotes) 1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest.push_back('"'); 1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { 1385d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ToUnsigned<StringPiece::value_type>::Unsigned c = *it; 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (EscapeSpecialCodePoint(c, &dest)) 1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) continue; 1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (c < 32 || c > 126) 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::StringAppendF(&dest, kU16EscapeFormat, c); 1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) else 1455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest.push_back(*it); 1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (put_in_quotes) 1495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) dest.push_back('"'); 1505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return dest; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace base 155