172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be 3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file. 4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/escape.h" 6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 7ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <algorithm> 8ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h" 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/scoped_ptr.h" 11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_piece.h" 123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_util.h" 13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_string_conversions.h" 14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_offset_string_conversions.h" 15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace { 17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const char* const kHexString = "0123456789ABCDEF"; 19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline char IntToHex(int i) { 20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott DCHECK(i >= 0 && i <= 15) << i << " not a hex value"; 21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return kHexString[i]; 22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A fast bit-vector map for ascii characters. 25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Internally stores 256 bits in an array of 8 ints. 27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Does quick bit-flicking to lookup needed characters. 28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass Charmap { 29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public: 30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3, 31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32 b4, uint32 b5, uint32 b6, uint32 b7) { 32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3; 33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7; 34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott bool Contains(unsigned char c) const { 37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return (map_[c >> 5] & (1 << (c & 31))) ? true : false; 38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private: 41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott uint32 map_[8]; 42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given text to escape and a Charmap defining which values to escape, 45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// return an escaped string. If use_plus is true, spaces are converted 46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to +, otherwise, if spaces are in the charmap, they are converted to 47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// %20. 48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::string Escape(const std::string& text, const Charmap& charmap, 49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen bool use_plus) { 50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string escaped; 51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.reserve(text.length() * 3); 52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (unsigned int i = 0; i < text.length(); ++i) { 53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott unsigned char c = static_cast<unsigned char>(text[i]); 54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (use_plus && ' ' == c) { 55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.push_back('+'); 56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else if (charmap.Contains(c)) { 57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.push_back('%'); 58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.push_back(IntToHex(c >> 4)); 59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.push_back(IntToHex(c & 0xf)); 60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped.push_back(c); 62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return escaped; 65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Contains nonzero when the corresponding character is unescapable for normal 68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// URLs. These characters are the ones that may change the parsing of a URL, so 69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// we don't want to unescape them sometimes. In many case we won't want to 70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// unescape spaces, but that is controlled by parameters to Unescape*. 71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The basic rule is that we can't unescape anything that would changing parsing 73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// like # or ?. We also can't unescape &, =, or + since that could be part of a 7421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// query and that could change the server's parsing of the query. Nor can we 7521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// unescape \ since googleurl will convert it to a /. 7621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// 7721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Lastly, we can't unescape anything that doesn't have a canonical 7821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// representation in a URL. This means that unescaping will change the URL, and 7921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// you could get different behavior if you copy and paste the URL, or press 8021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// enter in the URL bar. The list of characters that fall into this category 8121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// are the ones labeled PASS (allow either escaped or unescaped) in the big 8221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// lookup table at the top of googleurl/src/url_canon_path.cc 83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kUrlUnescape[128] = { 84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// NULL, control chars... 85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// ' ' ! " # $ % & ' ( ) * + , - . / 8821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 9021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// @ A B C D E F G H I J K L M N O 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// P Q R S T U V W X Y Z [ \ ] ^ _ 9421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// ` a b c d e f g h i j k l m n o 96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// p q r s t u v w x y z { | } ~ <NBSP> 98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}; 100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename STR> 102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenSTR UnescapeURLWithOffsetsImpl(const STR& escaped_text, 103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeRule::Type rules, 104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment) { 105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offsets_for_adjustment) { 106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::for_each(offsets_for_adjustment->begin(), 107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offsets_for_adjustment->end(), 108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen LimitOffset<std::wstring>(escaped_text.length())); 109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Do not unescape anything, return the |escaped_text| text. 111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (rules == UnescapeRule::NONE) 112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return escaped_text; 113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // The output of the unescaping is always smaller than the input, so we can 115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // reserve the input size to make sure we have enough buffer and don't have 116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // to allocate in the loop below. 117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott STR result; 118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.reserve(escaped_text.length()); 119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 120ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdjustEncodingOffset::Adjustments adjustments; // Locations of adjusted text. 121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (static_cast<unsigned char>(escaped_text[i]) >= 128) { 123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Non ASCII character, append as is. 124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back(escaped_text[i]); 125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott continue; 126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char current_char = static_cast<char>(escaped_text[i]); 129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (current_char == '%' && i + 2 < max) { 130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const typename STR::value_type most_sig_digit( 131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static_cast<typename STR::value_type>(escaped_text[i + 1])); 132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const typename STR::value_type least_sig_digit( 133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static_cast<typename STR::value_type>(escaped_text[i + 2])); 1343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { 1353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick unsigned char value = HexDigitToInt(most_sig_digit) * 16 + 1363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HexDigitToInt(least_sig_digit); 137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (value >= 0x80 || // Unescape all high-bit characters. 138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // For 7-bit characters, the lookup table tells us all valid chars. 139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott (kUrlUnescape[value] || 140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // ...and we allow some additional unescaping when flags are set. 141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott (value == ' ' && (rules & UnescapeRule::SPACES)) || 142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Allow any of the prohibited but non-control characters when 143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // we're doing "special" chars. 144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Additionally allow control characters if requested. 146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { 147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Use the unescaped version of the character. 148ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen adjustments.push_back(i); 149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back(value); 150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott i += 2; 151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Keep escaped. Append a percent and we'll get the following two 153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // digits on the next loops through. 154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back('%'); 155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Invalid escape sequence, just pass the percent through and continue 158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // right after it. 159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back('%'); 160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) && 162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott escaped_text[i] == '+') { 163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back(' '); 164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } else { 165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Normal case for unescaped characters. 166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.push_back(escaped_text[i]); 167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Make offset adjustment. 171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offsets_for_adjustment && !adjustments.empty()) { 172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::for_each(offsets_for_adjustment->begin(), 173ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offsets_for_adjustment->end(), 174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AdjustEncodingOffset(adjustments)); 175ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 176ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 177ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return result; 178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 179ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsentemplate<typename STR> 181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenSTR UnescapeURLImpl(const STR& escaped_text, 182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeRule::Type rules, 183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t* offset_for_adjustment) { 184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t> offsets; 185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset_for_adjustment) 186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offsets.push_back(*offset_for_adjustment); 187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets); 188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset_for_adjustment) 189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen *offset_for_adjustment = offsets[0]; 190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return result; 191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} // namespace 194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Everything except alphanumerics and !'()*-._~ 196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See RFC 2396 for the list of reserved characters. 197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kQueryCharmap( 198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L, 199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); 200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeQueryParamValue(const std::string& text, bool use_plus) { 202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return Escape(text, kQueryCharmap, use_plus); 203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Convert the string to a sequence of bytes and then % escape anything 206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// except alphanumerics and !'()*-._~ 20772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenstring16 EscapeQueryParamValueUTF8(const string16& text, 20872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen bool use_plus) { 20972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return UTF8ToUTF16(Escape(UTF16ToUTF8(text), kQueryCharmap, use_plus)); 210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-printable, non-7bit, and (including space) "#%:<>?[\]^`{|} 213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kPathCharmap( 214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L, 215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); 216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapePath(const std::string& path) { 218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return Escape(path, kPathCharmap, false); 219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-printable, non-7bit, and (including space) ?>=<;+'&%$#"![\]^`{|} 222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kUrlEscape( 223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xf80008fdL, 0x78000001L, 0xb8000001L, 224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL 225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott); 226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeUrlEncodedData(const std::string& path) { 228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return Escape(path, kUrlEscape, true); 229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-7bit 232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kNonASCIICharmap( 233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L, 234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); 235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeNonASCII(const std::string& input) { 237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return Escape(input, kNonASCIICharmap, false); 238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and 241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// !'()*-._~% 242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kExternalHandlerCharmap( 243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L, 244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL); 245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeExternalHandlerValue(const std::string& text) { 247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return Escape(text, kExternalHandlerCharmap, false); 248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 250ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstring16 UnescapeAndDecodeUTF8URLComponentWithOffsets( 251ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const std::string& text, 252ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeRule::Type rules, 253ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>* offsets_for_adjustment) { 254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::wstring result; 255ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t> original_offsets; 256ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offsets_for_adjustment) 257ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen original_offsets = *offsets_for_adjustment; 258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott std::string unescaped_url( 259ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment)); 260ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(), 261ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen &result, offsets_for_adjustment)) 262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return WideToUTF16Hack(result); // Character set looks like it's valid. 263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Not valid. Return the escaped version. Undo our changes to 265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // |offset_for_adjustment| since we haven't changed the string after all. 266ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offsets_for_adjustment) 267ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen *offsets_for_adjustment = original_offsets; 268ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets( 269ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen text, offsets_for_adjustment)); 270ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 271ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 272ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstring16 UnescapeAndDecodeUTF8URLComponent(const std::string& text, 273ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeRule::Type rules, 274ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t* offset_for_adjustment) { 275ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t> offsets; 276ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset_for_adjustment) 277ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offsets.push_back(*offset_for_adjustment); 278ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen string16 result = 279ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets); 280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (offset_for_adjustment) 281ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen *offset_for_adjustment = offsets[0]; 282ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return result; 283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string UnescapeURLComponent(const std::string& escaped_text, 286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::Type rules) { 287ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL); 288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 UnescapeURLComponent(const string16& escaped_text, 291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott UnescapeRule::Type rules) { 292ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL); 293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str> 297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) { 298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const struct { 299c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott char key; 300c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* replacement; 301c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } kCharsToEscape[] = { 302c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { '<', "<" }, 303c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { '>', ">" }, 304c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { '&', "&" }, 305c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { '"', """ }, 306c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { '\'', "'" }, 307c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott }; 308c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t k; 309c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) { 310c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (c == kCharsToEscape[k].key) { 311c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char* p = kCharsToEscape[k].replacement; 312c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott while (*p) 313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output->push_back(*p++); 314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (k == ARRAYSIZE_UNSAFE(kCharsToEscape)) 318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott output->push_back(c); 319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTML(char c, std::string* output) { 322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott AppendEscapedCharForHTMLImpl(c, output); 323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTML(wchar_t c, string16* output) { 326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott AppendEscapedCharForHTMLImpl(c, output); 327c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str> 330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstr EscapeForHTMLImpl(const str& input) { 331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott str result; 332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott result.reserve(input.size()); // optimize for no escaping 333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (typename str::const_iterator it = input.begin(); it != input.end(); ++it) 335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott AppendEscapedCharForHTMLImpl(*it, &result); 336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return result; 338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeForHTML(const std::string& input) { 341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return EscapeForHTMLImpl(input); 342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 EscapeForHTML(const string16& input) { 345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return EscapeForHTMLImpl(input); 346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 UnescapeForHTML(const string16& input) { 349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott static const struct { 350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const wchar_t* ampersand_code; 351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott const char replacement; 352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } kEscapeToChars[] = { 353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { L"<", '<' }, 354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { L">", '>' }, 355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { L"&", '&' }, 356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { L""", '"' }, 357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott { L"'", '\''}, 358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott }; 359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (input.find(WideToUTF16(L"&")) == std::string::npos) 361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return input; 362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)]; 364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott string16 text(input); 365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) { 366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (*iter == '&') { 367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott // Potential ampersand encode char. 368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott size_t index = iter - text.begin(); 369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) { 370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (ampersand_chars[i].empty()) 371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code); 372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott if (text.find(ampersand_chars[i], index) == index) { 373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott text.replace(iter, iter + ampersand_chars[i].length(), 374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott 1, kEscapeToChars[i].replacement); 375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott break; 376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott } 380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott return text; 381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott} 382ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 383ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenAdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments) 384ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : adjustments(adjustments) {} 385ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 386ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid AdjustEncodingOffset::operator()(size_t& offset) { 387ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // For each encoded character occurring before an offset subtract 2. 388ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset == string16::npos) 389ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 390ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t adjusted_offset = offset; 391ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (Adjustments::const_iterator i = adjustments.begin(); 392ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen i != adjustments.end(); ++i) { 393ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t location = *i; 394ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset <= location) { 395ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offset = adjusted_offset; 396ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 397ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 398ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (offset <= (location + 2)) { 399ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offset = string16::npos; 400ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return; 401ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 402ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen adjusted_offset -= 2; 403ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 404ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offset = adjusted_offset; 405ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 406