172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Use of this source code is governed by a BSD-style license that can be
3c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// found in the LICENSE file.
4c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
5c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "net/base/escape.h"
6c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
7ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <algorithm>
8ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
9c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/logging.h"
10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/scoped_ptr.h"
11c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/string_piece.h"
123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_util.h"
13c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_string_conversions.h"
14c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott#include "base/utf_offset_string_conversions.h"
15c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
16c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottnamespace {
17c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
18c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const char* const kHexString = "0123456789ABCDEF";
19c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottinline char IntToHex(int i) {
20c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  DCHECK(i >= 0 && i <= 15) << i << " not a hex value";
21c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return kHexString[i];
22c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
23c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
24c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// A fast bit-vector map for ascii characters.
25c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
26c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Internally stores 256 bits in an array of 8 ints.
27c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Does quick bit-flicking to lookup needed characters.
28c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottclass Charmap {
29c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott public:
30c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  Charmap(uint32 b0, uint32 b1, uint32 b2, uint32 b3,
31c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          uint32 b4, uint32 b5, uint32 b6, uint32 b7) {
32c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    map_[0] = b0; map_[1] = b1; map_[2] = b2; map_[3] = b3;
33c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    map_[4] = b4; map_[5] = b5; map_[6] = b6; map_[7] = b7;
34c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
35c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
36c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  bool Contains(unsigned char c) const {
37c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return (map_[c >> 5] & (1 << (c & 31))) ? true : false;
38c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
39c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
40c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott private:
41c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  uint32 map_[8];
42c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
43c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
44c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Given text to escape and a Charmap defining which values to escape,
45c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// return an escaped string.  If use_plus is true, spaces are converted
46c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// to +, otherwise, if spaces are in the charmap, they are converted to
47c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// %20.
48ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::string Escape(const std::string& text, const Charmap& charmap,
49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                   bool use_plus) {
50c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string escaped;
51c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  escaped.reserve(text.length() * 3);
52c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (unsigned int i = 0; i < text.length(); ++i) {
53c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    unsigned char c = static_cast<unsigned char>(text[i]);
54c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (use_plus && ' ' == c) {
55c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      escaped.push_back('+');
56c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else if (charmap.Contains(c)) {
57c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      escaped.push_back('%');
58c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      escaped.push_back(IntToHex(c >> 4));
59c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      escaped.push_back(IntToHex(c & 0xf));
60c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
61c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      escaped.push_back(c);
62c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
63c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
64c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return escaped;
65c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
66c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
67c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Contains nonzero when the corresponding character is unescapable for normal
68c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// URLs. These characters are the ones that may change the parsing of a URL, so
69c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// we don't want to unescape them sometimes. In many case we won't want to
70c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// unescape spaces, but that is controlled by parameters to Unescape*.
71c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//
72c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// The basic rule is that we can't unescape anything that would changing parsing
73c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// like # or ?. We also can't unescape &, =, or + since that could be part of a
7421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// query and that could change the server's parsing of the query. Nor can we
7521d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// unescape \ since googleurl will convert it to a /.
7621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen//
7721d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// Lastly, we can't unescape anything that doesn't have a canonical
7821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// representation in a URL. This means that unescaping will change the URL, and
7921d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// you could get different behavior if you copy and paste the URL, or press
8021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// enter in the URL bar. The list of characters that fall into this category
8121d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// are the ones labeled PASS (allow either escaped or unescaped) in the big
8221d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen// lookup table at the top of googleurl/src/url_canon_path.cc
83c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottconst char kUrlUnescape[128] = {
84c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   NULL, control chars...
85c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//  ' ' !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /
8821d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
89c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
9021d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,
91c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   @  A  B  C  D  E  F  G  H  I  J  K  L  M  N  O
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
9421d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1,
95c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   `  a  b  c  d  e  f  g  h  i  j  k  l  m  n  o
96c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
97c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott//   p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~  <NBSP>
98c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
99c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott};
100c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
101c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate<typename STR>
102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenSTR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               UnescapeRule::Type rules,
104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               std::vector<size_t>* offsets_for_adjustment) {
105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment) {
106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  offsets_for_adjustment->end(),
108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  LimitOffset<std::wstring>(escaped_text.length()));
109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
110c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Do not unescape anything, return the |escaped_text| text.
111c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (rules == UnescapeRule::NONE)
112c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return escaped_text;
113c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
114c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // The output of the unescaping is always smaller than the input, so we can
115c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // reserve the input size to make sure we have enough buffer and don't have
116c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // to allocate in the loop below.
117c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  STR result;
118c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  result.reserve(escaped_text.length());
119c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
120ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  AdjustEncodingOffset::Adjustments adjustments;  // Locations of adjusted text.
121c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
122c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
123c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Non ASCII character, append as is.
124c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      result.push_back(escaped_text[i]);
125c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      continue;
126c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
127c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
128c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char current_char = static_cast<char>(escaped_text[i]);
129c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (current_char == '%' && i + 2 < max) {
130c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      const typename STR::value_type most_sig_digit(
131c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          static_cast<typename STR::value_type>(escaped_text[i + 1]));
132c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      const typename STR::value_type least_sig_digit(
133c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          static_cast<typename STR::value_type>(escaped_text[i + 2]));
1343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
1353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        unsigned char value = HexDigitToInt(most_sig_digit) * 16 +
1363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick            HexDigitToInt(least_sig_digit);
137c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (value >= 0x80 ||  // Unescape all high-bit characters.
138c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            // For 7-bit characters, the lookup table tells us all valid chars.
139c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott            (kUrlUnescape[value] ||
140c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             // ...and we allow some additional unescaping when flags are set.
141c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             (value == ' ' && (rules & UnescapeRule::SPACES)) ||
142c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             // Allow any of the prohibited but non-control characters when
143c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             // we're doing "special" chars.
144c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
145c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             // Additionally allow control characters if requested.
146c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott             (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
147c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          // Use the unescaped version of the character.
148ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen          adjustments.push_back(i);
149c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          result.push_back(value);
150c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          i += 2;
151c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        } else {
152c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          // Keep escaped. Append a percent and we'll get the following two
153c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          // digits on the next loops through.
154c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          result.push_back('%');
155c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
156c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      } else {
157c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // Invalid escape sequence, just pass the percent through and continue
158c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        // right after it.
159c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        result.push_back('%');
160c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      }
161c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else if ((rules & UnescapeRule::REPLACE_PLUS_WITH_SPACE) &&
162c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott               escaped_text[i] == '+') {
163c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      result.push_back(' ');
164c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    } else {
165c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Normal case for unescaped characters.
166c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      result.push_back(escaped_text[i]);
167c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
168c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
169c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Make offset adjustment.
171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment && !adjustments.empty()) {
172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::for_each(offsets_for_adjustment->begin(),
173ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                   offsets_for_adjustment->end(),
174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                   AdjustEncodingOffset(adjustments));
175ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
176ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
177ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
179ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsentemplate<typename STR>
181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenSTR UnescapeURLImpl(const STR& escaped_text,
182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                    UnescapeRule::Type rules,
183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                    size_t* offset_for_adjustment) {
184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  STR result = UnescapeURLWithOffsetsImpl(escaped_text, rules, &offsets);
188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
190c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return result;
191c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
192c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
193c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}  // namespace
194c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
195c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Everything except alphanumerics and !'()*-._~
196c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// See RFC 2396 for the list of reserved characters.
197c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kQueryCharmap(
198c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xfc00987dL, 0x78000001L, 0xb8000001L,
199c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
200c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
201c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeQueryParamValue(const std::string& text, bool use_plus) {
202c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return Escape(text, kQueryCharmap, use_plus);
203c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
204c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
205c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Convert the string to a sequence of bytes and then % escape anything
206c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// except alphanumerics and !'()*-._~
20772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenstring16 EscapeQueryParamValueUTF8(const string16& text,
20872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen                                   bool use_plus) {
20972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  return UTF8ToUTF16(Escape(UTF16ToUTF8(text), kQueryCharmap, use_plus));
210c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
211c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
212c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-printable, non-7bit, and (including space)  "#%:<>?[\]^`{|}
213c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kPathCharmap(
214c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xd400002dL, 0x78000000L, 0xb8000001L,
215c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
216c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
217c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapePath(const std::string& path) {
218c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return Escape(path, kPathCharmap, false);
219c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
220c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
221c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-printable, non-7bit, and (including space) ?>=<;+'&%$#"![\]^`{|}
222c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kUrlEscape(
223c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xf80008fdL, 0x78000001L, 0xb8000001L,
224c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL
225c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott);
226c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
227c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeUrlEncodedData(const std::string& path) {
228c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return Escape(path, kUrlEscape, true);
229c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
230c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
231c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// non-7bit
232c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kNonASCIICharmap(
233c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0x00000000L, 0x00000000L, 0x00000000L, 0x00000000L,
234c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
235c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
236c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeNonASCII(const std::string& input) {
237c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return Escape(input, kNonASCIICharmap, false);
238c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
239c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
240c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// Everything except alphanumerics, the reserved characters(;/?:@&=+$,) and
241c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott// !'()*-._~%
242c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstatic const Charmap kExternalHandlerCharmap(
243c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0x5000080dL, 0x68000000L, 0xb8000001L,
244c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  0xffffffffL, 0xffffffffL, 0xffffffffL, 0xffffffffL);
245c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
246c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeExternalHandlerValue(const std::string& text) {
247c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return Escape(text, kExternalHandlerCharmap, false);
248c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
249c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
250ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstring16 UnescapeAndDecodeUTF8URLComponentWithOffsets(
251ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const std::string& text,
252ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    UnescapeRule::Type rules,
253ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    std::vector<size_t>* offsets_for_adjustment) {
254c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::wstring result;
255ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> original_offsets;
256ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment)
257ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    original_offsets = *offsets_for_adjustment;
258c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  std::string unescaped_url(
259ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      UnescapeURLWithOffsetsImpl(text, rules, offsets_for_adjustment));
260ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (UTF8ToWideAndAdjustOffsets(unescaped_url.data(), unescaped_url.length(),
261ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                &result, offsets_for_adjustment))
262c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return WideToUTF16Hack(result);      // Character set looks like it's valid.
263c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
264c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // Not valid.  Return the escaped version.  Undo our changes to
265c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  // |offset_for_adjustment| since we haven't changed the string after all.
266ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offsets_for_adjustment)
267ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offsets_for_adjustment = original_offsets;
268ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return WideToUTF16Hack(UTF8ToWideAndAdjustOffsets(
269ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      text, offsets_for_adjustment));
270ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
271ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
272ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstring16 UnescapeAndDecodeUTF8URLComponent(const std::string& text,
273ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                           UnescapeRule::Type rules,
274ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                           size_t* offset_for_adjustment) {
275ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
276ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset_for_adjustment)
277ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(*offset_for_adjustment);
278ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  string16 result =
279ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      UnescapeAndDecodeUTF8URLComponentWithOffsets(text, rules, &offsets);
280c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (offset_for_adjustment)
281ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    *offset_for_adjustment = offsets[0];
282ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return result;
283c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
284c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
285c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string UnescapeURLComponent(const std::string& escaped_text,
286c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                                 UnescapeRule::Type rules) {
287ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return UnescapeURLWithOffsetsImpl<std::string>(escaped_text, rules, NULL);
288c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
289c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
290c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 UnescapeURLComponent(const string16& escaped_text,
291c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                              UnescapeRule::Type rules) {
292ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return UnescapeURLWithOffsetsImpl<string16>(escaped_text, rules, NULL);
293c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
294c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
295c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
296c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str>
297c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTMLImpl(typename str::value_type c, str* output) {
298c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
299c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    char key;
300c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char* replacement;
301c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } kCharsToEscape[] = {
302c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { '<', "&lt;" },
303c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { '>', "&gt;" },
304c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { '&', "&amp;" },
305c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { '"', "&quot;" },
306c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { '\'', "&#39;" },
307c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
308c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  size_t k;
309c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (k = 0; k < ARRAYSIZE_UNSAFE(kCharsToEscape); ++k) {
310c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (c == kCharsToEscape[k].key) {
311c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      const char* p = kCharsToEscape[k].replacement;
312c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      while (*p)
313c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        output->push_back(*p++);
314c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      break;
315c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
316c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
317c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (k == ARRAYSIZE_UNSAFE(kCharsToEscape))
318c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    output->push_back(c);
319c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
320c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
321c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTML(char c, std::string* output) {
322c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  AppendEscapedCharForHTMLImpl(c, output);
323c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
324c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
325c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottvoid AppendEscapedCharForHTML(wchar_t c, string16* output) {
326c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  AppendEscapedCharForHTMLImpl(c, output);
327c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
328c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
329c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scotttemplate <class str>
330c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstr EscapeForHTMLImpl(const str& input) {
331c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  str result;
332c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  result.reserve(input.size());  // optimize for no escaping
333c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
334c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (typename str::const_iterator it = input.begin(); it != input.end(); ++it)
335c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    AppendEscapedCharForHTMLImpl(*it, &result);
336c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
337c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return result;
338c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
339c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
340c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstd::string EscapeForHTML(const std::string& input) {
341c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return EscapeForHTMLImpl(input);
342c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
343c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
344c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 EscapeForHTML(const string16& input) {
345c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return EscapeForHTMLImpl(input);
346c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
347c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
348c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scottstring16 UnescapeForHTML(const string16& input) {
349c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  static const struct {
350c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const wchar_t* ampersand_code;
351c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    const char replacement;
352c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  } kEscapeToChars[] = {
353c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"&lt;", '<' },
354c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"&gt;", '>' },
355c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"&amp;", '&' },
356c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"&quot;", '"' },
357c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    { L"&#39;", '\''},
358c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  };
359c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
360c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  if (input.find(WideToUTF16(L"&")) == std::string::npos)
361c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    return input;
362c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott
363c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  string16 ampersand_chars[ARRAYSIZE_UNSAFE(kEscapeToChars)];
364c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  string16 text(input);
365c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  for (string16::iterator iter = text.begin(); iter != text.end(); ++iter) {
366c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    if (*iter == '&') {
367c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      // Potential ampersand encode char.
368c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      size_t index = iter - text.begin();
369c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEscapeToChars); i++) {
370c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (ampersand_chars[i].empty())
371c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          ampersand_chars[i] = WideToUTF16(kEscapeToChars[i].ampersand_code);
372c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        if (text.find(ampersand_chars[i], index) == index) {
373c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          text.replace(iter, iter + ampersand_chars[i].length(),
374c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott                       1, kEscapeToChars[i].replacement);
375c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott          break;
376c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott        }
377c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott      }
378c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott    }
379c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  }
380c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott  return text;
381c7f5f8508d98d5952d42ed7648c2a8f30a4da156Patrick Scott}
382ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
383ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenAdjustEncodingOffset::AdjustEncodingOffset(const Adjustments& adjustments)
384ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  : adjustments(adjustments) {}
385ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
386ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid AdjustEncodingOffset::operator()(size_t& offset) {
387ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // For each encoded character occurring before an offset subtract 2.
388ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (offset == string16::npos)
389ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return;
390ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  size_t adjusted_offset = offset;
391ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (Adjustments::const_iterator i = adjustments.begin();
392ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       i != adjustments.end(); ++i) {
393ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    size_t location = *i;
394ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset <= location) {
395ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      offset = adjusted_offset;
396ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      return;
397ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
398ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (offset <= (location + 2)) {
399ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      offset = string16::npos;
400ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      return;
401ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
402ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    adjusted_offset -= 2;
403ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
404ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  offset = adjusted_offset;
405ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
406