1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <stdlib.h>
672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h"
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/base/net_util.h"
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/tools/dump_cache/url_to_filename_encoder.h"
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing std::string;
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace {
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Returns 1 if buf is prefixed by "num_digits" of hex digits
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Teturns 0 otherwise.
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The function checks for '\0' for string termination.
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint HexDigitsPrefix(const char* buf, int num_digits) {
203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (int i = 0; i < num_digits; i++) {
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!IsHexDigit(buf[i]))
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return 0;  // This also detects end of string as '\0' is not xdigit.
233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return 1;
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifdef WIN32
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#define strtoull _strtoui64
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// A simple parser for long long values. Returns the parsed value if a
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// valid integer is found; else returns deflt
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochuint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  char *error = NULL;
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const uint64 value = strtoull(str, &error, 16);
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return (error == str) ? deflt : value;
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace net {
43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The escape character choice is made here -- all code and tests in this
453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// directory are based off of this constant.  However, our testdata
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// has tons of dependencies on this, so it cannot be changed without
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// re-running those tests and fixing them.
483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst char UrlToFilenameEncoder::kEscapeChar = ',';
493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst char UrlToFilenameEncoder::kTruncationChar = '-';
503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {
533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  CHECK(!segment->empty());
543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if ((*segment == ".") || (*segment == "..")) {
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    dest->append(1, kEscapeChar);
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    dest->append(*segment);
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    segment->clear();
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else {
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    size_t segment_size = segment->size();
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (segment_size > kMaximumSubdirectoryLength) {
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // We need to inject ",-" at the end of the segment to signify that
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // we are inserting an artificial '/'.  This means we have to chop
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // off at least two characters to make room.
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      segment_size = kMaximumSubdirectoryLength - 2;
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // But we don't want to break up an escape sequence that happens to lie at
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // the end.  Escape sequences are at most 2 characters.
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if ((*segment)[segment_size - 1] == kEscapeChar) {
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        segment_size -= 1;
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      } else if ((*segment)[segment_size - 2] == kEscapeChar) {
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        segment_size -= 2;
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      dest->append(segment->data(), segment_size);
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      dest->append(1, kEscapeChar);
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      dest->append(1, kTruncationChar);
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      segment->erase(0, segment_size);
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // At this point, if we had segment_size=3, and segment="abcd",
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      // then after this erase, we will have written "abc,-" and set segment="d"
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      dest->append(*segment);
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      segment->clear();
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,
883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                                         const string& escaped_ending,
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                         char dir_separator,
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                         string* encoded_filename) {
913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  string filename_ending = UrlUtilities::Unescape(escaped_ending);
923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  char encoded[3];
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int encoded_len;
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string segment;
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // TODO(jmarantz): This code would be a bit simpler if we disallowed
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Instaweb allowing filename_prefix to not end in "/".  We could
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // then change the is routine to just take one input string.
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size_t start_of_segment = filename_prefix.find_last_of(dir_separator);
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (start_of_segment == string::npos) {
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    segment = filename_prefix;
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else {
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    segment = filename_prefix.substr(start_of_segment + 1);
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  size_t index = 0;
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Special case the first / to avoid adding a leading kEscapeChar.
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    encoded_filename->append(segment);
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    segment.clear();
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    encoded_filename->append(1, dir_separator);
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    ++index;
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (; index < filename_ending.length(); ++index) {
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    unsigned char ch = static_cast<unsigned char>(filename_ending[index]);
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // Note: instead of outputing an empty segment, we let the second slash
1213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // be escaped below.
1223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    if ((ch == dir_separator) && !segment.empty()) {
1233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      AppendSegment(&segment, encoded_filename);
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded_filename->append(1, dir_separator);
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      segment.clear();
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
1273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // After removing unsafe chars the only safe ones are _.=+- and alphanums.
1283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') ||
1293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick          (ch == '-') || (('0' <= ch) && (ch <= '9')) ||
1303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick          (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) {
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[0] = ch;
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded_len = 1;
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      } else {
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[0] = kEscapeChar;
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[1] = ch / 16;
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[2] = ch % 16;
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded_len = 3;
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      segment.append(encoded, encoded_len);
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // If segment is too big, we must chop it into chunks.
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      if (segment.size() > kMaximumSubdirectoryLength) {
1453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        AppendSegment(&segment, encoded_filename);
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        encoded_filename->append(1, dir_separator);
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      }
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Append "," to the leaf filename so the leaf can also be a branch., e.g.
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // /a/b/c/d".  So we will rename the "d" here to "d,".  If doing that pushed
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // us over the 128 char limit, then we will need to append "/" and the
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // remaining chars.
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  segment += kEscapeChar;
1573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  AppendSegment(&segment, encoded_filename);
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!segment.empty()) {
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // The last overflow segment is special, because we appended in
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // kEscapeChar above.  We won't need to check it again for size
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // or further escaping.
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    encoded_filename->append(1, dir_separator);
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    encoded_filename->append(segment);
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Note: this decoder is not the exact inverse of the EncodeSegment above,
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// because it does not take into account a prefix.
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool UrlToFilenameEncoder::Decode(const string& encoded_filename,
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  char dir_separator,
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  string* decoded_url) {
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  enum State {
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    kStart,
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    kEscape,
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    kFirstDigit,
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    kTruncate,
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    kEscapeDot
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  };
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  State state = kStart;
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int char_code = 0;
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  char hex_buffer[3];
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  hex_buffer[2] = '\0';
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < encoded_filename.size(); ++i) {
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    char ch = encoded_filename[i];
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    switch (state) {
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      case kStart:
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        if (ch == kEscapeChar) {
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kEscape;
1893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        } else if (ch == dir_separator) {
1903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else {
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          decoded_url->append(1, ch);
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        }
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      case kEscape:
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        if (HexDigitsPrefix(&ch, 1) == 1) {
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          hex_buffer[0] = ch;
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kFirstDigit;
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else if (ch == kTruncationChar) {
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kTruncate;
201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else if (ch == '.') {
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          decoded_url->append(1, '.');
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kEscapeDot;  // Look for at most one more dot.
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else if (ch == dir_separator) {
2053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick          // Consider url "//x".  This was once encoded to "/,/x,".
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          // This code is what skips the first Escape.
2073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kStart;
209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else {
210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          return false;
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        }
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      case kFirstDigit:
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        if (HexDigitsPrefix(&ch, 1) == 1) {
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          hex_buffer[1] = ch;
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          uint64 hex_value = ParseLeadingHex64Value(hex_buffer, 0);
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          decoded_url->append(1, static_cast<char>(hex_value));
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          char_code = 0;
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kStart;
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else {
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          return false;
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        }
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      case kTruncate:
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        if (ch == dir_separator) {
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          // Skip this separator, it was only put in to break up long
227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          // path segments, but is not part of the URL.
228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          state = kStart;
229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        } else {
230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch          return false;
231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        }
232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      case kEscapeDot:
234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        decoded_url->append(1, ch);
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        state = kStart;
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        break;
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // All legal encoded filenames end in kEscapeChar.
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return (state == kEscape);
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
2443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Escape the given input |path| and chop any individual components
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// of the path which are greater than kMaximumSubdirectoryLength characters
246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// into two chunks.
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// This legacy version has several issues with aliasing of different URLs,
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// inability to represent both /a/b/c and /a/b/c/d, and inability to decode
250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// the filenames back into URLs.
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// But there is a large body of slurped data which depends on this format,
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// so leave it as the default for spdy_in_mem_edsm_server.
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstring UrlToFilenameEncoder::LegacyEscape(const string& path) {
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string output;
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Note:  We also chop paths into medium sized 'chunks'.
258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //        This is due to the incompetence of the windows
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //        filesystem, which still hasn't figured out how
260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //        to deal with long filenames.
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int last_slash = 0;
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t index = 0; index < path.length(); index++) {
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    char ch = path[index];
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (ch == 0x5C)
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      last_slash = index;
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if ((ch == 0x2D) ||                    // hyphen
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        (ch == 0x5C) || (ch == 0x5F) ||    // backslash, underscore
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        ((0x30 <= ch) && (ch <= 0x39)) ||  // Digits [0-9]
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        ((0x41 <= ch) && (ch <= 0x5A)) ||  // Uppercase [A-Z]
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        ((0x61 <= ch) && (ch <= 0x7A))) {  // Lowercase [a-z]
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      output.append(&path[index], 1);
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      char encoded[3];
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded[0] = 'x';
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded[1] = ch / 16;
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded[2] = ch % 16;
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      output.append(encoded, 3);
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (index - last_slash > kMaximumSubdirectoryLength) {
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifdef WIN32
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      char slash = '\\';
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      char slash = '/';
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      output.append(&slash, 1);
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      last_slash = index;
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return output;
292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace net
295