1cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe// Use of this source code is governed by a BSD-style license that can be
3cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe// found in the LICENSE file.
4cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe
5cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#include <stdlib.h>
6cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe
7cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#include "base/logging.h"
8cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#include "base/strings/string_util.h"
9cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#include "net/base/net_util.h"
10cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#include "net/tools/dump_cache/url_to_filename_encoder.h"
1188b5a391bc2f9eb85066219e453682a746730eadAaron Carroll
12833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hongusing std::string;
13ab9461eaea21e861cc777aae2420db8f486ed1e2Bruce Cran
14cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboenamespace {
15cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe
16a3d741fa3bc3120d5b62a56826a97524daa32803Jens Axboe// Returns 1 if buf is prefixed by "num_digits" of hex digits
179f988e2ebb3bff7087cc9681a54bd7f0d0e42140Jens Axboe// Teturns 0 otherwise.
18e25839d4cb5fefcb5ffce76128a4faedb177e7afJens Axboe// The function checks for '\0' for string termination.
19fd112d34a2cfdc2d9efcd394e38b6d87b357c23dVincent Kang Fuint HexDigitsPrefix(const char* buf, int num_digits) {
20cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe  for (int i = 0; i < num_digits; i++) {
219af4a24408ea7d4cea084a4fe214b81145cc36acJens Axboe    if (!IsHexDigit(buf[i]))
223b8b7135602a4d3a7132fee10da9c1203ab643adJens Axboe      return 0;  // This also detects end of string as '\0' is not xdigit.
23f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe  }
24f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe  return 1;
25f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe}
26f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe
27f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe#ifdef WIN32
28f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe#define strtoull _strtoui64
29f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe#endif
30f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe
31ce952ab65a46f728ffada9613bb50ace7aeaa7c8Jens Axboe// A simple parser for long long values. Returns the parsed value if a
32f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// valid integer is found; else returns deflt
33f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
34f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboeuint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
35f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe  char *error = NULL;
36f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe  const uint64 value = strtoull(str, &error, 16);
37f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe  return (error == str) ? deflt : value;
38f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe}
39f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe
40f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe}
41f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe
42f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboenamespace net {
43f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe
44f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// The escape character choice is made here -- all code and tests in this
45f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// directory are based off of this constant.  However, our testdata
46f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// has tons of dependencies on this, so it cannot be changed without
47f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboe// re-running those tests and fixing them.
48f085737f15364ba194e5582b19a71eb35d4b8d11Jens Axboeconst char UrlToFilenameEncoder::kEscapeChar = ',';
4906b0be6efb3da5131bc8386251d017f0abafbdacJens Axboeconst char UrlToFilenameEncoder::kTruncationChar = '-';
5006b0be6efb3da5131bc8386251d017f0abafbdacJens Axboeconst size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;
51b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe
523c037bcf2334ca75b23103a954232a48e4ce6dc4Jens Axboevoid UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {
53ab9461eaea21e861cc777aae2420db8f486ed1e2Bruce Cran  CHECK(!segment->empty());
54833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong  if ((*segment == ".") || (*segment == "..")) {
55833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong    dest->append(1, kEscapeChar);
5606b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    dest->append(*segment);
57ab9461eaea21e861cc777aae2420db8f486ed1e2Bruce Cran    segment->clear();
5806b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  } else {
5906b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    size_t segment_size = segment->size();
603c037bcf2334ca75b23103a954232a48e4ce6dc4Jens Axboe    if (segment_size > kMaximumSubdirectoryLength) {
61833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      // We need to inject ",-" at the end of the segment to signify that
62833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      // we are inserting an artificial '/'.  This means we have to chop
63b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe      // off at least two characters to make room.
6406b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe      segment_size = kMaximumSubdirectoryLength - 2;
65833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong
6606b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe      // But we don't want to break up an escape sequence that happens to lie at
6706b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe      // the end.  Escape sequences are at most 2 characters.
68833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      if ((*segment)[segment_size - 1] == kEscapeChar) {
69b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe        segment_size -= 1;
70b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe      } else if ((*segment)[segment_size - 2] == kEscapeChar) {
71b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe        segment_size -= 2;
72b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe      }
73a3073f4a296bba2cbd026603fe284341c370bfb0Jens Axboe      dest->append(segment->data(), segment_size);
74b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe      dest->append(1, kEscapeChar);
75a3073f4a296bba2cbd026603fe284341c370bfb0Jens Axboe      dest->append(1, kTruncationChar);
767837213b66e20a8d91e7069f5823852f42c41440Jens Axboe      segment->erase(0, segment_size);
77b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe
787837213b66e20a8d91e7069f5823852f42c41440Jens Axboe      // At this point, if we had segment_size=3, and segment="abcd",
79a3073f4a296bba2cbd026603fe284341c370bfb0Jens Axboe      // then after this erase, we will have written "abc,-" and set segment="d"
80b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe    } else {
8106b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe      dest->append(*segment);
827837213b66e20a8d91e7069f5823852f42c41440Jens Axboe      segment->clear();
8306b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    }
8406b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  }
85a3073f4a296bba2cbd026603fe284341c370bfb0Jens Axboe}
86b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe
87b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboevoid UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,
8806b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe                                         const string& escaped_ending,
89b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe                                         char dir_separator,
90b1ec1da670aab645e32303ea5ffaa1e5ca336936Jens Axboe                                         string* encoded_filename) {
9106b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  string filename_ending = UrlUtilities::Unescape(escaped_ending);
92d447a8c25a735519089626b3047da646597a7c6fJens Axboe
93d447a8c25a735519089626b3047da646597a7c6fJens Axboe  char encoded[3];
9407b3232de97ac32a873f0b5d17c8f49c18ed3ae7Jens Axboe  int encoded_len;
95d447a8c25a735519089626b3047da646597a7c6fJens Axboe  string segment;
96ae3fb6fbaf7dd68291f5de8e8aaac0d28e24c9eeJens Axboe
97d447a8c25a735519089626b3047da646597a7c6fJens Axboe  // TODO(jmarantz): This code would be a bit simpler if we disallowed
98d447a8c25a735519089626b3047da646597a7c6fJens Axboe  // Instaweb allowing filename_prefix to not end in "/".  We could
99d447a8c25a735519089626b3047da646597a7c6fJens Axboe  // then change the is routine to just take one input string.
100d447a8c25a735519089626b3047da646597a7c6fJens Axboe  size_t start_of_segment = filename_prefix.find_last_of(dir_separator);
101d447a8c25a735519089626b3047da646597a7c6fJens Axboe  if (start_of_segment == string::npos) {
102d447a8c25a735519089626b3047da646597a7c6fJens Axboe    segment = filename_prefix;
103833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong  } else {
104d447a8c25a735519089626b3047da646597a7c6fJens Axboe    segment = filename_prefix.substr(start_of_segment + 1);
10507b3232de97ac32a873f0b5d17c8f49c18ed3ae7Jens Axboe    *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);
106d447a8c25a735519089626b3047da646597a7c6fJens Axboe  }
10706b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe
10806b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  size_t index = 0;
10906b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  // Special case the first / to avoid adding a leading kEscapeChar.
11006b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {
11106b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    encoded_filename->append(segment);
11206b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    segment.clear();
113d447a8c25a735519089626b3047da646597a7c6fJens Axboe    encoded_filename->append(1, dir_separator);
114d447a8c25a735519089626b3047da646597a7c6fJens Axboe    ++index;
11506b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  }
116d447a8c25a735519089626b3047da646597a7c6fJens Axboe
11706b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe  for (; index < filename_ending.length(); ++index) {
11806b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    unsigned char ch = static_cast<unsigned char>(filename_ending[index]);
11907b3232de97ac32a873f0b5d17c8f49c18ed3ae7Jens Axboe
12006b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    // Note: instead of outputing an empty segment, we let the second slash
12106b0be6efb3da5131bc8386251d017f0abafbdacJens Axboe    // be escaped below.
122d447a8c25a735519089626b3047da646597a7c6fJens Axboe    if ((ch == dir_separator) && !segment.empty()) {
123d447a8c25a735519089626b3047da646597a7c6fJens Axboe      AppendSegment(&segment, encoded_filename);
124d447a8c25a735519089626b3047da646597a7c6fJens Axboe      encoded_filename->append(1, dir_separator);
1250de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe      segment.clear();
1260de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe    } else {
127cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe      // After removing unsafe chars the only safe ones are _.=+- and alphanums.
12874454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt      if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') ||
12974454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt          (ch == '-') || (('0' <= ch) && (ch <= '9')) ||
130e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe          (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) {
13174454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[0] = ch;
13274454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded_len = 1;
13374454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt      } else {
13474454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[0] = kEscapeChar;
13574454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[1] = ch / 16;
13674454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
13774454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[2] = ch % 16;
13874454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt        encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
139cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe        encoded_len = 3;
14074454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt      }
1410de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe      segment.append(encoded, encoded_len);
1420de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe
1430de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe      // If segment is too big, we must chop it into chunks.
1440de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe      if (segment.size() > kMaximumSubdirectoryLength) {
1450de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe        AppendSegment(&segment, encoded_filename);
1460de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe        encoded_filename->append(1, dir_separator);
14774454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt      }
14874454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt    }
14974454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  }
15074454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt
15174454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  // Append "," to the leaf filename so the leaf can also be a branch., e.g.
152e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe  // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and
15374454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  // /a/b/c/d".  So we will rename the "d" here to "d,".  If doing that pushed
154e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe  // us over the 128 char limit, then we will need to append "/" and the
15574454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  // remaining chars.
156e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe  segment += kEscapeChar;
157e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe  AppendSegment(&segment, encoded_filename);
15874454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  if (!segment.empty()) {
159e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe    // The last overflow segment is special, because we appended in
16074454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt    // kEscapeChar above.  We won't need to check it again for size
161e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe    // or further escaping.
16274454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt    encoded_filename->append(1, dir_separator);
163e4668264df255e8d01680920a4e78fd4186aeff1Jens Axboe    encoded_filename->append(segment);
16474454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt  }
16574454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt}
16674454ce40f1a5e1e682da0a8acb824a7f6910270Christian Ehrhardt
167cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe// Note: this decoder is not the exact inverse of the EncodeSegment above,
168cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe// because it does not take into account a prefix.
169a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboebool UrlToFilenameEncoder::Decode(const string& encoded_filename,
170a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe                                  char dir_separator,
171a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe                                  string* decoded_url) {
172a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe  enum State {
173a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe    kStart,
174a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe    kEscape,
175a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe    kFirstDigit,
176a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe    kTruncate,
177a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe    kEscapeDot
178a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe  };
179a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe  State state = kStart;
180a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe  char hex_buffer[3];
181a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe  hex_buffer[2] = '\0';
1827bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe  for (size_t i = 0; i < encoded_filename.size(); ++i) {
1837bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe    char ch = encoded_filename[i];
184cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe    switch (state) {
185d6978a3242daad9cb7b0710b724f19225d1ed7e2Jens Axboe      case kStart:
186a639f0bbd278365a2fa15031afd29a24dc917437Jens Axboe        if (ch == kEscapeChar) {
18757fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          state = kEscape;
18857fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        } else if (ch == dir_separator) {
189a639f0bbd278365a2fa15031afd29a24dc917437Jens Axboe          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
19057fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        } else {
19157fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          decoded_url->append(1, ch);
192a639f0bbd278365a2fa15031afd29a24dc917437Jens Axboe        }
19357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        break;
19457fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe      case kEscape:
195a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe        if (HexDigitsPrefix(&ch, 1) == 1) {
19657fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          hex_buffer[0] = ch;
197a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe          state = kFirstDigit;
198a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe        } else if (ch == kTruncationChar) {
199a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe          state = kTruncate;
200a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe        } else if (ch == '.') {
201a03fb65f4e5d657ee3bb68309cfa70ae2d5bc44bJens Axboe          decoded_url->append(1, '.');
20257fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          state = kEscapeDot;  // Look for at most one more dot.
203a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe        } else if (ch == dir_separator) {
20457fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          // Consider url "//x".  This was once encoded to "/,/x,".
20557fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          // This code is what skips the first Escape.
206a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
20757fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          state = kStart;
20857fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        } else {
209a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          return false;
21057fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        }
21157fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        break;
212a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe      case kFirstDigit:
21357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        if (HexDigitsPrefix(&ch, 1) == 1) {
21457fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          hex_buffer[1] = ch;
215a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          uint64 hex_value = ParseLeadingHex64Value(hex_buffer, 0);
21657fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          decoded_url->append(1, static_cast<char>(hex_value));
21757fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          state = kStart;
218a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe        } else {
21957fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          return false;
220a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe        }
22157fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        break;
222a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe      case kTruncate:
22357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        if (ch == dir_separator) {
224a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          // Skip this separator, it was only put in to break up long
22557fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe          // path segments, but is not part of the URL.
226a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          state = kStart;
22757fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        } else {
228a04f158df11fce840dbd7b5e426245929e6276a3Jens Axboe          return false;
2297bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe        }
230e721c57fc77e0155bb73a2c266dba0c6ce0bd3b5Jens Axboe        break;
2317bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe      case kEscapeDot:
2327bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe        decoded_url->append(1, ch);
23357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        state = kStart;
23457fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        break;
23557fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe    }
23657fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe  }
23757fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe
238a639f0bbd278365a2fa15031afd29a24dc917437Jens Axboe  // All legal encoded filenames end in kEscapeChar.
239cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe  return (state == kEscape);
240cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe}
2417bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe
2426925dd356191bc40e8a1ebc8fd92a40b476658c3Jens Axboe// Escape the given input |path| and chop any individual components
24357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe// of the path which are greater than kMaximumSubdirectoryLength characters
24455ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe// into two chunks.
245ba4ddd690a04f39abada885f1b4ea3b228e790a8Jens Axboe//
24657fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe// This legacy version has several issues with aliasing of different URLs,
2471d1c187b36fd4ee28d72d04660b37a7c3edd64e6Jens Axboe// inability to represent both /a/b/c and /a/b/c/d, and inability to decode
2487bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe// the filenames back into URLs.
2491d1c187b36fd4ee28d72d04660b37a7c3edd64e6Jens Axboe//
250d0c814ececb7410e97d1a437e80fc2dfd5c6de38Steven Lang// But there is a large body of slurped data which depends on this format,
251d0c814ececb7410e97d1a437e80fc2dfd5c6de38Steven Lang// so leave it as the default for spdy_in_mem_edsm_server.
252d0c814ececb7410e97d1a437e80fc2dfd5c6de38Steven Langstring UrlToFilenameEncoder::LegacyEscape(const string& path) {
25355ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe  string output;
254ba4ddd690a04f39abada885f1b4ea3b228e790a8Jens Axboe
255ba4ddd690a04f39abada885f1b4ea3b228e790a8Jens Axboe  // Note:  We also chop paths into medium sized 'chunks'.
25655ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe  //        This is due to the incompetence of the windows
2573c703d13b1323869de7e51b9c5c7feb9d9c8211dJens Axboe  //        filesystem, which still hasn't figured out how
25855ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe  //        to deal with long filenames.
25955ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe  int last_slash = 0;
26055ed9636e82b8dee419b5a76c07098bff4d980b6Jens Axboe  for (size_t index = 0; index < path.length(); index++) {
2617bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe    char ch = path[index];
26257fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe    if (ch == 0x5C)
26357fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe      last_slash = index;
2647bb591020669b7266c57108f2a68b48a03ae72eeJens Axboe    if ((ch == 0x2D) ||                    // hyphen
26557fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        (ch == 0x5C) || (ch == 0x5F) ||    // backslash, underscore
26657fc29faae372cb474b5f2ef921638ab28bb9dc0Jens Axboe        ((0x30 <= ch) && (ch <= 0x39)) ||  // Digits [0-9]
267cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe        ((0x41 <= ch) && (ch <= 0x5A)) ||  // Uppercase [A-Z]
268833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong        ((0x61 <= ch) && (ch <= 0x7A))) {  // Lowercase [a-z]
269833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      output.append(&path[index], 1);
270e25839d4cb5fefcb5ffce76128a4faedb177e7afJens Axboe    } else {
271833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      char encoded[3];
272833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      encoded[0] = 'x';
273833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      encoded[1] = ch / 16;
274833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
275833491908a1afd67d27ce79257de3a4d80143d9fYu-ju Hong      encoded[2] = ch % 16;
276e1f365035a952233463d85d659bd960ba78f012eJens Axboe      encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
277cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe      output.append(encoded, 3);
2780de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe    }
2790de5b26f6e177aacac0683306c47e0cbaf58b0b6Jens Axboe    if (index - last_slash > kMaximumSubdirectoryLength) {
280cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#ifdef WIN32
281b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe      char slash = '\\';
282cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe#else
283cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe      char slash = '/';
284f90eff5a414f6e8c16a51c3a7d9b5e077ab49aacJens Axboe#endif
285f90eff5a414f6e8c16a51c3a7d9b5e077ab49aacJens Axboe      output.append(&slash, 1);
286cb2c86fdf03241fee32fd2e2caff43af1022403cJens Axboe      last_slash = index;
287b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe    }
288b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe  }
289b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe  return output;
290b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe}
291b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe
292b347f9daece7d65a6e596cd3bd0ef3602e40b059Jens Axboe}  // namespace net
293cda866caa9b9f6598e264d24a8997d24f3c2b1c7Jens Axboe