15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <stdlib.h>
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h"
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/base/net_util.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/tools/dump_cache/url_to_filename_encoder.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using std::string;
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Returns 1 if buf is prefixed by "num_digits" of hex digits
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Teturns 0 otherwise.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The function checks for '\0' for string termination.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int HexDigitsPrefix(const char* buf, int num_digits) {
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int i = 0; i < num_digits; i++) {
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (!IsHexDigit(buf[i]))
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return 0;  // This also detects end of string as '\0' is not xdigit.
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return 1;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef WIN32
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define strtoull _strtoui64
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A simple parser for long long values. Returns the parsed value if a
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// valid integer is found; else returns deflt
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char *error = NULL;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const uint64 value = strtoull(str, &error, 16);
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (error == str) ? deflt : value;
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net {
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The escape character choice is made here -- all code and tests in this
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// directory are based off of this constant.  However, our testdata
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// has tons of dependencies on this, so it cannot be changed without
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// re-running those tests and fixing them.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char UrlToFilenameEncoder::kEscapeChar = ',';
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const char UrlToFilenameEncoder::kTruncationChar = '-';
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) {
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  CHECK(!segment->empty());
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if ((*segment == ".") || (*segment == "..")) {
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dest->append(1, kEscapeChar);
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    dest->append(*segment);
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    segment->clear();
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t segment_size = segment->size();
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (segment_size > kMaximumSubdirectoryLength) {
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // We need to inject ",-" at the end of the segment to signify that
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // we are inserting an artificial '/'.  This means we have to chop
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // off at least two characters to make room.
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      segment_size = kMaximumSubdirectoryLength - 2;
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // But we don't want to break up an escape sequence that happens to lie at
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // the end.  Escape sequences are at most 2 characters.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ((*segment)[segment_size - 1] == kEscapeChar) {
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        segment_size -= 1;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else if ((*segment)[segment_size - 2] == kEscapeChar) {
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        segment_size -= 2;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dest->append(segment->data(), segment_size);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dest->append(1, kEscapeChar);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dest->append(1, kTruncationChar);
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      segment->erase(0, segment_size);
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // At this point, if we had segment_size=3, and segment="abcd",
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // then after this erase, we will have written "abc,-" and set segment="d"
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      dest->append(*segment);
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      segment->clear();
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix,
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         const string& escaped_ending,
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         char dir_separator,
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                         string* encoded_filename) {
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string filename_ending = UrlUtilities::Unescape(escaped_ending);
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char encoded[3];
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int encoded_len;
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string segment;
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(jmarantz): This code would be a bit simpler if we disallowed
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Instaweb allowing filename_prefix to not end in "/".  We could
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // then change the is routine to just take one input string.
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t start_of_segment = filename_prefix.find_last_of(dir_separator);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (start_of_segment == string::npos) {
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    segment = filename_prefix;
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    segment = filename_prefix.substr(start_of_segment + 1);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    *encoded_filename = filename_prefix.substr(0, start_of_segment + 1);
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t index = 0;
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Special case the first / to avoid adding a leading kEscapeChar.
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) {
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    encoded_filename->append(segment);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    segment.clear();
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    encoded_filename->append(1, dir_separator);
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++index;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (; index < filename_ending.length(); ++index) {
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unsigned char ch = static_cast<unsigned char>(filename_ending[index]);
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Note: instead of outputing an empty segment, we let the second slash
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // be escaped below.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ch == dir_separator) && !segment.empty()) {
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      AppendSegment(&segment, encoded_filename);
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded_filename->append(1, dir_separator);
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      segment.clear();
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // After removing unsafe chars the only safe ones are _.=+- and alphanums.
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') ||
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          (ch == '-') || (('0' <= ch) && (ch <= '9')) ||
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) {
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[0] = ch;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded_len = 1;
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[0] = kEscapeChar;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[1] = ch / 16;
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[2] = ch % 16;
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded_len = 3;
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      segment.append(encoded, encoded_len);
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If segment is too big, we must chop it into chunks.
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (segment.size() > kMaximumSubdirectoryLength) {
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        AppendSegment(&segment, encoded_filename);
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        encoded_filename->append(1, dir_separator);
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Append "," to the leaf filename so the leaf can also be a branch., e.g.
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // /a/b/c/d".  So we will rename the "d" here to "d,".  If doing that pushed
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // us over the 128 char limit, then we will need to append "/" and the
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // remaining chars.
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  segment += kEscapeChar;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  AppendSegment(&segment, encoded_filename);
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!segment.empty()) {
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // The last overflow segment is special, because we appended in
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // kEscapeChar above.  We won't need to check it again for size
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // or further escaping.
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    encoded_filename->append(1, dir_separator);
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    encoded_filename->append(segment);
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Note: this decoder is not the exact inverse of the EncodeSegment above,
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// because it does not take into account a prefix.
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UrlToFilenameEncoder::Decode(const string& encoded_filename,
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  char dir_separator,
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                  string* decoded_url) {
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum State {
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kStart,
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kEscape,
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kFirstDigit,
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kTruncate,
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    kEscapeDot
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  State state = kStart;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char hex_buffer[3];
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  hex_buffer[2] = '\0';
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < encoded_filename.size(); ++i) {
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char ch = encoded_filename[i];
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (state) {
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case kStart:
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ch == kEscapeChar) {
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kEscape;
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (ch == dir_separator) {
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          decoded_url->append(1, ch);
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case kEscape:
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (HexDigitsPrefix(&ch, 1) == 1) {
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          hex_buffer[0] = ch;
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kFirstDigit;
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (ch == kTruncationChar) {
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kTruncate;
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (ch == '.') {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          decoded_url->append(1, '.');
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kEscapeDot;  // Look for at most one more dot.
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else if (ch == dir_separator) {
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Consider url "//x".  This was once encoded to "/,/x,".
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // This code is what skips the first Escape.
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          decoded_url->append(1, '/');  // URLs only use '/' not '\\'
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kStart;
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return false;
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case kFirstDigit:
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (HexDigitsPrefix(&ch, 1) == 1) {
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          hex_buffer[1] = ch;
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          uint64 hex_value = ParseLeadingHex64Value(hex_buffer, 0);
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          decoded_url->append(1, static_cast<char>(hex_value));
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kStart;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return false;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case kTruncate:
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (ch == dir_separator) {
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Skip this separator, it was only put in to break up long
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // path segments, but is not part of the URL.
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = kStart;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          return false;
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case kEscapeDot:
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        decoded_url->append(1, ch);
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        state = kStart;
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // All legal encoded filenames end in kEscapeChar.
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (state == kEscape);
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Escape the given input |path| and chop any individual components
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// of the path which are greater than kMaximumSubdirectoryLength characters
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// into two chunks.
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This legacy version has several issues with aliasing of different URLs,
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// inability to represent both /a/b/c and /a/b/c/d, and inability to decode
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the filenames back into URLs.
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// But there is a large body of slurped data which depends on this format,
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// so leave it as the default for spdy_in_mem_edsm_server.
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string UrlToFilenameEncoder::LegacyEscape(const string& path) {
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string output;
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Note:  We also chop paths into medium sized 'chunks'.
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //        This is due to the incompetence of the windows
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //        filesystem, which still hasn't figured out how
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //        to deal with long filenames.
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int last_slash = 0;
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t index = 0; index < path.length(); index++) {
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char ch = path[index];
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (ch == 0x5C)
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      last_slash = index;
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((ch == 0x2D) ||                    // hyphen
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ch == 0x5C) || (ch == 0x5F) ||    // backslash, underscore
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((0x30 <= ch) && (ch <= 0x39)) ||  // Digits [0-9]
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((0x41 <= ch) && (ch <= 0x5A)) ||  // Uppercase [A-Z]
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ((0x61 <= ch) && (ch <= 0x7A))) {  // Lowercase [a-z]
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      output.append(&path[index], 1);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      char encoded[3];
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded[0] = 'x';
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded[1] = ch / 16;
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded[2] = ch % 16;
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      output.append(encoded, 3);
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (index - last_slash > kMaximumSubdirectoryLength) {
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifdef WIN32
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      char slash = '\\';
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      char slash = '/';
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      output.append(&slash, 1);
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      last_slash = index;
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return output;
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace net
293