1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <stdlib.h> 672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h" 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h" 9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/base/net_util.h" 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/tools/dump_cache/url_to_filename_encoder.h" 11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing std::string; 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace { 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Returns 1 if buf is prefixed by "num_digits" of hex digits 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Teturns 0 otherwise. 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The function checks for '\0' for string termination. 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochint HexDigitsPrefix(const char* buf, int num_digits) { 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (int i = 0; i < num_digits; i++) { 21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!IsHexDigit(buf[i])) 22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return 0; // This also detects end of string as '\0' is not xdigit. 233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return 1; 25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifdef WIN32 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#define strtoull _strtoui64 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif 30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// A simple parser for long long values. Returns the parsed value if a 32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// valid integer is found; else returns deflt 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// UInt64 and Int64 cannot handle decimal numbers with leading 0s. 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochuint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char *error = NULL; 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const uint64 value = strtoull(str, &error, 16); 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return (error == str) ? deflt : value; 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace net { 43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The escape character choice is made here -- all code and tests in this 453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// directory are based off of this constant. However, our testdata 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// has tons of dependencies on this, so it cannot be changed without 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// re-running those tests and fixing them. 483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst char UrlToFilenameEncoder::kEscapeChar = ','; 493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst char UrlToFilenameEncoder::kTruncationChar = '-'; 503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickconst size_t UrlToFilenameEncoder::kMaximumSubdirectoryLength = 128; 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid UrlToFilenameEncoder::AppendSegment(string* segment, string* dest) { 533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick CHECK(!segment->empty()); 543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if ((*segment == ".") || (*segment == "..")) { 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(1, kEscapeChar); 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(*segment); 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment->clear(); 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch size_t segment_size = segment->size(); 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (segment_size > kMaximumSubdirectoryLength) { 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We need to inject ",-" at the end of the segment to signify that 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // we are inserting an artificial '/'. This means we have to chop 63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // off at least two characters to make room. 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment_size = kMaximumSubdirectoryLength - 2; 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // But we don't want to break up an escape sequence that happens to lie at 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the end. Escape sequences are at most 2 characters. 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if ((*segment)[segment_size - 1] == kEscapeChar) { 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment_size -= 1; 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if ((*segment)[segment_size - 2] == kEscapeChar) { 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment_size -= 2; 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(segment->data(), segment_size); 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(1, kEscapeChar); 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(1, kTruncationChar); 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment->erase(0, segment_size); 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // At this point, if we had segment_size=3, and segment="abcd", 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // then after this erase, we will have written "abc,-" and set segment="d" 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch dest->append(*segment); 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment->clear(); 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid UrlToFilenameEncoder::EncodeSegment(const string& filename_prefix, 883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string& escaped_ending, 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char dir_separator, 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string* encoded_filename) { 913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string filename_ending = UrlUtilities::Unescape(escaped_ending); 923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char encoded[3]; 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int encoded_len; 95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string segment; 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // TODO(jmarantz): This code would be a bit simpler if we disallowed 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Instaweb allowing filename_prefix to not end in "/". We could 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // then change the is routine to just take one input string. 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch size_t start_of_segment = filename_prefix.find_last_of(dir_separator); 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (start_of_segment == string::npos) { 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment = filename_prefix; 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment = filename_prefix.substr(start_of_segment + 1); 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *encoded_filename = filename_prefix.substr(0, start_of_segment + 1); 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch size_t index = 0; 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Special case the first / to avoid adding a leading kEscapeChar. 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!filename_ending.empty() && (filename_ending[0] == dir_separator)) { 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(segment); 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment.clear(); 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(1, dir_separator); 114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++index; 115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (; index < filename_ending.length(); ++index) { 118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch unsigned char ch = static_cast<unsigned char>(filename_ending[index]); 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Note: instead of outputing an empty segment, we let the second slash 1213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // be escaped below. 1223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if ((ch == dir_separator) && !segment.empty()) { 1233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick AppendSegment(&segment, encoded_filename); 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(1, dir_separator); 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment.clear(); 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 1273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // After removing unsafe chars the only safe ones are _.=+- and alphanums. 1283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if ((ch == '_') || (ch == '.') || (ch == '=') || (ch == '+') || 1293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick (ch == '-') || (('0' <= ch) && (ch <= '9')) || 1303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick (('A' <= ch) && (ch <= 'Z')) || (('a' <= ch) && (ch <= 'z'))) { 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[0] = ch; 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_len = 1; 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[0] = kEscapeChar; 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[1] = ch / 16; 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[2] = ch % 16; 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_len = 3; 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment.append(encoded, encoded_len); 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // If segment is too big, we must chop it into chunks. 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (segment.size() > kMaximumSubdirectoryLength) { 1453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick AppendSegment(&segment, encoded_filename); 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(1, dir_separator); 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Append "," to the leaf filename so the leaf can also be a branch., e.g. 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // allow http://a/b/c and http://a/b/c/d to co-exist as files "/a/b/c," and 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // /a/b/c/d". So we will rename the "d" here to "d,". If doing that pushed 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // us over the 128 char limit, then we will need to append "/" and the 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // remaining chars. 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch segment += kEscapeChar; 1573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick AppendSegment(&segment, encoded_filename); 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!segment.empty()) { 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The last overflow segment is special, because we appended in 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // kEscapeChar above. We won't need to check it again for size 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // or further escaping. 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(1, dir_separator); 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded_filename->append(segment); 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Note: this decoder is not the exact inverse of the EncodeSegment above, 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// because it does not take into account a prefix. 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool UrlToFilenameEncoder::Decode(const string& encoded_filename, 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char dir_separator, 171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string* decoded_url) { 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch enum State { 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kStart, 174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kEscape, 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kFirstDigit, 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kTruncate, 177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch kEscapeDot 178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch }; 179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch State state = kStart; 180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int char_code = 0; 181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char hex_buffer[3]; 182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hex_buffer[2] = '\0'; 183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < encoded_filename.size(); ++i) { 184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char ch = encoded_filename[i]; 185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch switch (state) { 186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case kStart: 187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (ch == kEscapeChar) { 188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kEscape; 1893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } else if (ch == dir_separator) { 1903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick decoded_url->append(1, '/'); // URLs only use '/' not '\\' 191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch decoded_url->append(1, ch); 193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case kEscape: 196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (HexDigitsPrefix(&ch, 1) == 1) { 197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hex_buffer[0] = ch; 198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kFirstDigit; 199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if (ch == kTruncationChar) { 200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kTruncate; 201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if (ch == '.') { 202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch decoded_url->append(1, '.'); 203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kEscapeDot; // Look for at most one more dot. 204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if (ch == dir_separator) { 2053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Consider url "//x". This was once encoded to "/,/x,". 206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This code is what skips the first Escape. 2073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick decoded_url->append(1, '/'); // URLs only use '/' not '\\' 208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kStart; 209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case kFirstDigit: 214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (HexDigitsPrefix(&ch, 1) == 1) { 215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch hex_buffer[1] = ch; 216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch uint64 hex_value = ParseLeadingHex64Value(hex_buffer, 0); 217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch decoded_url->append(1, static_cast<char>(hex_value)); 218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char_code = 0; 219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kStart; 220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case kTruncate: 225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (ch == dir_separator) { 226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Skip this separator, it was only put in to break up long 227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // path segments, but is not part of the URL. 228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kStart; 229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch case kEscapeDot: 234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch decoded_url->append(1, ch); 235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch state = kStart; 236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch break; 237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // All legal encoded filenames end in kEscapeChar. 241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return (state == kEscape); 242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 2443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Escape the given input |path| and chop any individual components 245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// of the path which are greater than kMaximumSubdirectoryLength characters 246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// into two chunks. 247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// This legacy version has several issues with aliasing of different URLs, 249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// inability to represent both /a/b/c and /a/b/c/d, and inability to decode 250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// the filenames back into URLs. 251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// But there is a large body of slurped data which depends on this format, 253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// so leave it as the default for spdy_in_mem_edsm_server. 254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstring UrlToFilenameEncoder::LegacyEscape(const string& path) { 255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string output; 256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Note: We also chop paths into medium sized 'chunks'. 258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This is due to the incompetence of the windows 259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // filesystem, which still hasn't figured out how 260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // to deal with long filenames. 261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int last_slash = 0; 262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t index = 0; index < path.length(); index++) { 263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char ch = path[index]; 264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (ch == 0x5C) 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch last_slash = index; 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if ((ch == 0x2D) || // hyphen 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch (ch == 0x5C) || (ch == 0x5F) || // backslash, underscore 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9] 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z] 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z] 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output.append(&path[index], 1); 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char encoded[3]; 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[0] = 'x'; 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[1] = ch / 16; 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0'; 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[2] = ch % 16; 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0'; 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output.append(encoded, 3); 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (index - last_slash > kMaximumSubdirectoryLength) { 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifdef WIN32 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char slash = '\\'; 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#else 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char slash = '/'; 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch output.append(&slash, 1); 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch last_slash = index; 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return output; 292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace net 295