1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/tools/dump_cache/url_to_filename_encoder.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <string> 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector> 93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_piece.h" 11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h" 123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/stringprintf.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gtest/include/gtest/gtest.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing base::StringPiece; 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing std::string; 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace net { 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#ifdef WIN32 213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kDirSeparator = '\\'; 223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kOtherDirSeparator = '/'; 233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#else 243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kDirSeparator = '/'; 253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kOtherDirSeparator = '\\'; 263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#endif 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochclass UrlToFilenameEncoderTest : public ::testing::Test { 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch protected: 303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar), 313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick dir_sep_(1, kDirSeparator) { 323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void CheckSegmentLength(const StringPiece& escaped_word) { 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<StringPiece> components; 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Tokenize(escaped_word, StringPiece("/"), &components); 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < components.size(); ++i) { 383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch components[i].size()); 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) { 443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // These characters are invalid in Windows. We add in ', as that's pretty 45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // inconvenient in a Unix filename. 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx 483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string kInvalidChars = "<>:\"|?*'"; 49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < escaped_word.size(); ++i) { 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char c = escaped_word[i]; 513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_EQ(string::npos, kInvalidChars.find(c)); 523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_NE(invalid_slash, c); 53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_NE('\0', c); // only invalid character in Posix 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_GT(0x7E, c); // only English printable characters 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void Validate(const string& in_word, const string& gold_word) { 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string escaped_word, url; 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(gold_word, escaped_word); 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CheckSegmentLength(escaped_word); 633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick CheckValidChars(escaped_word, '\\'); 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UrlToFilenameEncoder::Decode(escaped_word, '/', &url); 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(in_word, url); 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void ValidateAllSegmentsSmall(const string& in_word) { 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string escaped_word, url; 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word); 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CheckSegmentLength(escaped_word); 723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick CheckValidChars(escaped_word, '\\'); 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UrlToFilenameEncoder::Decode(escaped_word, '/', &url); 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(in_word, url); 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void ValidateNoChange(const string& word) { 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We always suffix the leaf with kEscapeChar, unless the leaf is empty. 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate(word, word + escape_); 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void ValidateEscaped(unsigned char ch) { 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We always suffix the leaf with kEscapeChar, unless the leaf is empty. 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch char escaped[100]; 853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const char escape = UrlToFilenameEncoder::kEscapeChar; 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape); 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate(string(1, ch), escaped); 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick void ValidateUrl(const string& url, const string& base_path, 913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick bool legacy_escape, const string& gold_filename) { 923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string encoded_filename = UrlToFilenameEncoder::Encode( 933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick url, base_path, legacy_escape); 943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_EQ(gold_filename, encoded_filename); 953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (!legacy_escape) { 963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick CheckSegmentLength(encoded_filename); 973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick CheckValidChars(encoded_filename, kOtherDirSeparator); 983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string decoded_url; 993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator, 1003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick &decoded_url); 1013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (url != decoded_url) { 1023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_EQ(url, "http://" + decoded_url); 1033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick void ValidateUrlOldNew(const string& url, const string& gold_old_filename, 1083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string& gold_new_filename) { 1093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrl(url, "", true, gold_old_filename); 1103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrl(url, "", false, gold_new_filename); 1113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick void ValidateEncodeSame(const string& url1, const string& url2) { 1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string filename1 = UrlToFilenameEncoder::Encode(url1, "", false); 1153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string filename2 = UrlToFilenameEncoder::Encode(url2, "", false); 1163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_EQ(filename1, filename2); 1173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string escape_; 1203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string dir_sep_; 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}; 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, DoesNotEscape) { 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange(""); 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("abcdefg"); 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("abcdefghijklmnopqrstuvwxyz"); 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("ZYXWVUT"); 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA"); 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("01234567689"); 1303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateNoChange("_.=+-"); 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA" 1323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "01234567689_.=+-"); 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("index.html"); 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("/"); 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("/."); 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange("."); 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateNoChange(".."); 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, Escapes) { 1413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string bad_chars = 1423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "<>:\"\\|?*" // Illegal on Windows 1433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "~`!$^&(){}[]';" // Bad for Unix shells 1443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "^@" // Build tool doesn't like 1453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "#%" // Tool doesn't like 1463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ","; // The escape char has to be escaped 1473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (size_t i = 0; i < bad_chars.size(); ++i) { 1493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateEscaped(bad_chars[i]); 1503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Check non-printable characters. 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateEscaped('\0'); 1543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (size_t i = 127; i < 256; ++i) { 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateEscaped(static_cast<char>(i)); 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) { 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_); 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/./", "/" + escape_ + "./" + escape_); 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/../", "/" + escape_ + "../" + escape_); 1633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Validate("//", "/" + escape_ + "2F" + escape_); 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/./leaf", "/" + escape_ + "./leaf" + escape_); 165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/../leaf", "/" + escape_ + "../leaf" + escape_); 1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_); 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("mysite/u?param1=x¶m2=y", 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" + 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_); 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("search?q=dogs&go=&form=QBLH&qs=n", // from Latency Labs bing test. 171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ + 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "26form=QBLH" + escape_ + "26qs=n" + escape_); 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true", 174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ + 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "3Fid=138" + escape_ + "26content=true" + escape_); 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1783345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) { 1793345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrlOldNew("http://www.google.com/index.html", 1803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.google.com" + dir_sep_ + "indexx2Ehtml", 1813345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.google.com" + dir_sep_ + "index.html" + escape_); 1823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=", 1833345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.google.com" + dir_sep_ + "x" + dir_sep_ + 1843345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D", 1853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1863345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" + 1873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ + 1883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "26oq=" + escape_); 1893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrlOldNew("http://www.foo.com/a//", 1903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml", 1913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" + 1923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick escape_); 1933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // From bug: Double slash preserved. 1953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html", 1963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "", false, 1973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" + 1983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" + 1993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick dir_sep_ + "index.html" + escape_); 2003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateUrlOldNew( 2013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "http://blogutils.net/olct/online.php?" 2023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "site=http://thelwordfanfics.blogspot.&interval=600", 2033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F" 2053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600", 2063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ + 2083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ + 2093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_); 2103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 2113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// From bug: Escapes treated the same as normal char. 2133345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) { 2143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (int i = 0; i < 128; ++i) { 2153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string unescaped(1, static_cast<char>(i)); 2163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string escaped = base::StringPrintf("%%%02X", i); 2173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateEncodeSame(unescaped, escaped); 2183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 2193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ValidateEncodeSame( 2213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot" 2223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch", 2233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot" 2253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch"); 2263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 2273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 2283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// From bug: Filename encoding is not prefix-free. 2293345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) { 2303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Validate("/", "/" + escape_); 2313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Validate("//", "/" + escape_ + "2F" + escape_); 2323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Validate("///", "/" + escape_ + "2F" + "/" + escape_); 2333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 2343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 235c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LongTail) { 236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static char long_word[] = 237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "~joebob/briggs/12345678901234567890123456789012345678901234567890" 238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234567890" 239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234567890" 240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234567890" 241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234567890" 242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234567890"; 243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the long lines in the string below are 64 characters, so we can see 245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the slashes every 128. 246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string gold_long_word = 247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "7Ejoebob/briggs/" 248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567890123456789012345678901234567890123456789012345678901234" 249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "56789012345678901234567890123456789012345678901234567890123456" + 250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "-/" 251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "7890123456789012345678901234567890123456789012345678901234567890" 252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "12345678901234567890123456789012345678901234567890123456789012" + 253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "-/" 254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "3456789012345678901234567890123456789012345678901234567890123456" 255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "78901234567890123456789012345678901234567890123456789012345678" + 256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "-/" 257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "9012345678901234567890" + escape_; 2583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sizeof(long_word)); 260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate(long_word, gold_long_word); 261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 263c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LongTailQuestion) { 264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Here the '?' in the last path segment expands to @3F, making 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // it hit 128 chars before the input segment gets that big. 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static char long_word[] = 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?" 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567?1234567?1234567?1234567?1234567?1234567?1234567?" 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"; 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Notice that at the end of the third segment, we avoid splitting 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the (escape_ + "3F") that was generated from the "?", so that segment is 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // only 127 characters. 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string pattern = "1234567" + escape_ + "3F"; // 10 characters 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string gold_long_word = 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "7Ejoebob/briggs/" + 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch pattern + pattern + pattern + pattern + pattern + pattern + "1234" 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "123456" + escape_ + "-/" 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern + 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch pattern + pattern + pattern + pattern + pattern + pattern + pattern + 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "12" + 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "-/" 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch + pattern + "1234567" + 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "-/" + 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escape_ + "3F" + pattern + pattern + escape_; 2923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sizeof(long_word)); 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate(long_word, gold_long_word); 295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 297c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) { 298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // hit corner cases, +/- 4 characters from kMaxLen 299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = -4; i <= 4; ++i) { 300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string input; 3013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x'); 302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateAllSegmentsSmall(input); 303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 306c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) { 307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // hit corner cases, +/- 4 characters from kMaxLen. This time we 308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // leave off the last 'x' and put in a '.', which ensures that we 309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // are truncating with '/' *after* the expansion. 310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (int i = -4; i <= 4; ++i) { 311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string input; 3123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x'); 313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch input.append(1, '.'); // this will expand to 3 characters. 314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ValidateAllSegmentsSmall(input); 315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 318c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) { 319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/a/b/c", "/a/b/c" + escape_); // c is leaf file "c," 320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/a/b/c/d", "/a/b/c/d" + escape_); // c is directory "c" 321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Validate("/a/b/c/d/", "/a/b/c/d/" + escape_); 322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 325c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, BackslashSeparator) { 326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string long_word; 327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string escaped_word; 3283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x'); 329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UrlToFilenameEncoder::EncodeSegment("", long_word, '\\', &escaped_word); 330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // check that one backslash, plus the escape ",-", and the ending , got added. 332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ(long_word.size() + 4, escaped_word.size()); 3333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength, 334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch escaped_word.size()); 335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check that the backslash got inserted at the correct spot. 336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch EXPECT_EQ('\\', escaped_word[ 3373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick UrlToFilenameEncoder::kMaximumSubdirectoryLength]); 338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 3403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} // namespace net 341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 342