1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "net/tools/dump_cache/url_to_filename_encoder.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <string>
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector>
93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_piece.h"
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/stringprintf.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "testing/gtest/include/gtest/gtest.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing base::StringPiece;
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochusing std::string;
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace net {
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#ifdef WIN32
213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kDirSeparator = '\\';
223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kOtherDirSeparator = '/';
233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#else
243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kDirSeparator = '/';
253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickchar kOtherDirSeparator = '\\';
263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#endif
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochclass UrlToFilenameEncoderTest : public ::testing::Test {
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch protected:
303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  UrlToFilenameEncoderTest() : escape_(1, UrlToFilenameEncoder::kEscapeChar),
313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                               dir_sep_(1, kDirSeparator) {
323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void CheckSegmentLength(const StringPiece& escaped_word) {
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::vector<StringPiece> components;
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Tokenize(escaped_word, StringPiece("/"), &components);
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (size_t i = 0; i < components.size(); ++i) {
383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      EXPECT_GE(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                components[i].size());
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  void CheckValidChars(const StringPiece& escaped_word, char invalid_slash) {
443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // These characters are invalid in Windows.  We add in ', as that's pretty
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // inconvenient in a Unix filename.
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    //
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // See http://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string kInvalidChars = "<>:\"|?*'";
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    for (size_t i = 0; i < escaped_word.size(); ++i) {
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      char c = escaped_word[i];
513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      EXPECT_EQ(string::npos, kInvalidChars.find(c));
523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      EXPECT_NE(invalid_slash, c);
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      EXPECT_NE('\0', c);  // only invalid character in Posix
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      EXPECT_GT(0x7E, c);  // only English printable characters
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void Validate(const string& in_word, const string& gold_word) {
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    string escaped_word, url;
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word);
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(gold_word, escaped_word);
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    CheckSegmentLength(escaped_word);
633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    CheckValidChars(escaped_word, '\\');
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(in_word, url);
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void ValidateAllSegmentsSmall(const string& in_word) {
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    string escaped_word, url;
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    UrlToFilenameEncoder::EncodeSegment("", in_word, '/', &escaped_word);
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    CheckSegmentLength(escaped_word);
723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    CheckValidChars(escaped_word, '\\');
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    UrlToFilenameEncoder::Decode(escaped_word, '/', &url);
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    EXPECT_EQ(in_word, url);
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void ValidateNoChange(const string& word) {
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Validate(word, word + escape_);
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void ValidateEscaped(unsigned char ch) {
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We always suffix the leaf with kEscapeChar, unless the leaf is empty.
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    char escaped[100];
853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const char escape = UrlToFilenameEncoder::kEscapeChar;
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    base::snprintf(escaped, sizeof(escaped), "%c%02X%c", escape, ch, escape);
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Validate(string(1, ch), escaped);
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  void ValidateUrl(const string& url, const string& base_path,
913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                   bool legacy_escape, const string& gold_filename) {
923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string encoded_filename = UrlToFilenameEncoder::Encode(
933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        url, base_path, legacy_escape);
943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    EXPECT_EQ(gold_filename, encoded_filename);
953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    if (!legacy_escape) {
963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      CheckSegmentLength(encoded_filename);
973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      CheckValidChars(encoded_filename, kOtherDirSeparator);
983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      string decoded_url;
993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      UrlToFilenameEncoder::Decode(encoded_filename, kDirSeparator,
1003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                                   &decoded_url);
1013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      if (url != decoded_url) {
1023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        EXPECT_EQ(url, "http://" + decoded_url);
1033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      }
1043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
1053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
1063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  void ValidateUrlOldNew(const string& url, const string& gold_old_filename,
1083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                         const string& gold_new_filename) {
1093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ValidateUrl(url, "", true, gold_old_filename);
1103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ValidateUrl(url, "", false, gold_new_filename);
1113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
1123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  void ValidateEncodeSame(const string& url1, const string& url2) {
1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string filename1 = UrlToFilenameEncoder::Encode(url1, "", false);
1153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string filename2 = UrlToFilenameEncoder::Encode(url2, "", false);
1163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    EXPECT_EQ(filename1, filename2);
1173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
1183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string escape_;
1203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  string dir_sep_;
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch};
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, DoesNotEscape) {
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("");
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("abcdefg");
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("abcdefghijklmnopqrstuvwxyz");
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("ZYXWVUT");
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("ZYXWVUTSRQPONMLKJIHGFEDCBA");
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("01234567689");
1303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateNoChange("_.=+-");
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("abcdefghijklmnopqrstuvwxyzZYXWVUTSRQPONMLKJIHGFEDCBA"
1323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                   "01234567689_.=+-");
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("index.html");
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("/");
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("/.");
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange(".");
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateNoChange("..");
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, Escapes) {
1413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  const string bad_chars =
1423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "<>:\"\\|?*"      // Illegal on Windows
1433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "~`!$^&(){}[]';"  // Bad for Unix shells
1443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "^@"              // Build tool doesn't like
1453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "#%"              // Tool doesn't like
1463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      ",";              // The escape char has to be escaped
1473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (size_t i = 0; i < bad_chars.size(); ++i) {
1493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ValidateEscaped(bad_chars[i]);
1503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Check non-printable characters.
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ValidateEscaped('\0');
1543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (size_t i = 127; i < 256; ++i) {
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    ValidateEscaped(static_cast<char>(i));
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, DoesEscapeCorrectly) {
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("mysite.com&x", "mysite.com" + escape_ + "26x" + escape_);
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/./", "/" + escape_ + "./" + escape_);
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/../", "/" + escape_ + "../" + escape_);
1633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Validate("//", "/" + escape_ + "2F" + escape_);
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/./leaf", "/" + escape_ + "./leaf" + escape_);
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/../leaf", "/" + escape_ + "../leaf" + escape_);
1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Validate("//leaf", "/" + escape_ + "2Fleaf" + escape_);
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("mysite/u?param1=x&param2=y",
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           "mysite/u" + escape_ + "3Fparam1=x" + escape_ + "26param2=y" +
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           escape_);
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("search?q=dogs&go=&form=QBLH&qs=n",  // from Latency Labs bing test.
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           "search" + escape_ + "3Fq=dogs" + escape_ + "26go=" + escape_ +
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           "26form=QBLH" + escape_ + "26qs=n" + escape_);
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("~joebob/my_neeto-website+with_stuff.asp?id=138&content=true",
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           "" + escape_ + "7Ejoebob/my_neeto-website+with_stuff.asp" + escape_ +
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           "3Fid=138" + escape_ + "26content=true" + escape_);
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1783345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, EncodeUrlCorrectly) {
1793345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateUrlOldNew("http://www.google.com/index.html",
1803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.google.com" + dir_sep_ + "indexx2Ehtml",
1813345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.google.com" + dir_sep_ + "index.html" + escape_);
1823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateUrlOldNew("http://www.google.com/x/search?hl=en&q=dogs&oq=",
1833345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.google.com" + dir_sep_ + "x" + dir_sep_ +
1843345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "searchx3Fhlx3Denx26qx3Ddogsx26oqx3D",
1853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1863345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.google.com" + dir_sep_ + "x" + dir_sep_ + "search" +
1873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    escape_ + "3Fhl=en" + escape_ + "26q=dogs" + escape_ +
1883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "26oq=" + escape_);
1893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateUrlOldNew("http://www.foo.com/a//",
1903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.foo.com" + dir_sep_ + "ax255Cx255Cindexx2Ehtml",
1913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    "www.foo.com" + dir_sep_ + "a" + dir_sep_ + escape_ + "2F" +
1923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                    escape_);
1933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // From bug: Double slash preserved.
1953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateUrl("http://www.foo.com/u?site=http://www.google.com/index.html",
1963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick              "", false,
1973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick              "www.foo.com" + dir_sep_ + "u" + escape_ + "3Fsite=http" +
1983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick              escape_ + "3A" + dir_sep_ + escape_ + "2Fwww.google.com" +
1993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick              dir_sep_ + "index.html" + escape_);
2003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateUrlOldNew(
2013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "http://blogutils.net/olct/online.php?"
2023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "site=http://thelwordfanfics.blogspot.&interval=600",
2033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "onlinex2Ephpx3F"
2053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "sitex3Dhttpx3Ax255Cx255Cthelwordfanficsx2Eblogspotx2Ex26intervalx3D600",
2063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "blogutils.net" + dir_sep_ + "olct" + dir_sep_ + "online.php" + escape_ +
2083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "3Fsite=http" + escape_ + "3A" + dir_sep_ + escape_ +
2093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "2Fthelwordfanfics.blogspot." + escape_ + "26interval=600" + escape_);
2103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
2113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// From bug: Escapes treated the same as normal char.
2133345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, UnescapeUrlsBeforeEncode) {
2143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (int i = 0; i < 128; ++i) {
2153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string unescaped(1, static_cast<char>(i));
2163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string escaped = base::StringPrintf("%%%02X", i);
2173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ValidateEncodeSame(unescaped, escaped);
2183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
2193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ValidateEncodeSame(
2213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "http://www.blogger.com/navbar.g?bName=God!&Mode=FOO&searchRoot"
2223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch",
2233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "http://www.blogger.com/navbar.g?bName=God%21&Mode=FOO&searchRoot"
2253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      "=http%3A%2F%2Fsurvivorscanthrive.blogspot.com%2Fsearch");
2263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
2273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
2283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// From bug: Filename encoding is not prefix-free.
2293345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickTEST_F(UrlToFilenameEncoderTest, EscapeSecondSlash) {
2303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Validate("/", "/" + escape_);
2313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Validate("//", "/" + escape_ + "2F" + escape_);
2323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Validate("///", "/" + escape_ + "2F" + "/" + escape_);
2333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
2343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LongTail) {
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static char long_word[] =
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "~joebob/briggs/12345678901234567890123456789012345678901234567890"
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234567890"
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234567890"
240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234567890"
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234567890"
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234567890";
243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the long lines in the string below are 64 characters, so we can see
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the slashes every 128.
246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string gold_long_word =
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "7Ejoebob/briggs/"
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567890123456789012345678901234567890123456789012345678901234"
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "56789012345678901234567890123456789012345678901234567890123456" +
250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "-/"
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "7890123456789012345678901234567890123456789012345678901234567890"
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "12345678901234567890123456789012345678901234567890123456789012" +
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "-/"
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "3456789012345678901234567890123456789012345678901234567890123456"
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "78901234567890123456789012345678901234567890123456789012345678" +
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "-/"
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "9012345678901234567890" + escape_;
2583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch            sizeof(long_word));
260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate(long_word, gold_long_word);
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LongTailQuestion) {
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Here the '?' in the last path segment expands to @3F, making
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // it hit 128 chars before the input segment gets that big.
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static char long_word[] =
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "~joebob/briggs/1234567?1234567?1234567?1234567?1234567?"
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?"
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "1234567?1234567?1234567?1234567?1234567?1234567?1234567?";
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Notice that at the end of the third segment, we avoid splitting
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the (escape_ + "3F") that was generated from the "?", so that segment is
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // only 127 characters.
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string pattern = "1234567" + escape_ + "3F";  // 10 characters
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string gold_long_word =
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "7Ejoebob/briggs/" +
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      pattern + pattern + pattern + pattern + pattern + pattern + "1234"
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       "123456" + escape_ + "-/"
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "7" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern +
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      pattern + pattern + pattern + pattern + pattern + pattern + pattern +
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "12" +
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "-/"
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "34567" + escape_ + "3F" + pattern + pattern + pattern + pattern + pattern
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      + "1234567" + escape_ + "3F" + pattern + pattern + pattern + pattern
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      + pattern + "1234567" +
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "-/" +
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      escape_ + "3F" + pattern + pattern + escape_;
2923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  EXPECT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch            sizeof(long_word));
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate(long_word, gold_long_word);
295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
297c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenNoEscape) {
298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // hit corner cases, +/- 4 characters from kMaxLen
299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = -4; i <= 4; ++i) {
300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    string input;
3013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength, 'x');
302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    ValidateAllSegmentsSmall(input);
303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
306c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, CornerCasesNearMaxLenWithEscape) {
307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // hit corner cases, +/- 4 characters from kMaxLen.  This time we
308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // leave off the last 'x' and put in a '.', which ensures that we
309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // are truncating with '/' *after* the expansion.
310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (int i = -4; i <= 4; ++i) {
311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    string input;
3123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    input.append(i + UrlToFilenameEncoder::kMaximumSubdirectoryLength - 1, 'x');
313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    input.append(1, '.');  // this will expand to 3 characters.
314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    ValidateAllSegmentsSmall(input);
315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
318c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, LeafBranchAlias) {
319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/a/b/c", "/a/b/c" + escape_);        // c is leaf file "c,"
320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/a/b/c/d", "/a/b/c/d" + escape_);    // c is directory "c"
321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Validate("/a/b/c/d/", "/a/b/c/d/" + escape_);
322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
325c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTEST_F(UrlToFilenameEncoderTest, BackslashSeparator) {
326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string long_word;
327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  string escaped_word;
3283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  long_word.append(UrlToFilenameEncoder::kMaximumSubdirectoryLength + 1, 'x');
329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  UrlToFilenameEncoder::EncodeSegment("", long_word, '\\', &escaped_word);
330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // check that one backslash, plus the escape ",-", and the ending , got added.
332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ(long_word.size() + 4, escaped_word.size());
3333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ASSERT_LT(UrlToFilenameEncoder::kMaximumSubdirectoryLength,
334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch            escaped_word.size());
335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check that the backslash got inserted at the correct spot.
336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  EXPECT_EQ('\\', escaped_word[
3373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      UrlToFilenameEncoder::kMaximumSubdirectoryLength]);
338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
3403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}  // namespace net
341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
342