url_to_filename_encoder.h revision c7f5f8508d98d5952d42ed7648c2a8f30a4da156
1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef NET_TOOLS_DUMP_CACHE_URL_TO_FILE_ENCODER_H_
6#define NET_TOOLS_DUMP_CACHE_URL_TO_FILE_ENCODER_H_
7
8#include <string>
9
10#include "base/file_path.h"
11#include "base/file_util.h"
12#include "base/string_util.h"
13#include "googleurl/src/gurl.h"
14
15namespace net {
16
17// Helper class for converting a URL into a filename.
18class UrlToFilenameEncoder {
19 public:
20  // Given a |url| and a |base_path|, returns a FilePath which represents this
21  // |url|.
22  static FilePath Encode(const std::string& url, FilePath base_path) {
23    std::string clean_url(url);
24    if (clean_url.length() && clean_url[clean_url.length()-1] == '/')
25      clean_url.append("index.html");
26
27    GURL gurl(clean_url);
28    FilePath filename(base_path);
29    filename = filename.AppendASCII(gurl.host());
30
31    std::string url_filename = gurl.PathForRequest();
32    // Strip the leading '/'
33    if (url_filename[0] == '/')
34      url_filename = url_filename.substr(1);
35
36    // replace '/' with '\'
37    ConvertToSlashes(&url_filename);
38
39    // strip double slashes ("\\")
40    StripDoubleSlashes(&url_filename);
41
42    // Save path as filesystem-safe characters
43    url_filename = Escape(url_filename);
44    filename = filename.AppendASCII(url_filename);
45
46    return filename;
47  }
48
49 private:
50  // This is the length at which we chop individual subdirectories.
51  // Technically, we shouldn't need to do this, but I found that
52  // even with long-filename support, windows had trouble creating
53  // long subdirectories, and making them shorter helps.
54  static const size_t kMaximumSubdirectoryLength = 128;
55
56  // Escape the given input |path| and chop any individual components
57  // of the path which are greater than kMaximumSubdirectoryLength characters
58  // into two chunks.
59  static std::string Escape(const std::string& path) {
60    std::string output;
61    int last_slash = 0;
62    for (size_t index = 0; index < path.length(); index++) {
63      char ch = path[index];
64      if (ch == 0x5C)
65        last_slash = index;
66      if ((ch == 0x2D) ||                   // hyphen
67          (ch == 0x5C) || (ch == 0x5F) ||   // backslash, underscore
68          ((0x30 <= ch) && (ch <= 0x39)) || // Digits [0-9]
69          ((0x41 <= ch) && (ch <= 0x5A)) || // Uppercase [A-Z]
70          ((0x61 <= ch) && (ch <= 0x7A))) { // Lowercase [a-z]
71        output.append(&path[index],1);
72      } else {
73        char encoded[3];
74        encoded[0] = 'x';
75        encoded[1] = ch / 16;
76        encoded[1] += (encoded[1] >= 10) ? 'A' - 10 : '0';
77        encoded[2] = ch % 16;
78        encoded[2] += (encoded[2] >= 10) ? 'A' - 10 : '0';
79        output.append(encoded, 3);
80      }
81      if (index - last_slash > kMaximumSubdirectoryLength) {
82        char backslash = '\\';
83        output.append(&backslash, 1);
84        last_slash = index;
85      }
86    }
87    return output;
88  }
89
90  // Replace all instances of |from| within |str| as |to|.
91  static void ReplaceAll(const std::string& from,
92                         const std::string& to,
93                         std::string* str) {
94    std::string::size_type pos(0);
95    while((pos = str->find(from, pos)) != std::string::npos) {
96      str->replace(pos, from.size(), to);
97      pos += from.size();
98    }
99  }
100
101  // Replace all instances of "/" with "\" in |path|.
102  static void ConvertToSlashes(std::string* path) {
103    static const char slash[] = { '/', '\0' };
104    static const char backslash[] = { '\\', '\0' };
105    ReplaceAll(slash, backslash, path);
106  }
107
108  // Replace all instances of "\\" with "%5C%5C" in |path|.
109  static void StripDoubleSlashes(std::string* path) {
110    static const char doubleslash[] = { '\\', '\\', '\0' };
111    static const char escaped_doubleslash[] =
112      { '%', '5', 'C', '%', '5', 'C','\0' };
113    ReplaceAll(doubleslash, escaped_doubleslash, path);
114  }
115};
116
117} // namespace net
118
119#endif  // NET_TOOLS_DUMP_CACHE_URL_TO_FILE_ENCODER_H__
120
121