url_utilities.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/tools/dump_cache/url_utilities.h"
6
7#include "base/logging.h"
8#include "base/strings/string_number_conversions.h"
9#include "base/strings/string_util.h"
10
11namespace net {
12
13std::string UrlUtilities::GetUrlHost(const std::string& url) {
14  size_t b = url.find("//");
15  if (b == std::string::npos)
16    b = 0;
17  else
18    b += 2;
19  size_t next_slash = url.find_first_of('/', b);
20  size_t next_colon = url.find_first_of(':', b);
21  if (next_slash != std::string::npos
22      && next_colon != std::string::npos
23      && next_colon < next_slash) {
24    return std::string(url, b, next_colon - b);
25  }
26  if (next_slash == std::string::npos) {
27    if (next_colon != std::string::npos) {
28      return std::string(url, b, next_colon - b);
29    } else {
30      next_slash = url.size();
31    }
32  }
33  return std::string(url, b, next_slash - b);
34}
35
36std::string UrlUtilities::GetUrlHostPath(const std::string& url) {
37  size_t b = url.find("//");
38  if (b == std::string::npos)
39    b = 0;
40  else
41    b += 2;
42  return std::string(url, b);
43}
44
45std::string UrlUtilities::GetUrlPath(const std::string& url) {
46  size_t b = url.find("//");
47  if (b == std::string::npos)
48    b = 0;
49  else
50    b += 2;
51  b = url.find("/", b);
52  if (b == std::string::npos)
53    return "/";
54
55  size_t e = url.find("#", b+1);
56  if (e != std::string::npos)
57    return std::string(url, b, (e - b));
58  return std::string(url, b);
59}
60
61namespace {
62
63// Parsing states for UrlUtilities::Unescape
64enum UnescapeState {
65  NORMAL,   // We are not in the middle of parsing an escape.
66  ESCAPE1,  // We just parsed % .
67  ESCAPE2   // We just parsed %X for some hex digit X.
68};
69
70}  // namespace
71
72std::string UrlUtilities::Unescape(const std::string& escaped_url) {
73  std::string unescaped_url, escape_text;
74  int escape_value;
75  UnescapeState state = NORMAL;
76  std::string::const_iterator iter = escaped_url.begin();
77  while (iter < escaped_url.end()) {
78    char c = *iter;
79    switch (state) {
80      case NORMAL:
81        if (c == '%') {
82          escape_text.clear();
83          state = ESCAPE1;
84        } else {
85          unescaped_url.push_back(c);
86        }
87        ++iter;
88        break;
89      case ESCAPE1:
90        if (IsHexDigit(c)) {
91          escape_text.push_back(c);
92          state = ESCAPE2;
93          ++iter;
94        } else {
95          // Unexpected, % followed by non-hex chars, pass it through.
96          unescaped_url.push_back('%');
97          state = NORMAL;
98        }
99        break;
100      case ESCAPE2:
101        if (IsHexDigit(c)) {
102          escape_text.push_back(c);
103          bool ok = base::HexStringToInt(escape_text, &escape_value);
104          DCHECK(ok);
105          unescaped_url.push_back(static_cast<unsigned char>(escape_value));
106          state = NORMAL;
107          ++iter;
108        } else {
109          // Unexpected, % followed by non-hex chars, pass it through.
110          unescaped_url.push_back('%');
111          unescaped_url.append(escape_text);
112          state = NORMAL;
113        }
114        break;
115    }
116  }
117  // Unexpected, % followed by end of string, pass it through.
118  if (state == ESCAPE1 || state == ESCAPE2) {
119    unescaped_url.push_back('%');
120    unescaped_url.append(escape_text);
121  }
122  return unescaped_url;
123}
124
125}  // namespace net
126
127