15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2010 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/tools/dump_cache/url_utilities.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_number_conversions.h"
9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h"
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net {
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlHost(const std::string& url) {
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t b = url.find("//");
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (b == std::string::npos)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b = 0;
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b += 2;
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t next_slash = url.find_first_of('/', b);
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t next_colon = url.find_first_of(':', b);
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (next_slash != std::string::npos
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      && next_colon != std::string::npos
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      && next_colon < next_slash) {
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return std::string(url, b, next_colon - b);
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (next_slash == std::string::npos) {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (next_colon != std::string::npos) {
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return std::string(url, b, next_colon - b);
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      next_slash = url.size();
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return std::string(url, b, next_slash - b);
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlHostPath(const std::string& url) {
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t b = url.find("//");
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (b == std::string::npos)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b = 0;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b += 2;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return std::string(url, b);
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlPath(const std::string& url) {
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t b = url.find("//");
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (b == std::string::npos)
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b = 0;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    b += 2;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  b = url.find("/", b);
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (b == std::string::npos)
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "/";
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t e = url.find("#", b+1);
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (e != std::string::npos)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return std::string(url, b, (e - b));
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return std::string(url, b);
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parsing states for UrlUtilities::Unescape
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum UnescapeState {
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NORMAL,   // We are not in the middle of parsing an escape.
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ESCAPE1,  // We just parsed % .
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ESCAPE2   // We just parsed %X for some hex digit X.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::Unescape(const std::string& escaped_url) {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string unescaped_url, escape_text;
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int escape_value;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UnescapeState state = NORMAL;
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string::const_iterator iter = escaped_url.begin();
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (iter < escaped_url.end()) {
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    char c = *iter;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    switch (state) {
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case NORMAL:
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (c == '%') {
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          escape_text.clear();
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = ESCAPE1;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unescaped_url.push_back(c);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++iter;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case ESCAPE1:
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (IsHexDigit(c)) {
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          escape_text.push_back(c);
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = ESCAPE2;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++iter;
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Unexpected, % followed by non-hex chars, pass it through.
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unescaped_url.push_back('%');
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = NORMAL;
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      case ESCAPE2:
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (IsHexDigit(c)) {
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          escape_text.push_back(c);
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          bool ok = base::HexStringToInt(escape_text, &escape_value);
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          DCHECK(ok);
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unescaped_url.push_back(static_cast<unsigned char>(escape_value));
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = NORMAL;
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          ++iter;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        } else {
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          // Unexpected, % followed by non-hex chars, pass it through.
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unescaped_url.push_back('%');
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          unescaped_url.append(escape_text);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          state = NORMAL;
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        break;
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unexpected, % followed by end of string, pass it through.
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (state == ESCAPE1 || state == ESCAPE2) {
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unescaped_url.push_back('%');
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    unescaped_url.append(escape_text);
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return unescaped_url;
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace net
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
127