15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2010 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/tools/dump_cache/url_utilities.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h" 8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_number_conversions.h" 9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h" 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net { 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlHost(const std::string& url) { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t b = url.find("//"); 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b == std::string::npos) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b = 0; 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b += 2; 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t next_slash = url.find_first_of('/', b); 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t next_colon = url.find_first_of(':', b); 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (next_slash != std::string::npos 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) && next_colon != std::string::npos 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) && next_colon < next_slash) { 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b, next_colon - b); 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (next_slash == std::string::npos) { 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (next_colon != std::string::npos) { 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b, next_colon - b); 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next_slash = url.size(); 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b, next_slash - b); 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlHostPath(const std::string& url) { 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t b = url.find("//"); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b == std::string::npos) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b = 0; 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b += 2; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::GetUrlPath(const std::string& url) { 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t b = url.find("//"); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b == std::string::npos) 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b = 0; 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b += 2; 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) b = url.find("/", b); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (b == std::string::npos) 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return "/"; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t e = url.find("#", b+1); 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (e != std::string::npos) 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b, (e - b)); 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return std::string(url, b); 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace { 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Parsing states for UrlUtilities::Unescape 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)enum UnescapeState { 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NORMAL, // We are not in the middle of parsing an escape. 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ESCAPE1, // We just parsed % . 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ESCAPE2 // We just parsed %X for some hex digit X. 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string UrlUtilities::Unescape(const std::string& escaped_url) { 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string unescaped_url, escape_text; 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int escape_value; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UnescapeState state = NORMAL; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string::const_iterator iter = escaped_url.begin(); 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while (iter < escaped_url.end()) { 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) char c = *iter; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switch (state) { 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case NORMAL: 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (c == '%') { 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) escape_text.clear(); 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = ESCAPE1; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.push_back(c); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++iter; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case ESCAPE1: 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IsHexDigit(c)) { 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) escape_text.push_back(c); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = ESCAPE2; 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++iter; 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unexpected, % followed by non-hex chars, pass it through. 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.push_back('%'); 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = NORMAL; 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) case ESCAPE2: 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (IsHexDigit(c)) { 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) escape_text.push_back(c); 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool ok = base::HexStringToInt(escape_text, &escape_value); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(ok); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.push_back(static_cast<unsigned char>(escape_value)); 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = NORMAL; 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++iter; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unexpected, % followed by non-hex chars, pass it through. 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.push_back('%'); 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.append(escape_text); 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) state = NORMAL; 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Unexpected, % followed by end of string, pass it through. 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (state == ESCAPE1 || state == ESCAPE2) { 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.push_back('%'); 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unescaped_url.append(escape_text); 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return unescaped_url; 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace net 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 127