1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/ui/elide_url.h" 6 7#include "base/logging.h" 8#include "base/strings/string_split.h" 9#include "base/strings/utf_string_conversions.h" 10#include "net/base/escape.h" 11#include "net/base/net_util.h" 12#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 13#include "ui/gfx/text_elider.h" 14#include "ui/gfx/text_utils.h" 15#include "url/gurl.h" 16 17using base::UTF8ToUTF16; 18using gfx::ElideText; 19using gfx::GetStringWidthF; 20using gfx::kEllipsisUTF16; 21using gfx::kForwardSlash; 22 23namespace { 24 25const base::char16 kDot = '.'; 26 27// Build a path from the first |num_components| elements in |path_elements|. 28// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate. 29base::string16 BuildPathFromComponents( 30 const base::string16& path_prefix, 31 const std::vector<base::string16>& path_elements, 32 const base::string16& filename, 33 size_t num_components) { 34 // Add the initial elements of the path. 35 base::string16 path = path_prefix; 36 37 // Build path from first |num_components| elements. 38 for (size_t j = 0; j < num_components; ++j) 39 path += path_elements[j] + kForwardSlash; 40 41 // Add |filename|, ellipsis if necessary. 42 if (num_components != (path_elements.size() - 1)) 43 path += base::string16(kEllipsisUTF16) + kForwardSlash; 44 path += filename; 45 46 return path; 47} 48 49// Takes a prefix (Domain, or Domain+subdomain) and a collection of path 50// components and elides if possible. Returns a string containing the longest 51// possible elided path, or an empty string if elision is not possible. 52base::string16 ElideComponentizedPath( 53 const base::string16& url_path_prefix, 54 const std::vector<base::string16>& url_path_elements, 55 const base::string16& url_filename, 56 const base::string16& url_query, 57 const gfx::FontList& font_list, 58 float available_pixel_width) { 59 const size_t url_path_number_of_elements = url_path_elements.size(); 60 61 CHECK(url_path_number_of_elements); 62 for (size_t i = url_path_number_of_elements - 1; i > 0; --i) { 63 base::string16 elided_path = BuildPathFromComponents(url_path_prefix, 64 url_path_elements, url_filename, i); 65 if (available_pixel_width >= GetStringWidthF(elided_path, font_list)) 66 return ElideText(elided_path + url_query, font_list, 67 available_pixel_width, gfx::ELIDE_TAIL); 68 } 69 70 return base::string16(); 71} 72 73// Splits the hostname in the |url| into sub-strings for the full hostname, 74// the domain (TLD+1), and the subdomain (everything leading the domain). 75void SplitHost(const GURL& url, 76 base::string16* url_host, 77 base::string16* url_domain, 78 base::string16* url_subdomain) { 79 // Get Host. 80 *url_host = UTF8ToUTF16(url.host()); 81 82 // Get domain and registry information from the URL. 83 *url_domain = UTF8ToUTF16( 84 net::registry_controlled_domains::GetDomainAndRegistry( 85 url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES)); 86 if (url_domain->empty()) 87 *url_domain = *url_host; 88 89 // Add port if required. 90 if (!url.port().empty()) { 91 *url_host += UTF8ToUTF16(":" + url.port()); 92 *url_domain += UTF8ToUTF16(":" + url.port()); 93 } 94 95 // Get sub domain. 96 const size_t domain_start_index = url_host->find(*url_domain); 97 base::string16 kWwwPrefix = UTF8ToUTF16("www."); 98 if (domain_start_index != base::string16::npos) 99 *url_subdomain = url_host->substr(0, domain_start_index); 100 if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() || 101 url.SchemeIsFile())) { 102 url_subdomain->clear(); 103 } 104} 105 106} // namespace 107 108// TODO(pkasting): http://crbug.com/77883 This whole function gets 109// kerning/ligatures/etc. issues potentially wrong by assuming that the width of 110// a rendered string is always the sum of the widths of its substrings. Also I 111// suspect it could be made simpler. 112base::string16 ElideUrl(const GURL& url, 113 const gfx::FontList& font_list, 114 float available_pixel_width, 115 const std::string& languages) { 116 // Get a formatted string and corresponding parsing of the url. 117 url::Parsed parsed; 118 const base::string16 url_string = 119 net::FormatUrl(url, languages, net::kFormatUrlOmitAll, 120 net::UnescapeRule::SPACES, &parsed, NULL, NULL); 121 if (available_pixel_width <= 0) 122 return url_string; 123 124 // If non-standard, return plain eliding. 125 if (!url.IsStandard()) 126 return ElideText(url_string, font_list, available_pixel_width, 127 gfx::ELIDE_TAIL); 128 129 // Now start eliding url_string to fit within available pixel width. 130 // Fist pass - check to see whether entire url_string fits. 131 const float pixel_width_url_string = GetStringWidthF(url_string, font_list); 132 if (available_pixel_width >= pixel_width_url_string) 133 return url_string; 134 135 // Get the path substring, including query and reference. 136 const size_t path_start_index = parsed.path.begin; 137 const size_t path_len = parsed.path.len; 138 base::string16 url_path_query_etc = url_string.substr(path_start_index); 139 base::string16 url_path = url_string.substr(path_start_index, path_len); 140 141 // Return general elided text if url minus the query fits. 142 const base::string16 url_minus_query = 143 url_string.substr(0, path_start_index + path_len); 144 if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list)) 145 return ElideText(url_string, font_list, available_pixel_width, 146 gfx::ELIDE_TAIL); 147 148 base::string16 url_host; 149 base::string16 url_domain; 150 base::string16 url_subdomain; 151 SplitHost(url, &url_host, &url_domain, &url_subdomain); 152 153 // If this is a file type, the path is now defined as everything after ":". 154 // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the 155 // domain is now C: - this is a nice hack for eliding to work pleasantly. 156 if (url.SchemeIsFile()) { 157 // Split the path string using ":" 158 std::vector<base::string16> file_path_split; 159 base::SplitString(url_path, ':', &file_path_split); 160 if (file_path_split.size() > 1) { // File is of type "file:///C:/.." 161 url_host.clear(); 162 url_domain.clear(); 163 url_subdomain.clear(); 164 165 const base::string16 kColon = UTF8ToUTF16(":"); 166 url_host = url_domain = file_path_split.at(0).substr(1) + kColon; 167 url_path_query_etc = url_path = file_path_split.at(1); 168 } 169 } 170 171 // Second Pass - remove scheme - the rest fits. 172 const float pixel_width_url_host = GetStringWidthF(url_host, font_list); 173 const float pixel_width_url_path = GetStringWidthF(url_path_query_etc, 174 font_list); 175 if (available_pixel_width >= 176 pixel_width_url_host + pixel_width_url_path) 177 return url_host + url_path_query_etc; 178 179 // Third Pass: Subdomain, domain and entire path fits. 180 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); 181 const float pixel_width_url_subdomain = 182 GetStringWidthF(url_subdomain, font_list); 183 if (available_pixel_width >= 184 pixel_width_url_subdomain + pixel_width_url_domain + 185 pixel_width_url_path) 186 return url_subdomain + url_domain + url_path_query_etc; 187 188 // Query element. 189 base::string16 url_query; 190 const float kPixelWidthDotsTrailer = GetStringWidthF( 191 base::string16(kEllipsisUTF16), font_list); 192 if (parsed.query.is_nonempty()) { 193 url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin); 194 if (available_pixel_width >= 195 (pixel_width_url_subdomain + pixel_width_url_domain + 196 pixel_width_url_path - GetStringWidthF(url_query, font_list))) { 197 return ElideText(url_subdomain + url_domain + url_path_query_etc, 198 font_list, available_pixel_width, gfx::ELIDE_TAIL); 199 } 200 } 201 202 // Parse url_path using '/'. 203 std::vector<base::string16> url_path_elements; 204 base::SplitString(url_path, kForwardSlash, &url_path_elements); 205 206 // Get filename - note that for a path ending with / 207 // such as www.google.com/intl/ads/, the file name is ads/. 208 size_t url_path_number_of_elements = url_path_elements.size(); 209 DCHECK(url_path_number_of_elements != 0); 210 base::string16 url_filename; 211 if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) { 212 url_filename = *(url_path_elements.end() - 1); 213 } else if (url_path_number_of_elements > 1) { // Path ends with a '/'. 214 url_filename = url_path_elements.at(url_path_number_of_elements - 2) + 215 kForwardSlash; 216 url_path_number_of_elements--; 217 } 218 DCHECK(url_path_number_of_elements != 0); 219 220 const size_t kMaxNumberOfUrlPathElementsAllowed = 1024; 221 if (url_path_number_of_elements <= 1 || 222 url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) { 223 // No path to elide, or too long of a path (could overflow in loop below) 224 // Just elide this as a text string. 225 return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list, 226 available_pixel_width, gfx::ELIDE_TAIL); 227 } 228 229 // Start eliding the path and replacing elements by ".../". 230 const base::string16 kEllipsisAndSlash = 231 base::string16(kEllipsisUTF16) + kForwardSlash; 232 const float pixel_width_ellipsis_slash = 233 GetStringWidthF(kEllipsisAndSlash, font_list); 234 235 // Check with both subdomain and domain. 236 base::string16 elided_path = 237 ElideComponentizedPath(url_subdomain + url_domain, url_path_elements, 238 url_filename, url_query, font_list, 239 available_pixel_width); 240 if (!elided_path.empty()) 241 return elided_path; 242 243 // Check with only domain. 244 // If a subdomain is present, add an ellipsis before domain. 245 // This is added only if the subdomain pixel width is larger than 246 // the pixel width of kEllipsis. Otherwise, subdomain remains, 247 // which means that this case has been resolved earlier. 248 base::string16 url_elided_domain = url_subdomain + url_domain; 249 if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) { 250 if (!url_subdomain.empty()) 251 url_elided_domain = kEllipsisAndSlash[0] + url_domain; 252 else 253 url_elided_domain = url_domain; 254 255 elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements, 256 url_filename, url_query, font_list, 257 available_pixel_width); 258 259 if (!elided_path.empty()) 260 return elided_path; 261 } 262 263 // Return elided domain/.../filename anyway. 264 base::string16 final_elided_url_string(url_elided_domain); 265 const float url_elided_domain_width = GetStringWidthF(url_elided_domain, 266 font_list); 267 268 // A hack to prevent trailing ".../...". 269 if ((available_pixel_width - url_elided_domain_width) > 270 pixel_width_ellipsis_slash + kPixelWidthDotsTrailer + 271 GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) { 272 final_elided_url_string += BuildPathFromComponents(base::string16(), 273 url_path_elements, url_filename, 1); 274 } else { 275 final_elided_url_string += url_path; 276 } 277 278 return ElideText(final_elided_url_string, font_list, available_pixel_width, 279 gfx::ELIDE_TAIL); 280} 281 282base::string16 ElideHost(const GURL& url, 283 const gfx::FontList& font_list, 284 float available_pixel_width) { 285 base::string16 url_host; 286 base::string16 url_domain; 287 base::string16 url_subdomain; 288 SplitHost(url, &url_host, &url_domain, &url_subdomain); 289 290 const float pixel_width_url_host = GetStringWidthF(url_host, font_list); 291 if (available_pixel_width >= pixel_width_url_host) 292 return url_host; 293 294 if (url_subdomain.empty()) 295 return url_domain; 296 297 const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list); 298 float subdomain_width = available_pixel_width - pixel_width_url_domain; 299 if (subdomain_width <= 0) 300 return base::string16(kEllipsisUTF16) + kDot + url_domain; 301 302 const base::string16 elided_subdomain = ElideText( 303 url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD); 304 return elided_subdomain + url_domain; 305} 306