1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/ui/elide_url.h"
6
7#include "base/logging.h"
8#include "base/strings/string_split.h"
9#include "base/strings/utf_string_conversions.h"
10#include "net/base/escape.h"
11#include "net/base/net_util.h"
12#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
13#include "ui/gfx/text_elider.h"
14#include "ui/gfx/text_utils.h"
15#include "url/gurl.h"
16
17using base::UTF8ToUTF16;
18using gfx::ElideText;
19using gfx::GetStringWidthF;
20using gfx::kEllipsisUTF16;
21using gfx::kForwardSlash;
22
23namespace {
24
25const base::char16 kDot = '.';
26
27// Build a path from the first |num_components| elements in |path_elements|.
28// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
29base::string16 BuildPathFromComponents(
30    const base::string16& path_prefix,
31    const std::vector<base::string16>& path_elements,
32    const base::string16& filename,
33    size_t num_components) {
34  // Add the initial elements of the path.
35  base::string16 path = path_prefix;
36
37  // Build path from first |num_components| elements.
38  for (size_t j = 0; j < num_components; ++j)
39    path += path_elements[j] + kForwardSlash;
40
41  // Add |filename|, ellipsis if necessary.
42  if (num_components != (path_elements.size() - 1))
43    path += base::string16(kEllipsisUTF16) + kForwardSlash;
44  path += filename;
45
46  return path;
47}
48
49// Takes a prefix (Domain, or Domain+subdomain) and a collection of path
50// components and elides if possible. Returns a string containing the longest
51// possible elided path, or an empty string if elision is not possible.
52base::string16 ElideComponentizedPath(
53    const base::string16& url_path_prefix,
54    const std::vector<base::string16>& url_path_elements,
55    const base::string16& url_filename,
56    const base::string16& url_query,
57    const gfx::FontList& font_list,
58    float available_pixel_width) {
59  const size_t url_path_number_of_elements = url_path_elements.size();
60
61  CHECK(url_path_number_of_elements);
62  for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
63    base::string16 elided_path = BuildPathFromComponents(url_path_prefix,
64        url_path_elements, url_filename, i);
65    if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
66      return ElideText(elided_path + url_query, font_list,
67                       available_pixel_width, gfx::ELIDE_TAIL);
68  }
69
70  return base::string16();
71}
72
73// Splits the hostname in the |url| into sub-strings for the full hostname,
74// the domain (TLD+1), and the subdomain (everything leading the domain).
75void SplitHost(const GURL& url,
76               base::string16* url_host,
77               base::string16* url_domain,
78               base::string16* url_subdomain) {
79  // Get Host.
80  *url_host = UTF8ToUTF16(url.host());
81
82  // Get domain and registry information from the URL.
83  *url_domain = UTF8ToUTF16(
84      net::registry_controlled_domains::GetDomainAndRegistry(
85          url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
86  if (url_domain->empty())
87    *url_domain = *url_host;
88
89  // Add port if required.
90  if (!url.port().empty()) {
91    *url_host += UTF8ToUTF16(":" + url.port());
92    *url_domain += UTF8ToUTF16(":" + url.port());
93  }
94
95  // Get sub domain.
96  const size_t domain_start_index = url_host->find(*url_domain);
97  base::string16 kWwwPrefix = UTF8ToUTF16("www.");
98  if (domain_start_index != base::string16::npos)
99    *url_subdomain = url_host->substr(0, domain_start_index);
100  if ((*url_subdomain == kWwwPrefix || url_subdomain->empty() ||
101      url.SchemeIsFile())) {
102    url_subdomain->clear();
103  }
104}
105
106}  // namespace
107
108// TODO(pkasting): http://crbug.com/77883 This whole function gets
109// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
110// a rendered string is always the sum of the widths of its substrings.  Also I
111// suspect it could be made simpler.
112base::string16 ElideUrl(const GURL& url,
113                        const gfx::FontList& font_list,
114                        float available_pixel_width,
115                        const std::string& languages) {
116  // Get a formatted string and corresponding parsing of the url.
117  url::Parsed parsed;
118  const base::string16 url_string =
119      net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
120                     net::UnescapeRule::SPACES, &parsed, NULL, NULL);
121  if (available_pixel_width <= 0)
122    return url_string;
123
124  // If non-standard, return plain eliding.
125  if (!url.IsStandard())
126    return ElideText(url_string, font_list, available_pixel_width,
127                     gfx::ELIDE_TAIL);
128
129  // Now start eliding url_string to fit within available pixel width.
130  // Fist pass - check to see whether entire url_string fits.
131  const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
132  if (available_pixel_width >= pixel_width_url_string)
133    return url_string;
134
135  // Get the path substring, including query and reference.
136  const size_t path_start_index = parsed.path.begin;
137  const size_t path_len = parsed.path.len;
138  base::string16 url_path_query_etc = url_string.substr(path_start_index);
139  base::string16 url_path = url_string.substr(path_start_index, path_len);
140
141  // Return general elided text if url minus the query fits.
142  const base::string16 url_minus_query =
143      url_string.substr(0, path_start_index + path_len);
144  if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
145    return ElideText(url_string, font_list, available_pixel_width,
146                     gfx::ELIDE_TAIL);
147
148  base::string16 url_host;
149  base::string16 url_domain;
150  base::string16 url_subdomain;
151  SplitHost(url, &url_host, &url_domain, &url_subdomain);
152
153  // If this is a file type, the path is now defined as everything after ":".
154  // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
155  // domain is now C: - this is a nice hack for eliding to work pleasantly.
156  if (url.SchemeIsFile()) {
157    // Split the path string using ":"
158    std::vector<base::string16> file_path_split;
159    base::SplitString(url_path, ':', &file_path_split);
160    if (file_path_split.size() > 1) {  // File is of type "file:///C:/.."
161      url_host.clear();
162      url_domain.clear();
163      url_subdomain.clear();
164
165      const base::string16 kColon = UTF8ToUTF16(":");
166      url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
167      url_path_query_etc = url_path = file_path_split.at(1);
168    }
169  }
170
171  // Second Pass - remove scheme - the rest fits.
172  const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
173  const float pixel_width_url_path = GetStringWidthF(url_path_query_etc,
174                                                     font_list);
175  if (available_pixel_width >=
176      pixel_width_url_host + pixel_width_url_path)
177    return url_host + url_path_query_etc;
178
179  // Third Pass: Subdomain, domain and entire path fits.
180  const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
181  const float pixel_width_url_subdomain =
182      GetStringWidthF(url_subdomain, font_list);
183  if (available_pixel_width >=
184      pixel_width_url_subdomain + pixel_width_url_domain +
185      pixel_width_url_path)
186    return url_subdomain + url_domain + url_path_query_etc;
187
188  // Query element.
189  base::string16 url_query;
190  const float kPixelWidthDotsTrailer = GetStringWidthF(
191      base::string16(kEllipsisUTF16), font_list);
192  if (parsed.query.is_nonempty()) {
193    url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
194    if (available_pixel_width >=
195        (pixel_width_url_subdomain + pixel_width_url_domain +
196         pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
197      return ElideText(url_subdomain + url_domain + url_path_query_etc,
198                       font_list, available_pixel_width, gfx::ELIDE_TAIL);
199    }
200  }
201
202  // Parse url_path using '/'.
203  std::vector<base::string16> url_path_elements;
204  base::SplitString(url_path, kForwardSlash, &url_path_elements);
205
206  // Get filename - note that for a path ending with /
207  // such as www.google.com/intl/ads/, the file name is ads/.
208  size_t url_path_number_of_elements = url_path_elements.size();
209  DCHECK(url_path_number_of_elements != 0);
210  base::string16 url_filename;
211  if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
212    url_filename = *(url_path_elements.end() - 1);
213  } else if (url_path_number_of_elements > 1) {  // Path ends with a '/'.
214    url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
215        kForwardSlash;
216    url_path_number_of_elements--;
217  }
218  DCHECK(url_path_number_of_elements != 0);
219
220  const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
221  if (url_path_number_of_elements <= 1 ||
222      url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
223    // No path to elide, or too long of a path (could overflow in loop below)
224    // Just elide this as a text string.
225    return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
226                     available_pixel_width, gfx::ELIDE_TAIL);
227  }
228
229  // Start eliding the path and replacing elements by ".../".
230  const base::string16 kEllipsisAndSlash =
231      base::string16(kEllipsisUTF16) + kForwardSlash;
232  const float pixel_width_ellipsis_slash =
233      GetStringWidthF(kEllipsisAndSlash, font_list);
234
235  // Check with both subdomain and domain.
236  base::string16 elided_path =
237      ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
238                             url_filename, url_query, font_list,
239                             available_pixel_width);
240  if (!elided_path.empty())
241    return elided_path;
242
243  // Check with only domain.
244  // If a subdomain is present, add an ellipsis before domain.
245  // This is added only if the subdomain pixel width is larger than
246  // the pixel width of kEllipsis. Otherwise, subdomain remains,
247  // which means that this case has been resolved earlier.
248  base::string16 url_elided_domain = url_subdomain + url_domain;
249  if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
250    if (!url_subdomain.empty())
251      url_elided_domain = kEllipsisAndSlash[0] + url_domain;
252    else
253      url_elided_domain = url_domain;
254
255    elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
256                                         url_filename, url_query, font_list,
257                                         available_pixel_width);
258
259    if (!elided_path.empty())
260      return elided_path;
261  }
262
263  // Return elided domain/.../filename anyway.
264  base::string16 final_elided_url_string(url_elided_domain);
265  const float url_elided_domain_width = GetStringWidthF(url_elided_domain,
266                                                        font_list);
267
268  // A hack to prevent trailing ".../...".
269  if ((available_pixel_width - url_elided_domain_width) >
270      pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
271      GetStringWidthF(base::ASCIIToUTF16("UV"), font_list)) {
272    final_elided_url_string += BuildPathFromComponents(base::string16(),
273        url_path_elements, url_filename, 1);
274  } else {
275    final_elided_url_string += url_path;
276  }
277
278  return ElideText(final_elided_url_string, font_list, available_pixel_width,
279                   gfx::ELIDE_TAIL);
280}
281
282base::string16 ElideHost(const GURL& url,
283                         const gfx::FontList& font_list,
284                         float available_pixel_width) {
285  base::string16 url_host;
286  base::string16 url_domain;
287  base::string16 url_subdomain;
288  SplitHost(url, &url_host, &url_domain, &url_subdomain);
289
290  const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
291  if (available_pixel_width >= pixel_width_url_host)
292    return url_host;
293
294  if (url_subdomain.empty())
295    return url_domain;
296
297  const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
298  float subdomain_width = available_pixel_width - pixel_width_url_domain;
299  if (subdomain_width <= 0)
300    return base::string16(kEllipsisUTF16) + kDot + url_domain;
301
302  const base::string16 elided_subdomain = ElideText(
303      url_subdomain, font_list, subdomain_width, gfx::ELIDE_HEAD);
304  return elided_subdomain + url_domain;
305}
306