text_elider.cc revision a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This file implements utility functions for eliding and formatting UI text.
6//
7// Note that several of the functions declared in text_elider.h are implemented
8// in this file using helper classes in an unnamed namespace.
9
10#include "ui/gfx/text_elider.h"
11
12#include <string>
13#include <vector>
14
15#include "base/files/file_path.h"
16#include "base/i18n/break_iterator.h"
17#include "base/i18n/char_iterator.h"
18#include "base/i18n/rtl.h"
19#include "base/memory/scoped_ptr.h"
20#include "base/strings/string_split.h"
21#include "base/strings/string_util.h"
22#include "base/strings/sys_string_conversions.h"
23#include "base/strings/utf_string_conversions.h"
24#include "net/base/escape.h"
25#include "net/base/net_util.h"
26#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
27#include "third_party/icu/source/common/unicode/rbbi.h"
28#include "third_party/icu/source/common/unicode/uloc.h"
29#include "ui/gfx/font_list.h"
30#include "ui/gfx/text_utils.h"
31#include "url/gurl.h"
32
33namespace gfx {
34
35// U+2026 in utf8
36const char kEllipsis[] = "\xE2\x80\xA6";
37const base::char16 kEllipsisUTF16[] = { 0x2026, 0 };
38const base::char16 kForwardSlash = '/';
39
40namespace {
41
42// Helper class to split + elide text, while respecting UTF16 surrogate pairs.
43class StringSlicer {
44 public:
45  StringSlicer(const base::string16& text,
46               const base::string16& ellipsis,
47               bool elide_in_middle)
48      : text_(text),
49        ellipsis_(ellipsis),
50        elide_in_middle_(elide_in_middle) {
51  }
52
53  // Cuts |text_| to be |length| characters long. If |elide_in_middle_| is true,
54  // the middle of the string is removed to leave equal-length pieces from the
55  // beginning and end of the string; otherwise, the end of the string is
56  // removed and only the beginning remains. If |insert_ellipsis| is true,
57  // then an ellipsis character will be inserted at the cut point.
58  base::string16 CutString(size_t length, bool insert_ellipsis) {
59    const base::string16 ellipsis_text = insert_ellipsis ? ellipsis_
60                                                         : base::string16();
61
62    if (!elide_in_middle_)
63      return text_.substr(0, FindValidBoundaryBefore(length)) + ellipsis_text;
64
65    // We put the extra character, if any, before the cut.
66    const size_t half_length = length / 2;
67    const size_t prefix_length = FindValidBoundaryBefore(length - half_length);
68    const size_t suffix_start_guess = text_.length() - half_length;
69    const size_t suffix_start = FindValidBoundaryAfter(suffix_start_guess);
70    const size_t suffix_length =
71        half_length - (suffix_start_guess - suffix_start);
72    return text_.substr(0, prefix_length) + ellipsis_text +
73           text_.substr(suffix_start, suffix_length);
74  }
75
76 private:
77  // Returns a valid cut boundary at or before |index|.
78  size_t FindValidBoundaryBefore(size_t index) const {
79    DCHECK_LE(index, text_.length());
80    if (index != text_.length())
81      U16_SET_CP_START(text_.data(), 0, index);
82    return index;
83  }
84
85  // Returns a valid cut boundary at or after |index|.
86  size_t FindValidBoundaryAfter(size_t index) const {
87    DCHECK_LE(index, text_.length());
88    if (index != text_.length())
89      U16_SET_CP_LIMIT(text_.data(), 0, index, text_.length());
90    return index;
91  }
92
93  // The text to be sliced.
94  const base::string16& text_;
95
96  // Ellipsis string to use.
97  const base::string16& ellipsis_;
98
99  // If true, the middle of the string will be elided.
100  bool elide_in_middle_;
101
102  DISALLOW_COPY_AND_ASSIGN(StringSlicer);
103};
104
105// Build a path from the first |num_components| elements in |path_elements|.
106// Prepends |path_prefix|, appends |filename|, inserts ellipsis if appropriate.
107base::string16 BuildPathFromComponents(
108    const base::string16& path_prefix,
109    const std::vector<base::string16>& path_elements,
110    const base::string16& filename,
111    size_t num_components) {
112  // Add the initial elements of the path.
113  base::string16 path = path_prefix;
114
115  // Build path from first |num_components| elements.
116  for (size_t j = 0; j < num_components; ++j)
117    path += path_elements[j] + kForwardSlash;
118
119  // Add |filename|, ellipsis if necessary.
120  if (num_components != (path_elements.size() - 1))
121    path += base::string16(kEllipsisUTF16) + kForwardSlash;
122  path += filename;
123
124  return path;
125}
126
127// Takes a prefix (Domain, or Domain+subdomain) and a collection of path
128// components and elides if possible. Returns a string containing the longest
129// possible elided path, or an empty string if elision is not possible.
130base::string16 ElideComponentizedPath(
131    const base::string16& url_path_prefix,
132    const std::vector<base::string16>& url_path_elements,
133    const base::string16& url_filename,
134    const base::string16& url_query,
135    const FontList& font_list,
136    float available_pixel_width) {
137  const size_t url_path_number_of_elements = url_path_elements.size();
138
139  CHECK(url_path_number_of_elements);
140  for (size_t i = url_path_number_of_elements - 1; i > 0; --i) {
141    base::string16 elided_path = BuildPathFromComponents(url_path_prefix,
142        url_path_elements, url_filename, i);
143    if (available_pixel_width >= GetStringWidthF(elided_path, font_list))
144      return ElideText(elided_path + url_query, font_list,
145                       available_pixel_width, ELIDE_AT_END);
146  }
147
148  return base::string16();
149}
150
151}  // namespace
152
153base::string16 ElideEmail(const base::string16& email,
154                          const FontList& font_list,
155                          float available_pixel_width) {
156  if (GetStringWidthF(email, font_list) <= available_pixel_width)
157    return email;
158
159  // Split the email into its local-part (username) and domain-part. The email
160  // spec technically allows for @ symbols in the local-part (username) of the
161  // email under some special requirements. It is guaranteed that there is no @
162  // symbol in the domain part of the email however so splitting at the last @
163  // symbol is safe.
164  const size_t split_index = email.find_last_of('@');
165  DCHECK_NE(split_index, base::string16::npos);
166  base::string16 username = email.substr(0, split_index);
167  base::string16 domain = email.substr(split_index + 1);
168  DCHECK(!username.empty());
169  DCHECK(!domain.empty());
170
171  // Subtract the @ symbol from the available width as it is mandatory.
172  const base::string16 kAtSignUTF16 = ASCIIToUTF16("@");
173  available_pixel_width -= GetStringWidthF(kAtSignUTF16, font_list);
174
175  // Check whether eliding the domain is necessary: if eliding the username
176  // is sufficient, the domain will not be elided.
177  const float full_username_width = GetStringWidthF(username, font_list);
178  const float available_domain_width =
179      available_pixel_width -
180      std::min(full_username_width,
181               GetStringWidthF(username.substr(0, 1) + kEllipsisUTF16,
182                               font_list));
183  if (GetStringWidthF(domain, font_list) > available_domain_width) {
184    // Elide the domain so that it only takes half of the available width.
185    // Should the username not need all the width available in its half, the
186    // domain will occupy the leftover width.
187    // If |desired_domain_width| is greater than |available_domain_width|: the
188    // minimal username elision allowed by the specifications will not fit; thus
189    // |desired_domain_width| must be <= |available_domain_width| at all cost.
190    const float desired_domain_width =
191        std::min(available_domain_width,
192                 std::max(available_pixel_width - full_username_width,
193                          available_pixel_width / 2));
194    domain = ElideText(domain, font_list, desired_domain_width,
195                       ELIDE_IN_MIDDLE);
196    // Failing to elide the domain such that at least one character remains
197    // (other than the ellipsis itself) remains: return a single ellipsis.
198    if (domain.length() <= 1U)
199      return base::string16(kEllipsisUTF16);
200  }
201
202  // Fit the username in the remaining width (at this point the elided username
203  // is guaranteed to fit with at least one character remaining given all the
204  // precautions taken earlier).
205  available_pixel_width -= GetStringWidthF(domain, font_list);
206  username = ElideText(username, font_list, available_pixel_width,
207                       ELIDE_AT_END);
208
209  return username + kAtSignUTF16 + domain;
210}
211
212// TODO(pkasting): http://crbug.com/77883 This whole function gets
213// kerning/ligatures/etc. issues potentially wrong by assuming that the width of
214// a rendered string is always the sum of the widths of its substrings.  Also I
215// suspect it could be made simpler.
216base::string16 ElideUrl(const GURL& url,
217                        const FontList& font_list,
218                        float available_pixel_width,
219                        const std::string& languages) {
220  // Get a formatted string and corresponding parsing of the url.
221  url_parse::Parsed parsed;
222  const base::string16 url_string =
223      net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
224                     net::UnescapeRule::SPACES, &parsed, NULL, NULL);
225  if (available_pixel_width <= 0)
226    return url_string;
227
228  // If non-standard, return plain eliding.
229  if (!url.IsStandard())
230    return ElideText(url_string, font_list, available_pixel_width,
231                     ELIDE_AT_END);
232
233  // Now start eliding url_string to fit within available pixel width.
234  // Fist pass - check to see whether entire url_string fits.
235  const float pixel_width_url_string = GetStringWidthF(url_string, font_list);
236  if (available_pixel_width >= pixel_width_url_string)
237    return url_string;
238
239  // Get the path substring, including query and reference.
240  const size_t path_start_index = parsed.path.begin;
241  const size_t path_len = parsed.path.len;
242  base::string16 url_path_query_etc = url_string.substr(path_start_index);
243  base::string16 url_path = url_string.substr(path_start_index, path_len);
244
245  // Return general elided text if url minus the query fits.
246  const base::string16 url_minus_query =
247      url_string.substr(0, path_start_index + path_len);
248  if (available_pixel_width >= GetStringWidthF(url_minus_query, font_list))
249    return ElideText(url_string, font_list, available_pixel_width,
250                     ELIDE_AT_END);
251
252  // Get Host.
253  base::string16 url_host = UTF8ToUTF16(url.host());
254
255  // Get domain and registry information from the URL.
256  base::string16 url_domain = UTF8ToUTF16(
257      net::registry_controlled_domains::GetDomainAndRegistry(
258          url, net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES));
259  if (url_domain.empty())
260    url_domain = url_host;
261
262  // Add port if required.
263  if (!url.port().empty()) {
264    url_host += UTF8ToUTF16(":" + url.port());
265    url_domain += UTF8ToUTF16(":" + url.port());
266  }
267
268  // Get sub domain.
269  base::string16 url_subdomain;
270  const size_t domain_start_index = url_host.find(url_domain);
271  if (domain_start_index != base::string16::npos)
272    url_subdomain = url_host.substr(0, domain_start_index);
273  const base::string16 kWwwPrefix = UTF8ToUTF16("www.");
274  if ((url_subdomain == kWwwPrefix || url_subdomain.empty() ||
275      url.SchemeIsFile())) {
276    url_subdomain.clear();
277  }
278
279  // If this is a file type, the path is now defined as everything after ":".
280  // For example, "C:/aa/aa/bb", the path is "/aa/bb/cc". Interesting, the
281  // domain is now C: - this is a nice hack for eliding to work pleasantly.
282  if (url.SchemeIsFile()) {
283    // Split the path string using ":"
284    std::vector<base::string16> file_path_split;
285    base::SplitString(url_path, ':', &file_path_split);
286    if (file_path_split.size() > 1) {  // File is of type "file:///C:/.."
287      url_host.clear();
288      url_domain.clear();
289      url_subdomain.clear();
290
291      const base::string16 kColon = UTF8ToUTF16(":");
292      url_host = url_domain = file_path_split.at(0).substr(1) + kColon;
293      url_path_query_etc = url_path = file_path_split.at(1);
294    }
295  }
296
297  // Second Pass - remove scheme - the rest fits.
298  const float pixel_width_url_host = GetStringWidthF(url_host, font_list);
299  const float pixel_width_url_path = GetStringWidthF(url_path_query_etc,
300                                                     font_list);
301  if (available_pixel_width >=
302      pixel_width_url_host + pixel_width_url_path)
303    return url_host + url_path_query_etc;
304
305  // Third Pass: Subdomain, domain and entire path fits.
306  const float pixel_width_url_domain = GetStringWidthF(url_domain, font_list);
307  const float pixel_width_url_subdomain =
308      GetStringWidthF(url_subdomain, font_list);
309  if (available_pixel_width >=
310      pixel_width_url_subdomain + pixel_width_url_domain +
311      pixel_width_url_path)
312    return url_subdomain + url_domain + url_path_query_etc;
313
314  // Query element.
315  base::string16 url_query;
316  const float kPixelWidthDotsTrailer = GetStringWidthF(
317      base::string16(kEllipsisUTF16), font_list);
318  if (parsed.query.is_nonempty()) {
319    url_query = UTF8ToUTF16("?") + url_string.substr(parsed.query.begin);
320    if (available_pixel_width >=
321        (pixel_width_url_subdomain + pixel_width_url_domain +
322         pixel_width_url_path - GetStringWidthF(url_query, font_list))) {
323      return ElideText(url_subdomain + url_domain + url_path_query_etc,
324                       font_list, available_pixel_width, ELIDE_AT_END);
325    }
326  }
327
328  // Parse url_path using '/'.
329  std::vector<base::string16> url_path_elements;
330  base::SplitString(url_path, kForwardSlash, &url_path_elements);
331
332  // Get filename - note that for a path ending with /
333  // such as www.google.com/intl/ads/, the file name is ads/.
334  size_t url_path_number_of_elements = url_path_elements.size();
335  DCHECK(url_path_number_of_elements != 0);
336  base::string16 url_filename;
337  if ((url_path_elements.at(url_path_number_of_elements - 1)).length() > 0) {
338    url_filename = *(url_path_elements.end() - 1);
339  } else if (url_path_number_of_elements > 1) {  // Path ends with a '/'.
340    url_filename = url_path_elements.at(url_path_number_of_elements - 2) +
341        kForwardSlash;
342    url_path_number_of_elements--;
343  }
344  DCHECK(url_path_number_of_elements != 0);
345
346  const size_t kMaxNumberOfUrlPathElementsAllowed = 1024;
347  if (url_path_number_of_elements <= 1 ||
348      url_path_number_of_elements > kMaxNumberOfUrlPathElementsAllowed) {
349    // No path to elide, or too long of a path (could overflow in loop below)
350    // Just elide this as a text string.
351    return ElideText(url_subdomain + url_domain + url_path_query_etc, font_list,
352                     available_pixel_width, ELIDE_AT_END);
353  }
354
355  // Start eliding the path and replacing elements by ".../".
356  const base::string16 kEllipsisAndSlash =
357      base::string16(kEllipsisUTF16) + kForwardSlash;
358  const float pixel_width_ellipsis_slash =
359      GetStringWidthF(kEllipsisAndSlash, font_list);
360
361  // Check with both subdomain and domain.
362  base::string16 elided_path =
363      ElideComponentizedPath(url_subdomain + url_domain, url_path_elements,
364                             url_filename, url_query, font_list,
365                             available_pixel_width);
366  if (!elided_path.empty())
367    return elided_path;
368
369  // Check with only domain.
370  // If a subdomain is present, add an ellipsis before domain.
371  // This is added only if the subdomain pixel width is larger than
372  // the pixel width of kEllipsis. Otherwise, subdomain remains,
373  // which means that this case has been resolved earlier.
374  base::string16 url_elided_domain = url_subdomain + url_domain;
375  if (pixel_width_url_subdomain > kPixelWidthDotsTrailer) {
376    if (!url_subdomain.empty())
377      url_elided_domain = kEllipsisAndSlash[0] + url_domain;
378    else
379      url_elided_domain = url_domain;
380
381    elided_path = ElideComponentizedPath(url_elided_domain, url_path_elements,
382                                         url_filename, url_query, font_list,
383                                         available_pixel_width);
384
385    if (!elided_path.empty())
386      return elided_path;
387  }
388
389  // Return elided domain/.../filename anyway.
390  base::string16 final_elided_url_string(url_elided_domain);
391  const float url_elided_domain_width = GetStringWidthF(url_elided_domain,
392                                                        font_list);
393
394  // A hack to prevent trailing ".../...".
395  if ((available_pixel_width - url_elided_domain_width) >
396      pixel_width_ellipsis_slash + kPixelWidthDotsTrailer +
397      GetStringWidthF(ASCIIToUTF16("UV"), font_list)) {
398    final_elided_url_string += BuildPathFromComponents(base::string16(),
399        url_path_elements, url_filename, 1);
400  } else {
401    final_elided_url_string += url_path;
402  }
403
404  return ElideText(final_elided_url_string, font_list, available_pixel_width,
405                   ELIDE_AT_END);
406}
407
408base::string16 ElideFilename(const base::FilePath& filename,
409                             const FontList& font_list,
410                             float available_pixel_width) {
411#if defined(OS_WIN)
412  base::string16 filename_utf16 = filename.value();
413  base::string16 extension = filename.Extension();
414  base::string16 rootname = filename.BaseName().RemoveExtension().value();
415#elif defined(OS_POSIX)
416  base::string16 filename_utf16 = WideToUTF16(base::SysNativeMBToWide(
417      filename.value()));
418  base::string16 extension = WideToUTF16(base::SysNativeMBToWide(
419      filename.Extension()));
420  base::string16 rootname = WideToUTF16(base::SysNativeMBToWide(
421      filename.BaseName().RemoveExtension().value()));
422#endif
423
424  const float full_width = GetStringWidthF(filename_utf16, font_list);
425  if (full_width <= available_pixel_width)
426    return base::i18n::GetDisplayStringInLTRDirectionality(filename_utf16);
427
428  if (rootname.empty() || extension.empty()) {
429    const base::string16 elided_name = ElideText(filename_utf16, font_list,
430                                           available_pixel_width, ELIDE_AT_END);
431    return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
432  }
433
434  const float ext_width = GetStringWidthF(extension, font_list);
435  const float root_width = GetStringWidthF(rootname, font_list);
436
437  // We may have trimmed the path.
438  if (root_width + ext_width <= available_pixel_width) {
439    const base::string16 elided_name = rootname + extension;
440    return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
441  }
442
443  if (ext_width >= available_pixel_width) {
444    const base::string16 elided_name = ElideText(
445        rootname + extension, font_list, available_pixel_width,
446        ELIDE_IN_MIDDLE);
447    return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
448  }
449
450  float available_root_width = available_pixel_width - ext_width;
451  base::string16 elided_name =
452      ElideText(rootname, font_list, available_root_width, ELIDE_AT_END);
453  elided_name += extension;
454  return base::i18n::GetDisplayStringInLTRDirectionality(elided_name);
455}
456
457base::string16 ElideText(const base::string16& text,
458                         const FontList& font_list,
459                         float available_pixel_width,
460                         ElideBehavior elide_behavior) {
461  if (text.empty())
462    return text;
463
464  const float current_text_pixel_width = GetStringWidthF(text, font_list);
465  const bool elide_in_middle = (elide_behavior == ELIDE_IN_MIDDLE);
466  const bool insert_ellipsis = (elide_behavior != TRUNCATE_AT_END);
467
468  const base::string16 ellipsis = base::string16(kEllipsisUTF16);
469  StringSlicer slicer(text, ellipsis, elide_in_middle);
470
471  // Pango will return 0 width for absurdly long strings. Cut the string in
472  // half and try again.
473  // This is caused by an int overflow in Pango (specifically, in
474  // pango_glyph_string_extents_range). It's actually more subtle than just
475  // returning 0, since on super absurdly long strings, the int can wrap and
476  // return positive numbers again. Detecting that is probably not worth it
477  // (eliding way too much from a ridiculous string is probably still
478  // ridiculous), but we should check other widths for bogus values as well.
479  if (current_text_pixel_width <= 0 && !text.empty()) {
480    const base::string16 cut = slicer.CutString(text.length() / 2, false);
481    return ElideText(cut, font_list, available_pixel_width, elide_behavior);
482  }
483
484  if (current_text_pixel_width <= available_pixel_width)
485    return text;
486
487  if (insert_ellipsis &&
488      GetStringWidthF(ellipsis, font_list) > available_pixel_width)
489    return base::string16();
490
491  // Use binary search to compute the elided text.
492  size_t lo = 0;
493  size_t hi = text.length() - 1;
494  size_t guess;
495  for (guess = (lo + hi) / 2; lo <= hi; guess = (lo + hi) / 2) {
496    // We check the length of the whole desired string at once to ensure we
497    // handle kerning/ligatures/etc. correctly.
498    const base::string16 cut = slicer.CutString(guess, insert_ellipsis);
499    const float guess_length = GetStringWidthF(cut, font_list);
500    // Check again that we didn't hit a Pango width overflow. If so, cut the
501    // current string in half and start over.
502    if (guess_length <= 0) {
503      return ElideText(slicer.CutString(guess / 2, false),
504                       font_list, available_pixel_width, elide_behavior);
505    }
506    if (guess_length > available_pixel_width)
507      hi = guess - 1;
508    else
509      lo = guess + 1;
510  }
511
512  return slicer.CutString(guess, insert_ellipsis);
513}
514
515base::string16 ElideText(const base::string16& text,
516                         const Font& font,
517                         float available_pixel_width,
518                         ElideBehavior elide_behavior) {
519  return ElideText(text, FontList(font), available_pixel_width, elide_behavior);
520}
521
522SortedDisplayURL::SortedDisplayURL(const GURL& url,
523                                   const std::string& languages) {
524  net::AppendFormattedHost(url, languages, &sort_host_);
525  base::string16 host_minus_www = net::StripWWW(sort_host_);
526  url_parse::Parsed parsed;
527  display_url_ =
528      net::FormatUrl(url, languages, net::kFormatUrlOmitAll,
529                     net::UnescapeRule::SPACES, &parsed, &prefix_end_, NULL);
530  if (sort_host_.length() > host_minus_www.length()) {
531    prefix_end_ += sort_host_.length() - host_minus_www.length();
532    sort_host_.swap(host_minus_www);
533  }
534}
535
536SortedDisplayURL::SortedDisplayURL() : prefix_end_(0) {
537}
538
539SortedDisplayURL::~SortedDisplayURL() {
540}
541
542int SortedDisplayURL::Compare(const SortedDisplayURL& other,
543                              icu::Collator* collator) const {
544  // Compare on hosts first. The host won't contain 'www.'.
545  UErrorCode compare_status = U_ZERO_ERROR;
546  UCollationResult host_compare_result = collator->compare(
547      static_cast<const UChar*>(sort_host_.c_str()),
548      static_cast<int>(sort_host_.length()),
549      static_cast<const UChar*>(other.sort_host_.c_str()),
550      static_cast<int>(other.sort_host_.length()),
551      compare_status);
552  DCHECK(U_SUCCESS(compare_status));
553  if (host_compare_result != 0)
554    return host_compare_result;
555
556  // Hosts match, compare on the portion of the url after the host.
557  base::string16 path = this->AfterHost();
558  base::string16 o_path = other.AfterHost();
559  compare_status = U_ZERO_ERROR;
560  UCollationResult path_compare_result = collator->compare(
561      static_cast<const UChar*>(path.c_str()),
562      static_cast<int>(path.length()),
563      static_cast<const UChar*>(o_path.c_str()),
564      static_cast<int>(o_path.length()),
565      compare_status);
566  DCHECK(U_SUCCESS(compare_status));
567  if (path_compare_result != 0)
568    return path_compare_result;
569
570  // Hosts and paths match, compare on the complete url. This'll push the www.
571  // ones to the end.
572  compare_status = U_ZERO_ERROR;
573  UCollationResult display_url_compare_result = collator->compare(
574      static_cast<const UChar*>(display_url_.c_str()),
575      static_cast<int>(display_url_.length()),
576      static_cast<const UChar*>(other.display_url_.c_str()),
577      static_cast<int>(other.display_url_.length()),
578      compare_status);
579  DCHECK(U_SUCCESS(compare_status));
580  return display_url_compare_result;
581}
582
583base::string16 SortedDisplayURL::AfterHost() const {
584  const size_t slash_index = display_url_.find(sort_host_, prefix_end_);
585  if (slash_index == base::string16::npos) {
586    NOTREACHED();
587    return base::string16();
588  }
589  return display_url_.substr(slash_index + sort_host_.length());
590}
591
592bool ElideString(const base::string16& input, int max_len,
593                 base::string16* output) {
594  DCHECK_GE(max_len, 0);
595  if (static_cast<int>(input.length()) <= max_len) {
596    output->assign(input);
597    return false;
598  }
599
600  switch (max_len) {
601    case 0:
602      output->clear();
603      break;
604    case 1:
605      output->assign(input.substr(0, 1));
606      break;
607    case 2:
608      output->assign(input.substr(0, 2));
609      break;
610    case 3:
611      output->assign(input.substr(0, 1) + ASCIIToUTF16(".") +
612                     input.substr(input.length() - 1));
613      break;
614    case 4:
615      output->assign(input.substr(0, 1) + ASCIIToUTF16("..") +
616                     input.substr(input.length() - 1));
617      break;
618    default: {
619      int rstr_len = (max_len - 3) / 2;
620      int lstr_len = rstr_len + ((max_len - 3) % 2);
621      output->assign(input.substr(0, lstr_len) + ASCIIToUTF16("...") +
622                     input.substr(input.length() - rstr_len));
623      break;
624    }
625  }
626
627  return true;
628}
629
630namespace {
631
632// Internal class used to track progress of a rectangular string elide
633// operation.  Exists so the top-level ElideRectangleString() function
634// can be broken into smaller methods sharing this state.
635class RectangleString {
636 public:
637  RectangleString(size_t max_rows, size_t max_cols,
638                  bool strict, base::string16 *output)
639      : max_rows_(max_rows),
640        max_cols_(max_cols),
641        current_row_(0),
642        current_col_(0),
643        strict_(strict),
644        suppressed_(false),
645        output_(output) {}
646
647  // Perform deferred initializations following creation.  Must be called
648  // before any input can be added via AddString().
649  void Init() { output_->clear(); }
650
651  // Add an input string, reformatting to fit the desired dimensions.
652  // AddString() may be called multiple times to concatenate together
653  // multiple strings into the region (the current caller doesn't do
654  // this, however).
655  void AddString(const base::string16& input);
656
657  // Perform any deferred output processing.  Must be called after the
658  // last AddString() call has occurred.
659  bool Finalize();
660
661 private:
662  // Add a line to the rectangular region at the current position,
663  // either by itself or by breaking it into words.
664  void AddLine(const base::string16& line);
665
666  // Add a word to the rectangular region at the current position,
667  // either by itself or by breaking it into characters.
668  void AddWord(const base::string16& word);
669
670  // Add text to the output string if the rectangular boundaries
671  // have not been exceeded, advancing the current position.
672  void Append(const base::string16& string);
673
674  // Set the current position to the beginning of the next line.  If
675  // |output| is true, add a newline to the output string if the rectangular
676  // boundaries have not been exceeded.  If |output| is false, we assume
677  // some other mechanism will (likely) do similar breaking after the fact.
678  void NewLine(bool output);
679
680  // Maximum number of rows allowed in the output string.
681  size_t max_rows_;
682
683  // Maximum number of characters allowed in the output string.
684  size_t max_cols_;
685
686  // Current row position, always incremented and may exceed max_rows_
687  // when the input can not fit in the region.  We stop appending to
688  // the output string, however, when this condition occurs.  In the
689  // future, we may want to expose this value to allow the caller to
690  // determine how many rows would actually be required to hold the
691  // formatted string.
692  size_t current_row_;
693
694  // Current character position, should never exceed max_cols_.
695  size_t current_col_;
696
697  // True when we do whitespace to newline conversions ourselves.
698  bool strict_;
699
700  // True when some of the input has been truncated.
701  bool suppressed_;
702
703  // String onto which the output is accumulated.
704  base::string16* output_;
705
706  DISALLOW_COPY_AND_ASSIGN(RectangleString);
707};
708
709void RectangleString::AddString(const base::string16& input) {
710  base::i18n::BreakIterator lines(input,
711                                  base::i18n::BreakIterator::BREAK_NEWLINE);
712  if (lines.Init()) {
713    while (lines.Advance())
714      AddLine(lines.GetString());
715  } else {
716    NOTREACHED() << "BreakIterator (lines) init failed";
717  }
718}
719
720bool RectangleString::Finalize() {
721  if (suppressed_) {
722    output_->append(ASCIIToUTF16("..."));
723    return true;
724  }
725  return false;
726}
727
728void RectangleString::AddLine(const base::string16& line) {
729  if (line.length() < max_cols_) {
730    Append(line);
731  } else {
732    base::i18n::BreakIterator words(line,
733                                    base::i18n::BreakIterator::BREAK_SPACE);
734    if (words.Init()) {
735      while (words.Advance())
736        AddWord(words.GetString());
737    } else {
738      NOTREACHED() << "BreakIterator (words) init failed";
739    }
740  }
741  // Account for naturally-occuring newlines.
742  ++current_row_;
743  current_col_ = 0;
744}
745
746void RectangleString::AddWord(const base::string16& word) {
747  if (word.length() < max_cols_) {
748    // Word can be made to fit, no need to fragment it.
749    if (current_col_ + word.length() >= max_cols_)
750      NewLine(strict_);
751    Append(word);
752  } else {
753    // Word is so big that it must be fragmented.
754    int array_start = 0;
755    int char_start = 0;
756    base::i18n::UTF16CharIterator chars(&word);
757    while (!chars.end()) {
758      // When boundary is hit, add as much as will fit on this line.
759      if (current_col_ + (chars.char_pos() - char_start) >= max_cols_) {
760        Append(word.substr(array_start, chars.array_pos() - array_start));
761        NewLine(true);
762        array_start = chars.array_pos();
763        char_start = chars.char_pos();
764      }
765      chars.Advance();
766    }
767    // Add the last remaining fragment, if any.
768    if (array_start != chars.array_pos())
769      Append(word.substr(array_start, chars.array_pos() - array_start));
770  }
771}
772
773void RectangleString::Append(const base::string16& string) {
774  if (current_row_ < max_rows_)
775    output_->append(string);
776  else
777    suppressed_ = true;
778  current_col_ += string.length();
779}
780
781void RectangleString::NewLine(bool output) {
782  if (current_row_ < max_rows_) {
783    if (output)
784      output_->append(ASCIIToUTF16("\n"));
785  } else {
786    suppressed_ = true;
787  }
788  ++current_row_;
789  current_col_ = 0;
790}
791
792// Internal class used to track progress of a rectangular text elide
793// operation.  Exists so the top-level ElideRectangleText() function
794// can be broken into smaller methods sharing this state.
795class RectangleText {
796 public:
797  RectangleText(const FontList& font_list,
798                float available_pixel_width,
799                int available_pixel_height,
800                WordWrapBehavior wrap_behavior,
801                std::vector<base::string16>* lines)
802      : font_list_(font_list),
803        line_height_(font_list.GetHeight()),
804        available_pixel_width_(available_pixel_width),
805        available_pixel_height_(available_pixel_height),
806        wrap_behavior_(wrap_behavior),
807        current_width_(0),
808        current_height_(0),
809        last_line_ended_in_lf_(false),
810        lines_(lines),
811        insufficient_width_(false),
812        insufficient_height_(false) {}
813
814  // Perform deferred initializions following creation.  Must be called
815  // before any input can be added via AddString().
816  void Init() { lines_->clear(); }
817
818  // Add an input string, reformatting to fit the desired dimensions.
819  // AddString() may be called multiple times to concatenate together
820  // multiple strings into the region (the current caller doesn't do
821  // this, however).
822  void AddString(const base::string16& input);
823
824  // Perform any deferred output processing.  Must be called after the last
825  // AddString() call has occured. Returns a combination of
826  // |ReformattingResultFlags| indicating whether the given width or height was
827  // insufficient, leading to elision or truncation.
828  int Finalize();
829
830 private:
831  // Add a line to the rectangular region at the current position,
832  // either by itself or by breaking it into words.
833  void AddLine(const base::string16& line);
834
835  // Wrap the specified word across multiple lines.
836  int WrapWord(const base::string16& word);
837
838  // Add a long word - wrapping, eliding or truncating per the wrap behavior.
839  int AddWordOverflow(const base::string16& word);
840
841  // Add a word to the rectangluar region at the current position.
842  int AddWord(const base::string16& word);
843
844  // Append the specified |text| to the current output line, incrementing the
845  // running width by the specified amount. This is an optimization over
846  // |AddToCurrentLine()| when |text_width| is already known.
847  void AddToCurrentLineWithWidth(const base::string16& text, float text_width);
848
849  // Append the specified |text| to the current output line.
850  void AddToCurrentLine(const base::string16& text);
851
852  // Set the current position to the beginning of the next line.
853  bool NewLine();
854
855  // The font list used for measuring text width.
856  const FontList& font_list_;
857
858  // The height of each line of text.
859  const int line_height_;
860
861  // The number of pixels of available width in the rectangle.
862  const float available_pixel_width_;
863
864  // The number of pixels of available height in the rectangle.
865  const int available_pixel_height_;
866
867  // The wrap behavior for words that are too long to fit on a single line.
868  const WordWrapBehavior wrap_behavior_;
869
870  // The current running width.
871  float current_width_;
872
873  // The current running height.
874  int current_height_;
875
876  // The current line of text.
877  base::string16 current_line_;
878
879  // Indicates whether the last line ended with \n.
880  bool last_line_ended_in_lf_;
881
882  // The output vector of lines.
883  std::vector<base::string16>* lines_;
884
885  // Indicates whether a word was so long that it had to be truncated or elided
886  // to fit the available width.
887  bool insufficient_width_;
888
889  // Indicates whether there were too many lines for the available height.
890  bool insufficient_height_;
891
892  DISALLOW_COPY_AND_ASSIGN(RectangleText);
893};
894
895void RectangleText::AddString(const base::string16& input) {
896  base::i18n::BreakIterator lines(input,
897                                  base::i18n::BreakIterator::BREAK_NEWLINE);
898  if (lines.Init()) {
899    while (!insufficient_height_ && lines.Advance()) {
900      base::string16 line = lines.GetString();
901      // The BREAK_NEWLINE iterator will keep the trailing newline character,
902      // except in the case of the last line, which may not have one.  Remove
903      // the newline character, if it exists.
904      last_line_ended_in_lf_ = !line.empty() && line[line.length() - 1] == '\n';
905      if (last_line_ended_in_lf_)
906        line.resize(line.length() - 1);
907      AddLine(line);
908    }
909  } else {
910    NOTREACHED() << "BreakIterator (lines) init failed";
911  }
912}
913
914int RectangleText::Finalize() {
915  // Remove trailing whitespace from the last line or remove the last line
916  // completely, if it's just whitespace.
917  if (!insufficient_height_ && !lines_->empty()) {
918    TrimWhitespace(lines_->back(), TRIM_TRAILING, &lines_->back());
919    if (lines_->back().empty() && !last_line_ended_in_lf_)
920      lines_->pop_back();
921  }
922  if (last_line_ended_in_lf_)
923    lines_->push_back(base::string16());
924  return (insufficient_width_ ? INSUFFICIENT_SPACE_HORIZONTAL : 0) |
925         (insufficient_height_ ? INSUFFICIENT_SPACE_VERTICAL : 0);
926}
927
928void RectangleText::AddLine(const base::string16& line) {
929  const float line_width = GetStringWidthF(line, font_list_);
930  if (line_width <= available_pixel_width_) {
931    AddToCurrentLineWithWidth(line, line_width);
932  } else {
933    // Iterate over positions that are valid to break the line at. In general,
934    // these are word boundaries but after any punctuation following the word.
935    base::i18n::BreakIterator words(line,
936                                    base::i18n::BreakIterator::BREAK_LINE);
937    if (words.Init()) {
938      while (words.Advance()) {
939        const bool truncate = !current_line_.empty();
940        const base::string16& word = words.GetString();
941        const int lines_added = AddWord(word);
942        if (lines_added) {
943          if (truncate) {
944            // Trim trailing whitespace from the line that was added.
945            const int line = lines_->size() - lines_added;
946            TrimWhitespace(lines_->at(line), TRIM_TRAILING, &lines_->at(line));
947          }
948          if (ContainsOnlyWhitespace(word)) {
949            // Skip the first space if the previous line was carried over.
950            current_width_ = 0;
951            current_line_.clear();
952          }
953        }
954      }
955    } else {
956      NOTREACHED() << "BreakIterator (words) init failed";
957    }
958  }
959  // Account for naturally-occuring newlines.
960  NewLine();
961}
962
963int RectangleText::WrapWord(const base::string16& word) {
964  // Word is so wide that it must be fragmented.
965  base::string16 text = word;
966  int lines_added = 0;
967  bool first_fragment = true;
968  while (!insufficient_height_ && !text.empty()) {
969    base::string16 fragment =
970        ElideText(text, font_list_, available_pixel_width_,
971                  TRUNCATE_AT_END);
972    // At least one character has to be added at every line, even if the
973    // available space is too small.
974    if(fragment.empty())
975      fragment = text.substr(0, 1);
976    if (!first_fragment && NewLine())
977      lines_added++;
978    AddToCurrentLine(fragment);
979    text = text.substr(fragment.length());
980    first_fragment = false;
981  }
982  return lines_added;
983}
984
985int RectangleText::AddWordOverflow(const base::string16& word) {
986  int lines_added = 0;
987
988  // Unless this is the very first word, put it on a new line.
989  if (!current_line_.empty()) {
990    if (!NewLine())
991      return 0;
992    lines_added++;
993  }
994
995  if (wrap_behavior_ == IGNORE_LONG_WORDS) {
996    current_line_ = word;
997    current_width_ = available_pixel_width_;
998  } else if (wrap_behavior_ == WRAP_LONG_WORDS) {
999    lines_added += WrapWord(word);
1000  } else {
1001    const ElideBehavior elide_behavior =
1002        (wrap_behavior_ == ELIDE_LONG_WORDS ? ELIDE_AT_END : TRUNCATE_AT_END);
1003    const base::string16 elided_word =
1004        ElideText(word, font_list_, available_pixel_width_, elide_behavior);
1005    AddToCurrentLine(elided_word);
1006    insufficient_width_ = true;
1007  }
1008
1009  return lines_added;
1010}
1011
1012int RectangleText::AddWord(const base::string16& word) {
1013  int lines_added = 0;
1014  base::string16 trimmed;
1015  TrimWhitespace(word, TRIM_TRAILING, &trimmed);
1016  const float trimmed_width = GetStringWidthF(trimmed, font_list_);
1017  if (trimmed_width <= available_pixel_width_) {
1018    // Word can be made to fit, no need to fragment it.
1019    if ((current_width_ + trimmed_width > available_pixel_width_) && NewLine())
1020      lines_added++;
1021    // Append the non-trimmed word, in case more words are added after.
1022    AddToCurrentLine(word);
1023  } else {
1024    lines_added = AddWordOverflow(wrap_behavior_ == IGNORE_LONG_WORDS ?
1025                                  trimmed : word);
1026  }
1027  return lines_added;
1028}
1029
1030void RectangleText::AddToCurrentLine(const base::string16& text) {
1031  AddToCurrentLineWithWidth(text, GetStringWidthF(text, font_list_));
1032}
1033
1034void RectangleText::AddToCurrentLineWithWidth(const base::string16& text,
1035                                              float text_width) {
1036  if (current_height_ >= available_pixel_height_) {
1037    insufficient_height_ = true;
1038    return;
1039  }
1040  current_line_.append(text);
1041  current_width_ += text_width;
1042}
1043
1044bool RectangleText::NewLine() {
1045  bool line_added = false;
1046  if (current_height_ < available_pixel_height_) {
1047    lines_->push_back(current_line_);
1048    current_line_.clear();
1049    line_added = true;
1050  } else {
1051    insufficient_height_ = true;
1052  }
1053  current_height_ += line_height_;
1054  current_width_ = 0;
1055  return line_added;
1056}
1057
1058}  // namespace
1059
1060bool ElideRectangleString(const base::string16& input, size_t max_rows,
1061                          size_t max_cols, bool strict,
1062                          base::string16* output) {
1063  RectangleString rect(max_rows, max_cols, strict, output);
1064  rect.Init();
1065  rect.AddString(input);
1066  return rect.Finalize();
1067}
1068
1069int ElideRectangleText(const base::string16& input,
1070                       const FontList& font_list,
1071                       float available_pixel_width,
1072                       int available_pixel_height,
1073                       WordWrapBehavior wrap_behavior,
1074                       std::vector<base::string16>* lines) {
1075  RectangleText rect(font_list,
1076                     available_pixel_width,
1077                     available_pixel_height,
1078                     wrap_behavior,
1079                     lines);
1080  rect.Init();
1081  rect.AddString(input);
1082  return rect.Finalize();
1083}
1084
1085base::string16 TruncateString(const base::string16& string, size_t length) {
1086  if (string.size() <= length)
1087    // String fits, return it.
1088    return string;
1089
1090  if (length == 0)
1091    // No room for the elide string, return an empty string.
1092    return base::string16();
1093
1094  size_t max = length - 1;
1095
1096  // Added to the end of strings that are too big.
1097  static const base::char16 kElideString[] = { 0x2026, 0 };
1098
1099  if (max == 0)
1100    // Just enough room for the elide string.
1101    return kElideString;
1102
1103  // Use a line iterator to find the first boundary.
1104  UErrorCode status = U_ZERO_ERROR;
1105  scoped_ptr<icu::RuleBasedBreakIterator> bi(
1106      static_cast<icu::RuleBasedBreakIterator*>(
1107          icu::RuleBasedBreakIterator::createLineInstance(
1108              icu::Locale::getDefault(), status)));
1109  if (U_FAILURE(status))
1110    return string.substr(0, max) + kElideString;
1111  bi->setText(string.c_str());
1112  int32_t index = bi->preceding(static_cast<int32_t>(max));
1113  if (index == icu::BreakIterator::DONE) {
1114    index = static_cast<int32_t>(max);
1115  } else {
1116    // Found a valid break (may be the beginning of the string). Now use
1117    // a character iterator to find the previous non-whitespace character.
1118    icu::StringCharacterIterator char_iterator(string.c_str());
1119    if (index == 0) {
1120      // No valid line breaks. Start at the end again. This ensures we break
1121      // on a valid character boundary.
1122      index = static_cast<int32_t>(max);
1123    }
1124    char_iterator.setIndex(index);
1125    while (char_iterator.hasPrevious()) {
1126      char_iterator.previous();
1127      if (!(u_isspace(char_iterator.current()) ||
1128            u_charType(char_iterator.current()) == U_CONTROL_CHAR ||
1129            u_charType(char_iterator.current()) == U_NON_SPACING_MARK)) {
1130        // Not a whitespace character. Advance the iterator so that we
1131        // include the current character in the truncated string.
1132        char_iterator.next();
1133        break;
1134      }
1135    }
1136    if (char_iterator.hasPrevious()) {
1137      // Found a valid break point.
1138      index = char_iterator.getIndex();
1139    } else {
1140      // String has leading whitespace, return the elide string.
1141      return kElideString;
1142    }
1143  }
1144  return string.substr(0, index) + kElideString;
1145}
1146
1147}  // namespace gfx
1148