1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "ui/base/l10n/l10n_util.h"
6
7#include <algorithm>
8#include <cstdlib>
9#include <iterator>
10#include <string>
11
12#include "base/command_line.h"
13#include "base/compiler_specific.h"
14#include "base/file_util.h"
15#include "base/i18n/file_util_icu.h"
16#include "base/i18n/rtl.h"
17#include "base/i18n/string_compare.h"
18#include "base/lazy_instance.h"
19#include "base/memory/scoped_ptr.h"
20#include "base/path_service.h"
21#include "base/strings/string_number_conversions.h"
22#include "base/strings/string_split.h"
23#include "base/strings/string_util.h"
24#include "base/strings/stringprintf.h"
25#include "base/strings/sys_string_conversions.h"
26#include "base/strings/utf_string_conversions.h"
27#include "build/build_config.h"
28#include "third_party/icu/source/common/unicode/rbbi.h"
29#include "third_party/icu/source/common/unicode/uloc.h"
30#include "ui/base/l10n/l10n_util_collator.h"
31#include "ui/base/l10n/l10n_util_plurals.h"
32#include "ui/base/resource/resource_bundle.h"
33#include "ui/base/ui_base_paths.h"
34
35#if defined(OS_ANDROID)
36#include "ui/base/l10n/l10n_util_android.h"
37#endif
38
39#if defined(USE_GLIB)
40#include <glib.h>
41#endif
42
43#if defined(OS_WIN)
44#include "ui/base/l10n/l10n_util_win.h"
45#endif  // OS_WIN
46
47namespace {
48
49static const char* const kAcceptLanguageList[] = {
50  "af",     // Afrikaans
51  "am",     // Amharic
52  "ar",     // Arabic
53  "az",     // Azerbaijani
54  "be",     // Belarusian
55  "bg",     // Bulgarian
56  "bh",     // Bihari
57  "bn",     // Bengali
58  "br",     // Breton
59  "bs",     // Bosnian
60  "ca",     // Catalan
61  "co",     // Corsican
62  "cs",     // Czech
63  "cy",     // Welsh
64  "da",     // Danish
65  "de",     // German
66  "de-AT",  // German (Austria)
67  "de-CH",  // German (Switzerland)
68  "de-DE",  // German (Germany)
69  "el",     // Greek
70  "en",     // English
71  "en-AU",  // English (Australia)
72  "en-CA",  // English (Canada)
73  "en-GB",  // English (UK)
74  "en-NZ",  // English (New Zealand)
75  "en-US",  // English (US)
76  "en-ZA",  // English (South Africa)
77  "eo",     // Esperanto
78  // TODO(jungshik) : Do we want to list all es-Foo for Latin-American
79  // Spanish speaking countries?
80  "es",     // Spanish
81  "es-419", // Spanish (Latin America)
82  "et",     // Estonian
83  "eu",     // Basque
84  "fa",     // Persian
85  "fi",     // Finnish
86  "fil",    // Filipino
87  "fo",     // Faroese
88  "fr",     // French
89  "fr-CA",  // French (Canada)
90  "fr-CH",  // French (Switzerland)
91  "fr-FR",  // French (France)
92  "fy",     // Frisian
93  "ga",     // Irish
94  "gd",     // Scots Gaelic
95  "gl",     // Galician
96  "gn",     // Guarani
97  "gu",     // Gujarati
98  "ha",     // Hausa
99  "haw",    // Hawaiian
100  "he",     // Hebrew
101  "hi",     // Hindi
102  "hr",     // Croatian
103  "hu",     // Hungarian
104  "hy",     // Armenian
105  "ia",     // Interlingua
106  "id",     // Indonesian
107  "is",     // Icelandic
108  "it",     // Italian
109  "it-CH",  // Italian (Switzerland)
110  "it-IT",  // Italian (Italy)
111  "ja",     // Japanese
112  "jw",     // Javanese
113  "ka",     // Georgian
114  "kk",     // Kazakh
115  "km",     // Cambodian
116  "kn",     // Kannada
117  "ko",     // Korean
118  "ku",     // Kurdish
119  "ky",     // Kyrgyz
120  "la",     // Latin
121  "ln",     // Lingala
122  "lo",     // Laothian
123  "lt",     // Lithuanian
124  "lv",     // Latvian
125  "mk",     // Macedonian
126  "ml",     // Malayalam
127  "mn",     // Mongolian
128  "mo",     // Moldavian
129  "mr",     // Marathi
130  "ms",     // Malay
131  "mt",     // Maltese
132  "nb",     // Norwegian (Bokmal)
133  "ne",     // Nepali
134  "nl",     // Dutch
135  "nn",     // Norwegian (Nynorsk)
136  "no",     // Norwegian
137  "oc",     // Occitan
138  "om",     // Oromo
139  "or",     // Oriya
140  "pa",     // Punjabi
141  "pl",     // Polish
142  "ps",     // Pashto
143  "pt",     // Portuguese
144  "pt-BR",  // Portuguese (Brazil)
145  "pt-PT",  // Portuguese (Portugal)
146  "qu",     // Quechua
147  "rm",     // Romansh
148  "ro",     // Romanian
149  "ru",     // Russian
150  "sd",     // Sindhi
151  "sh",     // Serbo-Croatian
152  "si",     // Sinhalese
153  "sk",     // Slovak
154  "sl",     // Slovenian
155  "sn",     // Shona
156  "so",     // Somali
157  "sq",     // Albanian
158  "sr",     // Serbian
159  "st",     // Sesotho
160  "su",     // Sundanese
161  "sv",     // Swedish
162  "sw",     // Swahili
163  "ta",     // Tamil
164  "te",     // Telugu
165  "tg",     // Tajik
166  "th",     // Thai
167  "ti",     // Tigrinya
168  "tk",     // Turkmen
169  "to",     // Tonga
170  "tr",     // Turkish
171  "tt",     // Tatar
172  "tw",     // Twi
173  "ug",     // Uighur
174  "uk",     // Ukrainian
175  "ur",     // Urdu
176  "uz",     // Uzbek
177  "vi",     // Vietnamese
178  "xh",     // Xhosa
179  "yi",     // Yiddish
180  "yo",     // Yoruba
181  "zh",     // Chinese
182  "zh-CN",  // Chinese (Simplified)
183  "zh-TW",  // Chinese (Traditional)
184  "zu",     // Zulu
185};
186
187// Returns true if |locale_name| has an alias in the ICU data file.
188bool IsDuplicateName(const std::string& locale_name) {
189  static const char* const kDuplicateNames[] = {
190    "en",
191    "pt",
192    "zh",
193    "zh_hans_cn",
194    "zh_hant_hk",
195    "zh_hant_mo",
196    "zh_hans_sg",
197    "zh_hant_tw"
198  };
199
200  // Skip all 'es_RR'. Currently, we use 'es' for es-ES (Spanish in Spain).
201  // 'es-419' (Spanish in Latin America) is not available in ICU so that it
202  // has to be added manually in GetAvailableLocales().
203  if (LowerCaseEqualsASCII(locale_name.substr(0, 3),  "es_"))
204    return true;
205  for (size_t i = 0; i < arraysize(kDuplicateNames); ++i) {
206    if (base::strcasecmp(kDuplicateNames[i], locale_name.c_str()) == 0)
207      return true;
208  }
209  return false;
210}
211
212// We added 30+ minimally populated locales with only a few entries
213// (exemplar character set, script, writing direction and its own
214// lanaguage name). These locales have to be distinguished from the
215// fully populated locales to which Chrome is localized.
216bool IsLocalePartiallyPopulated(const std::string& locale_name) {
217  // For partially populated locales, even the translation for "English"
218  // is not available. A more robust/elegant way to check is to add a special
219  // field (say, 'isPartial' to our version of ICU locale files) and
220  // check its value, but this hack seems to work well.
221  return !l10n_util::IsLocaleNameTranslated("en", locale_name);
222}
223
224#if !defined(OS_MACOSX)
225bool IsLocaleAvailable(const std::string& locale) {
226  // If locale has any illegal characters in it, we don't want to try to
227  // load it because it may be pointing outside the locale data file directory.
228  if (!file_util::IsFilenameLegal(base::ASCIIToUTF16(locale)))
229    return false;
230
231  // IsLocalePartiallyPopulated() can be called here for an early return w/o
232  // checking the resource availability below. It'd help when Chrome is run
233  // under a system locale Chrome is not localized to (e.g.Farsi on Linux),
234  // but it'd slow down the start up time a little bit for locales Chrome is
235  // localized to. So, we don't call it here.
236  if (!l10n_util::IsLocaleSupportedByOS(locale))
237    return false;
238
239  // If the ResourceBundle is not yet initialized, return false to avoid the
240  // CHECK failure in ResourceBundle::GetSharedInstance().
241  if (!ResourceBundle::HasSharedInstance())
242    return false;
243
244  // TODO(hshi): make ResourceBundle::LocaleDataPakExists() a static function
245  // so that this can be invoked without initializing the global instance.
246  // See crbug.com/230432: CHECK failure in GetUserDataDir().
247  return ResourceBundle::GetSharedInstance().LocaleDataPakExists(locale);
248}
249#endif
250
251// On Linux, the text layout engine Pango determines paragraph directionality
252// by looking at the first strongly-directional character in the text. This
253// means text such as "Google Chrome foo bar..." will be layed out LTR even
254// if "foo bar" is RTL. So this function prepends the necessary RLM in such
255// cases.
256void AdjustParagraphDirectionality(base::string16* paragraph) {
257#if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID)
258  if (base::i18n::IsRTL() &&
259      base::i18n::StringContainsStrongRTLChars(*paragraph)) {
260    paragraph->insert(0, 1,
261                      static_cast<base::char16>(base::i18n::kRightToLeftMark));
262  }
263#endif
264}
265
266struct AvailableLocalesTraits
267    : base::DefaultLazyInstanceTraits<std::vector<std::string> > {
268  static std::vector<std::string>* New(void* instance) {
269    std::vector<std::string>* locales =
270        base::DefaultLazyInstanceTraits<std::vector<std::string> >::New(
271            instance);
272    int num_locales = uloc_countAvailable();
273    for (int i = 0; i < num_locales; ++i) {
274      std::string locale_name = uloc_getAvailable(i);
275      // Filter out the names that have aliases.
276      if (IsDuplicateName(locale_name))
277        continue;
278      // Filter out locales for which we have only partially populated data
279      // and to which Chrome is not localized.
280      if (IsLocalePartiallyPopulated(locale_name))
281        continue;
282      if (!l10n_util::IsLocaleSupportedByOS(locale_name))
283        continue;
284      // Normalize underscores to hyphens because that's what our locale files
285      // use.
286      std::replace(locale_name.begin(), locale_name.end(), '_', '-');
287
288      // Map the Chinese locale names over to zh-CN and zh-TW.
289      if (LowerCaseEqualsASCII(locale_name, "zh-hans")) {
290        locale_name = "zh-CN";
291      } else if (LowerCaseEqualsASCII(locale_name, "zh-hant")) {
292        locale_name = "zh-TW";
293      }
294      locales->push_back(locale_name);
295    }
296
297    // Manually add 'es-419' to the list. See the comment in IsDuplicateName().
298    locales->push_back("es-419");
299    return locales;
300  }
301};
302
303base::LazyInstance<std::vector<std::string>, AvailableLocalesTraits>
304    g_available_locales = LAZY_INSTANCE_INITIALIZER;
305
306}  // namespace
307
308namespace l10n_util {
309
310std::string GetCanonicalLocale(const std::string& locale) {
311  return base::i18n::GetCanonicalLocale(locale.c_str());
312}
313
314bool CheckAndResolveLocale(const std::string& locale,
315                           std::string* resolved_locale) {
316#if defined(OS_MACOSX)
317  NOTIMPLEMENTED();
318  return false;
319#else
320  if (IsLocaleAvailable(locale)) {
321    *resolved_locale = locale;
322    return true;
323  }
324
325  // If there's a variant, skip over it so we can try without the region
326  // code.  For example, ca_ES@valencia should cause us to try ca@valencia
327  // before ca.
328  std::string::size_type variant_pos = locale.find('@');
329  if (variant_pos != std::string::npos)
330    return false;
331
332  // If the locale matches language but not country, use that instead.
333  // TODO(jungshik) : Nothing is done about languages that Chrome
334  // does not support but available on Windows. We fall
335  // back to en-US in GetApplicationLocale so that it's a not critical,
336  // but we can do better.
337  std::string::size_type hyphen_pos = locale.find('-');
338  std::string lang(locale, 0, hyphen_pos);
339  if (hyphen_pos != std::string::npos && hyphen_pos > 0) {
340    std::string region(locale, hyphen_pos + 1);
341    std::string tmp_locale(lang);
342    // Map es-RR other than es-ES to es-419 (Chrome's Latin American
343    // Spanish locale).
344    if (LowerCaseEqualsASCII(lang, "es") &&
345        !LowerCaseEqualsASCII(region, "es")) {
346      tmp_locale.append("-419");
347    } else if (LowerCaseEqualsASCII(lang, "zh")) {
348      // Map zh-HK and zh-MO to zh-TW. Otherwise, zh-FOO is mapped to zh-CN.
349      if (LowerCaseEqualsASCII(region, "hk") ||
350          LowerCaseEqualsASCII(region, "mo")) { // Macao
351        tmp_locale.append("-TW");
352      } else {
353        tmp_locale.append("-CN");
354      }
355    } else if (LowerCaseEqualsASCII(lang, "en")) {
356      // Map Australian, Canadian, New Zealand and South African English
357      // to British English for now.
358      // TODO(jungshik): en-CA may have to change sides once
359      // we have OS locale separate from app locale (Chrome's UI language).
360      if (LowerCaseEqualsASCII(region, "au") ||
361          LowerCaseEqualsASCII(region, "ca") ||
362          LowerCaseEqualsASCII(region, "nz") ||
363          LowerCaseEqualsASCII(region, "za")) {
364        tmp_locale.append("-GB");
365      } else {
366        tmp_locale.append("-US");
367      }
368    }
369    if (IsLocaleAvailable(tmp_locale)) {
370      resolved_locale->swap(tmp_locale);
371      return true;
372    }
373  }
374
375  // Google updater uses no, tl, iw and en for our nb, fil, he, and en-US.
376  struct {
377    const char* source;
378    const char* dest;
379  } alias_map[] = {
380      {"no", "nb"},
381      {"tl", "fil"},
382      {"iw", "he"},
383      {"en", "en-US"},
384  };
385
386  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(alias_map); ++i) {
387    if (LowerCaseEqualsASCII(lang, alias_map[i].source)) {
388      std::string tmp_locale(alias_map[i].dest);
389      if (IsLocaleAvailable(tmp_locale)) {
390        resolved_locale->swap(tmp_locale);
391        return true;
392      }
393    }
394  }
395
396  return false;
397#endif
398}
399
400std::string GetApplicationLocale(const std::string& pref_locale) {
401#if defined(OS_MACOSX)
402
403  // Use any override (Cocoa for the browser), otherwise use the preference
404  // passed to the function.
405  std::string app_locale = l10n_util::GetLocaleOverride();
406  if (app_locale.empty())
407    app_locale = pref_locale;
408
409  // The above should handle all of the cases Chrome normally hits, but for some
410  // unit tests, we need something to fall back too.
411  if (app_locale.empty())
412    app_locale = "en-US";
413
414  // Windows/Linux call SetICUDefaultLocale after determining the actual locale
415  // with CheckAndResolveLocal to make ICU APIs work in that locale.
416  // Mac doesn't use a locale directory tree of resources (it uses Mac style
417  // resources), so mirror the Windows/Linux behavior of calling
418  // SetICUDefaultLocale.
419  base::i18n::SetICUDefaultLocale(app_locale);
420  return app_locale;
421
422#else
423
424  std::string resolved_locale;
425  std::vector<std::string> candidates;
426
427  // We only use --lang and the app pref on Windows.  On Linux, we only
428  // look at the LC_*/LANG environment variables.  We do, however, pass --lang
429  // to renderer and plugin processes so they know what language the parent
430  // process decided to use.
431
432#if defined(OS_WIN)
433
434  // First, try the preference value.
435  if (!pref_locale.empty())
436    candidates.push_back(GetCanonicalLocale(pref_locale));
437
438  // Next, try the overridden locale.
439  const std::vector<std::string>& languages = l10n_util::GetLocaleOverrides();
440  if (!languages.empty()) {
441    candidates.reserve(candidates.size() + languages.size());
442    std::transform(languages.begin(), languages.end(),
443                   std::back_inserter(candidates), &GetCanonicalLocale);
444  } else {
445    // If no override was set, defer to ICU
446    candidates.push_back(base::i18n::GetConfiguredLocale());
447  }
448
449#elif defined(OS_ANDROID)
450
451  // On Android, query java.util.Locale for the default locale.
452  candidates.push_back(GetDefaultLocale());
453
454#elif defined(USE_GLIB) && !defined(OS_CHROMEOS)
455
456  // GLib implements correct environment variable parsing with
457  // the precedence order: LANGUAGE, LC_ALL, LC_MESSAGES and LANG.
458  // We used to use our custom parsing code along with ICU for this purpose.
459  // If we have a port that does not depend on GTK, we have to
460  // restore our custom code for that port.
461  const char* const* languages = g_get_language_names();
462  DCHECK(languages);  // A valid pointer is guaranteed.
463  DCHECK(*languages);  // At least one entry, "C", is guaranteed.
464
465  for (; *languages != NULL; ++languages) {
466    candidates.push_back(base::i18n::GetCanonicalLocale(*languages));
467  }
468
469#else
470
471  // By default, use the application locale preference. This applies to ChromeOS
472  // and linux systems without glib.
473  if (!pref_locale.empty())
474    candidates.push_back(pref_locale);
475
476#endif
477
478  std::vector<std::string>::const_iterator i = candidates.begin();
479  for (; i != candidates.end(); ++i) {
480    if (CheckAndResolveLocale(*i, &resolved_locale)) {
481      base::i18n::SetICUDefaultLocale(resolved_locale);
482      return resolved_locale;
483    }
484  }
485
486  // Fallback on en-US.
487  const std::string fallback_locale("en-US");
488  if (IsLocaleAvailable(fallback_locale)) {
489    base::i18n::SetICUDefaultLocale(fallback_locale);
490    return fallback_locale;
491  }
492
493  return std::string();
494
495#endif
496}
497
498bool IsLocaleNameTranslated(const char* locale,
499                            const std::string& display_locale) {
500  base::string16 display_name =
501      l10n_util::GetDisplayNameForLocale(locale, display_locale, false);
502  // Because ICU sets the error code to U_USING_DEFAULT_WARNING whether or not
503  // uloc_getDisplayName returns the actual translation or the default
504  // value (locale code), we have to rely on this hack to tell whether
505  // the translation is available or not.  If ICU doesn't have a translated
506  // name for this locale, GetDisplayNameForLocale will just return the
507  // locale code.
508  return !base::IsStringASCII(display_name) ||
509      base::UTF16ToASCII(display_name) != locale;
510}
511
512base::string16 GetDisplayNameForLocale(const std::string& locale,
513                                       const std::string& display_locale,
514                                       bool is_for_ui) {
515  std::string locale_code = locale;
516  // Internally, we use the language code of zh-CN and zh-TW, but we want the
517  // display names to be Chinese (Simplified) and Chinese (Traditional) instead
518  // of Chinese (China) and Chinese (Taiwan).  To do that, we pass zh-Hans
519  // and zh-Hant to ICU. Even with this mapping, we'd get
520  // 'Chinese (Simplified Han)' and 'Chinese (Traditional Han)' in English and
521  // even longer results in other languages. Arguably, they're better than
522  // the current results : Chinese (China) / Chinese (Taiwan).
523  // TODO(jungshik): Do one of the following:
524  // 1. Special-case Chinese by getting the custom-translation for them
525  // 2. Recycle IDS_ENCODING_{SIMP,TRAD}_CHINESE.
526  // 3. Get translations for two directly from the ICU resouce bundle
527  // because they're not accessible with other any API.
528  // 4. Patch ICU to special-case zh-Hans/zh-Hant for us.
529  // #1 and #2 wouldn't work if display_locale != current UI locale although
530  // we can think of additional hack to work around the problem.
531  // #3 can be potentially expensive.
532  if (locale_code == "zh-CN")
533    locale_code = "zh-Hans";
534  else if (locale_code == "zh-TW")
535    locale_code = "zh-Hant";
536
537  base::string16 display_name;
538#if defined(OS_ANDROID)
539  // Use Java API to get locale display name so that we can remove most of
540  // the lang data from icu data to reduce binary size, except for zh-Hans and
541  // zh-Hant because the current Android Java API doesn't support scripts.
542  // TODO(wangxianzhu): remove the special handling of zh-Hans and zh-Hant once
543  // Android Java API supports scripts.
544  if (!StartsWithASCII(locale_code, "zh-Han", true)) {
545    display_name = GetDisplayNameForLocale(locale_code, display_locale);
546  } else
547#endif
548  {
549    UErrorCode error = U_ZERO_ERROR;
550    const int kBufferSize = 1024;
551
552    int actual_size = uloc_getDisplayName(
553        locale_code.c_str(), display_locale.c_str(),
554        WriteInto(&display_name, kBufferSize), kBufferSize - 1, &error);
555    DCHECK(U_SUCCESS(error));
556    display_name.resize(actual_size);
557  }
558
559  // Add directional markup so parentheses are properly placed.
560  if (is_for_ui && base::i18n::IsRTL())
561    base::i18n::AdjustStringForLocaleDirection(&display_name);
562  return display_name;
563}
564
565base::string16 GetDisplayNameForCountry(const std::string& country_code,
566                                        const std::string& display_locale) {
567  return GetDisplayNameForLocale("_" + country_code, display_locale, false);
568}
569
570std::string NormalizeLocale(const std::string& locale) {
571  std::string normalized_locale(locale);
572  std::replace(normalized_locale.begin(), normalized_locale.end(), '-', '_');
573
574  return normalized_locale;
575}
576
577void GetParentLocales(const std::string& current_locale,
578                      std::vector<std::string>* parent_locales) {
579  std::string locale(NormalizeLocale(current_locale));
580
581  const int kNameCapacity = 256;
582  char parent[kNameCapacity];
583  base::strlcpy(parent, locale.c_str(), kNameCapacity);
584  parent_locales->push_back(parent);
585  UErrorCode err = U_ZERO_ERROR;
586  while (uloc_getParent(parent, parent, kNameCapacity, &err) > 0) {
587    if (U_FAILURE(err))
588      break;
589    parent_locales->push_back(parent);
590  }
591}
592
593bool IsValidLocaleSyntax(const std::string& locale) {
594  // Check that the length is plausible.
595  if (locale.size() < 2 || locale.size() >= ULOC_FULLNAME_CAPACITY)
596    return false;
597
598  // Strip off the part after an '@' sign, which might contain keywords,
599  // as in en_IE@currency=IEP or fr@collation=phonebook;calendar=islamic-civil.
600  // We don't validate that part much, just check that there's at least one
601  // equals sign in a plausible place. Normalize the prefix so that hyphens
602  // are changed to underscores.
603  std::string prefix = NormalizeLocale(locale);
604  size_t split_point = locale.find("@");
605  if (split_point != std::string::npos) {
606    std::string keywords = locale.substr(split_point + 1);
607    prefix = locale.substr(0, split_point);
608
609    size_t equals_loc = keywords.find("=");
610    if (equals_loc == std::string::npos ||
611        equals_loc < 1 || equals_loc > keywords.size() - 2)
612      return false;
613  }
614
615  // Check that all characters before the at-sign are alphanumeric or
616  // underscore.
617  for (size_t i = 0; i < prefix.size(); i++) {
618    char ch = prefix[i];
619    if (!IsAsciiAlpha(ch) && !IsAsciiDigit(ch) && ch != '_')
620      return false;
621  }
622
623  // Check that the initial token (before the first hyphen/underscore)
624  // is 1 - 3 alphabetical characters (a language tag).
625  for (size_t i = 0; i < prefix.size(); i++) {
626    char ch = prefix[i];
627    if (ch == '_') {
628      if (i < 1 || i > 3)
629        return false;
630      break;
631    }
632    if (!IsAsciiAlpha(ch))
633      return false;
634  }
635
636  // Check that the all tokens after the initial token are 1 - 8 characters.
637  // (Tokenize/StringTokenizer don't work here, they collapse multiple
638  // delimiters into one.)
639  int token_len = 0;
640  int token_index = 0;
641  for (size_t i = 0; i < prefix.size(); i++) {
642    if (prefix[i] != '_') {
643      token_len++;
644      continue;
645    }
646
647    if (token_index > 0 && (token_len < 1 || token_len > 8)) {
648      return false;
649    }
650    token_index++;
651    token_len = 0;
652  }
653  if (token_index == 0 && (token_len < 1 || token_len > 3)) {
654    return false;
655  } else if (token_len < 1 || token_len > 8) {
656    return false;
657  }
658
659  return true;
660}
661
662std::string GetStringUTF8(int message_id) {
663  return base::UTF16ToUTF8(GetStringUTF16(message_id));
664}
665
666base::string16 GetStringUTF16(int message_id) {
667  ResourceBundle& rb = ResourceBundle::GetSharedInstance();
668  base::string16 str = rb.GetLocalizedString(message_id);
669  AdjustParagraphDirectionality(&str);
670
671  return str;
672}
673
674base::string16 GetStringFUTF16(int message_id,
675                               const std::vector<base::string16>& replacements,
676                               std::vector<size_t>* offsets) {
677  // TODO(tc): We could save a string copy if we got the raw string as
678  // a StringPiece and were able to call ReplaceStringPlaceholders with
679  // a StringPiece format string and base::string16 substitution strings.  In
680  // practice, the strings should be relatively short.
681  ResourceBundle& rb = ResourceBundle::GetSharedInstance();
682  const base::string16& format_string = rb.GetLocalizedString(message_id);
683
684#ifndef NDEBUG
685  // Make sure every replacement string is being used, so we don't just
686  // silently fail to insert one. If |offsets| is non-NULL, then don't do this
687  // check as the code may simply want to find the placeholders rather than
688  // actually replacing them.
689  if (!offsets) {
690    std::string utf8_string = base::UTF16ToUTF8(format_string);
691
692    // $9 is the highest allowed placeholder.
693    for (size_t i = 0; i < 9; ++i) {
694      bool placeholder_should_exist = replacements.size() > i;
695
696      std::string placeholder =
697          base::StringPrintf("$%d", static_cast<int>(i + 1));
698      size_t pos = utf8_string.find(placeholder.c_str());
699      if (placeholder_should_exist) {
700        DCHECK_NE(std::string::npos, pos) <<
701            " Didn't find a " << placeholder << " placeholder in " <<
702            utf8_string;
703      } else {
704        DCHECK_EQ(std::string::npos, pos) <<
705            " Unexpectedly found a " << placeholder << " placeholder in " <<
706            utf8_string;
707      }
708    }
709  }
710#endif
711
712  base::string16 formatted = ReplaceStringPlaceholders(
713      format_string, replacements, offsets);
714  AdjustParagraphDirectionality(&formatted);
715
716  return formatted;
717}
718
719std::string GetStringFUTF8(int message_id,
720                           const base::string16& a) {
721  return base::UTF16ToUTF8(GetStringFUTF16(message_id, a));
722}
723
724std::string GetStringFUTF8(int message_id,
725                           const base::string16& a,
726                           const base::string16& b) {
727  return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b));
728}
729
730std::string GetStringFUTF8(int message_id,
731                           const base::string16& a,
732                           const base::string16& b,
733                           const base::string16& c) {
734  return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c));
735}
736
737std::string GetStringFUTF8(int message_id,
738                           const base::string16& a,
739                           const base::string16& b,
740                           const base::string16& c,
741                           const base::string16& d) {
742  return base::UTF16ToUTF8(GetStringFUTF16(message_id, a, b, c, d));
743}
744
745base::string16 GetStringFUTF16(int message_id,
746                               const base::string16& a) {
747  std::vector<base::string16> replacements;
748  replacements.push_back(a);
749  return GetStringFUTF16(message_id, replacements, NULL);
750}
751
752base::string16 GetStringFUTF16(int message_id,
753                               const base::string16& a,
754                               const base::string16& b) {
755  return GetStringFUTF16(message_id, a, b, NULL);
756}
757
758base::string16 GetStringFUTF16(int message_id,
759                               const base::string16& a,
760                               const base::string16& b,
761                               const base::string16& c) {
762  std::vector<base::string16> replacements;
763  replacements.push_back(a);
764  replacements.push_back(b);
765  replacements.push_back(c);
766  return GetStringFUTF16(message_id, replacements, NULL);
767}
768
769base::string16 GetStringFUTF16(int message_id,
770                               const base::string16& a,
771                               const base::string16& b,
772                               const base::string16& c,
773                               const base::string16& d) {
774  std::vector<base::string16> replacements;
775  replacements.push_back(a);
776  replacements.push_back(b);
777  replacements.push_back(c);
778  replacements.push_back(d);
779  return GetStringFUTF16(message_id, replacements, NULL);
780}
781
782base::string16 GetStringFUTF16(int message_id,
783                               const base::string16& a,
784                               const base::string16& b,
785                               const base::string16& c,
786                               const base::string16& d,
787                               const base::string16& e) {
788  std::vector<base::string16> replacements;
789  replacements.push_back(a);
790  replacements.push_back(b);
791  replacements.push_back(c);
792  replacements.push_back(d);
793  replacements.push_back(e);
794  return GetStringFUTF16(message_id, replacements, NULL);
795}
796
797base::string16 GetStringFUTF16(int message_id,
798                               const base::string16& a,
799                               size_t* offset) {
800  DCHECK(offset);
801  std::vector<size_t> offsets;
802  std::vector<base::string16> replacements;
803  replacements.push_back(a);
804  base::string16 result = GetStringFUTF16(message_id, replacements, &offsets);
805  DCHECK(offsets.size() == 1);
806  *offset = offsets[0];
807  return result;
808}
809
810base::string16 GetStringFUTF16(int message_id,
811                               const base::string16& a,
812                               const base::string16& b,
813                               std::vector<size_t>* offsets) {
814  std::vector<base::string16> replacements;
815  replacements.push_back(a);
816  replacements.push_back(b);
817  return GetStringFUTF16(message_id, replacements, offsets);
818}
819
820base::string16 GetStringFUTF16Int(int message_id, int a) {
821  return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::IntToString(a)));
822}
823
824base::string16 GetStringFUTF16Int(int message_id, int64 a) {
825  return GetStringFUTF16(message_id, base::UTF8ToUTF16(base::Int64ToString(a)));
826}
827
828// Specialization of operator() method for base::string16 version.
829template <>
830bool StringComparator<base::string16>::operator()(const base::string16& lhs,
831                                                  const base::string16& rhs) {
832  // If we can not get collator instance for specified locale, just do simple
833  // string compare.
834  if (!collator_)
835    return lhs < rhs;
836  return base::i18n::CompareString16WithCollator(collator_, lhs, rhs) ==
837      UCOL_LESS;
838};
839
840base::string16 GetPluralStringFUTF16(const std::vector<int>& message_ids,
841                               int number) {
842  scoped_ptr<icu::PluralFormat> format = BuildPluralFormat(message_ids);
843  DCHECK(format);
844
845  UErrorCode err = U_ZERO_ERROR;
846  icu::UnicodeString result_files_string = format->format(number, err);
847  int capacity = result_files_string.length() + 1;
848  DCHECK_GT(capacity, 1);
849  base::string16 result;
850  result_files_string.extract(
851      static_cast<UChar*>(WriteInto(&result, capacity)), capacity, err);
852  DCHECK(U_SUCCESS(err));
853  return result;
854}
855
856std::string GetPluralStringFUTF8(const std::vector<int>& message_ids,
857                                 int number) {
858  return base::UTF16ToUTF8(GetPluralStringFUTF16(message_ids, number));
859}
860
861void SortStrings16(const std::string& locale,
862                   std::vector<base::string16>* strings) {
863  SortVectorWithStringKey(locale, strings, false);
864}
865
866const std::vector<std::string>& GetAvailableLocales() {
867  return g_available_locales.Get();
868}
869
870void GetAcceptLanguagesForLocale(const std::string& display_locale,
871                                 std::vector<std::string>* locale_codes) {
872  for (size_t i = 0; i < arraysize(kAcceptLanguageList); ++i) {
873    if (!l10n_util::IsLocaleNameTranslated(kAcceptLanguageList[i],
874                                           display_locale))
875      // TODO(jungshik) : Put them at the of the list with language codes
876      // enclosed by brackets instead of skipping.
877        continue;
878    locale_codes->push_back(kAcceptLanguageList[i]);
879  }
880}
881
882int GetLocalizedContentsWidthInPixels(int pixel_resource_id) {
883  int width = 0;
884  base::StringToInt(l10n_util::GetStringUTF8(pixel_resource_id), &width);
885  DCHECK_GT(width, 0);
886  return width;
887}
888
889}  // namespace l10n_util
890