language_selector.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This file defines a helper class for selecting a supported language from a
6// set of candidates.
7
8#include "chrome/installer/util/language_selector.h"
9
10#include <algorithm>
11#include <functional>
12
13#include "base/logging.h"
14#include "base/string_util.h"
15#include "base/win/i18n.h"
16#include "chrome/installer/util/google_update_settings.h"
17
18#include "installer_util_strings.h"
19
20namespace {
21
22struct LangToOffset {
23  const wchar_t* language;
24  int offset;
25};
26
27// The language we fall back upon when all else fails.
28const wchar_t kFallbackLanguage[] = L"en-us";
29const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US;
30
31// http://tools.ietf.org/html/rfc5646 Section 2.3.3
32const std::wstring::size_type kScriptSubtagLength = 4;
33
34// A sorted array of language identifiers (and their offsets) for which
35// translations are available. The contents of the array are generated by
36// create_string_rc.py.
37const LangToOffset kLanguageOffsetPairs[] = {
38#define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ },
39  DO_LANGUAGES
40#undef HANDLE_LANGUAGE
41};
42
43// A sorted array of language identifiers that are aliases to other languages
44// for which translations are available.
45const LangToOffset kLanguageToOffsetExceptions[] = {
46  // Alias some English variants to British English (all others wildcard to US).
47  { L"en-au", IDS_L10N_OFFSET_EN_GB },
48  { L"en-ca", IDS_L10N_OFFSET_EN_GB },
49  { L"en-nz", IDS_L10N_OFFSET_EN_GB },
50  { L"en-za", IDS_L10N_OFFSET_EN_GB },
51  // Alias es-es to es (all others wildcard to es-419).
52  { L"es-es", IDS_L10N_OFFSET_ES },
53  // Google web properties use iw for he. Handle both just to be safe.
54  { L"he", IDS_L10N_OFFSET_IW },
55  // Google web properties use no for nb. Handle both just to be safe.
56  { L"nb", IDS_L10N_OFFSET_NO },
57  // Some Google web properties use tl for fil. Handle both just to be safe.
58  // They're not completely identical, but alias it here.
59  { L"tl", IDS_L10N_OFFSET_FIL },
60  // Pre-Vista aliases for Chinese w/ script subtag.
61  { L"zh-chs", IDS_L10N_OFFSET_ZH_CN },
62  { L"zh-cht", IDS_L10N_OFFSET_ZH_TW },
63  // Vista+ aliases for Chinese w/ script subtag.
64  { L"zh-hans", IDS_L10N_OFFSET_ZH_CN },
65  { L"zh-hant", IDS_L10N_OFFSET_ZH_TW },
66  // Alias Hong Kong and Macau to Taiwan.
67  { L"zh-hk", IDS_L10N_OFFSET_ZH_TW },
68  { L"zh-mo", IDS_L10N_OFFSET_ZH_TW },
69  // Although the wildcard entry for zh would result in this, alias zh-sg so
70  // that it will win if it precedes another valid tag in a list of candidates.
71  { L"zh-sg", IDS_L10N_OFFSET_ZH_CN }
72};
73
74// A sorted array of neutral language identifiers that are wildcard aliases to
75// other languages for which translations are available.
76const LangToOffset kLanguageToOffsetWildcards[] = {
77  // Use the U.S. region for anything English.
78  { L"en", IDS_L10N_OFFSET_EN_US },
79  // Use the Latin American region for anything Spanish.
80  { L"es", IDS_L10N_OFFSET_ES_419 },
81  // Use the Brazil region for anything Portugese.
82  { L"pt", IDS_L10N_OFFSET_PT_BR },
83  // Use the P.R.C. region for anything Chinese.
84  { L"zh", IDS_L10N_OFFSET_ZH_CN }
85};
86
87#if !defined(NDEBUG)
88// Returns true if the items in the given range are sorted.  If
89// |byNameAndOffset| is true, the items must be sorted by both name and offset.
90bool IsArraySorted(const LangToOffset* first, const LangToOffset* last,
91                   bool byNameAndOffset) {
92  if (last - first > 1) {
93    for (--last; first != last; ++first) {
94       if (!(std::wstring(first->language) < (first + 1)->language) ||
95           byNameAndOffset && !(first->offset < (first + 1)->offset)) {
96         return false;
97       }
98    }
99  }
100  return true;
101}
102
103// Validates that the static read-only mappings are properly sorted.
104void ValidateMappings() {
105  // Ensure that kLanguageOffsetPairs is sorted.
106  DCHECK(IsArraySorted(&kLanguageOffsetPairs[0],
107                       &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
108                       true)) << "kOffsetToLanguageId is not sorted";
109
110  // Ensure that kLanguageToOffsetExceptions is sorted.
111  DCHECK(IsArraySorted(
112           &kLanguageToOffsetExceptions[0],
113           &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
114           false)) << "kLanguageToOffsetExceptions is not sorted";
115
116  // Ensure that kLanguageToOffsetWildcards is sorted.
117  DCHECK(IsArraySorted(
118            &kLanguageToOffsetWildcards[0],
119            &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
120            false)) << "kLanguageToOffsetWildcards is not sorted";
121}
122#endif  // !defined(NDEBUG)
123
124// A less-than overload to do slightly more efficient searches in the
125// sorted arrays.
126bool operator<(const LangToOffset& left, const std::wstring& right) {
127  return left.language < right;
128}
129
130// A less-than overload to do slightly more efficient searches in the
131// sorted arrays.
132bool operator<(const std::wstring& left, const LangToOffset& right) {
133  return left < right.language;
134}
135
136// A not-so-efficient less-than overload for the same uses as above.
137bool operator<(const LangToOffset& left, const LangToOffset& right) {
138  return std::wstring(left.language) < right.language;
139}
140
141// A compare function for searching in a sorted array by offset.
142bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) {
143  return left.offset < right.offset;
144}
145
146// Binary search in one of the sorted arrays to find the offset corresponding to
147// a given language |name|.
148bool TryFindOffset(const LangToOffset* first, const LangToOffset* last,
149                   const std::wstring& name, int* offset) {
150  const LangToOffset* search_result = std::lower_bound(first, last, name);
151  if (last != search_result && search_result->language == name) {
152    *offset = search_result->offset;
153    return true;
154  }
155  return false;
156}
157
158// A predicate function for LanguageSelector::SelectIf that searches for the
159// offset of a translated language.  The search first tries to find an exact
160// match.  Failing that, an exact match with an alias is attempted.
161bool GetLanguageOffset(const std::wstring& language, int* offset) {
162  // Note: always perform the exact match first so that an alias is never
163  // selected in place of a future translation.
164  return
165      TryFindOffset(
166          &kLanguageOffsetPairs[0],
167          &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
168          language, offset) ||
169      TryFindOffset(
170          &kLanguageToOffsetExceptions[0],
171          &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
172          language, offset);
173}
174
175// A predicate function for LanguageSelector::SelectIf that searches for a
176// wildcard match with |language|'s primary language subtag.
177bool MatchLanguageOffset(const std::wstring& language, int* offset) {
178  std::wstring primary_language = language.substr(0, language.find(L'-'));
179
180  // Now check for wildcards.
181  return
182      TryFindOffset(
183          &kLanguageToOffsetWildcards[0],
184          &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
185          primary_language, offset);
186}
187
188// Adds to |candidates| the eligible languages on the system.  Any language
189// setting specified by Omaha takes precedence over the operating system's
190// configured languages.
191void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) {
192  DCHECK(candidates);
193  std::wstring language;
194
195  // Omaha gets first pick.
196  GoogleUpdateSettings::GetLanguage(&language);
197  if (!language.empty()) {
198    candidates->push_back(language);
199  }
200
201  // Now try the Windows UI languages.  Use the thread preferred since that will
202  // kindly return us a list of all kinds of fallbacks.
203  base::win::i18n::GetThreadPreferredUILanguageList(candidates);
204}
205
206}  // namespace
207
208namespace installer {
209
210LanguageSelector::LanguageSelector()
211    : offset_(arraysize(kLanguageOffsetPairs)) {
212#if !defined(NDEBUG)
213  ValidateMappings();
214#endif  // !defined(NDEBUG)
215  std::vector<std::wstring> candidates;
216
217  GetCandidatesFromSystem(&candidates);
218  DoSelect(candidates);
219}
220
221LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates)
222    : offset_(arraysize(kLanguageOffsetPairs)) {
223#if !defined(NDEBUG)
224  ValidateMappings();
225#endif  // !defined(NDEBUG)
226  DoSelect(candidates);
227}
228
229LanguageSelector::~LanguageSelector() {
230}
231
232// static
233std::wstring LanguageSelector::GetLanguageName(int offset) {
234  DCHECK_GE(offset, 0);
235  DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs));
236
237  LangToOffset value = { NULL, offset };
238  const LangToOffset* search_result =
239    std::lower_bound(&kLanguageOffsetPairs[0],
240                     &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
241                     value, IsOffsetLessThan);
242  if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result &&
243      search_result->offset == offset) {
244    return search_result->language;
245  }
246  NOTREACHED() << "Unknown language offset.";
247  return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1);
248}
249
250// Runs through the set of candidates, sending their downcased representation
251// through |select_predicate|.  Returns true if the predicate selects a
252// candidate, in which case |matched_name| is assigned the value of the
253// candidate and |matched_offset| is assigned the language offset of the
254// selected translation.
255// static
256bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates,
257                                SelectPred_Fn select_predicate,
258                                std::wstring* matched_name,
259                                int* matched_offset) {
260  std::wstring candidate;
261  for (std::vector<std::wstring>::const_iterator scan = candidates.begin(),
262          end = candidates.end(); scan != end; ++scan) {
263    candidate.assign(*scan);
264    StringToLowerASCII(&candidate);
265    if (select_predicate(candidate, matched_offset)) {
266      matched_name->assign(*scan);
267      return true;
268    }
269  }
270
271  return false;
272}
273
274// Select the best-fit translation from the ordered list |candidates|.
275// At the conclusion, this instance's |matched_candidate_| and |offset_| members
276// are set to the name of the selected candidate and the offset of the matched
277// translation.  If no translation is selected, the fallback's name and offset
278// are selected.
279void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) {
280  // Make a pass through the candidates looking for an exact or alias match.
281  // Failing that, make another pass looking for a wildcard match.
282  if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_,
283                &offset_) &&
284      !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_,
285                &offset_)) {
286    VLOG(1) << "No suitable language found for any candidates.";
287
288    // Our fallback is "en-us"
289    matched_candidate_.assign(&kFallbackLanguage[0],
290                              arraysize(kFallbackLanguage) - 1);
291    offset_ = kFallbackLanguageOffset;
292  }
293}
294
295}  // namespace installer
296