language_selector.cc revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This file defines a helper class for selecting a supported language from a
6// set of candidates.
7
8#include "chrome/installer/util/language_selector.h"
9
10#include <algorithm>
11#include <functional>
12
13#include "base/logging.h"
14#include "base/strings/string16.h"
15#include "base/strings/string_util.h"
16#include "base/win/i18n.h"
17#include "chrome/installer/util/google_update_settings.h"
18
19#include "installer_util_strings.h"
20
21namespace {
22
23struct LangToOffset {
24  const wchar_t* language;
25  int offset;
26};
27
28// The language we fall back upon when all else fails.
29const wchar_t kFallbackLanguage[] = L"en-us";
30const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US;
31
32// http://tools.ietf.org/html/rfc5646 Section 2.3.3
33const std::wstring::size_type kScriptSubtagLength = 4;
34
35// A sorted array of language identifiers (and their offsets) for which
36// translations are available. The contents of the array are generated by
37// create_string_rc.py.
38const LangToOffset kLanguageOffsetPairs[] = {
39#define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ },
40  DO_LANGUAGES
41#undef HANDLE_LANGUAGE
42};
43
44// A sorted array of language identifiers that are aliases to other languages
45// for which translations are available.
46const LangToOffset kLanguageToOffsetExceptions[] = {
47  // Alias some English variants to British English (all others wildcard to US).
48  { L"en-au", IDS_L10N_OFFSET_EN_GB },
49  { L"en-ca", IDS_L10N_OFFSET_EN_GB },
50  { L"en-nz", IDS_L10N_OFFSET_EN_GB },
51  { L"en-za", IDS_L10N_OFFSET_EN_GB },
52  // Alias es-es to es (all others wildcard to es-419).
53  { L"es-es", IDS_L10N_OFFSET_ES },
54  // Google web properties use iw for he. Handle both just to be safe.
55  { L"he", IDS_L10N_OFFSET_IW },
56  // Google web properties use no for nb. Handle both just to be safe.
57  { L"nb", IDS_L10N_OFFSET_NO },
58  // Some Google web properties use tl for fil. Handle both just to be safe.
59  // They're not completely identical, but alias it here.
60  { L"tl", IDS_L10N_OFFSET_FIL },
61  // Pre-Vista aliases for Chinese w/ script subtag.
62  { L"zh-chs", IDS_L10N_OFFSET_ZH_CN },
63  { L"zh-cht", IDS_L10N_OFFSET_ZH_TW },
64  // Vista+ aliases for Chinese w/ script subtag.
65  { L"zh-hans", IDS_L10N_OFFSET_ZH_CN },
66  { L"zh-hant", IDS_L10N_OFFSET_ZH_TW },
67  // Alias Hong Kong and Macau to Taiwan.
68  { L"zh-hk", IDS_L10N_OFFSET_ZH_TW },
69  { L"zh-mo", IDS_L10N_OFFSET_ZH_TW },
70  // Although the wildcard entry for zh would result in this, alias zh-sg so
71  // that it will win if it precedes another valid tag in a list of candidates.
72  { L"zh-sg", IDS_L10N_OFFSET_ZH_CN }
73};
74
75// A sorted array of neutral language identifiers that are wildcard aliases to
76// other languages for which translations are available.
77const LangToOffset kLanguageToOffsetWildcards[] = {
78  // Use the U.S. region for anything English.
79  { L"en", IDS_L10N_OFFSET_EN_US },
80  // Use the Latin American region for anything Spanish.
81  { L"es", IDS_L10N_OFFSET_ES_419 },
82  // Use the Brazil region for anything Portugese.
83  { L"pt", IDS_L10N_OFFSET_PT_BR },
84  // Use the P.R.C. region for anything Chinese.
85  { L"zh", IDS_L10N_OFFSET_ZH_CN }
86};
87
88#if !defined(NDEBUG)
89// Returns true if the items in the given range are sorted.  If
90// |byNameAndOffset| is true, the items must be sorted by both name and offset.
91bool IsArraySorted(const LangToOffset* first, const LangToOffset* last,
92                   bool byNameAndOffset) {
93  if (last - first > 1) {
94    for (--last; first != last; ++first) {
95       if (!(std::wstring(first->language) < (first + 1)->language) ||
96           byNameAndOffset && !(first->offset < (first + 1)->offset)) {
97         return false;
98       }
99    }
100  }
101  return true;
102}
103
104// Validates that the static read-only mappings are properly sorted.
105void ValidateMappings() {
106  // Ensure that kLanguageOffsetPairs is sorted.
107  DCHECK(IsArraySorted(&kLanguageOffsetPairs[0],
108                       &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
109                       true)) << "kOffsetToLanguageId is not sorted";
110
111  // Ensure that kLanguageToOffsetExceptions is sorted.
112  DCHECK(IsArraySorted(
113           &kLanguageToOffsetExceptions[0],
114           &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
115           false)) << "kLanguageToOffsetExceptions is not sorted";
116
117  // Ensure that kLanguageToOffsetWildcards is sorted.
118  DCHECK(IsArraySorted(
119            &kLanguageToOffsetWildcards[0],
120            &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
121            false)) << "kLanguageToOffsetWildcards is not sorted";
122}
123#endif  // !defined(NDEBUG)
124
125// A less-than overload to do slightly more efficient searches in the
126// sorted arrays.
127bool operator<(const LangToOffset& left, const std::wstring& right) {
128  return left.language < right;
129}
130
131// A less-than overload to do slightly more efficient searches in the
132// sorted arrays.
133bool operator<(const std::wstring& left, const LangToOffset& right) {
134  return left < right.language;
135}
136
137// A not-so-efficient less-than overload for the same uses as above.
138bool operator<(const LangToOffset& left, const LangToOffset& right) {
139  return std::wstring(left.language) < right.language;
140}
141
142// A compare function for searching in a sorted array by offset.
143bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) {
144  return left.offset < right.offset;
145}
146
147// Binary search in one of the sorted arrays to find the offset corresponding to
148// a given language |name|.
149bool TryFindOffset(const LangToOffset* first, const LangToOffset* last,
150                   const std::wstring& name, int* offset) {
151  const LangToOffset* search_result = std::lower_bound(first, last, name);
152  if (last != search_result && search_result->language == name) {
153    *offset = search_result->offset;
154    return true;
155  }
156  return false;
157}
158
159// A predicate function for LanguageSelector::SelectIf that searches for the
160// offset of a translated language.  The search first tries to find an exact
161// match.  Failing that, an exact match with an alias is attempted.
162bool GetLanguageOffset(const std::wstring& language, int* offset) {
163  // Note: always perform the exact match first so that an alias is never
164  // selected in place of a future translation.
165  return
166      TryFindOffset(
167          &kLanguageOffsetPairs[0],
168          &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
169          language, offset) ||
170      TryFindOffset(
171          &kLanguageToOffsetExceptions[0],
172          &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)],
173          language, offset);
174}
175
176// A predicate function for LanguageSelector::SelectIf that searches for a
177// wildcard match with |language|'s primary language subtag.
178bool MatchLanguageOffset(const std::wstring& language, int* offset) {
179  std::wstring primary_language = language.substr(0, language.find(L'-'));
180
181  // Now check for wildcards.
182  return
183      TryFindOffset(
184          &kLanguageToOffsetWildcards[0],
185          &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)],
186          primary_language, offset);
187}
188
189// Adds to |candidates| the eligible languages on the system.  Any language
190// setting specified by Omaha takes precedence over the operating system's
191// configured languages.
192void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) {
193  DCHECK(candidates);
194  base::string16 language;
195
196  // Omaha gets first pick.
197  GoogleUpdateSettings::GetLanguage(&language);
198  if (!language.empty()) {
199    candidates->push_back(language);
200  }
201
202  // Now try the Windows UI languages.  Use the thread preferred since that will
203  // kindly return us a list of all kinds of fallbacks.
204  base::win::i18n::GetThreadPreferredUILanguageList(candidates);
205}
206
207}  // namespace
208
209namespace installer {
210
211LanguageSelector::LanguageSelector()
212    : offset_(arraysize(kLanguageOffsetPairs)) {
213#if !defined(NDEBUG)
214  ValidateMappings();
215#endif  // !defined(NDEBUG)
216  std::vector<std::wstring> candidates;
217
218  GetCandidatesFromSystem(&candidates);
219  DoSelect(candidates);
220}
221
222LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates)
223    : offset_(arraysize(kLanguageOffsetPairs)) {
224#if !defined(NDEBUG)
225  ValidateMappings();
226#endif  // !defined(NDEBUG)
227  DoSelect(candidates);
228}
229
230LanguageSelector::~LanguageSelector() {
231}
232
233// static
234std::wstring LanguageSelector::GetLanguageName(int offset) {
235  DCHECK_GE(offset, 0);
236  DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs));
237
238  LangToOffset value = { NULL, offset };
239  const LangToOffset* search_result =
240    std::lower_bound(&kLanguageOffsetPairs[0],
241                     &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)],
242                     value, IsOffsetLessThan);
243  if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result &&
244      search_result->offset == offset) {
245    return search_result->language;
246  }
247  NOTREACHED() << "Unknown language offset.";
248  return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1);
249}
250
251// Runs through the set of candidates, sending their downcased representation
252// through |select_predicate|.  Returns true if the predicate selects a
253// candidate, in which case |matched_name| is assigned the value of the
254// candidate and |matched_offset| is assigned the language offset of the
255// selected translation.
256// static
257bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates,
258                                SelectPred_Fn select_predicate,
259                                std::wstring* matched_name,
260                                int* matched_offset) {
261  std::wstring candidate;
262  for (std::vector<std::wstring>::const_iterator scan = candidates.begin(),
263          end = candidates.end(); scan != end; ++scan) {
264    candidate.assign(*scan);
265    StringToLowerASCII(&candidate);
266    if (select_predicate(candidate, matched_offset)) {
267      matched_name->assign(*scan);
268      return true;
269    }
270  }
271
272  return false;
273}
274
275// Select the best-fit translation from the ordered list |candidates|.
276// At the conclusion, this instance's |matched_candidate_| and |offset_| members
277// are set to the name of the selected candidate and the offset of the matched
278// translation.  If no translation is selected, the fallback's name and offset
279// are selected.
280void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) {
281  // Make a pass through the candidates looking for an exact or alias match.
282  // Failing that, make another pass looking for a wildcard match.
283  if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_,
284                &offset_) &&
285      !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_,
286                &offset_)) {
287    VLOG(1) << "No suitable language found for any candidates.";
288
289    // Our fallback is "en-us"
290    matched_candidate_.assign(&kFallbackLanguage[0],
291                              arraysize(kFallbackLanguage) - 1);
292    offset_ = kFallbackLanguageOffset;
293  }
294}
295
296}  // namespace installer
297