1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4// 5// This file defines a helper class for selecting a supported language from a 6// set of candidates. 7 8#include "chrome/installer/util/language_selector.h" 9 10#include <algorithm> 11#include <functional> 12 13#include "base/logging.h" 14#include "base/strings/string_util.h" 15#include "base/win/i18n.h" 16#include "chrome/installer/util/google_update_settings.h" 17 18#include "installer_util_strings.h" 19 20namespace { 21 22struct LangToOffset { 23 const wchar_t* language; 24 int offset; 25}; 26 27// The language we fall back upon when all else fails. 28const wchar_t kFallbackLanguage[] = L"en-us"; 29const int kFallbackLanguageOffset = IDS_L10N_OFFSET_EN_US; 30 31// http://tools.ietf.org/html/rfc5646 Section 2.3.3 32const std::wstring::size_type kScriptSubtagLength = 4; 33 34// A sorted array of language identifiers (and their offsets) for which 35// translations are available. The contents of the array are generated by 36// create_string_rc.py. 37const LangToOffset kLanguageOffsetPairs[] = { 38#define HANDLE_LANGUAGE(l_, o_) { L ## #l_, o_ }, 39 DO_LANGUAGES 40#undef HANDLE_LANGUAGE 41}; 42 43// A sorted array of language identifiers that are aliases to other languages 44// for which translations are available. 45const LangToOffset kLanguageToOffsetExceptions[] = { 46 // Alias some English variants to British English (all others wildcard to US). 47 { L"en-au", IDS_L10N_OFFSET_EN_GB }, 48 { L"en-ca", IDS_L10N_OFFSET_EN_GB }, 49 { L"en-nz", IDS_L10N_OFFSET_EN_GB }, 50 { L"en-za", IDS_L10N_OFFSET_EN_GB }, 51 // Alias es-es to es (all others wildcard to es-419). 52 { L"es-es", IDS_L10N_OFFSET_ES }, 53 // Google web properties use iw for he. Handle both just to be safe. 54 { L"he", IDS_L10N_OFFSET_IW }, 55 // Google web properties use no for nb. Handle both just to be safe. 56 { L"nb", IDS_L10N_OFFSET_NO }, 57 // Some Google web properties use tl for fil. Handle both just to be safe. 58 // They're not completely identical, but alias it here. 59 { L"tl", IDS_L10N_OFFSET_FIL }, 60 // Pre-Vista aliases for Chinese w/ script subtag. 61 { L"zh-chs", IDS_L10N_OFFSET_ZH_CN }, 62 { L"zh-cht", IDS_L10N_OFFSET_ZH_TW }, 63 // Vista+ aliases for Chinese w/ script subtag. 64 { L"zh-hans", IDS_L10N_OFFSET_ZH_CN }, 65 { L"zh-hant", IDS_L10N_OFFSET_ZH_TW }, 66 // Alias Hong Kong and Macau to Taiwan. 67 { L"zh-hk", IDS_L10N_OFFSET_ZH_TW }, 68 { L"zh-mo", IDS_L10N_OFFSET_ZH_TW }, 69 // Although the wildcard entry for zh would result in this, alias zh-sg so 70 // that it will win if it precedes another valid tag in a list of candidates. 71 { L"zh-sg", IDS_L10N_OFFSET_ZH_CN } 72}; 73 74// A sorted array of neutral language identifiers that are wildcard aliases to 75// other languages for which translations are available. 76const LangToOffset kLanguageToOffsetWildcards[] = { 77 // Use the U.S. region for anything English. 78 { L"en", IDS_L10N_OFFSET_EN_US }, 79 // Use the Latin American region for anything Spanish. 80 { L"es", IDS_L10N_OFFSET_ES_419 }, 81 // Use the Brazil region for anything Portugese. 82 { L"pt", IDS_L10N_OFFSET_PT_BR }, 83 // Use the P.R.C. region for anything Chinese. 84 { L"zh", IDS_L10N_OFFSET_ZH_CN } 85}; 86 87#if !defined(NDEBUG) 88// Returns true if the items in the given range are sorted. If 89// |byNameAndOffset| is true, the items must be sorted by both name and offset. 90bool IsArraySorted(const LangToOffset* first, const LangToOffset* last, 91 bool byNameAndOffset) { 92 if (last - first > 1) { 93 for (--last; first != last; ++first) { 94 if (!(std::wstring(first->language) < (first + 1)->language) || 95 byNameAndOffset && !(first->offset < (first + 1)->offset)) { 96 return false; 97 } 98 } 99 } 100 return true; 101} 102 103// Validates that the static read-only mappings are properly sorted. 104void ValidateMappings() { 105 // Ensure that kLanguageOffsetPairs is sorted. 106 DCHECK(IsArraySorted(&kLanguageOffsetPairs[0], 107 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 108 true)) << "kOffsetToLanguageId is not sorted"; 109 110 // Ensure that kLanguageToOffsetExceptions is sorted. 111 DCHECK(IsArraySorted( 112 &kLanguageToOffsetExceptions[0], 113 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 114 false)) << "kLanguageToOffsetExceptions is not sorted"; 115 116 // Ensure that kLanguageToOffsetWildcards is sorted. 117 DCHECK(IsArraySorted( 118 &kLanguageToOffsetWildcards[0], 119 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 120 false)) << "kLanguageToOffsetWildcards is not sorted"; 121} 122#endif // !defined(NDEBUG) 123 124// A less-than overload to do slightly more efficient searches in the 125// sorted arrays. 126bool operator<(const LangToOffset& left, const std::wstring& right) { 127 return left.language < right; 128} 129 130// A less-than overload to do slightly more efficient searches in the 131// sorted arrays. 132bool operator<(const std::wstring& left, const LangToOffset& right) { 133 return left < right.language; 134} 135 136// A not-so-efficient less-than overload for the same uses as above. 137bool operator<(const LangToOffset& left, const LangToOffset& right) { 138 return std::wstring(left.language) < right.language; 139} 140 141// A compare function for searching in a sorted array by offset. 142bool IsOffsetLessThan(const LangToOffset& left, const LangToOffset& right) { 143 return left.offset < right.offset; 144} 145 146// Binary search in one of the sorted arrays to find the offset corresponding to 147// a given language |name|. 148bool TryFindOffset(const LangToOffset* first, const LangToOffset* last, 149 const std::wstring& name, int* offset) { 150 const LangToOffset* search_result = std::lower_bound(first, last, name); 151 if (last != search_result && search_result->language == name) { 152 *offset = search_result->offset; 153 return true; 154 } 155 return false; 156} 157 158// A predicate function for LanguageSelector::SelectIf that searches for the 159// offset of a translated language. The search first tries to find an exact 160// match. Failing that, an exact match with an alias is attempted. 161bool GetLanguageOffset(const std::wstring& language, int* offset) { 162 // Note: always perform the exact match first so that an alias is never 163 // selected in place of a future translation. 164 return 165 TryFindOffset( 166 &kLanguageOffsetPairs[0], 167 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 168 language, offset) || 169 TryFindOffset( 170 &kLanguageToOffsetExceptions[0], 171 &kLanguageToOffsetExceptions[arraysize(kLanguageToOffsetExceptions)], 172 language, offset); 173} 174 175// A predicate function for LanguageSelector::SelectIf that searches for a 176// wildcard match with |language|'s primary language subtag. 177bool MatchLanguageOffset(const std::wstring& language, int* offset) { 178 std::wstring primary_language = language.substr(0, language.find(L'-')); 179 180 // Now check for wildcards. 181 return 182 TryFindOffset( 183 &kLanguageToOffsetWildcards[0], 184 &kLanguageToOffsetWildcards[arraysize(kLanguageToOffsetWildcards)], 185 primary_language, offset); 186} 187 188// Adds to |candidates| the eligible languages on the system. Any language 189// setting specified by Omaha takes precedence over the operating system's 190// configured languages. 191void GetCandidatesFromSystem(std::vector<std::wstring>* candidates) { 192 DCHECK(candidates); 193 std::wstring language; 194 195 // Omaha gets first pick. 196 GoogleUpdateSettings::GetLanguage(&language); 197 if (!language.empty()) { 198 candidates->push_back(language); 199 } 200 201 // Now try the Windows UI languages. Use the thread preferred since that will 202 // kindly return us a list of all kinds of fallbacks. 203 base::win::i18n::GetThreadPreferredUILanguageList(candidates); 204} 205 206} // namespace 207 208namespace installer { 209 210LanguageSelector::LanguageSelector() 211 : offset_(arraysize(kLanguageOffsetPairs)) { 212#if !defined(NDEBUG) 213 ValidateMappings(); 214#endif // !defined(NDEBUG) 215 std::vector<std::wstring> candidates; 216 217 GetCandidatesFromSystem(&candidates); 218 DoSelect(candidates); 219} 220 221LanguageSelector::LanguageSelector(const std::vector<std::wstring>& candidates) 222 : offset_(arraysize(kLanguageOffsetPairs)) { 223#if !defined(NDEBUG) 224 ValidateMappings(); 225#endif // !defined(NDEBUG) 226 DoSelect(candidates); 227} 228 229LanguageSelector::~LanguageSelector() { 230} 231 232// static 233std::wstring LanguageSelector::GetLanguageName(int offset) { 234 DCHECK_GE(offset, 0); 235 DCHECK_LT(static_cast<size_t>(offset), arraysize(kLanguageOffsetPairs)); 236 237 LangToOffset value = { NULL, offset }; 238 const LangToOffset* search_result = 239 std::lower_bound(&kLanguageOffsetPairs[0], 240 &kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)], 241 value, IsOffsetLessThan); 242 if (&kLanguageOffsetPairs[arraysize(kLanguageOffsetPairs)] != search_result && 243 search_result->offset == offset) { 244 return search_result->language; 245 } 246 NOTREACHED() << "Unknown language offset."; 247 return std::wstring(&kFallbackLanguage[0], arraysize(kFallbackLanguage) - 1); 248} 249 250// Runs through the set of candidates, sending their downcased representation 251// through |select_predicate|. Returns true if the predicate selects a 252// candidate, in which case |matched_name| is assigned the value of the 253// candidate and |matched_offset| is assigned the language offset of the 254// selected translation. 255// static 256bool LanguageSelector::SelectIf(const std::vector<std::wstring>& candidates, 257 SelectPred_Fn select_predicate, 258 std::wstring* matched_name, 259 int* matched_offset) { 260 std::wstring candidate; 261 for (std::vector<std::wstring>::const_iterator scan = candidates.begin(), 262 end = candidates.end(); scan != end; ++scan) { 263 candidate.assign(*scan); 264 StringToLowerASCII(&candidate); 265 if (select_predicate(candidate, matched_offset)) { 266 matched_name->assign(*scan); 267 return true; 268 } 269 } 270 271 return false; 272} 273 274// Select the best-fit translation from the ordered list |candidates|. 275// At the conclusion, this instance's |matched_candidate_| and |offset_| members 276// are set to the name of the selected candidate and the offset of the matched 277// translation. If no translation is selected, the fallback's name and offset 278// are selected. 279void LanguageSelector::DoSelect(const std::vector<std::wstring>& candidates) { 280 // Make a pass through the candidates looking for an exact or alias match. 281 // Failing that, make another pass looking for a wildcard match. 282 if (!SelectIf(candidates, &GetLanguageOffset, &matched_candidate_, 283 &offset_) && 284 !SelectIf(candidates, &MatchLanguageOffset, &matched_candidate_, 285 &offset_)) { 286 VLOG(1) << "No suitable language found for any candidates."; 287 288 // Our fallback is "en-us" 289 matched_candidate_.assign(&kFallbackLanguage[0], 290 arraysize(kFallbackLanguage) - 1); 291 offset_ = kFallbackLanguageOffset; 292 } 293} 294 295} // namespace installer 296