1947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Copyright (C) 2014 Google Inc. 2947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// 3947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Licensed under the Apache License, Version 2.0 (the "License"); 4947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// you may not use this file except in compliance with the License. 5947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// You may obtain a copy of the License at 6947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// 7947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// http://www.apache.org/licenses/LICENSE-2.0 8947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// 9947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Unless required by applicable law or agreed to in writing, software 10947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// distributed under the License is distributed on an "AS IS" BASIS, 11947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// See the License for the specific language governing permissions and 13947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// limitations under the License. 14947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 15947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include "language.h" 16947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 17947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <algorithm> 18947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <cctype> 19947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <string> 20947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <vector> 21947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 22f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org#include "rule.h" 23947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include "util/string_split.h" 24947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 25947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgnamespace i18n { 26947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgnamespace addressinput { 27947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 28947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgLanguage::Language(const std::string& language_tag) : tag(language_tag), 29947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org base(), 30947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org has_latin_script(false) { 31947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org // Character '-' is the separator for subtags in the BCP 47. However, some 32947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org // legacy code generates tags with '_' instead of '-'. 33947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org static const char kSubtagsSeparator = '-'; 34947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org static const char kAlternativeSubtagsSeparator = '_'; 35000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com std::replace( 36000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator); 37947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 38947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org // OK to use 'tolower' because BCP 47 tags are always in ASCII. 39947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org std::string lowercase = tag; 40000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com std::transform( 41000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com lowercase.begin(), lowercase.end(), lowercase.begin(), tolower); 42947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 43947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org base = lowercase.substr(0, lowercase.find(kSubtagsSeparator)); 44947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 45947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org // The lowercase BCP 47 subtag for Latin script. 46947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org static const char kLowercaseLatinScript[] = "latn"; 47947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org std::vector<std::string> subtags; 48947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org SplitString(lowercase, kSubtagsSeparator, &subtags); 49947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 50947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org // Support only the second and third position for the script. 51947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org has_latin_script = 52947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) || 53947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript); 54947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org} 55947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 56947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgLanguage::~Language() {} 57947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org 58f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.orgLanguage ChooseBestAddressLanguage(const Rule& address_region_rule, 59f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org const Language& ui_language) { 60f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org if (address_region_rule.GetLanguages().empty()) { 61f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org return ui_language; 62f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 63f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 64f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org std::vector<Language> available_languages; 65f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org for (std::vector<std::string>::const_iterator 66f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org language_tag_it = address_region_rule.GetLanguages().begin(); 67f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org language_tag_it != address_region_rule.GetLanguages().end(); 68f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org ++language_tag_it) { 69f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org available_languages.push_back(Language(*language_tag_it)); 70f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 71f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 72f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org if (ui_language.tag.empty()) { 73f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org return available_languages.front(); 74f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 75f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 76f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org bool has_latin_format = !address_region_rule.GetLatinFormat().empty(); 77f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 78f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org // The conventionally formatted BCP 47 Latin script with a preceding subtag 79f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org // separator. 80f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org static const char kLatinScriptSuffix[] = "-Latn"; 81f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org Language latin_script_language( 82f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org available_languages.front().base + kLatinScriptSuffix); 83f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org if (has_latin_format && ui_language.has_latin_script) { 84f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org return latin_script_language; 85f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 86f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 87f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org for (std::vector<Language>::const_iterator 88f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org available_lang_it = available_languages.begin(); 89f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org available_lang_it != available_languages.end(); ++available_lang_it) { 90f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org // Base language comparison works because no region supports the same base 91f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org // language with different scripts, for now. For example, no region supports 92f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org // "zh-Hant" and "zh-Hans" at the same time. 93f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org if (ui_language.base == available_lang_it->base) { 94f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org return *available_lang_it; 95f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 96f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org } 97f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 98f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org return has_latin_format ? latin_script_language : available_languages.front(); 99f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org} 100f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org 101947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org} // namespace addressinput 102947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org} // namespace i18n 103