1// Copyright (C) 2014 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "language.h"
16
17#include <algorithm>
18#include <cctype>
19#include <string>
20#include <vector>
21
22#include "rule.h"
23#include "util/string_split.h"
24
25namespace i18n {
26namespace addressinput {
27
28Language::Language(const std::string& language_tag) : tag(language_tag),
29                                                      base(),
30                                                      has_latin_script(false) {
31  // Character '-' is the separator for subtags in the BCP 47. However, some
32  // legacy code generates tags with '_' instead of '-'.
33  static const char kSubtagsSeparator = '-';
34  static const char kAlternativeSubtagsSeparator = '_';
35  std::replace(
36      tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);
37
38  // OK to use 'tolower' because BCP 47 tags are always in ASCII.
39  std::string lowercase = tag;
40  std::transform(
41      lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);
42
43  base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
44
45  // The lowercase BCP 47 subtag for Latin script.
46  static const char kLowercaseLatinScript[] = "latn";
47  std::vector<std::string> subtags;
48  SplitString(lowercase, kSubtagsSeparator, &subtags);
49
50  // Support only the second and third position for the script.
51  has_latin_script =
52      (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
53      (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
54}
55
56Language::~Language() {}
57
58Language ChooseBestAddressLanguage(const Rule& address_region_rule,
59                                   const Language& ui_language) {
60  if (address_region_rule.GetLanguages().empty()) {
61    return ui_language;
62  }
63
64  std::vector<Language> available_languages;
65  for (std::vector<std::string>::const_iterator
66       language_tag_it = address_region_rule.GetLanguages().begin();
67       language_tag_it != address_region_rule.GetLanguages().end();
68       ++language_tag_it) {
69    available_languages.push_back(Language(*language_tag_it));
70  }
71
72  if (ui_language.tag.empty()) {
73    return available_languages.front();
74  }
75
76  bool has_latin_format = !address_region_rule.GetLatinFormat().empty();
77
78  // The conventionally formatted BCP 47 Latin script with a preceding subtag
79  // separator.
80  static const char kLatinScriptSuffix[] = "-Latn";
81  Language latin_script_language(
82      available_languages.front().base + kLatinScriptSuffix);
83  if (has_latin_format && ui_language.has_latin_script) {
84    return latin_script_language;
85  }
86
87  for (std::vector<Language>::const_iterator
88       available_lang_it = available_languages.begin();
89       available_lang_it != available_languages.end(); ++available_lang_it) {
90    // Base language comparison works because no region supports the same base
91    // language with different scripts, for now. For example, no region supports
92    // "zh-Hant" and "zh-Hans" at the same time.
93    if (ui_language.base == available_lang_it->base) {
94      return *available_lang_it;
95    }
96  }
97
98  return has_latin_format ? latin_script_language : available_languages.front();
99}
100
101}  // namespace addressinput
102}  // namespace i18n
103