1947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Copyright (C) 2014 Google Inc.
2947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org//
3947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Licensed under the Apache License, Version 2.0 (the "License");
4947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// you may not use this file except in compliance with the License.
5947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// You may obtain a copy of the License at
6947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org//
7947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// http://www.apache.org/licenses/LICENSE-2.0
8947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org//
9947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// Unless required by applicable law or agreed to in writing, software
10947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// distributed under the License is distributed on an "AS IS" BASIS,
11947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// See the License for the specific language governing permissions and
13947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org// limitations under the License.
14947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
15947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include "language.h"
16947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
17947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <algorithm>
18947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <cctype>
19947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <string>
20947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include <vector>
21947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
22f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org#include "rule.h"
23947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org#include "util/string_split.h"
24947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
25947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgnamespace i18n {
26947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgnamespace addressinput {
27947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
28947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgLanguage::Language(const std::string& language_tag) : tag(language_tag),
29947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org                                                      base(),
30947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org                                                      has_latin_script(false) {
31947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  // Character '-' is the separator for subtags in the BCP 47. However, some
32947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  // legacy code generates tags with '_' instead of '-'.
33947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  static const char kSubtagsSeparator = '-';
34947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  static const char kAlternativeSubtagsSeparator = '_';
35000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com  std::replace(
36000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com      tag.begin(), tag.end(), kAlternativeSubtagsSeparator, kSubtagsSeparator);
37947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
38947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  // OK to use 'tolower' because BCP 47 tags are always in ASCII.
39947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  std::string lowercase = tag;
40000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com  std::transform(
41000aa6dbb70273ccefa77a5d4cd1a400939a2666roubert@google.com      lowercase.begin(), lowercase.end(), lowercase.begin(), tolower);
42947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
43947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  base = lowercase.substr(0, lowercase.find(kSubtagsSeparator));
44947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
45947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  // The lowercase BCP 47 subtag for Latin script.
46947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  static const char kLowercaseLatinScript[] = "latn";
47947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  std::vector<std::string> subtags;
48947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  SplitString(lowercase, kSubtagsSeparator, &subtags);
49947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
50947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  // Support only the second and third position for the script.
51947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org  has_latin_script =
52947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org      (subtags.size() > 1 && subtags[1] == kLowercaseLatinScript) ||
53947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org      (subtags.size() > 2 && subtags[2] == kLowercaseLatinScript);
54947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org}
55947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
56947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.orgLanguage::~Language() {}
57947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org
58f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.orgLanguage ChooseBestAddressLanguage(const Rule& address_region_rule,
59f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org                                   const Language& ui_language) {
60f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  if (address_region_rule.GetLanguages().empty()) {
61f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    return ui_language;
62f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  }
63f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
64f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  std::vector<Language> available_languages;
65f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  for (std::vector<std::string>::const_iterator
66f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org       language_tag_it = address_region_rule.GetLanguages().begin();
67f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org       language_tag_it != address_region_rule.GetLanguages().end();
68f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org       ++language_tag_it) {
69f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    available_languages.push_back(Language(*language_tag_it));
70f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  }
71f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
72f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  if (ui_language.tag.empty()) {
73f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    return available_languages.front();
74f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  }
75f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
76f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  bool has_latin_format = !address_region_rule.GetLatinFormat().empty();
77f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
78f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  // The conventionally formatted BCP 47 Latin script with a preceding subtag
79f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  // separator.
80f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  static const char kLatinScriptSuffix[] = "-Latn";
81f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  Language latin_script_language(
82f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org      available_languages.front().base + kLatinScriptSuffix);
83f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  if (has_latin_format && ui_language.has_latin_script) {
84f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    return latin_script_language;
85f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  }
86f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
87f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  for (std::vector<Language>::const_iterator
88f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org       available_lang_it = available_languages.begin();
89f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org       available_lang_it != available_languages.end(); ++available_lang_it) {
90f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    // Base language comparison works because no region supports the same base
91f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    // language with different scripts, for now. For example, no region supports
92f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    // "zh-Hant" and "zh-Hans" at the same time.
93f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    if (ui_language.base == available_lang_it->base) {
94f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org      return *available_lang_it;
95f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org    }
96f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  }
97f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
98f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org  return has_latin_format ? latin_script_language : available_languages.front();
99f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org}
100f1be124367e8e74e1a235774914e3d4f970e9444rouslan@chromium.org
101947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org}  // namespace addressinput
102947ce24396bbb66ec5fcd14a73f85f4e32c3dcc0rouslan@chromium.org}  // namespace i18n
103