1// Copyright (C) 2014 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <libaddressinput/address_formatter.h>
16
17#include <libaddressinput/address_data.h>
18#include <libaddressinput/address_field.h>
19#include <libaddressinput/util/basictypes.h>
20
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <functional>
25#include <string>
26#include <vector>
27
28#include "format_element.h"
29#include "language.h"
30#include "region_data_constants.h"
31#include "rule.h"
32#include "util/cctype_tolower_equal.h"
33
34namespace i18n {
35namespace addressinput {
36
37namespace {
38
39const char kCommaSeparator[] = ", ";
40const char kSpaceSeparator[] = " ";
41const char kArabicCommaSeparator[] = "\xD8\x8C" " ";  /* "، " */
42
43const char* kLanguagesThatUseSpace[] = {
44  "th",
45  "ko"
46};
47
48const char* kLanguagesThatHaveNoSeparator[] = {
49  "ja",
50  "zh"  // All Chinese variants.
51};
52
53// This data is based on CLDR, for languages that are in official use in some
54// country, where Arabic is the most likely script tag.
55// TODO: Consider supporting variants such as tr-Arab by detecting the script
56// code.
57const char* kLanguagesThatUseAnArabicComma[] = {
58  "ar",
59  "az",
60  "fa",
61  "kk",
62  "ku",
63  "ky",
64  "ps",
65  "tg",
66  "tk",
67  "ur",
68  "uz"
69};
70
71std::string GetLineSeparatorForLanguage(const std::string& language_tag) {
72  Language address_language(language_tag);
73
74  // First deal with explicit script tags.
75  if (address_language.has_latin_script) {
76    return kCommaSeparator;
77  }
78
79  // Now guess something appropriate based on the base language.
80  const std::string& base_language = address_language.base;
81  if (std::find_if(kLanguagesThatUseSpace,
82                   kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace),
83                   std::bind2nd(EqualToTolowerString(), base_language)) !=
84      kLanguagesThatUseSpace + arraysize(kLanguagesThatUseSpace)) {
85    return kSpaceSeparator;
86  } else if (std::find_if(
87                 kLanguagesThatHaveNoSeparator,
88                 kLanguagesThatHaveNoSeparator +
89                     arraysize(kLanguagesThatHaveNoSeparator),
90                 std::bind2nd(EqualToTolowerString(), base_language)) !=
91             kLanguagesThatHaveNoSeparator +
92                 arraysize(kLanguagesThatHaveNoSeparator)) {
93    return "";
94  } else if (std::find_if(
95                 kLanguagesThatUseAnArabicComma,
96                 kLanguagesThatUseAnArabicComma +
97                     arraysize(kLanguagesThatUseAnArabicComma),
98                 std::bind2nd(EqualToTolowerString(), base_language)) !=
99             kLanguagesThatUseAnArabicComma +
100                 arraysize(kLanguagesThatUseAnArabicComma)) {
101    return kArabicCommaSeparator;
102  }
103  // Either the language is a Latin-script language, or no language was
104  // specified. In the latter case we still return ", " as the most common
105  // separator in use. In countries that don't use this, e.g. Thailand,
106  // addresses are often written in Latin script where this would still be
107  // appropriate, so this is a reasonable default in the absence of information.
108  return kCommaSeparator;
109}
110
111void CombineLinesForLanguage(const std::vector<std::string>& lines,
112                             const std::string& language_tag,
113                             std::string* line) {
114  line->clear();
115  std::string separator = GetLineSeparatorForLanguage(language_tag);
116  for (std::vector<std::string>::const_iterator it = lines.begin();
117       it != lines.end();
118       ++it) {
119    if (it != lines.begin()) {
120      line->append(separator);
121    }
122    line->append(*it);
123  }
124}
125
126}  // namespace
127
128void GetFormattedNationalAddress(
129    const AddressData& address_data, std::vector<std::string>* lines) {
130  assert(lines != NULL);
131  lines->clear();
132
133  Rule rule;
134  rule.CopyFrom(Rule::GetDefault());
135  // TODO: Eventually, we should get the best rule for this country and
136  // language, rather than just for the country.
137  rule.ParseSerializedRule(RegionDataConstants::GetRegionData(
138      address_data.region_code));
139
140  Language language(address_data.language_code);
141
142  // If Latin-script rules are available and the |language_code| of this address
143  // is explicitly tagged as being Latin, then use the Latin-script formatting
144  // rules.
145  const std::vector<FormatElement>& format =
146      language.has_latin_script && !rule.GetLatinFormat().empty()
147          ? rule.GetLatinFormat()
148          : rule.GetFormat();
149
150  // Address format without the unnecessary elements (based on which address
151  // fields are empty). We assume all literal strings that are not at the start
152  // or end of a line are separators, and therefore only relevant if the
153  // surrounding fields are filled in. This works with the data we have
154  // currently.
155  std::vector<FormatElement> pruned_format;
156  for (std::vector<FormatElement>::const_iterator
157       element_it = format.begin();
158       element_it != format.end();
159       ++element_it) {
160    // Always keep the newlines.
161    if (element_it->IsNewline() ||
162        // Always keep the non-empty address fields.
163        (element_it->IsField() &&
164         !address_data.IsFieldEmpty(element_it->GetField())) ||
165        // Only keep literals that satisfy these 2 conditions:
166        (!element_it->IsField() &&
167         // (1) Not preceding an empty field.
168         (element_it + 1 == format.end() ||
169          !(element_it + 1)->IsField() ||
170          !address_data.IsFieldEmpty((element_it + 1)->GetField())) &&
171         // (2) Not following a removed field.
172         (element_it == format.begin() ||
173          !(element_it - 1)->IsField() ||
174          (!pruned_format.empty() && pruned_format.back().IsField())))) {
175      pruned_format.push_back(*element_it);
176    }
177  }
178
179  std::string line;
180  for (std::vector<FormatElement>::const_iterator
181       element_it = pruned_format.begin();
182       element_it != pruned_format.end();
183       ++element_it) {
184    if (element_it->IsNewline()) {
185      if (!line.empty()) {
186        lines->push_back(line);
187        line.clear();
188      }
189    } else if (element_it->IsField()) {
190      AddressField field = element_it->GetField();
191      if (field == STREET_ADDRESS) {
192        // The field "street address" represents the street address lines of an
193        // address, so there can be multiple values.
194        if (!address_data.IsFieldEmpty(field)) {
195          line.append(address_data.address_line.front());
196          if (address_data.address_line.size() > 1U) {
197            lines->push_back(line);
198            line.clear();
199            lines->insert(lines->end(),
200                          address_data.address_line.begin() + 1,
201                          address_data.address_line.end());
202          }
203        }
204      } else {
205        line.append(address_data.GetFieldValue(field));
206      }
207    } else {
208      line.append(element_it->GetLiteral());
209    }
210  }
211  if (!line.empty()) {
212    lines->push_back(line);
213  }
214}
215
216void GetFormattedNationalAddressLine(
217    const AddressData& address_data, std::string* line) {
218  std::vector<std::string> address_lines;
219  GetFormattedNationalAddress(address_data, &address_lines);
220  CombineLinesForLanguage(address_lines, address_data.language_code, line);
221}
222
223void GetStreetAddressLinesAsSingleLine(
224    const AddressData& address_data, std::string* line) {
225  CombineLinesForLanguage(
226      address_data.address_line, address_data.language_code, line);
227}
228
229}  // namespace addressinput
230}  // namespace i18n
231