1// Copyright (C) 2014 Google Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <libaddressinput/preload_supplier.h>
16
17#include <libaddressinput/address_data.h>
18#include <libaddressinput/address_field.h>
19#include <libaddressinput/callback.h>
20#include <libaddressinput/supplier.h>
21#include <libaddressinput/util/basictypes.h>
22#include <libaddressinput/util/scoped_ptr.h>
23
24#include <algorithm>
25#include <cassert>
26#include <cstddef>
27#include <functional>
28#include <map>
29#include <set>
30#include <stack>
31#include <string>
32#include <utility>
33#include <vector>
34
35#include "language.h"
36#include "lookup_key.h"
37#include "region_data_constants.h"
38#include "retriever.h"
39#include "rule.h"
40#include "util/json.h"
41#include "util/string_compare.h"
42
43namespace i18n {
44namespace addressinput {
45
46namespace {
47
48// STL predicate less<> that uses StringCompare to match strings that a human
49// reader would consider to be "the same". The default implementation just does
50// case insensitive string comparison, but StringCompare can be overriden with
51// more sophisticated implementations.
52class IndexLess : public std::binary_function<std::string, std::string, bool> {
53 public:
54  result_type operator()(const first_argument_type& a,
55                         const second_argument_type& b) const {
56    return kStringCompare.NaturalLess(a, b);
57  }
58
59 private:
60  static const StringCompare kStringCompare;
61};
62
63const StringCompare IndexLess::kStringCompare;
64
65}  // namespace
66
67class IndexMap : public std::map<std::string, const Rule*, IndexLess> {};
68
69namespace {
70
71class Helper {
72 public:
73  // Does not take ownership of its parameters.
74  Helper(const std::string& region_code,
75         const std::string& key,
76         const PreloadSupplier::Callback& loaded,
77         const Retriever& retriever,
78         std::set<std::string>* pending,
79         IndexMap* rule_index,
80         std::vector<const Rule*>* rule_storage)
81      : region_code_(region_code),
82        loaded_(loaded),
83        pending_(pending),
84        rule_index_(rule_index),
85        rule_storage_(rule_storage),
86        retrieved_(BuildCallback(this, &Helper::OnRetrieved)) {
87    assert(pending_ != NULL);
88    assert(rule_index_ != NULL);
89    assert(rule_storage_ != NULL);
90    assert(retrieved_ != NULL);
91    pending_->insert(key);
92    retriever.Retrieve(key, *retrieved_);
93  }
94
95 private:
96  ~Helper() {}
97
98  void OnRetrieved(bool success,
99                   const std::string& key,
100                   const std::string& data) {
101    int rule_count = 0;
102
103    size_t status = pending_->erase(key);
104    assert(status == 1);  // There will always be one item erased from the set.
105    (void)status;  // Prevent unused variable if assert() is optimized away.
106
107    Json json;
108    std::vector<const Rule*> sub_rules;
109
110    if (!success) {
111      goto callback;
112    }
113
114    if (!json.ParseObject(data)) {
115      success = false;
116      goto callback;
117    }
118
119    for (std::vector<std::string>::const_iterator
120         it = json.GetKeys().begin(); it != json.GetKeys().end(); ++it) {
121      if (!json.HasDictionaryValueForKey(*it)) {
122        success = false;
123        goto callback;
124      }
125      const Json& value = json.GetDictionaryValueForKey(*it);
126
127      if (!value.HasStringValueForKey("id")) {
128        success = false;
129        goto callback;
130      }
131      const std::string& id = value.GetStringValueForKey("id");
132      assert(*it == id);  // Sanity check.
133
134      size_t depth = std::count(id.begin(), id.end(), '/') - 1;
135      assert(depth < arraysize(LookupKey::kHierarchy));
136      AddressField field = LookupKey::kHierarchy[depth];
137
138      Rule* rule = new Rule;
139      if (field == COUNTRY) {
140        // All rules on the COUNTRY level inherit from the default rule.
141        rule->CopyFrom(Rule::GetDefault());
142      }
143      rule->ParseJsonRule(value);
144      assert(id == rule->GetId());  // Sanity check.
145
146      rule_storage_->push_back(rule);
147      if (depth > 0) {
148        sub_rules.push_back(rule);
149      }
150
151      // Add the ID of this Rule object to the rule index.
152      std::pair<IndexMap::iterator, bool> result =
153          rule_index_->insert(std::make_pair(id, rule));
154      assert(result.second);
155      (void)result;  // Prevent unused variable if assert() is optimized away.
156
157      ++rule_count;
158    }
159
160    /*
161     * Normally the address metadata server takes care of mapping from natural
162     * language names to metadata IDs (eg. "São Paulo" -> "SP") and from Latin
163     * script names to local script names (eg. "Tokushima" -> "徳島県").
164     *
165     * As the PreloadSupplier doesn't contact the metadata server upon each
166     * Supply() request, it instead has an internal lookup table (rule_index_)
167     * that contains such mappings.
168     *
169     * This lookup table is populated by iterating over all sub rules and for
170     * each of them construct ID strings using human readable names (eg. "São
171     * Paulo") and using Latin script names (eg. "Tokushima").
172     */
173    for (std::vector<const Rule*>::const_iterator
174         it = sub_rules.begin(); it != sub_rules.end(); ++it) {
175      std::stack<const Rule*> hierarchy;
176      hierarchy.push(*it);
177
178      // Push pointers to all parent Rule objects onto the hierarchy stack.
179      for (std::string parent_id((*it)->GetId());;) {
180        // Strip the last part of parent_id. Break if COUNTRY level is reached.
181        std::string::size_type pos = parent_id.rfind('/');
182        if (pos == sizeof "data/ZZ" - 1) {
183          break;
184        }
185        parent_id.resize(pos);
186
187        IndexMap::const_iterator jt = rule_index_->find(parent_id);
188        assert(jt != rule_index_->end());
189        hierarchy.push(jt->second);
190      }
191
192      std::string human_id((*it)->GetId().substr(0, sizeof "data/ZZ" - 1));
193      std::string latin_id(human_id);
194
195      // Append the names from all Rule objects on the hierarchy stack.
196      for (; !hierarchy.empty(); hierarchy.pop()) {
197        const Rule* rule = hierarchy.top();
198
199        human_id.push_back('/');
200        if (!rule->GetName().empty()) {
201          human_id.append(rule->GetName());
202        } else {
203          // If the "name" field is empty, the name is the last part of the ID.
204          const std::string& id = rule->GetId();
205          std::string::size_type pos = id.rfind('/');
206          assert(pos != std::string::npos);
207          human_id.append(id.substr(pos + 1));
208        }
209
210        if (!rule->GetLatinName().empty()) {
211          latin_id.push_back('/');
212          latin_id.append(rule->GetLatinName());
213        }
214      }
215
216      // If the ID has a language tag, copy it.
217      {
218        const std::string& id = (*it)->GetId();
219        std::string::size_type pos = id.rfind("--");
220        if (pos != std::string::npos) {
221          human_id.append(id, pos, id.size() - pos);
222        }
223      }
224
225      rule_index_->insert(std::make_pair(human_id, *it));
226
227      // Add the Latin script ID, if a Latin script name could be found for
228      // every part of the ID.
229      if (std::count(human_id.begin(), human_id.end(), '/') ==
230          std::count(latin_id.begin(), latin_id.end(), '/')) {
231        rule_index_->insert(std::make_pair(latin_id, *it));
232      }
233    }
234
235  callback:
236    loaded_(success, region_code_, rule_count);
237    delete this;
238  }
239
240  const std::string region_code_;
241  const PreloadSupplier::Callback& loaded_;
242  std::set<std::string>* const pending_;
243  IndexMap* const rule_index_;
244  std::vector<const Rule*>* const rule_storage_;
245  const scoped_ptr<const Retriever::Callback> retrieved_;
246
247  DISALLOW_COPY_AND_ASSIGN(Helper);
248};
249
250std::string KeyFromRegionCode(const std::string& region_code) {
251  AddressData address;
252  address.region_code = region_code;
253  LookupKey lookup_key;
254  lookup_key.FromAddress(address);
255  return lookup_key.ToKeyString(0);  // Zero depth = COUNTRY level.
256}
257
258}  // namespace
259
260PreloadSupplier::PreloadSupplier(const std::string& validation_data_url,
261                                 const Downloader* downloader,
262                                 Storage* storage)
263    : retriever_(new Retriever(validation_data_url, downloader, storage)),
264      pending_(),
265      rule_index_(new IndexMap),
266      rule_storage_() {}
267
268PreloadSupplier::~PreloadSupplier() {
269  for (std::vector<const Rule*>::const_iterator
270       it = rule_storage_.begin(); it != rule_storage_.end(); ++it) {
271    delete *it;
272  }
273}
274
275void PreloadSupplier::Supply(const LookupKey& lookup_key,
276                             const Supplier::Callback& supplied) {
277  Supplier::RuleHierarchy hierarchy;
278  bool success = GetRuleHierarchy(lookup_key, &hierarchy);
279  supplied(success, lookup_key, hierarchy);
280}
281
282const Rule* PreloadSupplier::GetRule(const LookupKey& lookup_key) const {
283  assert(IsLoaded(lookup_key.GetRegionCode()));
284  Supplier::RuleHierarchy hierarchy;
285  if (!GetRuleHierarchy(lookup_key, &hierarchy)) {
286    return NULL;
287  }
288  return hierarchy.rule[lookup_key.GetDepth()];
289}
290
291void PreloadSupplier::LoadRules(const std::string& region_code,
292                                const Callback& loaded) {
293  const std::string& key = KeyFromRegionCode(region_code);
294
295  if (IsLoadedKey(key)) {
296    loaded(true, region_code, 0);
297    return;
298  }
299
300  if (IsPendingKey(key)) {
301    return;
302  }
303
304  new Helper(
305      region_code,
306      key,
307      loaded,
308      *retriever_,
309      &pending_,
310      rule_index_.get(),
311      &rule_storage_);
312}
313
314bool PreloadSupplier::IsLoaded(const std::string& region_code) const {
315  return IsLoadedKey(KeyFromRegionCode(region_code));
316}
317
318bool PreloadSupplier::IsPending(const std::string& region_code) const {
319  return IsPendingKey(KeyFromRegionCode(region_code));
320}
321
322bool PreloadSupplier::GetRuleHierarchy(const LookupKey& lookup_key,
323                                       RuleHierarchy* hierarchy) const {
324  assert(hierarchy != NULL);
325
326  if (RegionDataConstants::IsSupported(lookup_key.GetRegionCode())) {
327    size_t max_depth = std::min(
328        lookup_key.GetDepth(),
329        RegionDataConstants::GetMaxLookupKeyDepth(lookup_key.GetRegionCode()));
330
331    for (size_t depth = 0; depth <= max_depth; ++depth) {
332      const std::string& key = lookup_key.ToKeyString(depth);
333      IndexMap::const_iterator it = rule_index_->find(key);
334      if (it == rule_index_->end()) {
335        return depth > 0;  // No data on COUNTRY level is failure.
336      }
337      hierarchy->rule[depth] = it->second;
338    }
339  }
340
341  return true;
342}
343
344bool PreloadSupplier::IsLoadedKey(const std::string& key) const {
345  return rule_index_->find(key) != rule_index_->end();
346}
347
348bool PreloadSupplier::IsPendingKey(const std::string& key) const {
349  return pending_.find(key) != pending_.end();
350}
351
352}  // namespace addressinput
353}  // namespace i18n
354