1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "third_party/libaddressinput/chromium/input_suggester.h"
6
7#include <cstddef>
8#include <set>
9#include <utility>
10
11#include "base/logging.h"
12#include "third_party/libaddressinput/chromium/trie.h"
13#include "third_party/libaddressinput/src/cpp/include/libaddressinput/address_data.h"
14#include "third_party/libaddressinput/src/cpp/include/libaddressinput/callback.h"
15#include "third_party/libaddressinput/src/cpp/include/libaddressinput/preload_supplier.h"
16#include "third_party/libaddressinput/src/cpp/include/libaddressinput/region_data.h"
17
18namespace autofill {
19
20using ::i18n::addressinput::AddressData;
21using ::i18n::addressinput::AddressField;
22using ::i18n::addressinput::BuildCallback;
23using ::i18n::addressinput::FieldProblemMap;
24using ::i18n::addressinput::PreloadSupplier;
25using ::i18n::addressinput::RegionData;
26using ::i18n::addressinput::RegionDataBuilder;
27
28using ::i18n::addressinput::ADMIN_AREA;
29using ::i18n::addressinput::COUNTRY;
30using ::i18n::addressinput::DEPENDENT_LOCALITY;
31using ::i18n::addressinput::LOCALITY;
32using ::i18n::addressinput::POSTAL_CODE;
33
34using ::i18n::addressinput::INVALID_FORMAT;
35using ::i18n::addressinput::MISMATCHING_VALUE;
36
37namespace {
38
39// Initial size for the buffer used in the canonicalizer.
40static const size_t kInitialBufferSize = 32;
41
42// A region and its metadata useful for constructing a suggestion.
43struct Suggestion {
44 public:
45  // Builds a suggestion of |region_to_suggest|. Does not take ownership of
46  // |region_to_suggest|, which should not be NULL.
47  Suggestion(const RegionData* region_to_suggest,
48             AddressField matching_address_field,
49             bool region_key_matches)
50      : region_to_suggest(region_to_suggest),
51        matching_address_field(matching_address_field),
52        region_key_matches(region_key_matches) {
53    DCHECK(region_to_suggest);
54  }
55
56  ~Suggestion() {}
57
58  // The region that should be suggested. For example, if the region is ("CA",
59  // "California"), then either "CA" or "California" should be suggested.
60  const RegionData* region_to_suggest;
61
62  // The field in the address for which the suggestion should be made. For
63  // example, ADMIN_AREA in US means the suggestion should be made for the field
64  // labeled "State".
65  AddressField matching_address_field;
66
67  // True if the key of the region matches user input (the name may or may not
68  // match). "CA" should be suggested for a ("CA", "California") region.
69  //
70  // False if only the name of the region matches user input (the key does not
71  // match). "California" should be suggested for a ("CA", "California") region.
72  bool region_key_matches;
73};
74
75// Suggestions for an address. Contains lists of suggestions for administrative
76// area, locality, and dependent locality fields of an address.
77class AddressSuggestions {
78 public:
79  AddressSuggestions() {}
80  ~AddressSuggestions() {}
81
82  // Marks all regions at |address_field| level as matching user input.
83  void AllRegionsMatchForField(AddressField address_field) {
84    all_regions_match_input_.insert(address_field);
85  }
86
87  // Marks given regions at |address_field| level as matching user input. The
88  // |regions_match_key| parameter contains the regions that match user input by
89  // their keys. The |regions_match_name| parameter contains the regions that
90  // match user input by their names.
91  //
92  // The |address_field| parameter should be either ADMIN_AREA, LOCALITY, or
93  // DEPENDENT_LOCALITY.
94  bool AddRegions(AddressField address_field,
95                  const std::set<const RegionData*>& regions_match_key,
96                  const std::set<const RegionData*>& regions_match_name) {
97    DCHECK(address_field >= ADMIN_AREA);
98    DCHECK(address_field <= DEPENDENT_LOCALITY);
99
100    AddressField parent_address_field =
101        static_cast<AddressField>(address_field - 1);
102
103    bool all_parents_match =
104        parent_address_field == COUNTRY ||
105        all_regions_match_input_.find(parent_address_field) !=
106            all_regions_match_input_.end();
107
108    // Cannot build |address_field| level suggestions if there are no matches in
109    // |parent_address_field| level regions.
110    const RegionsMatchInput* parents = NULL;
111    if (address_field > ADMIN_AREA && !all_parents_match) {
112      parents = &regions_match_input_[parent_address_field];
113      if (parents->keys.empty() && parents->names.empty())
114        return false;
115    }
116
117    RegionsMatchInput* regions = NULL;
118    if (address_field < DEPENDENT_LOCALITY)
119      regions = &regions_match_input_[address_field];
120
121    std::vector<Suggestion>* suggestions = &suggestions_[address_field];
122    bool added_suggestions = false;
123
124    // Iterate over both |regions_match_key| and |regions_match_name| and build
125    // Suggestion objects based on the given RegionData objects. Advance either
126    // one iterator at a time (if they point to different data) or both
127    // iterators at once (if they point to the same data).
128    for (std::set<const RegionData*>::const_iterator
129             key_it = regions_match_key.begin(),
130             name_it = regions_match_name.begin();
131         key_it != regions_match_key.end() ||
132             name_it != regions_match_name.end();) {
133      const RegionData* key_region =
134          key_it != regions_match_key.end() ? *key_it : NULL;
135      const RegionData* name_region =
136          name_it != regions_match_name.end() ? *name_it : NULL;
137
138      // Regions that do not have a parent that also matches input will not
139      // become suggestions.
140      bool key_region_has_parent =
141          all_parents_match ||
142          (parents && !parents->keys.empty() && key_region &&
143           parents->keys.find(&key_region->parent()) != parents->keys.end());
144      bool name_region_has_parent =
145          all_parents_match ||
146          (parents && !parents->names.empty() && name_region &&
147           parents->names.find(&name_region->parent()) != parents->names.end());
148
149      if (name_region && (!key_region || name_region < key_region)) {
150        if (name_region_has_parent) {
151          suggestions->push_back(Suggestion(name_region, address_field, false));
152          added_suggestions = true;
153          if (regions)
154            regions->names.insert(name_region);
155        }
156
157        ++name_it;
158      } else if (key_region && (!name_region || key_region < name_region)) {
159        if (key_region_has_parent) {
160          suggestions->push_back(Suggestion(key_region, address_field, true));
161          added_suggestions = true;
162          if (regions)
163            regions->keys.insert(key_region);
164        }
165
166        ++key_it;
167      } else {
168        if (key_region_has_parent) {
169          suggestions->push_back(Suggestion(key_region, address_field, true));
170          added_suggestions = true;
171          if (regions) {
172            regions->keys.insert(key_region);
173            regions->names.insert(name_region);
174          }
175        }
176
177        ++key_it;
178        ++name_it;
179      }
180    }
181
182    return added_suggestions;
183  }
184
185  // Swaps the suggestions for the smallest sub-region into |suggestions|.
186  // |this| is not usable after this call due to using the swap() operation.
187  //
188  // The |suggestions| parameter should not be NULL.
189  void SwapSmallestSubRegionSuggestions(std::vector<Suggestion>* suggestions) {
190    DCHECK(suggestions);
191    for (int i = DEPENDENT_LOCALITY; i >= ADMIN_AREA; --i) {
192      std::vector<Suggestion>* result =
193          &suggestions_[static_cast<AddressField>(i)];
194      if (!result->empty()) {
195        suggestions->swap(*result);
196        return;
197      }
198    }
199  }
200
201 private:
202  // The sets of non-owned regions used for looking up regions that match user
203  // input by keys and names.
204  struct RegionsMatchInput {
205    std::set<const RegionData*> keys;
206    std::set<const RegionData*> names;
207  };
208
209  // The regions that match user input at ADMIN_AREA and LOCALITY levels.
210  std::map<AddressField, RegionsMatchInput> regions_match_input_;
211
212  // The set of fields for which all regions match user input. Used to avoid
213  // storing a long list in |regions_match_input_| and later looking it up
214  // there.
215  std::set<AddressField> all_regions_match_input_;
216
217  // Suggestions at ADMIN_AREA, LOCALITY, and DEPENDENT_LOCALITY levels.
218  std::map<AddressField, std::vector<Suggestion> > suggestions_;
219
220  DISALLOW_COPY_AND_ASSIGN(AddressSuggestions);
221};
222
223}  // namespace
224
225InputSuggester::StringCanonicalizer::StringCanonicalizer()
226    : buffer_(kInitialBufferSize, 0) {
227  UErrorCode error_code = U_ZERO_ERROR;
228  collator_.reset(
229      icu::Collator::createInstance(icu::Locale::getRoot(), error_code));
230  DCHECK(U_SUCCESS(error_code));
231  collator_->setStrength(icu::Collator::PRIMARY);
232}
233
234InputSuggester::StringCanonicalizer::~StringCanonicalizer() {}
235
236const std::vector<uint8_t>& InputSuggester::StringCanonicalizer::Canonicalize(
237    const std::string& original) const {
238  DCHECK(!original.empty());
239
240  icu::UnicodeString icu_str(original.c_str(),
241                             static_cast<int32_t>(original.length()));
242  int32_t sort_key_size =
243      collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
244  DCHECK_LT(0, sort_key_size);
245
246  if (sort_key_size > buffer_size()) {
247    buffer_.resize(sort_key_size * 2, 0);
248    sort_key_size = collator_->getSortKey(icu_str, &buffer_[0], buffer_size());
249    DCHECK_LT(0, sort_key_size);
250    DCHECK_GT(buffer_size(), sort_key_size);
251  }
252
253  return buffer_;
254}
255
256int32_t InputSuggester::StringCanonicalizer::buffer_size() const {
257  return static_cast<int32_t>(buffer_.size());
258}
259
260// All sub-regions of a COUNTRY level region, organized into tries for lookup by
261// region name or key.
262class InputSuggester::SubRegionData {
263 public:
264  SubRegionData()
265      : initialized_(false),
266        smallest_region_size_(COUNTRY),
267        canonicalizer_(NULL) {}
268
269  ~SubRegionData() {}
270
271  bool is_initialized() const { return initialized_; }
272
273  // Adds the sub-regions of |country_region| into tries. Uses
274  // |shared_canonicalizer| for case and diacritic insensitive lookup of the
275  // sub-regions. Should be called at most once.
276  void Initialize(const RegionData& country_region,
277                  const StringCanonicalizer& shared_canonicalizer) {
278    DCHECK(!initialized_);
279    DCHECK(!country_region.has_parent());
280
281    initialized_ = true;
282    canonicalizer_ = &shared_canonicalizer;
283
284    if (!country_region.sub_regions().empty())
285      AddSubRegionsOf(country_region, COUNTRY);
286  }
287
288  // Adds the suggestions for |user_input| into |suggestions| when user is
289  // typing in |focused_field|.
290  void BuildSuggestions(const AddressData& user_input,
291                        AddressField focused_field,
292                        std::vector<Suggestion>* suggestions) {
293    DCHECK(initialized_);
294
295    // Do not suggest anything if there's no suggestion data for the focused
296    // field.
297    if (focused_field != POSTAL_CODE && smallest_region_size_ < focused_field)
298      return;
299
300    // Non-owned regions that match a field value by region key.
301    std::set<const RegionData*> regions_match_key;
302
303    // Non-owned regions that match a field value by region name.
304    std::set<const RegionData*> regions_match_name;
305
306    AddressSuggestions address_suggestions;
307    for (int i = ADMIN_AREA; i <= focused_field && i <= DEPENDENT_LOCALITY;
308         ++i) {
309      AddressField address_field = static_cast<AddressField>(i);
310      AddressField parent_address_field = static_cast<AddressField>(i - 1);
311
312      const std::string& field_value = user_input.GetFieldValue(address_field);
313      const std::string& parent_field_value =
314          user_input.GetFieldValue(parent_address_field);
315
316      if (field_value.empty() &&
317          (address_field == ADMIN_AREA || parent_field_value.empty())) {
318        address_suggestions.AllRegionsMatchForField(address_field);
319        continue;
320      }
321
322      if (field_value.empty()) {
323        DCHECK_NE(address_field, focused_field);
324        continue;
325      }
326
327      regions_match_key.clear();
328      regions_match_name.clear();
329
330      const FieldTries& field_tries = field_tries_[address_field];
331
332      const std::vector<uint8_t>& canonicalized_value =
333          canonicalizer_->Canonicalize(field_value);
334
335      field_tries.keys.FindDataForKeyPrefix(canonicalized_value,
336                                            &regions_match_key);
337      field_tries.names.FindDataForKeyPrefix(canonicalized_value,
338                                             &regions_match_name);
339
340      bool added_suggestions = address_suggestions.AddRegions(
341          address_field, regions_match_key, regions_match_name);
342
343      // Do not suggest anything if the focused field does not have suggestions.
344      if (address_field == focused_field && !added_suggestions)
345        return;
346    }
347
348    address_suggestions.SwapSmallestSubRegionSuggestions(suggestions);
349  }
350
351 private:
352  // The tries to lookup regions for a specific field by keys and names. For
353  // example, the FieldTries for ADMIN_AREA in US will have keys for "AL", "AK",
354  // "AS", etc and names for "Alabama", "Alaska", "American Samoa", etc. The
355  // struct is uncopyable due to Trie objects being uncopyable.
356  struct FieldTries {
357    Trie<const RegionData*> keys;
358    Trie<const RegionData*> names;
359  };
360
361  // Adds the sub-regions of |parent_region| into tries.
362  void AddSubRegionsOf(const RegionData& parent_region,
363                       AddressField parent_field) {
364    DCHECK(!parent_region.sub_regions().empty());
365
366    AddressField address_field = static_cast<AddressField>(parent_field + 1);
367    DCHECK(address_field >= ADMIN_AREA);
368    DCHECK(address_field <= DEPENDENT_LOCALITY);
369
370    FieldTries* field_tries = &field_tries_[address_field];
371    for (std::vector<const RegionData*>::const_iterator it =
372             parent_region.sub_regions().begin();
373         it != parent_region.sub_regions().end();
374         ++it) {
375      const RegionData* region = *it;
376      DCHECK(region);
377      DCHECK(!region->key().empty());
378      DCHECK(!region->name().empty());
379
380      field_tries->keys.AddDataForKey(
381          canonicalizer_->Canonicalize(region->key()), region);
382
383      field_tries->names.AddDataForKey(
384          canonicalizer_->Canonicalize(region->name()), region);
385
386      if (smallest_region_size_ < address_field)
387        smallest_region_size_ = address_field;
388
389      if (!region->sub_regions().empty())
390        AddSubRegionsOf(*region, address_field);
391    }
392  }
393
394  // True after Initialize() has been called.
395  bool initialized_;
396
397  // The tries to lookup regions for ADMIN_AREA, LOCALITY, and
398  // DEPENDENT_LOCALITY.
399  std::map<AddressField, FieldTries> field_tries_;
400
401  // The smallest size of a sub-region that has data. For example, this is
402  // ADMIN_AREA in US, but DEPENDENT_LOCALITY in CN.
403  AddressField smallest_region_size_;
404
405  // A shared instance of string canonicalizer for case and diacritic comparison
406  // of region keys and names.
407  const StringCanonicalizer* canonicalizer_;
408};
409
410InputSuggester::InputSuggester(PreloadSupplier* supplier)
411    : region_data_builder_(supplier),
412      input_helper_(supplier),
413      validator_(supplier),
414      validated_(BuildCallback(this, &InputSuggester::Validated)) {}
415
416InputSuggester::~InputSuggester() {}
417
418void InputSuggester::GetSuggestions(const AddressData& user_input,
419                                    AddressField focused_field,
420                                    size_t suggestions_limit,
421                                    std::vector<AddressData>* suggestions) {
422  DCHECK(suggestions);
423  DCHECK(focused_field == POSTAL_CODE ||
424         (focused_field >= ADMIN_AREA && focused_field <= DEPENDENT_LOCALITY));
425
426  AddressData address_copy = user_input;
427
428  // Do not suggest anything if the user input is empty.
429  if (address_copy.IsFieldEmpty(focused_field))
430    return;
431
432  if (focused_field == POSTAL_CODE) {
433    // Do not suggest anything if the user is typing an invalid postal code.
434    FieldProblemMap problems;
435    FieldProblemMap filter;
436    filter.insert(std::make_pair(POSTAL_CODE, INVALID_FORMAT));
437    validator_.Validate(address_copy,
438                        true,   // Allow postal office boxes.
439                        false,  // Do not require recipient name.
440                        &filter,
441                        &problems,
442                        *validated_);
443    if (!problems.empty())
444      return;
445
446    // Fill in the sub-regions based on the postal code.
447    input_helper_.FillAddress(&address_copy);
448  }
449
450  // Lazily initialize the mapping from COUNTRY level regions to all of their
451  // sub-regions with metadata for generating suggestions.
452  std::string unused_best_language;
453  const RegionData& region_data =
454      region_data_builder_.Build(address_copy.region_code,
455                                 address_copy.language_code,
456                                 &unused_best_language);
457  SubRegionData* sub_region_data = &sub_regions_[&region_data];
458  if (!sub_region_data->is_initialized())
459    sub_region_data->Initialize(region_data, canonicalizer_);
460
461  // Build the list of regions that match |address_copy| when the user is typing
462  // in the |focused_field|.
463  std::vector<Suggestion> suggested_regions;
464  sub_region_data->BuildSuggestions(
465      address_copy, focused_field, &suggested_regions);
466
467  FieldProblemMap problems;
468  FieldProblemMap filter;
469  filter.insert(std::make_pair(POSTAL_CODE, MISMATCHING_VALUE));
470
471  // Generate suggestions based on the regions.
472  for (std::vector<Suggestion>::const_iterator suggested_region_it =
473           suggested_regions.begin();
474       suggested_region_it != suggested_regions.end();
475       ++suggested_region_it) {
476    AddressData address;
477    address.region_code = address_copy.region_code;
478    address.postal_code = address_copy.postal_code;
479
480    // Traverse the tree of regions from the smallest |region_to_suggest| to the
481    // country-wide "root" of the tree. Use the region names or keys found at
482    // each of the levels of the tree to build the |address| to suggest.
483    AddressField address_field = suggested_region_it->matching_address_field;
484    for (const RegionData* region = suggested_region_it->region_to_suggest;
485         region->has_parent();
486         region = &region->parent()) {
487      address.SetFieldValue(address_field,
488                            suggested_region_it->region_key_matches
489                                ? region->key()
490                                : region->name());
491      address_field = static_cast<AddressField>(address_field - 1);
492    }
493
494    // Do not suggest an address with a mismatching postal code.
495    problems.clear();
496    validator_.Validate(address,
497                        true,   // Allow postal office boxes.
498                        false,  // Do not require recipient name.
499                        &filter,
500                        &problems,
501                        *validated_);
502    if (!problems.empty())
503      continue;
504
505    // Do not add more suggestions than |suggestions_limit|.
506    if (suggestions->size() >= suggestions_limit) {
507      suggestions->clear();
508      return;
509    }
510
511    suggestions->push_back(address);
512  }
513}
514
515void InputSuggester::Validated(bool success,
516                               const AddressData&,
517                               const FieldProblemMap&) {
518  DCHECK(success);
519}
520
521}  // namespace autofill
522