1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/common/android/address_parser_internal.h"
6
7#include <bitset>
8
9#include "base/logging.h"
10#include "base/strings/string_util.h"
11
12namespace {
13
14// Number of digits for a valid zip code.
15const size_t kZipDigits = 5;
16
17// Number of digits for a valid zip code in the Zip Plus 4 format.
18const size_t kZipPlus4Digits = 9;
19
20// Maximum number of digits of a house number, including possible hyphens.
21const size_t kMaxHouseDigits = 5;
22
23base::char16 SafePreviousChar(const base::string16::const_iterator& it,
24    const base::string16::const_iterator& begin) {
25  if (it == begin)
26    return ' ';
27  return *(it - 1);
28}
29
30base::char16 SafeNextChar(const base::string16::const_iterator& it,
31    const base::string16::const_iterator& end) {
32  if (it == end)
33    return ' ';
34  return *(it + 1);
35}
36
37bool WordLowerCaseEqualsASCII(base::string16::const_iterator word_begin,
38    base::string16::const_iterator word_end, const char* ascii_to_match) {
39  for (base::string16::const_iterator it = word_begin; it != word_end;
40      ++it, ++ascii_to_match) {
41    if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
42      return false;
43  }
44  return *ascii_to_match == 0 || *ascii_to_match == ' ';
45}
46
47bool LowerCaseEqualsASCIIWithPlural(base::string16::const_iterator word_begin,
48    base::string16::const_iterator word_end, const char* ascii_to_match,
49    bool allow_plural) {
50  for (base::string16::const_iterator it = word_begin; it != word_end;
51      ++it, ++ascii_to_match) {
52    if (!*ascii_to_match && allow_plural && *it == 's' && it + 1 == word_end)
53      return true;
54
55    if (!*ascii_to_match || base::ToLowerASCII(*it) != *ascii_to_match)
56      return false;
57  }
58  return *ascii_to_match == 0;
59}
60
61}  // anonymous namespace
62
63namespace content {
64
65namespace address_parser {
66
67namespace internal {
68
69Word::Word(const base::string16::const_iterator& begin,
70           const base::string16::const_iterator& end)
71    : begin(begin),
72      end(end) {
73  DCHECK(begin <= end);
74}
75
76bool HouseNumberParser::IsPreDelimiter(base::char16 character) {
77  return character == ':' || IsPostDelimiter(character);
78}
79
80bool HouseNumberParser::IsPostDelimiter(base::char16 character) {
81  return IsWhitespace(character) || strchr(",\"'", character);
82}
83
84void HouseNumberParser::RestartOnNextDelimiter() {
85  ResetState();
86  for (; it_ != end_ && !IsPreDelimiter(*it_); ++it_) {}
87}
88
89void HouseNumberParser::AcceptChars(size_t num_chars) {
90  size_t offset = std::min(static_cast<size_t>(std::distance(it_, end_)),
91                           num_chars);
92  it_ += offset;
93  result_chars_ += offset;
94}
95
96void HouseNumberParser::SkipChars(size_t num_chars) {
97  it_ += std::min(static_cast<size_t>(std::distance(it_, end_)), num_chars);
98}
99
100void HouseNumberParser::ResetState() {
101  num_digits_ = 0;
102  result_chars_ = 0;
103}
104
105bool HouseNumberParser::CheckFinished(Word* word) const {
106  // There should always be a number after a hyphen.
107  if (result_chars_ == 0 || SafePreviousChar(it_, begin_) == '-')
108    return false;
109
110  if (word) {
111    word->begin = it_ - result_chars_;
112    word->end = it_;
113  }
114  return true;
115}
116
117bool HouseNumberParser::Parse(
118    const base::string16::const_iterator& begin,
119    const base::string16::const_iterator& end, Word* word) {
120  it_ = begin_ = begin;
121  end_ = end;
122  ResetState();
123
124  // Iterations only used as a fail-safe against any buggy infinite loops.
125  size_t iterations = 0;
126  size_t max_iterations = end - begin + 1;
127  for (; it_ != end_ && iterations < max_iterations; ++iterations) {
128
129    // Word finished case.
130    if (IsPostDelimiter(*it_)) {
131      if (CheckFinished(word))
132        return true;
133      else if (result_chars_)
134        ResetState();
135
136      SkipChars(1);
137      continue;
138    }
139
140    // More digits. There should be no more after a letter was found.
141    if (IsAsciiDigit(*it_)) {
142      if (num_digits_ >= kMaxHouseDigits) {
143        RestartOnNextDelimiter();
144      } else {
145        AcceptChars(1);
146        ++num_digits_;
147      }
148      continue;
149    }
150
151    if (IsAsciiAlpha(*it_)) {
152      // Handle special case 'one'.
153      if (result_chars_ == 0) {
154        if (it_ + 3 <= end_ && LowerCaseEqualsASCII(it_, it_ + 3, "one"))
155          AcceptChars(3);
156        else
157          RestartOnNextDelimiter();
158        continue;
159      }
160
161      // There should be more than 1 character because of result_chars.
162      DCHECK_GT(result_chars_, 0U);
163      DCHECK(it_ != begin_);
164      base::char16 previous = SafePreviousChar(it_, begin_);
165      if (IsAsciiDigit(previous)) {
166        // Check cases like '12A'.
167        base::char16 next = SafeNextChar(it_, end_);
168        if (IsPostDelimiter(next)) {
169          AcceptChars(1);
170          continue;
171        }
172
173        // Handle cases like 12a, 1st, 2nd, 3rd, 7th.
174        if (IsAsciiAlpha(next)) {
175          base::char16 last_digit = previous;
176          base::char16 first_letter = base::ToLowerASCII(*it_);
177          base::char16 second_letter = base::ToLowerASCII(next);
178          bool is_teen = SafePreviousChar(it_ - 1, begin_) == '1' &&
179              num_digits_ == 2;
180
181          switch (last_digit - '0') {
182          case 1:
183            if ((first_letter == 's' && second_letter == 't') ||
184                (first_letter == 't' && second_letter == 'h' && is_teen)) {
185              AcceptChars(2);
186              continue;
187            }
188            break;
189
190          case 2:
191            if ((first_letter == 'n' && second_letter == 'd') ||
192                (first_letter == 't' && second_letter == 'h' && is_teen)) {
193              AcceptChars(2);
194              continue;
195            }
196            break;
197
198          case 3:
199            if ((first_letter == 'r' && second_letter == 'd') ||
200                (first_letter == 't' && second_letter == 'h' && is_teen)) {
201              AcceptChars(2);
202              continue;
203            }
204            break;
205
206          case 0:
207            // Explicitly exclude '0th'.
208            if (num_digits_ == 1)
209              break;
210
211          case 4:
212          case 5:
213          case 6:
214          case 7:
215          case 8:
216          case 9:
217            if (first_letter == 't' && second_letter == 'h') {
218              AcceptChars(2);
219              continue;
220            }
221            break;
222
223          default:
224            NOTREACHED();
225          }
226        }
227      }
228
229      RestartOnNextDelimiter();
230      continue;
231    }
232
233    if (*it_ == '-' && num_digits_ > 0) {
234      AcceptChars(1);
235      ++num_digits_;
236      continue;
237    }
238
239    RestartOnNextDelimiter();
240    SkipChars(1);
241  }
242
243  if (iterations >= max_iterations)
244    return false;
245
246  return CheckFinished(word);
247}
248
249bool FindStateStartingInWord(WordList* words,
250                             size_t state_first_word,
251                             size_t* state_last_word,
252                             String16Tokenizer* tokenizer,
253                             size_t* state_index) {
254
255  // Bitmasks containing the allowed suffixes for 2-letter state codes.
256  static const int state_two_letter_suffix[23] = {
257    0x02060c00,  // A followed by: [KLRSZ].
258    0x00000000,  // B.
259    0x00084001,  // C followed by: [AOT].
260    0x00000014,  // D followed by: [CE].
261    0x00000000,  // E.
262    0x00001800,  // F followed by: [LM].
263    0x00100001,  // G followed by: [AU].
264    0x00000100,  // H followed by: [I].
265    0x00002809,  // I followed by: [ADLN].
266    0x00000000,  // J.
267    0x01040000,  // K followed by: [SY].
268    0x00000001,  // L followed by: [A].
269    0x000ce199,  // M followed by: [ADEHINOPST].
270    0x0120129c,  // N followed by: [CDEHJMVY].
271    0x00020480,  // O followed by: [HKR].
272    0x00420001,  // P followed by: [ARW].
273    0x00000000,  // Q.
274    0x00000100,  // R followed by: [I].
275    0x0000000c,  // S followed by: [CD].
276    0x00802000,  // T followed by: [NX].
277    0x00080000,  // U followed by: [T].
278    0x00080101,  // V followed by: [AIT].
279    0x01200101   // W followed by: [AIVY].
280  };
281
282  // Accumulative number of states for the 2-letter code indexed by the first.
283  static const int state_two_letter_accumulative[24] = {
284     0,  5,  5,  8, 10, 10, 12, 14,
285    15, 19, 19, 21, 22, 32, 40, 43,
286    46, 46, 47, 49, 51, 52, 55, 59
287  };
288
289  // State names sorted alphabetically with their lengths.
290  // There can be more than one possible name for a same state if desired.
291  static const struct StateNameInfo {
292    const char* string;
293    char first_word_length;
294    char length;
295    char state_index; // Relative to two-character code alphabetical order.
296  } state_names[59] = {
297    { "alabama", 7, 7, 1 }, { "alaska", 6, 6, 0 },
298    { "american samoa", 8, 14, 3 }, { "arizona", 7, 7, 4 },
299    { "arkansas", 8, 8, 2 },
300    { "california", 10, 10, 5 }, { "colorado", 8, 8, 6 },
301    { "connecticut", 11, 11, 7 }, { "delaware", 8, 8, 9 },
302    { "district of columbia", 8, 20, 8 },
303    { "federated states of micronesia", 9, 30, 11 }, { "florida", 7, 7, 10 },
304    { "guam", 4, 4, 13 }, { "georgia", 7, 7, 12 },
305    { "hawaii", 6, 6, 14 },
306    { "idaho", 5, 5, 16 }, { "illinois", 8, 8, 17 }, { "indiana", 7, 7, 18 },
307    { "iowa", 4, 4, 15 },
308    { "kansas", 6, 6, 19 }, { "kentucky", 8, 8, 20 },
309    { "louisiana", 9, 9, 21 },
310    { "maine", 5, 5, 24 }, { "marshall islands", 8, 16, 25 },
311    { "maryland", 8, 8, 23 }, { "massachusetts", 13, 13, 22 },
312    { "michigan", 8, 8, 26 }, { "minnesota", 9, 9, 27 },
313    { "mississippi", 11, 11, 30 }, { "missouri", 8, 8, 28 },
314    { "montana", 7, 7, 31 },
315    { "nebraska", 8, 8, 34 }, { "nevada", 6, 6, 38 },
316    { "new hampshire", 3, 13, 35 }, { "new jersey", 3, 10, 36 },
317    { "new mexico", 3, 10, 37 }, { "new york", 3, 8, 39 },
318    { "north carolina", 5, 14, 32 }, { "north dakota", 5, 12, 33 },
319    { "northern mariana islands", 8, 24, 29 },
320    { "ohio", 4, 4, 40 }, { "oklahoma", 8, 8, 41 }, { "oregon", 6, 6, 42 },
321    { "palau", 5, 5, 45 }, { "pennsylvania", 12, 12, 43 },
322    { "puerto rico", 6, 11, 44 },
323    { "rhode island", 5, 5, 46 },
324    { "south carolina", 5, 14, 47 }, { "south dakota", 5, 12, 48 },
325    { "tennessee", 9, 9, 49 }, { "texas", 5, 5, 50 },
326    { "utah", 4, 4, 51 },
327    { "vermont", 7, 7, 54 }, { "virgin islands", 6, 14, 53 },
328    { "virginia", 8, 8, 52 },
329    { "washington", 10, 10, 55 }, { "west virginia", 4, 13, 57 },
330    { "wisconsin", 9, 9, 56 }, { "wyoming", 7, 7, 58 }
331  };
332
333  // Accumulative number of states for sorted names indexed by the first letter.
334  // Required a different one since there are codes that don't share their
335  // first letter with the name of their state (MP = Northern Mariana Islands).
336  static const int state_names_accumulative[24] = {
337     0,  5,  5,  8, 10, 10, 12, 14,
338    15, 19, 19, 21, 22, 31, 40, 43,
339    46, 46, 47, 49, 51, 52, 55, 59
340  };
341
342  DCHECK_EQ(state_names_accumulative[arraysize(state_names_accumulative) - 1],
343      static_cast<int>(ARRAYSIZE_UNSAFE(state_names)));
344
345  const Word& first_word = words->at(state_first_word);
346  int length = first_word.end - first_word.begin;
347  if (length < 2 || !IsAsciiAlpha(*first_word.begin))
348    return false;
349
350  // No state names start with x, y, z.
351  base::char16 first_letter = base::ToLowerASCII(*first_word.begin);
352  if (first_letter > 'w')
353    return false;
354
355  DCHECK(first_letter >= 'a');
356  int first_index = first_letter - 'a';
357
358  // Look for two-letter state names.
359  if (length == 2 && IsAsciiAlpha(*(first_word.begin + 1))) {
360    base::char16 second_letter = base::ToLowerASCII(*(first_word.begin + 1));
361    DCHECK(second_letter >= 'a');
362
363    int second_index = second_letter - 'a';
364    if (!(state_two_letter_suffix[first_index] & (1 << second_index)))
365      return false;
366
367    std::bitset<32> previous_suffixes = state_two_letter_suffix[first_index] &
368        ((1 << second_index) - 1);
369    *state_last_word = state_first_word;
370    *state_index = state_two_letter_accumulative[first_index] +
371        previous_suffixes.count();
372    return true;
373  }
374
375  // Look for full state names by their first letter. Discard by length.
376  for (int state = state_names_accumulative[first_index];
377      state < state_names_accumulative[first_index + 1]; ++state) {
378    if (state_names[state].first_word_length != length)
379      continue;
380
381    bool state_match = false;
382    size_t state_word = state_first_word;
383    for (int pos = 0; true; ) {
384      if (!WordLowerCaseEqualsASCII(words->at(state_word).begin,
385          words->at(state_word).end, &state_names[state].string[pos]))
386        break;
387
388      pos += words->at(state_word).end - words->at(state_word).begin + 1;
389      if (pos >= state_names[state].length) {
390        state_match = true;
391        break;
392      }
393
394      // Ran out of words, extract more from the tokenizer.
395      if (++state_word == words->size()) {
396        do {
397          if (!tokenizer->GetNext())
398            break;
399        } while (tokenizer->token_is_delim());
400        words->push_back(
401            Word(tokenizer->token_begin(), tokenizer->token_end()));
402      }
403    }
404
405    if (state_match) {
406      *state_last_word = state_word;
407      *state_index = state_names[state].state_index;
408      return true;
409    }
410  }
411
412  return false;
413}
414
415bool IsZipValid(const Word& word, size_t state_index) {
416  size_t length = word.end - word.begin;
417  if (length != kZipDigits && length != kZipPlus4Digits + 1)
418    return false;
419
420  for (base::string16::const_iterator it = word.begin; it != word.end; ++it) {
421    size_t pos = it - word.begin;
422    if (IsAsciiDigit(*it) || (*it == '-' && pos == kZipDigits))
423      continue;
424    return false;
425  }
426  return IsZipValidForState(word, state_index);
427}
428
429bool IsZipValidForState(const Word& word, size_t state_index) {
430  // List of valid zip code ranges.
431  static const struct {
432    signed char low;
433    signed char high;
434    signed char exception1;
435    signed char exception2;
436  } zip_range[] = {
437    { 99, 99, -1, -1 }, // AK Alaska.
438    { 35, 36, -1, -1 }, // AL Alabama.
439    { 71, 72, -1, -1 }, // AR Arkansas.
440    { 96, 96, -1, -1 }, // AS American Samoa.
441    { 85, 86, -1, -1 }, // AZ Arizona.
442    { 90, 96, -1, -1 }, // CA California.
443    { 80, 81, -1, -1 }, // CO Colorado.
444    {  6,  6, -1, -1 }, // CT Connecticut.
445    { 20, 20, -1, -1 }, // DC District of Columbia.
446    { 19, 19, -1, -1 }, // DE Delaware.
447    { 32, 34, -1, -1 }, // FL Florida.
448    { 96, 96, -1, -1 }, // FM Federated States of Micronesia.
449    { 30, 31, -1, -1 }, // GA Georgia.
450    { 96, 96, -1, -1 }, // GU Guam.
451    { 96, 96, -1, -1 }, // HI Hawaii.
452    { 50, 52, -1, -1 }, // IA Iowa.
453    { 83, 83, -1, -1 }, // ID Idaho.
454    { 60, 62, -1, -1 }, // IL Illinois.
455    { 46, 47, -1, -1 }, // IN Indiana.
456    { 66, 67, 73, -1 }, // KS Kansas.
457    { 40, 42, -1, -1 }, // KY Kentucky.
458    { 70, 71, -1, -1 }, // LA Louisiana.
459    {  1,  2, -1, -1 }, // MA Massachusetts.
460    { 20, 21, -1, -1 }, // MD Maryland.
461    {  3,  4, -1, -1 }, // ME Maine.
462    { 96, 96, -1, -1 }, // MH Marshall Islands.
463    { 48, 49, -1, -1 }, // MI Michigan.
464    { 55, 56, -1, -1 }, // MN Minnesota.
465    { 63, 65, -1, -1 }, // MO Missouri.
466    { 96, 96, -1, -1 }, // MP Northern Mariana Islands.
467    { 38, 39, -1, -1 }, // MS Mississippi.
468    { 55, 56, -1, -1 }, // MT Montana.
469    { 27, 28, -1, -1 }, // NC North Carolina.
470    { 58, 58, -1, -1 }, // ND North Dakota.
471    { 68, 69, -1, -1 }, // NE Nebraska.
472    {  3,  4, -1, -1 }, // NH New Hampshire.
473    {  7,  8, -1, -1 }, // NJ New Jersey.
474    { 87, 88, 86, -1 }, // NM New Mexico.
475    { 88, 89, 96, -1 }, // NV Nevada.
476    { 10, 14,  0,  6 }, // NY New York.
477    { 43, 45, -1, -1 }, // OH Ohio.
478    { 73, 74, -1, -1 }, // OK Oklahoma.
479    { 97, 97, -1, -1 }, // OR Oregon.
480    { 15, 19, -1, -1 }, // PA Pennsylvania.
481    {  6,  6,  0,  9 }, // PR Puerto Rico.
482    { 96, 96, -1, -1 }, // PW Palau.
483    {  2,  2, -1, -1 }, // RI Rhode Island.
484    { 29, 29, -1, -1 }, // SC South Carolina.
485    { 57, 57, -1, -1 }, // SD South Dakota.
486    { 37, 38, -1, -1 }, // TN Tennessee.
487    { 75, 79, 87, 88 }, // TX Texas.
488    { 84, 84, -1, -1 }, // UT Utah.
489    { 22, 24, 20, -1 }, // VA Virginia.
490    {  6,  9, -1, -1 }, // VI Virgin Islands.
491    {  5,  5, -1, -1 }, // VT Vermont.
492    { 98, 99, -1, -1 }, // WA Washington.
493    { 53, 54, -1, -1 }, // WI Wisconsin.
494    { 24, 26, -1, -1 }, // WV West Virginia.
495    { 82, 83, -1, -1 }  // WY Wyoming.
496  };
497
498  // Zip numeric value for the first two characters.
499  DCHECK(word.begin != word.end);
500  DCHECK(IsAsciiDigit(*word.begin));
501  DCHECK(IsAsciiDigit(*(word.begin + 1)));
502  int zip_prefix = (*word.begin - '0') * 10 + (*(word.begin + 1) - '0');
503
504  if ((zip_prefix >= zip_range[state_index].low &&
505       zip_prefix <= zip_range[state_index].high) ||
506      zip_prefix == zip_range[state_index].exception1 ||
507      zip_prefix == zip_range[state_index].exception2) {
508    return true;
509  }
510  return false;
511}
512
513bool IsValidLocationName(const Word& word) {
514  // Supported location names sorted alphabetically and grouped by first letter.
515  static const struct LocationNameInfo {
516    const char* string;
517    char length;
518    bool allow_plural;
519  } location_names[157] = {
520    { "alley", 5, false }, { "annex", 5, false }, { "arcade", 6, false },
521    { "ave", 3, false }, { "ave.", 4, false }, { "avenue", 6, false },
522    { "alameda", 7, false },
523    { "bayou", 5, false }, { "beach", 5, false }, { "bend", 4, false },
524    { "bluff", 5, true }, { "bottom", 6, false }, { "boulevard", 9, false },
525    { "branch", 6, false }, { "bridge", 6, false }, { "brook", 5, true },
526    { "burg", 4, true }, { "bypass", 6, false }, { "broadway", 8, false },
527    { "camino", 6, false }, { "camp", 4, false }, { "canyon", 6, false },
528    { "cape", 4, false }, { "causeway", 8, false }, { "center", 6, true },
529    { "circle", 6, true }, { "cliff", 5, true }, { "club", 4, false },
530    { "common", 6, false }, { "corner", 6, true }, { "course", 6, false },
531    { "court", 5, true }, { "cove", 4, true }, { "creek", 5, false },
532    { "crescent", 8, false }, { "crest", 5, false }, { "crossing", 8, false },
533    { "crossroad", 9, false }, { "curve", 5, false }, { "circulo", 7, false },
534    { "dale", 4, false }, { "dam", 3, false }, { "divide", 6, false },
535    { "drive", 5, true },
536    { "estate", 6, true }, { "expressway", 10, false },
537    { "extension", 9, true },
538    { "fall", 4, true }, { "ferry", 5, false }, { "field", 5, true },
539    { "flat", 4, true }, { "ford", 4, true }, { "forest", 6, false },
540    { "forge", 5, true }, { "fork", 4, true }, { "fort", 4, false },
541    { "freeway", 7, false },
542    { "garden", 6, true }, { "gateway", 7, false }, { "glen", 4, true },
543    { "green", 5, true }, { "grove", 5, true },
544    { "harbor", 6, true }, { "haven", 5, false }, { "heights", 7, false },
545    { "highway", 7, false }, { "hill", 4, true }, { "hollow", 6, false },
546    { "inlet", 5, false }, { "island", 6, true }, { "isle", 4, false },
547    { "junction", 8, true },
548    { "key", 3, true }, { "knoll", 5, true },
549    { "lake", 4, true }, { "land", 4, false }, { "landing", 7, false },
550    { "lane", 4, false }, { "light", 5, true }, { "loaf", 4, false },
551    { "lock", 4, true }, { "lodge", 5, false }, { "loop", 4, false },
552    { "mall", 4, false }, { "manor", 5, true }, { "meadow", 6, true },
553    { "mews", 4, false }, { "mill", 4, true }, { "mission", 7, false },
554    { "motorway", 8, false }, { "mount", 5, false }, { "mountain", 8, true },
555    { "neck", 4, false },
556    { "orchard", 7, false }, { "oval", 4, false }, { "overpass", 8, false },
557    { "park", 4, true }, { "parkway", 7, true }, { "pass", 4, false },
558    { "passage", 7, false }, { "path", 4, false }, { "pike", 4, false },
559    { "pine", 4, true }, { "plain", 5, true }, { "plaza", 5, false },
560    { "point", 5, true }, { "port", 4, true }, { "prairie", 7, false },
561    { "privada", 7, false },
562    { "radial", 6, false }, { "ramp", 4, false }, { "ranch", 5, false },
563    { "rapid", 5, true }, { "rest", 4, false }, { "ridge", 5, true },
564    { "river", 5, false }, { "road", 4, true }, { "route", 5, false },
565    { "row", 3, false }, { "rue", 3, false }, { "run", 3, false },
566    { "shoal", 5, true }, { "shore", 5, true }, { "skyway", 6, false },
567    { "spring", 6, true }, { "spur", 4, true }, { "square", 6, true },
568    { "station", 7, false }, { "stravenue", 9, false }, { "stream", 6, false },
569    { "st", 2, false }, { "st.", 3, false }, { "street", 6, true },
570    { "summit", 6, false }, { "speedway", 8, false },
571    { "terrace", 7, false }, { "throughway", 10, false }, { "trace", 5, false },
572    { "track", 5, false }, { "trafficway", 10, false }, { "trail", 5, false },
573    { "tunnel", 6, false }, { "turnpike", 8, false },
574    { "underpass", 9, false }, { "union", 5, true },
575    { "valley", 6, true }, { "viaduct", 7, false }, { "view", 4, true },
576    { "village", 7, true }, { "ville", 5, false }, { "vista", 5, false },
577    { "walk", 4, true }, { "wall", 4, false }, { "way", 3, true },
578    { "well", 4, true },
579    { "xing", 4, false }, { "xrd", 3, false }
580  };
581
582  // Accumulative number of location names for each starting letter.
583  static const int location_names_accumulative[25] = {
584      0,   7,  19,  40,  44,
585     47,  57,  62,  68,  71,
586     72,  74,  83,  92,  93,
587     96, 109, 109, 121, 135,
588    143, 145, 151, 155, 157
589  };
590
591  DCHECK_EQ(
592      location_names_accumulative[arraysize(location_names_accumulative) - 1],
593      static_cast<int>(ARRAYSIZE_UNSAFE(location_names)));
594
595  if (!IsAsciiAlpha(*word.begin))
596    return false;
597
598  // No location names start with y, z.
599  base::char16 first_letter = base::ToLowerASCII(*word.begin);
600  if (first_letter > 'x')
601    return false;
602
603  DCHECK(first_letter >= 'a');
604  int index = first_letter - 'a';
605  int length = std::distance(word.begin, word.end);
606  for (int i = location_names_accumulative[index];
607      i < location_names_accumulative[index + 1]; ++i) {
608    if (location_names[i].length != length &&
609        (location_names[i].allow_plural &&
610         location_names[i].length + 1 != length)) {
611      continue;
612    }
613
614    if (LowerCaseEqualsASCIIWithPlural(word.begin, word.end,
615                                       location_names[i].string,
616                                       location_names[i].allow_plural)) {
617      return true;
618    }
619  }
620
621  return false;
622}
623
624} // namespace internal
625
626} // namespace address_parser
627
628}  // namespace content
629