address_parser_internal.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_tokenizer.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content {
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace address_parser {
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal classes and functions for address parsing.
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct Word {
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16::const_iterator begin;
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16::const_iterator end;
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Word() {}
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Word(const string16::const_iterator& begin,
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       const string16::const_iterator& end);
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class HouseNumberParser {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HouseNumberParser() {}
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Parse(const string16::const_iterator& begin,
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             const string16::const_iterator& end,
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             Word* word);
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static inline bool IsPreDelimiter(char16 character);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  static inline bool IsPostDelimiter(char16 character);
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void RestartOnNextDelimiter();
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline bool CheckFinished(Word* word) const;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void AcceptChars(size_t num_chars);
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void SkipChars(size_t num_chars);
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void ResetState();
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Iterators to the beginning, current position and ending of the string
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // being currently parsed.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16::const_iterator begin_;
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16::const_iterator it_;
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16::const_iterator end_;
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Number of digits found in the current result candidate.
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t num_digits_;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Number of characters previous to the current iterator that belong
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to the current result candidate.
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t result_chars_;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(HouseNumberParser);
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<Word> WordList;
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<string16, string16::const_iterator>
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    String16Tokenizer;
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool FindStateStartingInWord(WordList* words,
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             size_t state_first_word,
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             size_t* state_last_word,
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             String16Tokenizer* tokenizer,
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                             size_t* state_index);
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsValidLocationName(const Word& word);
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsZipValid(const Word& word, size_t state_index);
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsZipValidForState(const Word& word, size_t state_index);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace internal
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace address_parser
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace content
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
83