15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_tokenizer.h"
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "content/common/content_export.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content {
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace address_parser {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal classes and functions for address parsing.
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal {
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests.
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)struct CONTENT_EXPORT Word {
22a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  base::string16::const_iterator begin;
23a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  base::string16::const_iterator end;
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Word() {}
26a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  Word(const base::string16::const_iterator& begin,
27a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)       const base::string16::const_iterator& end);
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests.
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)class CONTENT_EXPORT HouseNumberParser {
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  HouseNumberParser() {}
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
35a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  bool Parse(const base::string16::const_iterator& begin,
36a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)             const base::string16::const_iterator& end,
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             Word* word);
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  static inline bool IsPreDelimiter(base::char16 character);
415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  static inline bool IsPostDelimiter(base::char16 character);
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void RestartOnNextDelimiter();
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline bool CheckFinished(Word* word) const;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void AcceptChars(size_t num_chars);
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void SkipChars(size_t num_chars);
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  inline void ResetState();
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Iterators to the beginning, current position and ending of the string
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // being currently parsed.
51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  base::string16::const_iterator begin_;
52a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  base::string16::const_iterator it_;
53a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  base::string16::const_iterator end_;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Number of digits found in the current result candidate.
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t num_digits_;
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Number of characters previous to the current iterator that belong
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to the current result candidate.
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t result_chars_;
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DISALLOW_COPY_AND_ASSIGN(HouseNumberParser);
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<Word> WordList;
66a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)typedef base::StringTokenizerT<base::string16, base::string16::const_iterator>
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    String16Tokenizer;
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// These are exposed for tests.
702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool FindStateStartingInWord(WordList* words,
712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                            size_t state_first_word,
722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                            size_t* state_last_word,
732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                            String16Tokenizer* tokenizer,
742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                            size_t* state_index);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsValidLocationName(const Word& word);
772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValid(const Word& word, size_t state_index);
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValidForState(const Word& word, size_t state_index);
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace internal
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace address_parser
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace content
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_
87