15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_tokenizer.h" 112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "content/common/content_export.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace address_parser { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal classes and functions for address parsing. 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal { 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests. 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)struct CONTENT_EXPORT Word { 22a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16::const_iterator begin; 23a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16::const_iterator end; 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word() {} 26a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) Word(const base::string16::const_iterator& begin, 27a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16::const_iterator& end); 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests. 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)class CONTENT_EXPORT HouseNumberParser { 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HouseNumberParser() {} 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) bool Parse(const base::string16::const_iterator& begin, 36a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) const base::string16::const_iterator& end, 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word* word); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) static inline bool IsPreDelimiter(base::char16 character); 415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) static inline bool IsPostDelimiter(base::char16 character); 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void RestartOnNextDelimiter(); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline bool CheckFinished(Word* word) const; 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void AcceptChars(size_t num_chars); 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void SkipChars(size_t num_chars); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void ResetState(); 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Iterators to the beginning, current position and ending of the string 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // being currently parsed. 51a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16::const_iterator begin_; 52a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16::const_iterator it_; 53a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) base::string16::const_iterator end_; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of digits found in the current result candidate. 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t num_digits_; 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of characters previous to the current iterator that belong 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to the current result candidate. 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t result_chars_; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(HouseNumberParser); 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<Word> WordList; 66a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)typedef base::StringTokenizerT<base::string16, base::string16::const_iterator> 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) String16Tokenizer; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// These are exposed for tests. 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool FindStateStartingInWord(WordList* words, 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t state_first_word, 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* state_last_word, 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) String16Tokenizer* tokenizer, 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* state_index); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsValidLocationName(const Word& word); 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValid(const Word& word, size_t state_index); 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValidForState(const Word& word, size_t state_index); 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace internal 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace address_parser 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 87