address_parser_internal.h revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/string_tokenizer.h" 112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "content/common/content_export.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace address_parser { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal classes and functions for address parsing. 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal { 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests. 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)struct CONTENT_EXPORT Word { 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator begin; 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator end; 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word() {} 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word(const string16::const_iterator& begin, 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16::const_iterator& end); 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Exposed for tests. 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)class CONTENT_EXPORT HouseNumberParser { 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HouseNumberParser() {} 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Parse(const string16::const_iterator& begin, 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16::const_iterator& end, 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word* word); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline bool IsPreDelimiter(char16 character); 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline bool IsPostDelimiter(char16 character); 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void RestartOnNextDelimiter(); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline bool CheckFinished(Word* word) const; 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void AcceptChars(size_t num_chars); 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void SkipChars(size_t num_chars); 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void ResetState(); 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Iterators to the beginning, current position and ending of the string 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // being currently parsed. 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator begin_; 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator it_; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator end_; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of digits found in the current result candidate. 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t num_digits_; 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of characters previous to the current iterator that belong 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to the current result candidate. 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t result_chars_; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(HouseNumberParser); 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<Word> WordList; 662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)typedef base::StringTokenizerT<string16, string16::const_iterator> 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) String16Tokenizer; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// These are exposed for tests. 702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool FindStateStartingInWord(WordList* words, 712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t state_first_word, 722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* state_last_word, 732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) String16Tokenizer* tokenizer, 742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t* state_index); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsValidLocationName(const Word& word); 772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValid(const Word& word, size_t state_index); 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)CONTENT_EXPORT bool IsZipValidForState(const Word& word, size_t state_index); 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace internal 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace address_parser 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 87