address_parser_internal.h revision 5821806d5e7f356e8fa4b058a389a808ea183019
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_tokenizer.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace address_parser { 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Internal classes and functions for address parsing. 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace internal { 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct Word { 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator begin; 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator end; 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word() {} 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word(const string16::const_iterator& begin, 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16::const_iterator& end); 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class HouseNumberParser { 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) HouseNumberParser() {} 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool Parse(const string16::const_iterator& begin, 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16::const_iterator& end, 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Word* word); 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline bool IsPreDelimiter(char16 character); 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static inline bool IsPostDelimiter(char16 character); 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void RestartOnNextDelimiter(); 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline bool CheckFinished(Word* word) const; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void AcceptChars(size_t num_chars); 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void SkipChars(size_t num_chars); 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) inline void ResetState(); 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Iterators to the beginning, current position and ending of the string 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // being currently parsed. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator begin_; 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator it_; 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16::const_iterator end_; 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of digits found in the current result candidate. 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t num_digits_; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Number of characters previous to the current iterator that belong 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // to the current result candidate. 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t result_chars_; 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(HouseNumberParser); 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef std::vector<Word> WordList; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef StringTokenizerT<string16, string16::const_iterator> 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) String16Tokenizer; 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool FindStateStartingInWord(WordList* words, 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t state_first_word, 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t* state_last_word, 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) String16Tokenizer* tokenizer, 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t* state_index); 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsValidLocationName(const Word& word); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsZipValid(const Word& word, size_t state_index); 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsZipValidForState(const Word& word, size_t state_index); 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace internal 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace address_parser 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_COMMON_ADDRESS_PARSER_INTERNAL_H_ 83