1/*
2 * Copyright (C) 2012 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *    * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *    * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *    * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#ifndef CONTENT_RENDERER_ANDROID_ADDRESS_DETECTOR_H_
32#define CONTENT_RENDERER_ANDROID_ADDRESS_DETECTOR_H_
33#pragma once
34
35#include "build/build_config.h"  // Needed for OS_ANDROID
36
37#if defined(OS_ANDROID)
38
39#include <vector>
40
41#include "base/string_tokenizer.h"
42#include "base/string_util.h"
43#include "content/content_detector.h"
44
45// Finds a geographical address (currently US only) in the given text string.
46class AddressDetector : public ContentDetector {
47 public:
48  AddressDetector();
49  virtual ~AddressDetector();
50
51  // Implementation of ContentDetector.
52  virtual bool FindContent(const string16::const_iterator& begin,
53                           const string16::const_iterator& end,
54                           size_t* start_pos,
55                           size_t* end_pos) OVERRIDE;
56
57 private:
58  friend class AddressDetectorTest;
59
60  virtual std::string GetContentText(const WebKit::WebRange& range) OVERRIDE;
61  virtual GURL GetIntentURL(const std::string& content_text) OVERRIDE;
62  virtual size_t GetMaximumContentLength() OVERRIDE;
63  virtual bool IsEnabled(const WebKit::WebHitTestInfo& hit_test) OVERRIDE;
64
65  // Internal structs and classes. Required to be visible by the unit tests.
66  struct Word {
67    string16::const_iterator begin;
68    string16::const_iterator end;
69
70    Word() {}
71    Word(const string16::const_iterator& begin_it,
72         const string16::const_iterator& end_it)
73        : begin(begin_it),
74          end(end_it) {
75      DCHECK(begin_it <= end_it);
76    }
77  };
78
79  class HouseNumberParser {
80   public:
81    HouseNumberParser() {}
82
83    bool Parse(const string16::const_iterator& begin,
84               const string16::const_iterator& end,
85               Word* word);
86
87   private:
88    static inline bool IsPreDelimiter(char16 character);
89    static inline bool IsPostDelimiter(char16 character);
90    inline void RestartOnNextDelimiter();
91
92    inline bool CheckFinished(Word* word) const;
93    inline void AcceptChars(size_t num_chars);
94    inline void SkipChars(size_t num_chars);
95    inline void ResetState();
96
97    // Iterators to the beginning, current position and ending of the string
98    // being currently parsed.
99    string16::const_iterator begin_;
100    string16::const_iterator it_;
101    string16::const_iterator end_;
102
103    // Number of digits found in the current result candidate.
104    size_t num_digits_;
105
106    // Number of characters previous to the current iterator that belong
107    // to the current result candidate.
108    size_t result_chars_;
109
110    DISALLOW_COPY_AND_ASSIGN(HouseNumberParser);
111  };
112
113  typedef std::vector<Word> WordList;
114  typedef StringTokenizerT<string16, string16::const_iterator>
115      String16Tokenizer;
116
117  static bool FindStateStartingInWord(WordList* words,
118                                      size_t state_first_word,
119                                      size_t* state_last_word,
120                                      String16Tokenizer* tokenizer,
121                                      size_t* state_index);
122
123  static bool IsValidLocationName(const Word& word);
124  static bool IsZipValid(const Word& word, size_t state_index);
125  static bool IsZipValidForState(const Word& word, size_t state_index);
126
127  DISALLOW_COPY_AND_ASSIGN(AddressDetector);
128};
129
130#endif  // defined(OS_ANDROID)
131
132#endif  // CONTENT_RENDERER_ANDROID_ADDRESS_DETECTOR_H_
133