1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/history/in_memory_url_index_types.h"
6
7#include <algorithm>
8#include <functional>
9#include <iterator>
10#include <numeric>
11#include <set>
12
13#include "base/i18n/break_iterator.h"
14#include "base/i18n/case_conversion.h"
15#include "base/strings/string_util.h"
16#include "net/base/escape.h"
17#include "net/base/net_util.h"
18
19namespace history {
20
21// Matches within URL and Title Strings ----------------------------------------
22
23TermMatches MatchTermInString(const base::string16& term,
24                              const base::string16& cleaned_string,
25                              int term_num) {
26  const size_t kMaxCompareLength = 2048;
27  const base::string16& short_string =
28      (cleaned_string.length() > kMaxCompareLength) ?
29      cleaned_string.substr(0, kMaxCompareLength) : cleaned_string;
30  TermMatches matches;
31  for (size_t location = short_string.find(term);
32       location != base::string16::npos;
33       location = short_string.find(term, location + 1))
34    matches.push_back(TermMatch(term_num, location, term.length()));
35  return matches;
36}
37
38// Comparison function for sorting TermMatches by their offsets.
39bool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
40  return m1.offset < m2.offset;
41}
42
43TermMatches SortAndDeoverlapMatches(const TermMatches& matches) {
44  if (matches.empty())
45    return matches;
46  TermMatches sorted_matches = matches;
47  std::sort(sorted_matches.begin(), sorted_matches.end(), MatchOffsetLess);
48  TermMatches clean_matches;
49  TermMatch last_match;
50  for (TermMatches::const_iterator iter = sorted_matches.begin();
51       iter != sorted_matches.end(); ++iter) {
52    if (iter->offset >= last_match.offset + last_match.length) {
53      last_match = *iter;
54      clean_matches.push_back(last_match);
55    }
56  }
57  return clean_matches;
58}
59
60std::vector<size_t> OffsetsFromTermMatches(const TermMatches& matches) {
61  std::vector<size_t> offsets;
62  for (TermMatches::const_iterator i = matches.begin(); i != matches.end();
63       ++i) {
64    offsets.push_back(i->offset);
65    offsets.push_back(i->offset + i->length);
66  }
67  return offsets;
68}
69
70TermMatches ReplaceOffsetsInTermMatches(const TermMatches& matches,
71                                        const std::vector<size_t>& offsets) {
72  DCHECK_EQ(2 * matches.size(), offsets.size());
73  TermMatches new_matches;
74  std::vector<size_t>::const_iterator offset_iter = offsets.begin();
75  for (TermMatches::const_iterator term_iter = matches.begin();
76       term_iter != matches.end(); ++term_iter, ++offset_iter) {
77    const size_t starting_offset = *offset_iter;
78    ++offset_iter;
79    const size_t ending_offset = *offset_iter;
80    if ((starting_offset != base::string16::npos) &&
81        (ending_offset != base::string16::npos) &&
82        (starting_offset != ending_offset)) {
83      TermMatch new_match(*term_iter);
84      new_match.offset = starting_offset;
85      new_match.length = ending_offset - starting_offset;
86      new_matches.push_back(new_match);
87    }
88  }
89  return new_matches;
90}
91
92// Utility Functions -----------------------------------------------------------
93
94String16Set String16SetFromString16(const base::string16& cleaned_uni_string,
95                                    WordStarts* word_starts) {
96  String16Vector words =
97      String16VectorFromString16(cleaned_uni_string, false, word_starts);
98  String16Set word_set;
99  for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
100       ++iter)
101    word_set.insert(base::i18n::ToLower(*iter).substr(0, kMaxSignificantChars));
102  return word_set;
103}
104
105String16Vector String16VectorFromString16(
106    const base::string16& cleaned_uni_string,
107    bool break_on_space,
108    WordStarts* word_starts) {
109  if (word_starts)
110    word_starts->clear();
111  base::i18n::BreakIterator iter(cleaned_uni_string,
112      break_on_space ? base::i18n::BreakIterator::BREAK_SPACE :
113          base::i18n::BreakIterator::BREAK_WORD);
114  String16Vector words;
115  if (!iter.Init())
116    return words;
117  while (iter.Advance()) {
118    if (break_on_space || iter.IsWord()) {
119      base::string16 word(iter.GetString());
120      size_t initial_whitespace = 0;
121      if (break_on_space) {
122        base::string16 trimmed_word;
123        base::TrimWhitespace(word, base::TRIM_LEADING, &trimmed_word);
124        initial_whitespace = word.length() - trimmed_word.length();
125        base::TrimWhitespace(trimmed_word, base::TRIM_TRAILING, &word);
126      }
127      if (word.empty())
128        continue;
129      words.push_back(word);
130      if (!word_starts)
131        continue;
132      size_t word_start = iter.prev() + initial_whitespace;
133      if (word_start < kMaxSignificantChars)
134        word_starts->push_back(word_start);
135    }
136  }
137  return words;
138}
139
140Char16Set Char16SetFromString16(const base::string16& term) {
141  Char16Set characters;
142  for (base::string16::const_iterator iter = term.begin(); iter != term.end();
143       ++iter)
144    characters.insert(*iter);
145  return characters;
146}
147
148// HistoryInfoMapValue ---------------------------------------------------------
149
150HistoryInfoMapValue::HistoryInfoMapValue() {}
151HistoryInfoMapValue::~HistoryInfoMapValue() {}
152
153// RowWordStarts ---------------------------------------------------------------
154
155RowWordStarts::RowWordStarts() {}
156RowWordStarts::~RowWordStarts() {}
157
158void RowWordStarts::Clear() {
159  url_word_starts_.clear();
160  title_word_starts_.clear();
161}
162
163}  // namespace history
164