13ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch// Copyright 2011 the V8 project authors. All rights reserved.
2b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch// found in the LICENSE file.
459151504615d929945dc59db37bf1166937748c6Steve Block
559151504615d929945dc59db37bf1166937748c6Steve Block#ifndef V8_STRING_SEARCH_H_
659151504615d929945dc59db37bf1166937748c6Steve Block#define V8_STRING_SEARCH_H_
759151504615d929945dc59db37bf1166937748c6Steve Block
8014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/isolate.h"
9014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch#include "src/vector.h"
10014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
1159151504615d929945dc59db37bf1166937748c6Steve Blocknamespace v8 {
1259151504615d929945dc59db37bf1166937748c6Steve Blocknamespace internal {
1359151504615d929945dc59db37bf1166937748c6Steve Block
1459151504615d929945dc59db37bf1166937748c6Steve Block
15f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
16f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// String Search object.
17f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
18f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
19f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Class holding constants and methods that apply to all string search variants,
20f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// independently of subject and pattern char size.
21f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochclass StringSearchBase {
22f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch protected:
23f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
24f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // limit, we can fix the size of tables. For a needle longer than this limit,
25f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // search will not be optimal, since we only build tables for a suffix
26f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // of the string, but it is a safe approximation.
2744f0eee88ff00398ff7f715fab053374d808c90dSteve Block  static const int kBMMaxShift = Isolate::kBMMaxShift;
28f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
29f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Reduce alphabet to this size.
30f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
31f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // proportional to the input alphabet. We reduce the alphabet size by
32f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // equating input characters modulo a smaller alphabet size. This gives
33f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // a potentially less efficient searching, but is a safe approximation.
34f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // For needles using only characters in the same Unicode 256-code point page,
35f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // there is no search speed degradation.
36b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  static const int kLatin1AlphabetSize = 256;
3744f0eee88ff00398ff7f715fab053374d808c90dSteve Block  static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
38f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
39f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Bad-char shift table stored in the state. It's length is the alphabet size.
40f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // For patterns below this length, the skip length of Boyer-Moore is too short
41f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // to compensate for the algorithmic overhead compared to simple brute force.
42f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static const int kBMMinPatternLength = 7;
43f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
44b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  static inline bool IsOneByteString(Vector<const uint8_t> string) {
45f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    return true;
46f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
47f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
48b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  static inline bool IsOneByteString(Vector<const uc16> string) {
49b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch    return String::IsOneByte(string.start(), string.length());
50f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
51f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
5244f0eee88ff00398ff7f715fab053374d808c90dSteve Block  friend class Isolate;
53f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch};
54f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
55f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
56f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
57f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochclass StringSearch : private StringSearchBase {
5859151504615d929945dc59db37bf1166937748c6Steve Block public:
5944f0eee88ff00398ff7f715fab053374d808c90dSteve Block  StringSearch(Isolate* isolate, Vector<const PatternChar> pattern)
6044f0eee88ff00398ff7f715fab053374d808c90dSteve Block      : isolate_(isolate),
6144f0eee88ff00398ff7f715fab053374d808c90dSteve Block        pattern_(pattern),
62f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        start_(Max(0, pattern.length() - kBMMaxShift)) {
63f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
64b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch      if (!IsOneByteString(pattern_)) {
65f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        strategy_ = &FailSearch;
66f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return;
67f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      }
68f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
69f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int pattern_length = pattern_.length();
70f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (pattern_length < kBMMinPatternLength) {
71f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (pattern_length == 1) {
72f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        strategy_ = &SingleCharSearch;
73f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return;
74f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      }
75f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      strategy_ = &LinearSearch;
76f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return;
7759151504615d929945dc59db37bf1166937748c6Steve Block    }
78f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    strategy_ = &InitialSearch;
7959151504615d929945dc59db37bf1166937748c6Steve Block  }
80f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
81f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int Search(Vector<const SubjectChar> subject, int index) {
82f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    return strategy_(this, subject, index);
8359151504615d929945dc59db37bf1166937748c6Steve Block  }
84f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
85f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static inline int AlphabetSize() {
86f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (sizeof(PatternChar) == 1) {
87b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch      // Latin1 needle.
88b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch      return kLatin1AlphabetSize;
89f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    } else {
90b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch      DCHECK(sizeof(PatternChar) == 2);
91f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      // UC16 needle.
92f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return kUC16AlphabetSize;
93f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
9459151504615d929945dc59db37bf1166937748c6Steve Block  }
95f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
9659151504615d929945dc59db37bf1166937748c6Steve Block private:
97f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  typedef int (*SearchFunction)(  // NOLINT - it's not a cast!
98f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      StringSearch<PatternChar, SubjectChar>*,
99f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      Vector<const SubjectChar>,
100f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int);
101f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
102f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
103f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                        Vector<const SubjectChar>,
104f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                        int) {
105f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    return -1;
106f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
10759151504615d929945dc59db37bf1166937748c6Steve Block
108f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
109f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                              Vector<const SubjectChar> subject,
110f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                              int start_index);
111f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
112f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
113f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                          Vector<const SubjectChar> subject,
114f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                          int start_index);
115f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
116f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
117f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                           Vector<const SubjectChar> subject,
118f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                           int start_index);
119f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
120f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int BoyerMooreHorspoolSearch(
121f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      StringSearch<PatternChar, SubjectChar>* search,
122f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      Vector<const SubjectChar> subject,
123f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int start_index);
124f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
125f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
126f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                              Vector<const SubjectChar> subject,
127f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                              int start_index);
128f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
129f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  void PopulateBoyerMooreHorspoolTable();
130f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
131f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  void PopulateBoyerMooreTable();
132f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
133b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  static inline bool exceedsOneByte(uint8_t c) {
134b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch    return false;
135b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  }
136b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch
137b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  static inline bool exceedsOneByte(uint16_t c) {
138b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch    return c > String::kMaxOneByteCharCodeU;
139b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  }
140b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch
141f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  static inline int CharOccurrence(int* bad_char_occurrence,
142f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                                   SubjectChar char_code) {
143f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (sizeof(SubjectChar) == 1) {
144f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return bad_char_occurrence[static_cast<int>(char_code)];
145f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
146f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (sizeof(PatternChar) == 1) {
147b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch      if (exceedsOneByte(char_code)) {
148f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return -1;
149f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      }
150f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return bad_char_occurrence[static_cast<unsigned int>(char_code)];
151f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
152f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // Both pattern and subject are UC16. Reduce character to equivalence class.
153f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int equiv_class = char_code % kUC16AlphabetSize;
154f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    return bad_char_occurrence[equiv_class];
155f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
156f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
15744f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // The following tables are shared by all searches.
15844f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // TODO(lrn): Introduce a way for a pattern to keep its tables
15944f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // between searches (e.g., for an Atom RegExp).
16044f0eee88ff00398ff7f715fab053374d808c90dSteve Block
16144f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // Store for the BoyerMoore(Horspool) bad char shift table.
162f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Return a table covering the last kBMMaxShift+1 positions of
163f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // pattern.
164f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* bad_char_table() {
16544f0eee88ff00398ff7f715fab053374d808c90dSteve Block    return isolate_->bad_char_shift_table();
166f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
167f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
16844f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // Store for the BoyerMoore good suffix shift table.
169f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* good_suffix_shift_table() {
170f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // Return biased pointer that maps the range  [start_..pattern_.length()
171f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // to the kGoodSuffixShiftTable array.
17244f0eee88ff00398ff7f715fab053374d808c90dSteve Block    return isolate_->good_suffix_shift_table() - start_;
173f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
174f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
17544f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // Table used temporarily while building the BoyerMoore good suffix
17644f0eee88ff00398ff7f715fab053374d808c90dSteve Block  // shift table.
177f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* suffix_table() {
178f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // Return biased pointer that maps the range  [start_..pattern_.length()
179f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // to the kSuffixTable array.
18044f0eee88ff00398ff7f715fab053374d808c90dSteve Block    return isolate_->suffix_table() - start_;
181f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
182f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
18344f0eee88ff00398ff7f715fab053374d808c90dSteve Block  Isolate* isolate_;
184f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // The pattern to search for.
185f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  Vector<const PatternChar> pattern_;
186f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Pointer to implementation of the search.
187f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  SearchFunction strategy_;
188f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Cache value of Max(0, pattern_length() - kBMMaxShift)
189f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int start_;
19059151504615d929945dc59db37bf1166937748c6Steve Block};
19159151504615d929945dc59db37bf1166937748c6Steve Block
19259151504615d929945dc59db37bf1166937748c6Steve Block
193014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochtemplate <typename T, typename U>
194014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochinline T AlignDown(T value, U alignment) {
195014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  return reinterpret_cast<T>(
196014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch      (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
197014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}
198014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
199014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
200014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochinline uint8_t GetHighestValueByte(uc16 character) {
201014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  return Max(static_cast<uint8_t>(character & 0xFF),
202014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch             static_cast<uint8_t>(character >> 8));
203014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}
204014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
205014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
206014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochinline uint8_t GetHighestValueByte(uint8_t character) { return character; }
207014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
208014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
209014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochtemplate <typename PatternChar, typename SubjectChar>
210014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdochinline int FindFirstCharacter(Vector<const PatternChar> pattern,
211014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch                              Vector<const SubjectChar> subject, int index) {
212014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  const PatternChar pattern_first_char = pattern[0];
213014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  const int max_n = (subject.length() - pattern.length() + 1);
214014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
215014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
216014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  const SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
217014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  int pos = index;
218014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  do {
219014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    DCHECK_GE(max_n - pos, 0);
220014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    const SubjectChar* char_pos = reinterpret_cast<const SubjectChar*>(
221014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch        memchr(subject.start() + pos, search_byte,
222014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch               (max_n - pos) * sizeof(SubjectChar)));
223014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    if (char_pos == NULL) return -1;
224014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    char_pos = AlignDown(char_pos, sizeof(SubjectChar));
225014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    pos = static_cast<int>(char_pos - subject.start());
226014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    if (subject[pos] == search_char) return pos;
227014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  } while (++pos < max_n);
228014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
229014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  return -1;
230014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}
231014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
232014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch
233f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
234f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Single Character Pattern Search Strategy
235f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
23659151504615d929945dc59db37bf1166937748c6Steve Block
237f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
238f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochint StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
239f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    StringSearch<PatternChar, SubjectChar>* search,
240f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    Vector<const SubjectChar> subject,
241f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int index) {
242b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  DCHECK_EQ(1, search->pattern_.length());
243f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  PatternChar pattern_first_char = search->pattern_[0];
244014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  if (sizeof(PatternChar) > sizeof(SubjectChar)) {
245014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    if (exceedsOneByte(pattern_first_char)) {
246014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch      return -1;
24759151504615d929945dc59db37bf1166937748c6Steve Block    }
24859151504615d929945dc59db37bf1166937748c6Steve Block  }
249014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch  return FindFirstCharacter(search->pattern_, subject, index);
250f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch}
251f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
252f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
253f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Linear Search Strategy
254f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
255f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
256f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
257f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
2583ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochinline bool CharCompare(const PatternChar* pattern,
2593ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch                        const SubjectChar* subject,
2603ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch                        int length) {
261b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  DCHECK(length > 0);
262f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pos = 0;
263f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  do {
264f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (pattern[pos] != subject[pos]) {
265f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return false;
266f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
267f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    pos++;
268f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  } while (pos < length);
269f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  return true;
270f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch}
271f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
272f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
273f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Simple linear search for short patterns. Never bails out.
274f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
275f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochint StringSearch<PatternChar, SubjectChar>::LinearSearch(
276f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    StringSearch<PatternChar, SubjectChar>* search,
277f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    Vector<const SubjectChar> subject,
278f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int index) {
279f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  Vector<const PatternChar> pattern = search->pattern_;
280b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch  DCHECK(pattern.length() > 1);
281f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pattern_length = pattern.length();
282f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int i = index;
283f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int n = subject.length() - pattern_length;
284f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  while (i <= n) {
285014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    i = FindFirstCharacter(pattern, subject, i);
286014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    if (i == -1) return -1;
287014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    DCHECK_LE(i, n);
288014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch    i++;
289f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // Loop extracted to separate function to allow using return to do
290f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    // a deeper break.
291f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (CharCompare(pattern.start() + 1,
292f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                    subject.start() + i,
293f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                    pattern_length - 1)) {
294f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return i - 1;
295f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
296f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
297f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  return -1;
298f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch}
299f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
300f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
301f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Boyer-Moore string search
302f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
303f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
304f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
305f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochint StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
306f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    StringSearch<PatternChar, SubjectChar>* search,
307f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    Vector<const SubjectChar> subject,
308f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int start_index) {
309f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  Vector<const PatternChar> pattern = search->pattern_;
310f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int subject_length = subject.length();
311f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pattern_length = pattern.length();
312f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Only preprocess at most kBMMaxShift last characters of pattern.
313f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int start = search->start_;
314f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
315f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* bad_char_occurence = search->bad_char_table();
316f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* good_suffix_shift = search->good_suffix_shift_table();
317f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
318f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  PatternChar last_char = pattern[pattern_length - 1];
319f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int index = start_index;
320f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Continue search from i.
321f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  while (index <= subject_length - pattern_length) {
322f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int j = pattern_length - 1;
323f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int c;
324f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    while (last_char != (c = subject[index + j])) {
325f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int shift =
326f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          j - CharOccurrence(bad_char_occurence, c);
327f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      index += shift;
328f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (index > subject_length - pattern_length) {
329f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return -1;
330f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      }
331f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
332f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
333f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (j < 0) {
334f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return index;
335f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    } else if (j < start) {
336f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      // we have matched more than our tables allow us to be smart about.
337f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      // Fall back on BMH shift.
338f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      index += pattern_length - 1
339f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          - CharOccurrence(bad_char_occurence,
340f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch                           static_cast<SubjectChar>(last_char));
341f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    } else {
342f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int gs_shift = good_suffix_shift[j + 1];
343f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int bc_occ =
344f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          CharOccurrence(bad_char_occurence, c);
345f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int shift = j - bc_occ;
346f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (gs_shift > shift) {
347f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        shift = gs_shift;
348f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      }
349f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      index += shift;
350f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    }
35159151504615d929945dc59db37bf1166937748c6Steve Block  }
352f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
353f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  return -1;
35459151504615d929945dc59db37bf1166937748c6Steve Block}
35559151504615d929945dc59db37bf1166937748c6Steve Block
35659151504615d929945dc59db37bf1166937748c6Steve Block
357f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
358f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochvoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
359f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pattern_length = pattern_.length();
360f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  const PatternChar* pattern = pattern_.start();
361f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Only look at the last kBMMaxShift characters of pattern (from start_
362f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // to pattern_length).
363f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int start = start_;
364f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int length = pattern_length - start;
365f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
366f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Biased tables so that we can use pattern indices as table indices,
367f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // even if we only cover the part of the pattern from offset start.
368f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* shift_table = good_suffix_shift_table();
369f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* suffix_table = this->suffix_table();
370f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
371f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Initialize table.
372f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  for (int i = start; i < pattern_length; i++) {
373f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    shift_table[i] = length;
374f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
375f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  shift_table[pattern_length] = 1;
376f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  suffix_table[pattern_length] = pattern_length + 1;
37759151504615d929945dc59db37bf1166937748c6Steve Block
3783ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch  if (pattern_length <= start) {
3793ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch    return;
3803ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch  }
3813ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch
382f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Find suffixes.
383f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  PatternChar last_char = pattern[pattern_length - 1];
384f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int suffix = pattern_length + 1;
38559151504615d929945dc59db37bf1166937748c6Steve Block  {
386f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int i = pattern_length;
38759151504615d929945dc59db37bf1166937748c6Steve Block    while (i > start) {
38859151504615d929945dc59db37bf1166937748c6Steve Block      PatternChar c = pattern[i - 1];
389f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
390f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        if (shift_table[suffix] == length) {
391f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          shift_table[suffix] = suffix - i;
39259151504615d929945dc59db37bf1166937748c6Steve Block        }
393f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        suffix = suffix_table[suffix];
39459151504615d929945dc59db37bf1166937748c6Steve Block      }
395f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      suffix_table[--i] = --suffix;
396f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (suffix == pattern_length) {
39759151504615d929945dc59db37bf1166937748c6Steve Block        // No suffix to extend, so we check against last_char only.
39859151504615d929945dc59db37bf1166937748c6Steve Block        while ((i > start) && (pattern[i - 1] != last_char)) {
399f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          if (shift_table[pattern_length] == length) {
400f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch            shift_table[pattern_length] = pattern_length - i;
40159151504615d929945dc59db37bf1166937748c6Steve Block          }
402f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          suffix_table[--i] = pattern_length;
40359151504615d929945dc59db37bf1166937748c6Steve Block        }
40459151504615d929945dc59db37bf1166937748c6Steve Block        if (i > start) {
405f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          suffix_table[--i] = --suffix;
40659151504615d929945dc59db37bf1166937748c6Steve Block        }
40759151504615d929945dc59db37bf1166937748c6Steve Block      }
40859151504615d929945dc59db37bf1166937748c6Steve Block    }
40959151504615d929945dc59db37bf1166937748c6Steve Block  }
410f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Build shift table using suffixes.
411f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  if (suffix < pattern_length) {
412f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    for (int i = start; i <= pattern_length; i++) {
413f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (shift_table[i] == length) {
414f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        shift_table[i] = suffix - start;
41559151504615d929945dc59db37bf1166937748c6Steve Block      }
41659151504615d929945dc59db37bf1166937748c6Steve Block      if (i == suffix) {
417f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        suffix = suffix_table[suffix];
41859151504615d929945dc59db37bf1166937748c6Steve Block      }
41959151504615d929945dc59db37bf1166937748c6Steve Block    }
42059151504615d929945dc59db37bf1166937748c6Steve Block  }
42159151504615d929945dc59db37bf1166937748c6Steve Block}
42259151504615d929945dc59db37bf1166937748c6Steve Block
423f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
424f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Boyer-Moore-Horspool string search.
425f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
42659151504615d929945dc59db37bf1166937748c6Steve Block
427f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
428f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochint StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
429f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    StringSearch<PatternChar, SubjectChar>* search,
430f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    Vector<const SubjectChar> subject,
431f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int start_index) {
432f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  Vector<const PatternChar> pattern = search->pattern_;
433f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int subject_length = subject.length();
434f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pattern_length = pattern.length();
435f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* char_occurrences = search->bad_char_table();
436f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int badness = -pattern_length;
43759151504615d929945dc59db37bf1166937748c6Steve Block
43859151504615d929945dc59db37bf1166937748c6Steve Block  // How bad we are doing without a good-suffix table.
439f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  PatternChar last_char = pattern[pattern_length - 1];
440f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int last_char_shift = pattern_length - 1 -
441f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
44259151504615d929945dc59db37bf1166937748c6Steve Block  // Perform search
443f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int index = start_index;  // No matches found prior to this index.
444f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  while (index <= subject_length - pattern_length) {
445f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int j = pattern_length - 1;
446f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int subject_char;
447f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    while (last_char != (subject_char = subject[index + j])) {
448f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int bc_occ = CharOccurrence(char_occurrences, subject_char);
44959151504615d929945dc59db37bf1166937748c6Steve Block      int shift = j - bc_occ;
450f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      index += shift;
45159151504615d929945dc59db37bf1166937748c6Steve Block      badness += 1 - shift;  // at most zero, so badness cannot increase.
452f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (index > subject_length - pattern_length) {
45359151504615d929945dc59db37bf1166937748c6Steve Block        return -1;
45459151504615d929945dc59db37bf1166937748c6Steve Block      }
45559151504615d929945dc59db37bf1166937748c6Steve Block    }
45659151504615d929945dc59db37bf1166937748c6Steve Block    j--;
457f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    while (j >= 0 && pattern[j] == (subject[index + j])) j--;
45859151504615d929945dc59db37bf1166937748c6Steve Block    if (j < 0) {
459f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return index;
46059151504615d929945dc59db37bf1166937748c6Steve Block    } else {
461f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      index += last_char_shift;
46259151504615d929945dc59db37bf1166937748c6Steve Block      // Badness increases by the number of characters we have
46359151504615d929945dc59db37bf1166937748c6Steve Block      // checked, and decreases by the number of characters we
46459151504615d929945dc59db37bf1166937748c6Steve Block      // can skip by shifting. It's a measure of how we are doing
46559151504615d929945dc59db37bf1166937748c6Steve Block      // compared to reading each character exactly once.
466f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      badness += (pattern_length - j) - last_char_shift;
46759151504615d929945dc59db37bf1166937748c6Steve Block      if (badness > 0) {
468f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        search->PopulateBoyerMooreTable();
469f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        search->strategy_ = &BoyerMooreSearch;
470f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return BoyerMooreSearch(search, subject, index);
47159151504615d929945dc59db37bf1166937748c6Steve Block      }
47259151504615d929945dc59db37bf1166937748c6Steve Block    }
47359151504615d929945dc59db37bf1166937748c6Steve Block  }
47459151504615d929945dc59db37bf1166937748c6Steve Block  return -1;
47559151504615d929945dc59db37bf1166937748c6Steve Block}
47659151504615d929945dc59db37bf1166937748c6Steve Block
47759151504615d929945dc59db37bf1166937748c6Steve Block
478f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochtemplate <typename PatternChar, typename SubjectChar>
479f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochvoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
480f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int pattern_length = pattern_.length();
48159151504615d929945dc59db37bf1166937748c6Steve Block
482f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int* bad_char_occurrence = bad_char_table();
483f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch
484f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Only preprocess at most kBMMaxShift last characters of pattern.
485f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int start = start_;
486f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Run forwards to populate bad_char_table, so that *last* instance
487f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // of character equivalence class is the one registered.
488f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  // Notice: Doesn't include the last character.
489f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  int table_size = AlphabetSize();
490f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  if (start == 0) {  // All patterns less than kBMMaxShift in length.
491f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    memset(bad_char_occurrence,
492f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch           -1,
493f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch           table_size * sizeof(*bad_char_occurrence));
494f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  } else {
495f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    for (int i = 0; i < table_size; i++) {
496f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      bad_char_occurrence[i] = start - 1;
49759151504615d929945dc59db37bf1166937748c6Steve Block    }
49859151504615d929945dc59db37bf1166937748c6Steve Block  }
499f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  for (int i = start; i < pattern_length - 1; i++) {
500f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    PatternChar c = pattern_[i];
501f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
502f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    bad_char_occurrence[bucket] = i;
503f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  }
50459151504615d929945dc59db37bf1166937748c6Steve Block}
50559151504615d929945dc59db37bf1166937748c6Steve Block
506f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
507f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Linear string search with bailout to BMH.
508f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch//---------------------------------------------------------------------
50959151504615d929945dc59db37bf1166937748c6Steve Block
510f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Simple linear search for short patterns, which bails out if the string
511f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
51259151504615d929945dc59db37bf1166937748c6Steve Blocktemplate <typename PatternChar, typename SubjectChar>
513f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdochint StringSearch<PatternChar, SubjectChar>::InitialSearch(
514f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    StringSearch<PatternChar, SubjectChar>* search,
515f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    Vector<const SubjectChar> subject,
516f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    int index) {
517f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  Vector<const PatternChar> pattern = search->pattern_;
51859151504615d929945dc59db37bf1166937748c6Steve Block  int pattern_length = pattern.length();
51959151504615d929945dc59db37bf1166937748c6Steve Block  // Badness is a count of how much work we have done.  When we have
52059151504615d929945dc59db37bf1166937748c6Steve Block  // done enough work we decide it's probably worth switching to a better
52159151504615d929945dc59db37bf1166937748c6Steve Block  // algorithm.
52259151504615d929945dc59db37bf1166937748c6Steve Block  int badness = -10 - (pattern_length << 2);
52359151504615d929945dc59db37bf1166937748c6Steve Block
52459151504615d929945dc59db37bf1166937748c6Steve Block  // We know our pattern is at least 2 characters, we cache the first so
52559151504615d929945dc59db37bf1166937748c6Steve Block  // the common case of the first character not matching is faster.
526f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
52759151504615d929945dc59db37bf1166937748c6Steve Block    badness++;
528f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch    if (badness <= 0) {
529014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch      i = FindFirstCharacter(pattern, subject, i);
530014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch      if (i == -1) return -1;
531014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch      DCHECK_LE(i, n);
532f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      int j = 1;
533f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      do {
534f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        if (pattern[j] != subject[i + j]) {
535f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch          break;
536f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        }
537f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        j++;
538f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      } while (j < pattern_length);
539f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      if (j == pattern_length) {
540f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch        return i;
54159151504615d929945dc59db37bf1166937748c6Steve Block      }
542f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      badness += j;
54359151504615d929945dc59db37bf1166937748c6Steve Block    } else {
544f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      search->PopulateBoyerMooreHorspoolTable();
545f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      search->strategy_ = &BoyerMooreHorspoolSearch;
546f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch      return BoyerMooreHorspoolSearch(search, subject, i);
54759151504615d929945dc59db37bf1166937748c6Steve Block    }
54859151504615d929945dc59db37bf1166937748c6Steve Block  }
54959151504615d929945dc59db37bf1166937748c6Steve Block  return -1;
55059151504615d929945dc59db37bf1166937748c6Steve Block}
55159151504615d929945dc59db37bf1166937748c6Steve Block
55259151504615d929945dc59db37bf1166937748c6Steve Block
553f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// Perform a a single stand-alone search.
554f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// If searching multiple times for the same pattern, a search
555f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// object should be constructed once and the Search function then called
556f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch// for each search.
55759151504615d929945dc59db37bf1166937748c6Steve Blocktemplate <typename SubjectChar, typename PatternChar>
5583ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdochint SearchString(Isolate* isolate,
5593ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch                 Vector<const SubjectChar> subject,
5603ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch                 Vector<const PatternChar> pattern,
5613ef787dbeca8a5fb1086949cda830dccee07bfbdBen Murdoch                 int start_index) {
56244f0eee88ff00398ff7f715fab053374d808c90dSteve Block  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
563f87a203d89e1bbb6708282e0b64dbd13d59b723dBen Murdoch  return search.Search(subject, start_index);
56459151504615d929945dc59db37bf1166937748c6Steve Block}
56559151504615d929945dc59db37bf1166937748c6Steve Block
566014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}  // namespace internal
567014dc512cdd3e367bee49a713fdc5ed92584a3e5Ben Murdoch}  // namespace v8
56859151504615d929945dc59db37bf1166937748c6Steve Block
56959151504615d929945dc59db37bf1166937748c6Steve Block#endif  // V8_STRING_SEARCH_H_
570