11b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org// Copyright 2011 the V8 project authors. All rights reserved.
22ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// Redistribution and use in source and binary forms, with or without
32ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// modification, are permitted provided that the following conditions are
42ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// met:
52ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//
62ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//     * Redistributions of source code must retain the above copyright
72ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       notice, this list of conditions and the following disclaimer.
82ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//     * Redistributions in binary form must reproduce the above
92ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       copyright notice, this list of conditions and the following
102ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       disclaimer in the documentation and/or other materials provided
112ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       with the distribution.
122ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//     * Neither the name of Google Inc. nor the names of its
132ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       contributors may be used to endorse or promote products derived
142ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//       from this software without specific prior written permission.
152ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org//
162ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
172ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
182ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
192ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
202ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
212ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
222ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
232ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
242ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
252ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
262ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
272ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
282ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org#ifndef V8_STRING_SEARCH_H_
292ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org#define V8_STRING_SEARCH_H_
302ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
312ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.orgnamespace v8 {
322ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.orgnamespace internal {
332ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
342ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
35f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
36f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// String Search object.
37f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
38f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
39f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Class holding constants and methods that apply to all string search variants,
40f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// independently of subject and pattern char size.
41f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgclass StringSearchBase {
42f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org protected:
43f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
44f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // limit, we can fix the size of tables. For a needle longer than this limit,
45f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // search will not be optimal, since we only build tables for a suffix
46f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // of the string, but it is a safe approximation.
47ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  static const int kBMMaxShift = Isolate::kBMMaxShift;
48f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
49f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Reduce alphabet to this size.
50f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
51f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // proportional to the input alphabet. We reduce the alphabet size by
52f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // equating input characters modulo a smaller alphabet size. This gives
53f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // a potentially less efficient searching, but is a safe approximation.
54f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // For needles using only characters in the same Unicode 256-code point page,
55f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // there is no search speed degradation.
5659297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static const int kAsciiAlphabetSize = 256;
57ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
58f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
59f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Bad-char shift table stored in the state. It's length is the alphabet size.
60f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // For patterns below this length, the skip length of Boyer-Moore is too short
61f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // to compensate for the algorithmic overhead compared to simple brute force.
62f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static const int kBMMinPatternLength = 7;
63f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
6459297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static inline bool IsOneByteString(Vector<const uint8_t> string) {
65f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return true;
66f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
67f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
6859297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static inline bool IsOneByteString(Vector<const uc16> string) {
6959297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org    return String::IsOneByte(string.start(), string.length());
70f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
71f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
72ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  friend class Isolate;
73f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org};
74f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
75f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
76f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
77f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgclass StringSearch : private StringSearchBase {
782ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org public:
79ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  StringSearch(Isolate* isolate, Vector<const PatternChar> pattern)
80ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org      : isolate_(isolate),
81ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org        pattern_(pattern),
82f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        start_(Max(0, pattern.length() - kBMMaxShift)) {
83f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
8459297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org      if (!IsOneByteString(pattern_)) {
85f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        strategy_ = &FailSearch;
86f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return;
87f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
88f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
89f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int pattern_length = pattern_.length();
90f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (pattern_length < kBMMinPatternLength) {
91f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (pattern_length == 1) {
92f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        strategy_ = &SingleCharSearch;
93f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return;
94f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
95f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      strategy_ = &LinearSearch;
96f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return;
972ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
98f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    strategy_ = &InitialSearch;
992ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
100f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
101f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int Search(Vector<const SubjectChar> subject, int index) {
102f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return strategy_(this, subject, index);
1032ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
104f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
105f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static inline int AlphabetSize() {
106f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(PatternChar) == 1) {
107f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      // ASCII needle.
108f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return kAsciiAlphabetSize;
109f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    } else {
110f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      ASSERT(sizeof(PatternChar) == 2);
111f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      // UC16 needle.
112f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return kUC16AlphabetSize;
113f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
1142ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
115f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
1162ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org private:
117f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  typedef int (*SearchFunction)(  // NOLINT - it's not a cast!
118f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      StringSearch<PatternChar, SubjectChar>*,
119f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      Vector<const SubjectChar>,
120f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int);
121f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
122f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int FailSearch(StringSearch<PatternChar, SubjectChar>*,
123f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                        Vector<const SubjectChar>,
124f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                        int) {
125f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return -1;
126f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
1272ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
128f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int SingleCharSearch(StringSearch<PatternChar, SubjectChar>* search,
129f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                              Vector<const SubjectChar> subject,
130f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                              int start_index);
131f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
132f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int LinearSearch(StringSearch<PatternChar, SubjectChar>* search,
133f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                          Vector<const SubjectChar> subject,
134f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                          int start_index);
135f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
136f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int InitialSearch(StringSearch<PatternChar, SubjectChar>* search,
137f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                           Vector<const SubjectChar> subject,
138f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                           int start_index);
139f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
140f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int BoyerMooreHorspoolSearch(
141f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      StringSearch<PatternChar, SubjectChar>* search,
142f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      Vector<const SubjectChar> subject,
143f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int start_index);
144f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
145f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static int BoyerMooreSearch(StringSearch<PatternChar, SubjectChar>* search,
146f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                              Vector<const SubjectChar> subject,
147f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                              int start_index);
148f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
149f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  void PopulateBoyerMooreHorspoolTable();
150f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
151f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  void PopulateBoyerMooreTable();
152f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
15359297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static inline bool exceedsOneByte(uint8_t c) {
15459297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org    return false;
15559297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  }
15659297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org
15759297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  static inline bool exceedsOneByte(uint16_t c) {
15859297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org    return c > String::kMaxOneByteCharCodeU;
15959297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org  }
16059297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org
161f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  static inline int CharOccurrence(int* bad_char_occurrence,
162f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                                   SubjectChar char_code) {
163f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(SubjectChar) == 1) {
164f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return bad_char_occurrence[static_cast<int>(char_code)];
165f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
166f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(PatternChar) == 1) {
16759297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org      if (exceedsOneByte(char_code)) {
168f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return -1;
169f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
170ec7067e20f87c4d63531d5cb80cb942eb960a610kmillikin@chromium.org      return bad_char_occurrence[static_cast<unsigned int>(char_code)];
171f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
172ec7067e20f87c4d63531d5cb80cb942eb960a610kmillikin@chromium.org    // Both pattern and subject are UC16. Reduce character to equivalence class.
173f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int equiv_class = char_code % kUC16AlphabetSize;
174f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return bad_char_occurrence[equiv_class];
175f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
176f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
177ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // The following tables are shared by all searches.
178ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // TODO(lrn): Introduce a way for a pattern to keep its tables
179ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // between searches (e.g., for an Atom RegExp).
180ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org
181ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // Store for the BoyerMoore(Horspool) bad char shift table.
182f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Return a table covering the last kBMMaxShift+1 positions of
183f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // pattern.
184f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* bad_char_table() {
185ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org    return isolate_->bad_char_shift_table();
186f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
187f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
188ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // Store for the BoyerMoore good suffix shift table.
189f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* good_suffix_shift_table() {
190f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // Return biased pointer that maps the range  [start_..pattern_.length()
191f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // to the kGoodSuffixShiftTable array.
192ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org    return isolate_->good_suffix_shift_table() - start_;
193f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
194f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
195ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // Table used temporarily while building the BoyerMoore good suffix
196ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  // shift table.
197f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* suffix_table() {
198f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // Return biased pointer that maps the range  [start_..pattern_.length()
199f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // to the kSuffixTable array.
200ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org    return isolate_->suffix_table() - start_;
201f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
202f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
203ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  Isolate* isolate_;
204f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // The pattern to search for.
205f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  Vector<const PatternChar> pattern_;
206f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Pointer to implementation of the search.
207f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  SearchFunction strategy_;
208f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Cache value of Max(0, pattern_length() - kBMMaxShift)
209f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int start_;
2102ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org};
2112ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
2122ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
213f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
214f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Single Character Pattern Search Strategy
215f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
2162ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
217f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
218f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgint StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
219f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    StringSearch<PatternChar, SubjectChar>* search,
220f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    Vector<const SubjectChar> subject,
221f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int index) {
222f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  ASSERT_EQ(1, search->pattern_.length());
223f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  PatternChar pattern_first_char = search->pattern_[0];
224f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int i = index;
225f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
226f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
227f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        memchr(subject.start() + i,
228f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org               pattern_first_char,
229f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org               subject.length() - i));
230f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (pos == NULL) return -1;
231f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return static_cast<int>(pos - subject.start());
2322ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  } else {
233f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(PatternChar) > sizeof(SubjectChar)) {
23459297c735ad2a41156ae9c723a39ff259ad061e0jkummerow@chromium.org      if (exceedsOneByte(pattern_first_char)) {
235f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return -1;
236f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
2372ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
238f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    SubjectChar search_char = static_cast<SubjectChar>(pattern_first_char);
239f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int n = subject.length();
240f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    while (i < n) {
241f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (subject[i++] == search_char) return i - 1;
242f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
243f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    return -1;
2442ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
245f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org}
246f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
247f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
248f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Linear Search Strategy
249f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
250f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
251f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
252f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
2531b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.orginline bool CharCompare(const PatternChar* pattern,
2541b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org                        const SubjectChar* subject,
2551b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org                        int length) {
256f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  ASSERT(length > 0);
257f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pos = 0;
258f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  do {
259f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (pattern[pos] != subject[pos]) {
260f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return false;
261f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
262f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    pos++;
263f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  } while (pos < length);
264f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  return true;
265f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org}
266f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
267f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
268f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Simple linear search for short patterns. Never bails out.
269f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
270f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgint StringSearch<PatternChar, SubjectChar>::LinearSearch(
271f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    StringSearch<PatternChar, SubjectChar>* search,
272f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    Vector<const SubjectChar> subject,
273f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int index) {
274f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  Vector<const PatternChar> pattern = search->pattern_;
275f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  ASSERT(pattern.length() > 1);
276f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pattern_length = pattern.length();
277f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  PatternChar pattern_first_char = pattern[0];
278f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int i = index;
279f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int n = subject.length() - pattern_length;
280f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  while (i <= n) {
281f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
282f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
283f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          memchr(subject.start() + i,
284f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                 pattern_first_char,
285f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                 n - i + 1));
286f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (pos == NULL) return -1;
287f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      i = static_cast<int>(pos - subject.start()) + 1;
288f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    } else {
289f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (subject[i++] != pattern_first_char) continue;
290f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
291f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // Loop extracted to separate function to allow using return to do
292f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    // a deeper break.
293f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (CharCompare(pattern.start() + 1,
294f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                    subject.start() + i,
295f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                    pattern_length - 1)) {
296f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return i - 1;
297f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
298f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
299f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  return -1;
300f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org}
301f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
302f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
303f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Boyer-Moore string search
304f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
305f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
306f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
307f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgint StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
308f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    StringSearch<PatternChar, SubjectChar>* search,
309f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    Vector<const SubjectChar> subject,
310f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int start_index) {
311f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  Vector<const PatternChar> pattern = search->pattern_;
312f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int subject_length = subject.length();
313f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pattern_length = pattern.length();
314f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Only preprocess at most kBMMaxShift last characters of pattern.
315f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int start = search->start_;
316f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
317f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* bad_char_occurence = search->bad_char_table();
318f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* good_suffix_shift = search->good_suffix_shift_table();
319f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
320f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  PatternChar last_char = pattern[pattern_length - 1];
321f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int index = start_index;
322f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Continue search from i.
323f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  while (index <= subject_length - pattern_length) {
324f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int j = pattern_length - 1;
325f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int c;
326f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    while (last_char != (c = subject[index + j])) {
327f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int shift =
328f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          j - CharOccurrence(bad_char_occurence, c);
329f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      index += shift;
330f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (index > subject_length - pattern_length) {
331f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return -1;
332f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
333f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
334f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
335f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (j < 0) {
336f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return index;
337f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    } else if (j < start) {
338f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      // we have matched more than our tables allow us to be smart about.
339f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      // Fall back on BMH shift.
340f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      index += pattern_length - 1
341f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          - CharOccurrence(bad_char_occurence,
342f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                           static_cast<SubjectChar>(last_char));
343f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    } else {
344f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int gs_shift = good_suffix_shift[j + 1];
345f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int bc_occ =
346f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          CharOccurrence(bad_char_occurence, c);
347f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int shift = j - bc_occ;
348f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (gs_shift > shift) {
349f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        shift = gs_shift;
350f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      }
351f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      index += shift;
352f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    }
3532ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
354f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
355f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  return -1;
3562ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
3572ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
3582ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
359f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
360f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgvoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
361f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pattern_length = pattern_.length();
362f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  const PatternChar* pattern = pattern_.start();
363f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Only look at the last kBMMaxShift characters of pattern (from start_
364f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // to pattern_length).
365f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int start = start_;
366f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int length = pattern_length - start;
367f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
368f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Biased tables so that we can use pattern indices as table indices,
369f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // even if we only cover the part of the pattern from offset start.
370f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* shift_table = good_suffix_shift_table();
371f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* suffix_table = this->suffix_table();
372f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
373f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Initialize table.
374f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  for (int i = start; i < pattern_length; i++) {
375f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    shift_table[i] = length;
376f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
377f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  shift_table[pattern_length] = 1;
378f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  suffix_table[pattern_length] = pattern_length + 1;
3792ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
380bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org  if (pattern_length <= start) {
381bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org    return;
382bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org  }
383bf0c820d028452571c8c744ddd212c32c6d6a996danno@chromium.org
384f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Find suffixes.
385f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  PatternChar last_char = pattern[pattern_length - 1];
386f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int suffix = pattern_length + 1;
3872ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  {
388f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int i = pattern_length;
3892ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    while (i > start) {
3902ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      PatternChar c = pattern[i - 1];
391f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      while (suffix <= pattern_length && c != pattern[suffix - 1]) {
392f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        if (shift_table[suffix] == length) {
393f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          shift_table[suffix] = suffix - i;
3942ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        }
395f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        suffix = suffix_table[suffix];
3962ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
397f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      suffix_table[--i] = --suffix;
398f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (suffix == pattern_length) {
3992ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        // No suffix to extend, so we check against last_char only.
4002ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        while ((i > start) && (pattern[i - 1] != last_char)) {
401f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          if (shift_table[pattern_length] == length) {
402f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org            shift_table[pattern_length] = pattern_length - i;
4032ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org          }
404f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          suffix_table[--i] = pattern_length;
4052ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        }
4062ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        if (i > start) {
407f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          suffix_table[--i] = --suffix;
4082ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        }
4092ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
4102ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
4112ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
412f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Build shift table using suffixes.
413f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  if (suffix < pattern_length) {
414f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    for (int i = start; i <= pattern_length; i++) {
415f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (shift_table[i] == length) {
416f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        shift_table[i] = suffix - start;
4172ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
4182ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      if (i == suffix) {
419f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        suffix = suffix_table[suffix];
4202ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
4212ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
4222ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
4232ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
4242ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
425f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
426f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Boyer-Moore-Horspool string search.
427f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
4282ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
429f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
430f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgint StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
431f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    StringSearch<PatternChar, SubjectChar>* search,
432f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    Vector<const SubjectChar> subject,
433f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int start_index) {
434f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  Vector<const PatternChar> pattern = search->pattern_;
435f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int subject_length = subject.length();
436f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pattern_length = pattern.length();
437f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* char_occurrences = search->bad_char_table();
438f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int badness = -pattern_length;
4392ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
4402ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // How bad we are doing without a good-suffix table.
441f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  PatternChar last_char = pattern[pattern_length - 1];
442f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int last_char_shift = pattern_length - 1 -
443f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
4442ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // Perform search
445f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int index = start_index;  // No matches found prior to this index.
446f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  while (index <= subject_length - pattern_length) {
447f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int j = pattern_length - 1;
448f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int subject_char;
449f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    while (last_char != (subject_char = subject[index + j])) {
450f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int bc_occ = CharOccurrence(char_occurrences, subject_char);
4512ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      int shift = j - bc_occ;
452f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      index += shift;
4532ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      badness += 1 - shift;  // at most zero, so badness cannot increase.
454f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (index > subject_length - pattern_length) {
4552ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org        return -1;
4562ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
4572ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
4582ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    j--;
459f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    while (j >= 0 && pattern[j] == (subject[index + j])) j--;
4602ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    if (j < 0) {
461f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return index;
4622ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    } else {
463f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      index += last_char_shift;
4642ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      // Badness increases by the number of characters we have
4652ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      // checked, and decreases by the number of characters we
4662ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      // can skip by shifting. It's a measure of how we are doing
4672ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      // compared to reading each character exactly once.
468f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      badness += (pattern_length - j) - last_char_shift;
4692ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      if (badness > 0) {
470f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        search->PopulateBoyerMooreTable();
471f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        search->strategy_ = &BoyerMooreSearch;
472f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return BoyerMooreSearch(search, subject, index);
4732ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
4742ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
4752ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
4762ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  return -1;
4772ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
4782ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
4792ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
480f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
481f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgvoid StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
482f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int pattern_length = pattern_.length();
4832ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
484f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int* bad_char_occurrence = bad_char_table();
485f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org
486f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Only preprocess at most kBMMaxShift last characters of pattern.
487f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int start = start_;
488f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Run forwards to populate bad_char_table, so that *last* instance
489f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // of character equivalence class is the one registered.
490f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  // Notice: Doesn't include the last character.
491f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  int table_size = AlphabetSize();
492f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  if (start == 0) {  // All patterns less than kBMMaxShift in length.
493f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    memset(bad_char_occurrence,
494f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org           -1,
495f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org           table_size * sizeof(*bad_char_occurrence));
496f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  } else {
497f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    for (int i = 0; i < table_size; i++) {
498f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      bad_char_occurrence[i] = start - 1;
4992ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
5002ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
501f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  for (int i = start; i < pattern_length - 1; i++) {
502f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    PatternChar c = pattern_[i];
503f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int bucket = (sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
504f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    bad_char_occurrence[bucket] = i;
505f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  }
5062ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
5072ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
508f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
509f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Linear string search with bailout to BMH.
510f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org//---------------------------------------------------------------------
5112ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
512f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Simple linear search for short patterns, which bails out if the string
513f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
5142ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.orgtemplate <typename PatternChar, typename SubjectChar>
515f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.orgint StringSearch<PatternChar, SubjectChar>::InitialSearch(
516f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    StringSearch<PatternChar, SubjectChar>* search,
517f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    Vector<const SubjectChar> subject,
518f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    int index) {
519f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  Vector<const PatternChar> pattern = search->pattern_;
5202ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  int pattern_length = pattern.length();
5212ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // Badness is a count of how much work we have done.  When we have
5222ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // done enough work we decide it's probably worth switching to a better
5232ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // algorithm.
5242ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  int badness = -10 - (pattern_length << 2);
5252ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
5262ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // We know our pattern is at least 2 characters, we cache the first so
5272ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  // the common case of the first character not matching is faster.
5282ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  PatternChar pattern_first_char = pattern[0];
529f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  for (int i = index, n = subject.length() - pattern_length; i <= n; i++) {
5302ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    badness++;
531f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org    if (badness <= 0) {
532f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (sizeof(SubjectChar) == 1 && sizeof(PatternChar) == 1) {
533f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        const SubjectChar* pos = reinterpret_cast<const SubjectChar*>(
534f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org            memchr(subject.start() + i,
535f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                   pattern_first_char,
536f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org                   n - i + 1));
537f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        if (pos == NULL) {
538f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          return -1;
539f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        }
540f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        i = static_cast<int>(pos - subject.start());
541f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      } else {
542f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        if (subject[i] != pattern_first_char) continue;
5432ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
544f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      int j = 1;
545f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      do {
546f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        if (pattern[j] != subject[i + j]) {
547f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org          break;
548f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        }
549f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        j++;
550f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      } while (j < pattern_length);
551f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      if (j == pattern_length) {
552f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org        return i;
5532ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org      }
554f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      badness += j;
5552ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    } else {
556f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      search->PopulateBoyerMooreHorspoolTable();
557f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      search->strategy_ = &BoyerMooreHorspoolSearch;
558f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org      return BoyerMooreHorspoolSearch(search, subject, i);
5592ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org    }
5602ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  }
5612ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org  return -1;
5622ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
5632ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
5642ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
565f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// Perform a a single stand-alone search.
566f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// If searching multiple times for the same pattern, a search
567f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// object should be constructed once and the Search function then called
568f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org// for each search.
5692ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.orgtemplate <typename SubjectChar, typename PatternChar>
5701b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.orgint SearchString(Isolate* isolate,
5711b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org                 Vector<const SubjectChar> subject,
5721b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org                 Vector<const PatternChar> pattern,
5731b3afd1cab9087ca3c4e585d3da77d374d65c082mstarzinger@chromium.org                 int start_index) {
574ea88ce93dcb41a9200ec8747ae7642a5db1f4ce7sgjesse@chromium.org  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
575f05f2913e034b9332e55c02c9395e701725c02c1kmillikin@chromium.org  return search.Search(subject, start_index);
5762ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}
5772ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
5782ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org}}  // namespace v8::internal
5792ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org
5802ec107fe650fe56eed094ca017940f26af46644bsgjesse@chromium.org#endif  // V8_STRING_SEARCH_H_
581