1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/runtime/runtime-utils.h"
6
7#include "src/arguments.h"
8#include "src/conversions-inl.h"
9#include "src/isolate-inl.h"
10#include "src/messages.h"
11#include "src/regexp/jsregexp-inl.h"
12#include "src/regexp/jsregexp.h"
13#include "src/string-builder.h"
14#include "src/string-search.h"
15
16namespace v8 {
17namespace internal {
18
19class CompiledReplacement {
20 public:
21  explicit CompiledReplacement(Zone* zone)
22      : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
23
24  // Return whether the replacement is simple.
25  bool Compile(Handle<String> replacement, int capture_count,
26               int subject_length);
27
28  // Use Apply only if Compile returned false.
29  void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30             int32_t* match);
31
32  // Number of distinct parts of the replacement pattern.
33  int parts() { return parts_.length(); }
34
35  Zone* zone() const { return zone_; }
36
37 private:
38  enum PartType {
39    SUBJECT_PREFIX = 1,
40    SUBJECT_SUFFIX,
41    SUBJECT_CAPTURE,
42    REPLACEMENT_SUBSTRING,
43    REPLACEMENT_STRING,
44    NUMBER_OF_PART_TYPES
45  };
46
47  struct ReplacementPart {
48    static inline ReplacementPart SubjectMatch() {
49      return ReplacementPart(SUBJECT_CAPTURE, 0);
50    }
51    static inline ReplacementPart SubjectCapture(int capture_index) {
52      return ReplacementPart(SUBJECT_CAPTURE, capture_index);
53    }
54    static inline ReplacementPart SubjectPrefix() {
55      return ReplacementPart(SUBJECT_PREFIX, 0);
56    }
57    static inline ReplacementPart SubjectSuffix(int subject_length) {
58      return ReplacementPart(SUBJECT_SUFFIX, subject_length);
59    }
60    static inline ReplacementPart ReplacementString() {
61      return ReplacementPart(REPLACEMENT_STRING, 0);
62    }
63    static inline ReplacementPart ReplacementSubString(int from, int to) {
64      DCHECK(from >= 0);
65      DCHECK(to > from);
66      return ReplacementPart(-from, to);
67    }
68
69    // If tag <= 0 then it is the negation of a start index of a substring of
70    // the replacement pattern, otherwise it's a value from PartType.
71    ReplacementPart(int tag, int data) : tag(tag), data(data) {
72      // Must be non-positive or a PartType value.
73      DCHECK(tag < NUMBER_OF_PART_TYPES);
74    }
75    // Either a value of PartType or a non-positive number that is
76    // the negation of an index into the replacement string.
77    int tag;
78    // The data value's interpretation depends on the value of tag:
79    // tag == SUBJECT_PREFIX ||
80    // tag == SUBJECT_SUFFIX:  data is unused.
81    // tag == SUBJECT_CAPTURE: data is the number of the capture.
82    // tag == REPLACEMENT_SUBSTRING ||
83    // tag == REPLACEMENT_STRING:    data is index into array of substrings
84    //                               of the replacement string.
85    // tag <= 0: Temporary representation of the substring of the replacement
86    //           string ranging over -tag .. data.
87    //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
88    //           substring objects.
89    int data;
90  };
91
92  template <typename Char>
93  bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
94                               Vector<Char> characters, int capture_count,
95                               int subject_length, Zone* zone) {
96    int length = characters.length();
97    int last = 0;
98    for (int i = 0; i < length; i++) {
99      Char c = characters[i];
100      if (c == '$') {
101        int next_index = i + 1;
102        if (next_index == length) {  // No next character!
103          break;
104        }
105        Char c2 = characters[next_index];
106        switch (c2) {
107          case '$':
108            if (i > last) {
109              // There is a substring before. Include the first "$".
110              parts->Add(
111                  ReplacementPart::ReplacementSubString(last, next_index),
112                  zone);
113              last = next_index + 1;  // Continue after the second "$".
114            } else {
115              // Let the next substring start with the second "$".
116              last = next_index;
117            }
118            i = next_index;
119            break;
120          case '`':
121            if (i > last) {
122              parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
123            }
124            parts->Add(ReplacementPart::SubjectPrefix(), zone);
125            i = next_index;
126            last = i + 1;
127            break;
128          case '\'':
129            if (i > last) {
130              parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
131            }
132            parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
133            i = next_index;
134            last = i + 1;
135            break;
136          case '&':
137            if (i > last) {
138              parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
139            }
140            parts->Add(ReplacementPart::SubjectMatch(), zone);
141            i = next_index;
142            last = i + 1;
143            break;
144          case '0':
145          case '1':
146          case '2':
147          case '3':
148          case '4':
149          case '5':
150          case '6':
151          case '7':
152          case '8':
153          case '9': {
154            int capture_ref = c2 - '0';
155            if (capture_ref > capture_count) {
156              i = next_index;
157              continue;
158            }
159            int second_digit_index = next_index + 1;
160            if (second_digit_index < length) {
161              // Peek ahead to see if we have two digits.
162              Char c3 = characters[second_digit_index];
163              if ('0' <= c3 && c3 <= '9') {  // Double digits.
164                int double_digit_ref = capture_ref * 10 + c3 - '0';
165                if (double_digit_ref <= capture_count) {
166                  next_index = second_digit_index;
167                  capture_ref = double_digit_ref;
168                }
169              }
170            }
171            if (capture_ref > 0) {
172              if (i > last) {
173                parts->Add(ReplacementPart::ReplacementSubString(last, i),
174                           zone);
175              }
176              DCHECK(capture_ref <= capture_count);
177              parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
178              last = next_index + 1;
179            }
180            i = next_index;
181            break;
182          }
183          default:
184            i = next_index;
185            break;
186        }
187      }
188    }
189    if (length > last) {
190      if (last == 0) {
191        // Replacement is simple.  Do not use Apply to do the replacement.
192        return true;
193      } else {
194        parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
195      }
196    }
197    return false;
198  }
199
200  ZoneList<ReplacementPart> parts_;
201  ZoneList<Handle<String> > replacement_substrings_;
202  Zone* zone_;
203};
204
205
206bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
207                                  int subject_length) {
208  {
209    DisallowHeapAllocation no_gc;
210    String::FlatContent content = replacement->GetFlatContent();
211    DCHECK(content.IsFlat());
212    bool simple = false;
213    if (content.IsOneByte()) {
214      simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
215                                       capture_count, subject_length, zone());
216    } else {
217      DCHECK(content.IsTwoByte());
218      simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
219                                       capture_count, subject_length, zone());
220    }
221    if (simple) return true;
222  }
223
224  Isolate* isolate = replacement->GetIsolate();
225  // Find substrings of replacement string and create them as String objects.
226  int substring_index = 0;
227  for (int i = 0, n = parts_.length(); i < n; i++) {
228    int tag = parts_[i].tag;
229    if (tag <= 0) {  // A replacement string slice.
230      int from = -tag;
231      int to = parts_[i].data;
232      replacement_substrings_.Add(
233          isolate->factory()->NewSubString(replacement, from, to), zone());
234      parts_[i].tag = REPLACEMENT_SUBSTRING;
235      parts_[i].data = substring_index;
236      substring_index++;
237    } else if (tag == REPLACEMENT_STRING) {
238      replacement_substrings_.Add(replacement, zone());
239      parts_[i].data = substring_index;
240      substring_index++;
241    }
242  }
243  return false;
244}
245
246
247void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
248                                int match_from, int match_to, int32_t* match) {
249  DCHECK_LT(0, parts_.length());
250  for (int i = 0, n = parts_.length(); i < n; i++) {
251    ReplacementPart part = parts_[i];
252    switch (part.tag) {
253      case SUBJECT_PREFIX:
254        if (match_from > 0) builder->AddSubjectSlice(0, match_from);
255        break;
256      case SUBJECT_SUFFIX: {
257        int subject_length = part.data;
258        if (match_to < subject_length) {
259          builder->AddSubjectSlice(match_to, subject_length);
260        }
261        break;
262      }
263      case SUBJECT_CAPTURE: {
264        int capture = part.data;
265        int from = match[capture * 2];
266        int to = match[capture * 2 + 1];
267        if (from >= 0 && to > from) {
268          builder->AddSubjectSlice(from, to);
269        }
270        break;
271      }
272      case REPLACEMENT_SUBSTRING:
273      case REPLACEMENT_STRING:
274        builder->AddString(replacement_substrings_[part.data]);
275        break;
276      default:
277        UNREACHABLE();
278    }
279  }
280}
281
282
283void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284                              ZoneList<int>* indices, unsigned int limit,
285                              Zone* zone) {
286  DCHECK(limit > 0);
287  // Collect indices of pattern in subject using memchr.
288  // Stop after finding at most limit values.
289  const uint8_t* subject_start = subject.start();
290  const uint8_t* subject_end = subject_start + subject.length();
291  const uint8_t* pos = subject_start;
292  while (limit > 0) {
293    pos = reinterpret_cast<const uint8_t*>(
294        memchr(pos, pattern, subject_end - pos));
295    if (pos == NULL) return;
296    indices->Add(static_cast<int>(pos - subject_start), zone);
297    pos++;
298    limit--;
299  }
300}
301
302
303void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
304                              ZoneList<int>* indices, unsigned int limit,
305                              Zone* zone) {
306  DCHECK(limit > 0);
307  const uc16* subject_start = subject.start();
308  const uc16* subject_end = subject_start + subject.length();
309  for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
310    if (*pos == pattern) {
311      indices->Add(static_cast<int>(pos - subject_start), zone);
312      limit--;
313    }
314  }
315}
316
317
318template <typename SubjectChar, typename PatternChar>
319void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
320                       Vector<const PatternChar> pattern,
321                       ZoneList<int>* indices, unsigned int limit, Zone* zone) {
322  DCHECK(limit > 0);
323  // Collect indices of pattern in subject.
324  // Stop after finding at most limit values.
325  int pattern_length = pattern.length();
326  int index = 0;
327  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
328  while (limit > 0) {
329    index = search.Search(subject, index);
330    if (index < 0) return;
331    indices->Add(index, zone);
332    index += pattern_length;
333    limit--;
334  }
335}
336
337
338void FindStringIndicesDispatch(Isolate* isolate, String* subject,
339                               String* pattern, ZoneList<int>* indices,
340                               unsigned int limit, Zone* zone) {
341  {
342    DisallowHeapAllocation no_gc;
343    String::FlatContent subject_content = subject->GetFlatContent();
344    String::FlatContent pattern_content = pattern->GetFlatContent();
345    DCHECK(subject_content.IsFlat());
346    DCHECK(pattern_content.IsFlat());
347    if (subject_content.IsOneByte()) {
348      Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
349      if (pattern_content.IsOneByte()) {
350        Vector<const uint8_t> pattern_vector =
351            pattern_content.ToOneByteVector();
352        if (pattern_vector.length() == 1) {
353          FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354                                   limit, zone);
355        } else {
356          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
357                            limit, zone);
358        }
359      } else {
360        FindStringIndices(isolate, subject_vector,
361                          pattern_content.ToUC16Vector(), indices, limit, zone);
362      }
363    } else {
364      Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
365      if (pattern_content.IsOneByte()) {
366        Vector<const uint8_t> pattern_vector =
367            pattern_content.ToOneByteVector();
368        if (pattern_vector.length() == 1) {
369          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370                                   limit, zone);
371        } else {
372          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
373                            limit, zone);
374        }
375      } else {
376        Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
377        if (pattern_vector.length() == 1) {
378          FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379                                   limit, zone);
380        } else {
381          FindStringIndices(isolate, subject_vector, pattern_vector, indices,
382                            limit, zone);
383        }
384      }
385    }
386  }
387}
388
389
390template <typename ResultSeqString>
391MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
392    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
393    Handle<String> replacement, Handle<JSArray> last_match_info) {
394  DCHECK(subject->IsFlat());
395  DCHECK(replacement->IsFlat());
396
397  ZoneScope zone_scope(isolate->runtime_zone());
398  ZoneList<int> indices(8, zone_scope.zone());
399  DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
400  String* pattern =
401      String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
402  int subject_len = subject->length();
403  int pattern_len = pattern->length();
404  int replacement_len = replacement->length();
405
406  FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407                            zone_scope.zone());
408
409  int matches = indices.length();
410  if (matches == 0) return *subject;
411
412  // Detect integer overflow.
413  int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
414                           static_cast<int64_t>(pattern_len)) *
415                              static_cast<int64_t>(matches) +
416                          static_cast<int64_t>(subject_len);
417  int result_len;
418  if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
419    STATIC_ASSERT(String::kMaxLength < kMaxInt);
420    result_len = kMaxInt;  // Provoke exception.
421  } else {
422    result_len = static_cast<int>(result_len_64);
423  }
424
425  int subject_pos = 0;
426  int result_pos = 0;
427
428  MaybeHandle<SeqString> maybe_res;
429  if (ResultSeqString::kHasOneByteEncoding) {
430    maybe_res = isolate->factory()->NewRawOneByteString(result_len);
431  } else {
432    maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
433  }
434  Handle<SeqString> untyped_res;
435  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
436  Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
437
438  for (int i = 0; i < matches; i++) {
439    // Copy non-matched subject content.
440    if (subject_pos < indices.at(i)) {
441      String::WriteToFlat(*subject, result->GetChars() + result_pos,
442                          subject_pos, indices.at(i));
443      result_pos += indices.at(i) - subject_pos;
444    }
445
446    // Replace match.
447    if (replacement_len > 0) {
448      String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
449                          replacement_len);
450      result_pos += replacement_len;
451    }
452
453    subject_pos = indices.at(i) + pattern_len;
454  }
455  // Add remaining subject content at the end.
456  if (subject_pos < subject_len) {
457    String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
458                        subject_len);
459  }
460
461  int32_t match_indices[] = {indices.at(matches - 1),
462                             indices.at(matches - 1) + pattern_len};
463  RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
464
465  return *result;
466}
467
468
469MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
470    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
471    Handle<String> replacement, Handle<JSArray> last_match_info) {
472  DCHECK(subject->IsFlat());
473  DCHECK(replacement->IsFlat());
474
475  int capture_count = regexp->CaptureCount();
476  int subject_length = subject->length();
477
478  // CompiledReplacement uses zone allocation.
479  ZoneScope zone_scope(isolate->runtime_zone());
480  CompiledReplacement compiled_replacement(zone_scope.zone());
481  bool simple_replace =
482      compiled_replacement.Compile(replacement, capture_count, subject_length);
483
484  // Shortcut for simple non-regexp global replacements
485  if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
486    if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
487      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
488          isolate, subject, regexp, replacement, last_match_info);
489    } else {
490      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
491          isolate, subject, regexp, replacement, last_match_info);
492    }
493  }
494
495  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
496  if (global_cache.HasException()) return isolate->heap()->exception();
497
498  int32_t* current_match = global_cache.FetchNext();
499  if (current_match == NULL) {
500    if (global_cache.HasException()) return isolate->heap()->exception();
501    return *subject;
502  }
503
504  // Guessing the number of parts that the final result string is built
505  // from. Global regexps can match any number of times, so we guess
506  // conservatively.
507  int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
508  ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
509
510  // Number of parts added by compiled replacement plus preceeding
511  // string and possibly suffix after last match.  It is possible for
512  // all components to use two elements when encoded as two smis.
513  const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
514
515  int prev = 0;
516
517  do {
518    builder.EnsureCapacity(parts_added_per_loop);
519
520    int start = current_match[0];
521    int end = current_match[1];
522
523    if (prev < start) {
524      builder.AddSubjectSlice(prev, start);
525    }
526
527    if (simple_replace) {
528      builder.AddString(replacement);
529    } else {
530      compiled_replacement.Apply(&builder, start, end, current_match);
531    }
532    prev = end;
533
534    current_match = global_cache.FetchNext();
535  } while (current_match != NULL);
536
537  if (global_cache.HasException()) return isolate->heap()->exception();
538
539  if (prev < subject_length) {
540    builder.EnsureCapacity(2);
541    builder.AddSubjectSlice(prev, subject_length);
542  }
543
544  RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
545                               global_cache.LastSuccessfulMatch());
546
547  RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
548}
549
550
551template <typename ResultSeqString>
552MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
553    Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
554    Handle<JSArray> last_match_info) {
555  DCHECK(subject->IsFlat());
556
557  // Shortcut for simple non-regexp global replacements
558  if (regexp->TypeTag() == JSRegExp::ATOM) {
559    Handle<String> empty_string = isolate->factory()->empty_string();
560    if (subject->IsOneByteRepresentation()) {
561      return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
562          isolate, subject, regexp, empty_string, last_match_info);
563    } else {
564      return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
565          isolate, subject, regexp, empty_string, last_match_info);
566    }
567  }
568
569  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
570  if (global_cache.HasException()) return isolate->heap()->exception();
571
572  int32_t* current_match = global_cache.FetchNext();
573  if (current_match == NULL) {
574    if (global_cache.HasException()) return isolate->heap()->exception();
575    return *subject;
576  }
577
578  int start = current_match[0];
579  int end = current_match[1];
580  int capture_count = regexp->CaptureCount();
581  int subject_length = subject->length();
582
583  int new_length = subject_length - (end - start);
584  if (new_length == 0) return isolate->heap()->empty_string();
585
586  Handle<ResultSeqString> answer;
587  if (ResultSeqString::kHasOneByteEncoding) {
588    answer = Handle<ResultSeqString>::cast(
589        isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
590  } else {
591    answer = Handle<ResultSeqString>::cast(
592        isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
593  }
594
595  int prev = 0;
596  int position = 0;
597
598  do {
599    start = current_match[0];
600    end = current_match[1];
601    if (prev < start) {
602      // Add substring subject[prev;start] to answer string.
603      String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
604      position += start - prev;
605    }
606    prev = end;
607
608    current_match = global_cache.FetchNext();
609  } while (current_match != NULL);
610
611  if (global_cache.HasException()) return isolate->heap()->exception();
612
613  RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
614                               global_cache.LastSuccessfulMatch());
615
616  if (prev < subject_length) {
617    // Add substring subject[prev;length] to answer string.
618    String::WriteToFlat(*subject, answer->GetChars() + position, prev,
619                        subject_length);
620    position += subject_length - prev;
621  }
622
623  if (position == 0) return isolate->heap()->empty_string();
624
625  // Shorten string and fill
626  int string_size = ResultSeqString::SizeFor(position);
627  int allocated_string_size = ResultSeqString::SizeFor(new_length);
628  int delta = allocated_string_size - string_size;
629
630  answer->set_length(position);
631  if (delta == 0) return *answer;
632
633  Address end_of_string = answer->address() + string_size;
634  Heap* heap = isolate->heap();
635
636  // The trimming is performed on a newly allocated object, which is on a
637  // fresly allocated page or on an already swept page. Hence, the sweeper
638  // thread can not get confused with the filler creation. No synchronization
639  // needed.
640  // TODO(hpayer): We should shrink the large object page if the size
641  // of the object changed significantly.
642  if (!heap->lo_space()->Contains(*answer)) {
643    heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
644  }
645  heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
646  return *answer;
647}
648
649
650RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
651  HandleScope scope(isolate);
652  DCHECK(args.length() == 4);
653
654  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
655  CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
656  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
657  CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
658
659  CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
660  CHECK(last_match_info->HasFastObjectElements());
661
662  subject = String::Flatten(subject);
663
664  if (replacement->length() == 0) {
665    if (subject->HasOnlyOneByteChars()) {
666      return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
667          isolate, subject, regexp, last_match_info);
668    } else {
669      return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
670          isolate, subject, regexp, last_match_info);
671    }
672  }
673
674  replacement = String::Flatten(replacement);
675
676  return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
677                                             replacement, last_match_info);
678}
679
680
681RUNTIME_FUNCTION(Runtime_StringSplit) {
682  HandleScope handle_scope(isolate);
683  DCHECK(args.length() == 3);
684  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
685  CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
686  CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
687  CHECK(limit > 0);
688
689  int subject_length = subject->length();
690  int pattern_length = pattern->length();
691  CHECK(pattern_length > 0);
692
693  if (limit == 0xffffffffu) {
694    FixedArray* last_match_cache_unused;
695    Handle<Object> cached_answer(
696        RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
697                                   &last_match_cache_unused,
698                                   RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
699        isolate);
700    if (*cached_answer != Smi::FromInt(0)) {
701      // The cache FixedArray is a COW-array and can therefore be reused.
702      Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
703          Handle<FixedArray>::cast(cached_answer));
704      return *result;
705    }
706  }
707
708  // The limit can be very large (0xffffffffu), but since the pattern
709  // isn't empty, we can never create more parts than ~half the length
710  // of the subject.
711
712  subject = String::Flatten(subject);
713  pattern = String::Flatten(pattern);
714
715  static const int kMaxInitialListCapacity = 16;
716
717  ZoneScope zone_scope(isolate->runtime_zone());
718
719  // Find (up to limit) indices of separator and end-of-string in subject
720  int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
721  ZoneList<int> indices(initial_capacity, zone_scope.zone());
722
723  FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
724                            zone_scope.zone());
725
726  if (static_cast<uint32_t>(indices.length()) < limit) {
727    indices.Add(subject_length, zone_scope.zone());
728  }
729
730  // The list indices now contains the end of each part to create.
731
732  // Create JSArray of substrings separated by separator.
733  int part_count = indices.length();
734
735  Handle<JSArray> result =
736      isolate->factory()->NewJSArray(FAST_ELEMENTS, part_count, part_count,
737                                     INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
738
739  DCHECK(result->HasFastObjectElements());
740
741  Handle<FixedArray> elements(FixedArray::cast(result->elements()));
742
743  if (part_count == 1 && indices.at(0) == subject_length) {
744    elements->set(0, *subject);
745  } else {
746    int part_start = 0;
747    FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
748      int part_end = indices.at(i);
749      Handle<String> substring =
750          isolate->factory()->NewProperSubString(subject, part_start, part_end);
751      elements->set(i, *substring);
752      part_start = part_end + pattern_length;
753    });
754  }
755
756  if (limit == 0xffffffffu) {
757    if (result->HasFastObjectElements()) {
758      RegExpResultsCache::Enter(isolate, subject, pattern, elements,
759                                isolate->factory()->empty_fixed_array(),
760                                RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
761    }
762  }
763
764  return *result;
765}
766
767
768RUNTIME_FUNCTION(Runtime_RegExpExec) {
769  HandleScope scope(isolate);
770  DCHECK(args.length() == 4);
771  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
772  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
773  CONVERT_INT32_ARG_CHECKED(index, 2);
774  CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
775  // Due to the way the JS calls are constructed this must be less than the
776  // length of a string, i.e. it is always a Smi.  We check anyway for security.
777  CHECK(index >= 0);
778  CHECK(index <= subject->length());
779  isolate->counters()->regexp_entry_runtime()->Increment();
780  RETURN_RESULT_OR_FAILURE(
781      isolate, RegExpImpl::Exec(regexp, subject, index, last_match_info));
782}
783
784
785RUNTIME_FUNCTION(Runtime_RegExpFlags) {
786  SealHandleScope shs(isolate);
787  DCHECK(args.length() == 1);
788  CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
789  return regexp->flags();
790}
791
792
793RUNTIME_FUNCTION(Runtime_RegExpSource) {
794  SealHandleScope shs(isolate);
795  DCHECK(args.length() == 1);
796  CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
797  return regexp->source();
798}
799
800
801RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
802  HandleScope handle_scope(isolate);
803  DCHECK(args.length() == 3);
804  CONVERT_SMI_ARG_CHECKED(size, 0);
805  CHECK(size >= 0 && size <= FixedArray::kMaxLength);
806  CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
807  CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
808  Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
809  Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
810  Handle<JSObject> object =
811      isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
812  Handle<JSArray> array = Handle<JSArray>::cast(object);
813  array->set_elements(*elements);
814  array->set_length(Smi::FromInt(size));
815  // Write in-object properties after the length of the array.
816  array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
817  array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
818  return *array;
819}
820
821
822RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
823  HandleScope scope(isolate);
824  DCHECK(args.length() == 3);
825  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
826  CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
827  CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
828
829  RETURN_FAILURE_ON_EXCEPTION(isolate,
830                              JSRegExp::Initialize(regexp, source, flags));
831
832  return *regexp;
833}
834
835
836// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
837// separate last match info.  See comment on that function.
838template <bool has_capture>
839static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
840                                    Handle<JSRegExp> regexp,
841                                    Handle<JSArray> last_match_array,
842                                    Handle<JSArray> result_array) {
843  DCHECK(subject->IsFlat());
844  DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
845
846  int capture_count = regexp->CaptureCount();
847  int subject_length = subject->length();
848
849  static const int kMinLengthToCache = 0x1000;
850
851  if (subject_length > kMinLengthToCache) {
852    FixedArray* last_match_cache;
853    Object* cached_answer = RegExpResultsCache::Lookup(
854        isolate->heap(), *subject, regexp->data(), &last_match_cache,
855        RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
856    if (cached_answer->IsFixedArray()) {
857      int capture_registers = (capture_count + 1) * 2;
858      int32_t* last_match = NewArray<int32_t>(capture_registers);
859      for (int i = 0; i < capture_registers; i++) {
860        last_match[i] = Smi::cast(last_match_cache->get(i))->value();
861      }
862      Handle<FixedArray> cached_fixed_array =
863          Handle<FixedArray>(FixedArray::cast(cached_answer));
864      // The cache FixedArray is a COW-array and can therefore be reused.
865      JSArray::SetContent(result_array, cached_fixed_array);
866      RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
867                                   last_match);
868      DeleteArray(last_match);
869      return *result_array;
870    }
871  }
872
873  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
874  if (global_cache.HasException()) return isolate->heap()->exception();
875
876  // Ensured in Runtime_RegExpExecMultiple.
877  DCHECK(result_array->HasFastObjectElements());
878  Handle<FixedArray> result_elements(
879      FixedArray::cast(result_array->elements()));
880  if (result_elements->length() < 16) {
881    result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
882  }
883
884  FixedArrayBuilder builder(result_elements);
885
886  // Position to search from.
887  int match_start = -1;
888  int match_end = 0;
889  bool first = true;
890
891  // Two smis before and after the match, for very long strings.
892  static const int kMaxBuilderEntriesPerRegExpMatch = 5;
893
894  while (true) {
895    int32_t* current_match = global_cache.FetchNext();
896    if (current_match == NULL) break;
897    match_start = current_match[0];
898    builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
899    if (match_end < match_start) {
900      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
901                                                match_start);
902    }
903    match_end = current_match[1];
904    {
905      // Avoid accumulating new handles inside loop.
906      HandleScope temp_scope(isolate);
907      Handle<String> match;
908      if (!first) {
909        match = isolate->factory()->NewProperSubString(subject, match_start,
910                                                       match_end);
911      } else {
912        match =
913            isolate->factory()->NewSubString(subject, match_start, match_end);
914        first = false;
915      }
916
917      if (has_capture) {
918        // Arguments array to replace function is match, captures, index and
919        // subject, i.e., 3 + capture count in total.
920        Handle<FixedArray> elements =
921            isolate->factory()->NewFixedArray(3 + capture_count);
922
923        elements->set(0, *match);
924        for (int i = 1; i <= capture_count; i++) {
925          int start = current_match[i * 2];
926          if (start >= 0) {
927            int end = current_match[i * 2 + 1];
928            DCHECK(start <= end);
929            Handle<String> substring =
930                isolate->factory()->NewSubString(subject, start, end);
931            elements->set(i, *substring);
932          } else {
933            DCHECK(current_match[i * 2 + 1] < 0);
934            elements->set(i, isolate->heap()->undefined_value());
935          }
936        }
937        elements->set(capture_count + 1, Smi::FromInt(match_start));
938        elements->set(capture_count + 2, *subject);
939        builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
940      } else {
941        builder.Add(*match);
942      }
943    }
944  }
945
946  if (global_cache.HasException()) return isolate->heap()->exception();
947
948  if (match_start >= 0) {
949    // Finished matching, with at least one match.
950    if (match_end < subject_length) {
951      ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
952                                                subject_length);
953    }
954
955    RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
956                                 global_cache.LastSuccessfulMatch());
957
958    if (subject_length > kMinLengthToCache) {
959      // Store the last successful match into the array for caching.
960      // TODO(yangguo): do not expose last match to JS and simplify caching.
961      int capture_registers = (capture_count + 1) * 2;
962      Handle<FixedArray> last_match_cache =
963          isolate->factory()->NewFixedArray(capture_registers);
964      int32_t* last_match = global_cache.LastSuccessfulMatch();
965      for (int i = 0; i < capture_registers; i++) {
966        last_match_cache->set(i, Smi::FromInt(last_match[i]));
967      }
968      Handle<FixedArray> result_fixed_array = builder.array();
969      result_fixed_array->Shrink(builder.length());
970      // Cache the result and turn the FixedArray into a COW array.
971      RegExpResultsCache::Enter(
972          isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
973          last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
974    }
975    return *builder.ToJSArray(result_array);
976  } else {
977    return isolate->heap()->null_value();  // No matches at all.
978  }
979}
980
981
982// This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
983// lastMatchInfoOverride to maintain the last match info, so we don't need to
984// set any other last match array info.
985RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
986  HandleScope handles(isolate);
987  DCHECK(args.length() == 4);
988
989  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
990  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
991  CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
992  CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
993  CHECK(last_match_info->HasFastObjectElements());
994  CHECK(result_array->HasFastObjectElements());
995
996  subject = String::Flatten(subject);
997  CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
998
999  if (regexp->CaptureCount() == 0) {
1000    return SearchRegExpMultiple<false>(isolate, subject, regexp,
1001                                       last_match_info, result_array);
1002  } else {
1003    return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1004                                      result_array);
1005  }
1006}
1007
1008
1009RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1010  SealHandleScope shs(isolate);
1011  DCHECK(args.length() == 4);
1012  Object* exception = isolate->pending_exception();
1013  isolate->clear_pending_exception();
1014  return isolate->ReThrow(exception);
1015}
1016
1017
1018RUNTIME_FUNCTION(Runtime_IsRegExp) {
1019  SealHandleScope shs(isolate);
1020  DCHECK(args.length() == 1);
1021  CONVERT_ARG_CHECKED(Object, obj, 0);
1022  return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1023}
1024}  // namespace internal
1025}  // namespace v8
1026