1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "YarrInterpreter.h"
29
30#include "Yarr.h"
31#include <wtf/BumpPointerAllocator.h>
32
33#ifndef NDEBUG
34#include <stdio.h>
35#endif
36
37using namespace WTF;
38
39namespace JSC { namespace Yarr {
40
41class Interpreter {
42public:
43    struct ParenthesesDisjunctionContext;
44
45    struct BackTrackInfoPatternCharacter {
46        uintptr_t matchAmount;
47    };
48    struct BackTrackInfoCharacterClass {
49        uintptr_t matchAmount;
50    };
51    struct BackTrackInfoBackReference {
52        uintptr_t begin; // Not really needed for greedy quantifiers.
53        uintptr_t matchAmount; // Not really needed for fixed quantifiers.
54    };
55    struct BackTrackInfoAlternative {
56        uintptr_t offset;
57    };
58    struct BackTrackInfoParentheticalAssertion {
59        uintptr_t begin;
60    };
61    struct BackTrackInfoParenthesesOnce {
62        uintptr_t begin;
63    };
64    struct BackTrackInfoParenthesesTerminal {
65        uintptr_t begin;
66    };
67    struct BackTrackInfoParentheses {
68        uintptr_t matchAmount;
69        ParenthesesDisjunctionContext* lastContext;
70    };
71
72    static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
73    {
74        context->next = backTrack->lastContext;
75        backTrack->lastContext = context;
76        ++backTrack->matchAmount;
77    }
78
79    static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
80    {
81        ASSERT(backTrack->matchAmount);
82        ASSERT(backTrack->lastContext);
83        backTrack->lastContext = backTrack->lastContext->next;
84        --backTrack->matchAmount;
85    }
86
87    struct DisjunctionContext
88    {
89        DisjunctionContext()
90            : term(0)
91        {
92        }
93
94        void* operator new(size_t, void* where)
95        {
96            return where;
97        }
98
99        int term;
100        unsigned matchBegin;
101        unsigned matchEnd;
102        uintptr_t frame[1];
103    };
104
105    DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
106    {
107        size_t size = sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
108        allocatorPool = allocatorPool->ensureCapacity(size);
109        if (!allocatorPool)
110            CRASH();
111        return new(allocatorPool->alloc(size)) DisjunctionContext();
112    }
113
114    void freeDisjunctionContext(DisjunctionContext* context)
115    {
116        allocatorPool = allocatorPool->dealloc(context);
117    }
118
119    struct ParenthesesDisjunctionContext
120    {
121        ParenthesesDisjunctionContext(int* output, ByteTerm& term)
122            : next(0)
123        {
124            unsigned firstSubpatternId = term.atom.subpatternId;
125            unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
126
127            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
128                subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
129                output[(firstSubpatternId << 1) + i] = -1;
130            }
131
132            new(getDisjunctionContext(term)) DisjunctionContext();
133        }
134
135        void* operator new(size_t, void* where)
136        {
137            return where;
138        }
139
140        void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
141        {
142            for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
143                output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
144        }
145
146        DisjunctionContext* getDisjunctionContext(ByteTerm& term)
147        {
148            return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
149        }
150
151        ParenthesesDisjunctionContext* next;
152        int subpatternBackup[1];
153    };
154
155    ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
156    {
157        size_t size = sizeof(ParenthesesDisjunctionContext) - sizeof(int) + (term.atom.parenthesesDisjunction->m_numSubpatterns << 1) * sizeof(int) + sizeof(DisjunctionContext) - sizeof(uintptr_t) + disjunction->m_frameSize * sizeof(uintptr_t);
158        allocatorPool = allocatorPool->ensureCapacity(size);
159        if (!allocatorPool)
160            CRASH();
161        return new(allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
162    }
163
164    void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
165    {
166        allocatorPool = allocatorPool->dealloc(context);
167    }
168
169    class InputStream {
170    public:
171        InputStream(const UChar* input, unsigned start, unsigned length)
172            : input(input)
173            , pos(start)
174            , length(length)
175        {
176        }
177
178        void next()
179        {
180            ++pos;
181        }
182
183        void rewind(unsigned amount)
184        {
185            ASSERT(pos >= amount);
186            pos -= amount;
187        }
188
189        int read()
190        {
191            ASSERT(pos < length);
192            if (pos < length)
193                return input[pos];
194            return -1;
195        }
196
197        int readPair()
198        {
199            ASSERT(pos + 1 < length);
200            return input[pos] | input[pos + 1] << 16;
201        }
202
203        int readChecked(int position)
204        {
205            ASSERT(position < 0);
206            ASSERT(static_cast<unsigned>(-position) <= pos);
207            unsigned p = pos + position;
208            ASSERT(p < length);
209            return input[p];
210        }
211
212        int reread(unsigned from)
213        {
214            ASSERT(from < length);
215            return input[from];
216        }
217
218        int prev()
219        {
220            ASSERT(!(pos > length));
221            if (pos && length)
222                return input[pos - 1];
223            return -1;
224        }
225
226        unsigned getPos()
227        {
228            return pos;
229        }
230
231        void setPos(unsigned p)
232        {
233            pos = p;
234        }
235
236        bool atStart()
237        {
238            return pos == 0;
239        }
240
241        bool atEnd()
242        {
243            return pos == length;
244        }
245
246        bool checkInput(int count)
247        {
248            if ((pos + count) <= length) {
249                pos += count;
250                return true;
251            }
252            return false;
253        }
254
255        void uncheckInput(int count)
256        {
257            pos -= count;
258        }
259
260        bool atStart(int position)
261        {
262            return (pos + position) == 0;
263        }
264
265        bool atEnd(int position)
266        {
267            return (pos + position) == length;
268        }
269
270        bool isNotAvailableInput(int position)
271        {
272            return (pos + position) > length;
273        }
274
275    private:
276        const UChar* input;
277        unsigned pos;
278        unsigned length;
279    };
280
281    bool testCharacterClass(CharacterClass* characterClass, int ch)
282    {
283        if (ch & 0xFF80) {
284            for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
285                if (ch == characterClass->m_matchesUnicode[i])
286                    return true;
287            for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
288                if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
289                    return true;
290        } else {
291            for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
292                if (ch == characterClass->m_matches[i])
293                    return true;
294            for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
295                if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
296                    return true;
297        }
298
299        return false;
300    }
301
302    bool checkCharacter(int testChar, int inputPosition)
303    {
304        return testChar == input.readChecked(inputPosition);
305    }
306
307    bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
308    {
309        int ch = input.readChecked(inputPosition);
310        return (loChar == ch) || (hiChar == ch);
311    }
312
313    bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
314    {
315        bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
316        return invert ? !match : match;
317    }
318
319    bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
320    {
321        int matchSize = matchEnd - matchBegin;
322
323        if (!input.checkInput(matchSize))
324            return false;
325
326        if (pattern->m_ignoreCase) {
327            for (int i = 0; i < matchSize; ++i) {
328                int ch = input.reread(matchBegin + i);
329
330                int lo = Unicode::toLower(ch);
331                int hi = Unicode::toUpper(ch);
332
333                if ((lo != hi) ? (!checkCasedCharacter(lo, hi, inputOffset - matchSize + i)) : (!checkCharacter(ch, inputOffset - matchSize + i))) {
334                    input.uncheckInput(matchSize);
335                    return false;
336                }
337            }
338        } else {
339            for (int i = 0; i < matchSize; ++i) {
340                if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
341                    input.uncheckInput(matchSize);
342                    return false;
343                }
344            }
345        }
346
347        return true;
348    }
349
350    bool matchAssertionBOL(ByteTerm& term)
351    {
352        return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
353    }
354
355    bool matchAssertionEOL(ByteTerm& term)
356    {
357        if (term.inputPosition)
358            return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
359
360        return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
361    }
362
363    bool matchAssertionWordBoundary(ByteTerm& term)
364    {
365        bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
366        bool readIsWordchar;
367        if (term.inputPosition)
368            readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
369        else
370            readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
371
372        bool wordBoundary = prevIsWordchar != readIsWordchar;
373        return term.invert() ? !wordBoundary : wordBoundary;
374    }
375
376    bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
377    {
378        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
379
380        switch (term.atom.quantityType) {
381        case QuantifierFixedCount:
382            break;
383
384        case QuantifierGreedy:
385            if (backTrack->matchAmount) {
386                --backTrack->matchAmount;
387                input.uncheckInput(1);
388                return true;
389            }
390            break;
391
392        case QuantifierNonGreedy:
393            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
394                ++backTrack->matchAmount;
395                if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
396                    return true;
397            }
398            input.uncheckInput(backTrack->matchAmount);
399            break;
400        }
401
402        return false;
403    }
404
405    bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
406    {
407        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
408
409        switch (term.atom.quantityType) {
410        case QuantifierFixedCount:
411            break;
412
413        case QuantifierGreedy:
414            if (backTrack->matchAmount) {
415                --backTrack->matchAmount;
416                input.uncheckInput(1);
417                return true;
418            }
419            break;
420
421        case QuantifierNonGreedy:
422            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
423                ++backTrack->matchAmount;
424                if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
425                    return true;
426            }
427            input.uncheckInput(backTrack->matchAmount);
428            break;
429        }
430
431        return false;
432    }
433
434    bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
435    {
436        ASSERT(term.type == ByteTerm::TypeCharacterClass);
437        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
438
439        switch (term.atom.quantityType) {
440        case QuantifierFixedCount: {
441            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
442                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
443                    return false;
444            }
445            return true;
446        }
447
448        case QuantifierGreedy: {
449            unsigned matchAmount = 0;
450            while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
451                if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
452                    input.uncheckInput(1);
453                    break;
454                }
455                ++matchAmount;
456            }
457            backTrack->matchAmount = matchAmount;
458
459            return true;
460        }
461
462        case QuantifierNonGreedy:
463            backTrack->matchAmount = 0;
464            return true;
465        }
466
467        ASSERT_NOT_REACHED();
468        return false;
469    }
470
471    bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
472    {
473        ASSERT(term.type == ByteTerm::TypeCharacterClass);
474        BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
475
476        switch (term.atom.quantityType) {
477        case QuantifierFixedCount:
478            break;
479
480        case QuantifierGreedy:
481            if (backTrack->matchAmount) {
482                --backTrack->matchAmount;
483                input.uncheckInput(1);
484                return true;
485            }
486            break;
487
488        case QuantifierNonGreedy:
489            if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
490                ++backTrack->matchAmount;
491                if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
492                    return true;
493            }
494            input.uncheckInput(backTrack->matchAmount);
495            break;
496        }
497
498        return false;
499    }
500
501    bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
502    {
503        ASSERT(term.type == ByteTerm::TypeBackReference);
504        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
505
506        int matchBegin = output[(term.atom.subpatternId << 1)];
507        int matchEnd = output[(term.atom.subpatternId << 1) + 1];
508
509        // If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
510        // In this case the result of match is empty string like when it references to a parentheses with zero-width match.
511        // Eg.: /(a\1)/
512        if (matchEnd == -1)
513            return true;
514
515        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
516
517        if (matchBegin == matchEnd)
518            return true;
519
520        switch (term.atom.quantityType) {
521        case QuantifierFixedCount: {
522            backTrack->begin = input.getPos();
523            for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
524                if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
525                    input.setPos(backTrack->begin);
526                    return false;
527                }
528            }
529            return true;
530        }
531
532        case QuantifierGreedy: {
533            unsigned matchAmount = 0;
534            while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
535                ++matchAmount;
536            backTrack->matchAmount = matchAmount;
537            return true;
538        }
539
540        case QuantifierNonGreedy:
541            backTrack->begin = input.getPos();
542            backTrack->matchAmount = 0;
543            return true;
544        }
545
546        ASSERT_NOT_REACHED();
547        return false;
548    }
549
550    bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
551    {
552        ASSERT(term.type == ByteTerm::TypeBackReference);
553        BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
554
555        int matchBegin = output[(term.atom.subpatternId << 1)];
556        int matchEnd = output[(term.atom.subpatternId << 1) + 1];
557        ASSERT((matchBegin == -1) || (matchBegin <= matchEnd));
558
559        if (matchBegin == matchEnd)
560            return false;
561
562        switch (term.atom.quantityType) {
563        case QuantifierFixedCount:
564            // for quantityCount == 1, could rewind.
565            input.setPos(backTrack->begin);
566            break;
567
568        case QuantifierGreedy:
569            if (backTrack->matchAmount) {
570                --backTrack->matchAmount;
571                input.rewind(matchEnd - matchBegin);
572                return true;
573            }
574            break;
575
576        case QuantifierNonGreedy:
577            if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
578                ++backTrack->matchAmount;
579                return true;
580            }
581            input.setPos(backTrack->begin);
582            break;
583        }
584
585        return false;
586    }
587
588    void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
589    {
590        if (term.capture()) {
591            unsigned subpatternId = term.atom.subpatternId;
592            output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
593            output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
594        }
595    }
596    void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
597    {
598        unsigned firstSubpatternId = term.atom.subpatternId;
599        unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
600        context->restoreOutput(output, firstSubpatternId, count);
601    }
602    JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
603    {
604        while (backTrack->matchAmount) {
605            ParenthesesDisjunctionContext* context = backTrack->lastContext;
606
607            JSRegExpResult result = matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true);
608            if (result == JSRegExpMatch)
609                return JSRegExpMatch;
610
611            resetMatches(term, context);
612            popParenthesesDisjunctionContext(backTrack);
613            freeParenthesesDisjunctionContext(context);
614
615            if (result != JSRegExpNoMatch)
616                return result;
617        }
618
619        return JSRegExpNoMatch;
620    }
621
622    bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
623    {
624        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
625        ASSERT(term.atom.quantityCount == 1);
626
627        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
628
629        switch (term.atom.quantityType) {
630        case QuantifierGreedy: {
631            // set this speculatively; if we get to the parens end this will be true.
632            backTrack->begin = input.getPos();
633            break;
634        }
635        case QuantifierNonGreedy: {
636            backTrack->begin = notFound;
637            context->term += term.atom.parenthesesWidth;
638            return true;
639        }
640        case QuantifierFixedCount:
641            break;
642        }
643
644        if (term.capture()) {
645            unsigned subpatternId = term.atom.subpatternId;
646            output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
647        }
648
649        return true;
650    }
651
652    bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
653    {
654        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
655        ASSERT(term.atom.quantityCount == 1);
656
657        if (term.capture()) {
658            unsigned subpatternId = term.atom.subpatternId;
659            output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
660        }
661
662        if (term.atom.quantityType == QuantifierFixedCount)
663            return true;
664
665        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
666        return backTrack->begin != input.getPos();
667    }
668
669    bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
670    {
671        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
672        ASSERT(term.atom.quantityCount == 1);
673
674        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
675
676        if (term.capture()) {
677            unsigned subpatternId = term.atom.subpatternId;
678            output[(subpatternId << 1)] = -1;
679            output[(subpatternId << 1) + 1] = -1;
680        }
681
682        switch (term.atom.quantityType) {
683        case QuantifierGreedy:
684            // if we backtrack to this point, there is another chance - try matching nothing.
685            ASSERT(backTrack->begin != notFound);
686            backTrack->begin = notFound;
687            context->term += term.atom.parenthesesWidth;
688            return true;
689        case QuantifierNonGreedy:
690            ASSERT(backTrack->begin != notFound);
691        case QuantifierFixedCount:
692            break;
693        }
694
695        return false;
696    }
697
698    bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
699    {
700        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
701        ASSERT(term.atom.quantityCount == 1);
702
703        BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
704
705        switch (term.atom.quantityType) {
706        case QuantifierGreedy:
707            if (backTrack->begin == notFound) {
708                context->term -= term.atom.parenthesesWidth;
709                return false;
710            }
711        case QuantifierNonGreedy:
712            if (backTrack->begin == notFound) {
713                backTrack->begin = input.getPos();
714                if (term.capture()) {
715                    // Technically this access to inputPosition should be accessing the begin term's
716                    // inputPosition, but for repeats other than fixed these values should be
717                    // the same anyway! (We don't pre-check for greedy or non-greedy matches.)
718                    ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
719                    ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
720                    unsigned subpatternId = term.atom.subpatternId;
721                    output[subpatternId << 1] = input.getPos() + term.inputPosition;
722                }
723                context->term -= term.atom.parenthesesWidth;
724                return true;
725            }
726        case QuantifierFixedCount:
727            break;
728        }
729
730        return false;
731    }
732
733    bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
734    {
735        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
736        ASSERT(term.atom.quantityType == QuantifierGreedy);
737        ASSERT(term.atom.quantityCount == quantifyInfinite);
738        ASSERT(!term.capture());
739
740        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
741        backTrack->begin = input.getPos();
742        return true;
743    }
744
745    bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
746    {
747        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
748
749        BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
750        // Empty match is a failed match.
751        if (backTrack->begin == input.getPos())
752            return false;
753
754        // Successful match! Okay, what's next? - loop around and try to match moar!
755        context->term -= (term.atom.parenthesesWidth + 1);
756        return true;
757    }
758
759    bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
760    {
761        ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
762        ASSERT(term.atom.quantityType == QuantifierGreedy);
763        ASSERT(term.atom.quantityCount == quantifyInfinite);
764        ASSERT(!term.capture());
765
766        // If we backtrack to this point, we have failed to match this iteration of the parens.
767        // Since this is greedy / zero minimum a failed is also accepted as a match!
768        context->term += term.atom.parenthesesWidth;
769        return true;
770    }
771
772    bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
773    {
774        // 'Terminal' parentheses are at the end of the regex, and as such a match past end
775        // should always be returned as a successful match - we should never backtrack to here.
776        ASSERT_NOT_REACHED();
777        return false;
778    }
779
780    bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
781    {
782        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
783        ASSERT(term.atom.quantityCount == 1);
784
785        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
786
787        backTrack->begin = input.getPos();
788        return true;
789    }
790
791    bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
792    {
793        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
794        ASSERT(term.atom.quantityCount == 1);
795
796        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
797
798        input.setPos(backTrack->begin);
799
800        // We've reached the end of the parens; if they are inverted, this is failure.
801        if (term.invert()) {
802            context->term -= term.atom.parenthesesWidth;
803            return false;
804        }
805
806        return true;
807    }
808
809    bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
810    {
811        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
812        ASSERT(term.atom.quantityCount == 1);
813
814        // We've failed to match parens; if they are inverted, this is win!
815        if (term.invert()) {
816            context->term += term.atom.parenthesesWidth;
817            return true;
818        }
819
820        return false;
821    }
822
823    bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
824    {
825        ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
826        ASSERT(term.atom.quantityCount == 1);
827
828        BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
829
830        input.setPos(backTrack->begin);
831
832        context->term -= term.atom.parenthesesWidth;
833        return false;
834    }
835
836    JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
837    {
838        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
839
840        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
841        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
842
843        backTrack->matchAmount = 0;
844        backTrack->lastContext = 0;
845
846        switch (term.atom.quantityType) {
847        case QuantifierFixedCount: {
848            // While we haven't yet reached our fixed limit,
849            while (backTrack->matchAmount < term.atom.quantityCount) {
850                // Try to do a match, and it it succeeds, add it to the list.
851                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
852                JSRegExpResult result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
853                if (result == JSRegExpMatch)
854                    appendParenthesesDisjunctionContext(backTrack, context);
855                else {
856                    // The match failed; try to find an alternate point to carry on from.
857                    resetMatches(term, context);
858                    freeParenthesesDisjunctionContext(context);
859
860                    if (result == JSRegExpNoMatch) {
861                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
862                        if (backtrackResult != JSRegExpMatch)
863                            return backtrackResult;
864                    } else
865                        return result;
866                }
867            }
868
869            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
870            ParenthesesDisjunctionContext* context = backTrack->lastContext;
871            recordParenthesesMatch(term, context);
872            return JSRegExpMatch;
873        }
874
875        case QuantifierGreedy: {
876            while (backTrack->matchAmount < term.atom.quantityCount) {
877                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
878                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
879                if (result == JSRegExpMatch)
880                    appendParenthesesDisjunctionContext(backTrack, context);
881                else {
882                    resetMatches(term, context);
883                    freeParenthesesDisjunctionContext(context);
884
885                    if (result != JSRegExpNoMatch)
886                        return result;
887
888                    break;
889                }
890            }
891
892            if (backTrack->matchAmount) {
893                ParenthesesDisjunctionContext* context = backTrack->lastContext;
894                recordParenthesesMatch(term, context);
895            }
896            return JSRegExpMatch;
897        }
898
899        case QuantifierNonGreedy:
900            return JSRegExpMatch;
901        }
902
903        ASSERT_NOT_REACHED();
904        return JSRegExpErrorNoMatch;
905    }
906
907    // Rules for backtracking differ depending on whether this is greedy or non-greedy.
908    //
909    // Greedy matches never should try just adding more - you should already have done
910    // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where
911    // you backtrack an item off the list needs checking, since we'll never have matched
912    // the one less case.  Tracking forwards, still add as much as possible.
913    //
914    // Non-greedy, we've already done the one less case, so don't match on popping.
915    // We haven't done the one more case, so always try to add that.
916    //
917    JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
918    {
919        ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
920
921        BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
922        ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
923
924        switch (term.atom.quantityType) {
925        case QuantifierFixedCount: {
926            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
927
928            ParenthesesDisjunctionContext* context = 0;
929            JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
930
931            if (result != JSRegExpMatch)
932                return result;
933
934            // While we haven't yet reached our fixed limit,
935            while (backTrack->matchAmount < term.atom.quantityCount) {
936                // Try to do a match, and it it succeeds, add it to the list.
937                context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
938                result = matchDisjunction(disjunctionBody, context->getDisjunctionContext(term));
939
940                if (result == JSRegExpMatch)
941                    appendParenthesesDisjunctionContext(backTrack, context);
942                else {
943                    // The match failed; try to find an alternate point to carry on from.
944                    resetMatches(term, context);
945                    freeParenthesesDisjunctionContext(context);
946
947                    if (result == JSRegExpNoMatch) {
948                        JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
949                        if (backtrackResult != JSRegExpMatch)
950                            return backtrackResult;
951                    } else
952                        return result;
953                }
954            }
955
956            ASSERT(backTrack->matchAmount == term.atom.quantityCount);
957            context = backTrack->lastContext;
958            recordParenthesesMatch(term, context);
959            return JSRegExpMatch;
960        }
961
962        case QuantifierGreedy: {
963            if (!backTrack->matchAmount)
964                return JSRegExpNoMatch;
965
966            ParenthesesDisjunctionContext* context = backTrack->lastContext;
967            JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
968            if (result == JSRegExpMatch) {
969                while (backTrack->matchAmount < term.atom.quantityCount) {
970                    ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
971                    JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
972                    if (parenthesesResult == JSRegExpMatch)
973                        appendParenthesesDisjunctionContext(backTrack, context);
974                    else {
975                        resetMatches(term, context);
976                        freeParenthesesDisjunctionContext(context);
977
978                        if (parenthesesResult != JSRegExpNoMatch)
979                            return parenthesesResult;
980
981                        break;
982                    }
983                }
984            } else {
985                resetMatches(term, context);
986                popParenthesesDisjunctionContext(backTrack);
987                freeParenthesesDisjunctionContext(context);
988
989                if (result != JSRegExpNoMatch)
990                    return result;
991            }
992
993            if (backTrack->matchAmount) {
994                ParenthesesDisjunctionContext* context = backTrack->lastContext;
995                recordParenthesesMatch(term, context);
996            }
997            return JSRegExpMatch;
998        }
999
1000        case QuantifierNonGreedy: {
1001            // If we've not reached the limit, try to add one more match.
1002            if (backTrack->matchAmount < term.atom.quantityCount) {
1003                ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
1004                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term));
1005                if (result == JSRegExpMatch) {
1006                    appendParenthesesDisjunctionContext(backTrack, context);
1007                    recordParenthesesMatch(term, context);
1008                    return JSRegExpMatch;
1009                }
1010
1011                resetMatches(term, context);
1012                freeParenthesesDisjunctionContext(context);
1013
1014                if (result != JSRegExpNoMatch)
1015                    return result;
1016            }
1017
1018            // Nope - okay backtrack looking for an alternative.
1019            while (backTrack->matchAmount) {
1020                ParenthesesDisjunctionContext* context = backTrack->lastContext;
1021                JSRegExpResult result = matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true);
1022                if (result == JSRegExpMatch) {
1023                    // successful backtrack! we're back in the game!
1024                    if (backTrack->matchAmount) {
1025                        context = backTrack->lastContext;
1026                        recordParenthesesMatch(term, context);
1027                    }
1028                    return JSRegExpMatch;
1029                }
1030
1031                // pop a match off the stack
1032                resetMatches(term, context);
1033                popParenthesesDisjunctionContext(backTrack);
1034                freeParenthesesDisjunctionContext(context);
1035
1036                return result;
1037            }
1038
1039            return JSRegExpNoMatch;
1040        }
1041        }
1042
1043        ASSERT_NOT_REACHED();
1044        return JSRegExpErrorNoMatch;
1045    }
1046
1047    void lookupForBeginChars()
1048    {
1049        int character;
1050        bool firstSingleCharFound;
1051
1052        while (true) {
1053            if (input.isNotAvailableInput(2))
1054                return;
1055
1056            firstSingleCharFound = false;
1057
1058            character = input.readPair();
1059
1060            for (unsigned i = 0; i < pattern->m_beginChars.size(); ++i) {
1061                BeginChar bc = pattern->m_beginChars[i];
1062
1063                if (!firstSingleCharFound && bc.value <= 0xFFFF) {
1064                    firstSingleCharFound = true;
1065                    character &= 0xFFFF;
1066                }
1067
1068                if ((character | bc.mask) == bc.value)
1069                    return;
1070            }
1071
1072            input.next();
1073        }
1074    }
1075
1076#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1077#define BACKTRACK() { --context->term; goto backtrack; }
1078#define currentTerm() (disjunction->terms[context->term])
1079    JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false, bool isBody = false)
1080    {
1081        if (!--remainingMatchCount)
1082            return JSRegExpErrorHitLimit;
1083
1084        if (btrack)
1085            BACKTRACK();
1086
1087        if (pattern->m_containsBeginChars && isBody)
1088            lookupForBeginChars();
1089
1090        context->matchBegin = input.getPos();
1091        context->term = 0;
1092
1093    matchAgain:
1094        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1095
1096        switch (currentTerm().type) {
1097        case ByteTerm::TypeSubpatternBegin:
1098            MATCH_NEXT();
1099        case ByteTerm::TypeSubpatternEnd:
1100            context->matchEnd = input.getPos();
1101            return JSRegExpMatch;
1102
1103        case ByteTerm::TypeBodyAlternativeBegin:
1104            MATCH_NEXT();
1105        case ByteTerm::TypeBodyAlternativeDisjunction:
1106        case ByteTerm::TypeBodyAlternativeEnd:
1107            context->matchEnd = input.getPos();
1108            return JSRegExpMatch;
1109
1110        case ByteTerm::TypeAlternativeBegin:
1111            MATCH_NEXT();
1112        case ByteTerm::TypeAlternativeDisjunction:
1113        case ByteTerm::TypeAlternativeEnd: {
1114            int offset = currentTerm().alternative.end;
1115            BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1116            backTrack->offset = offset;
1117            context->term += offset;
1118            MATCH_NEXT();
1119        }
1120
1121        case ByteTerm::TypeAssertionBOL:
1122            if (matchAssertionBOL(currentTerm()))
1123                MATCH_NEXT();
1124            BACKTRACK();
1125        case ByteTerm::TypeAssertionEOL:
1126            if (matchAssertionEOL(currentTerm()))
1127                MATCH_NEXT();
1128            BACKTRACK();
1129        case ByteTerm::TypeAssertionWordBoundary:
1130            if (matchAssertionWordBoundary(currentTerm()))
1131                MATCH_NEXT();
1132            BACKTRACK();
1133
1134        case ByteTerm::TypePatternCharacterOnce:
1135        case ByteTerm::TypePatternCharacterFixed: {
1136            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1137                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
1138                    BACKTRACK();
1139            }
1140            MATCH_NEXT();
1141        }
1142        case ByteTerm::TypePatternCharacterGreedy: {
1143            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1144            unsigned matchAmount = 0;
1145            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1146                if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
1147                    input.uncheckInput(1);
1148                    break;
1149                }
1150                ++matchAmount;
1151            }
1152            backTrack->matchAmount = matchAmount;
1153
1154            MATCH_NEXT();
1155        }
1156        case ByteTerm::TypePatternCharacterNonGreedy: {
1157            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1158            backTrack->matchAmount = 0;
1159            MATCH_NEXT();
1160        }
1161
1162        case ByteTerm::TypePatternCasedCharacterOnce:
1163        case ByteTerm::TypePatternCasedCharacterFixed: {
1164            for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
1165                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
1166                    BACKTRACK();
1167            }
1168            MATCH_NEXT();
1169        }
1170        case ByteTerm::TypePatternCasedCharacterGreedy: {
1171            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1172            unsigned matchAmount = 0;
1173            while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
1174                if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
1175                    input.uncheckInput(1);
1176                    break;
1177                }
1178                ++matchAmount;
1179            }
1180            backTrack->matchAmount = matchAmount;
1181
1182            MATCH_NEXT();
1183        }
1184        case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1185            BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1186            backTrack->matchAmount = 0;
1187            MATCH_NEXT();
1188        }
1189
1190        case ByteTerm::TypeCharacterClass:
1191            if (matchCharacterClass(currentTerm(), context))
1192                MATCH_NEXT();
1193            BACKTRACK();
1194        case ByteTerm::TypeBackReference:
1195            if (matchBackReference(currentTerm(), context))
1196                MATCH_NEXT();
1197            BACKTRACK();
1198        case ByteTerm::TypeParenthesesSubpattern: {
1199            JSRegExpResult result = matchParentheses(currentTerm(), context);
1200
1201            if (result == JSRegExpMatch) {
1202                MATCH_NEXT();
1203            }  else if (result != JSRegExpNoMatch)
1204                return result;
1205
1206            BACKTRACK();
1207        }
1208        case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1209            if (matchParenthesesOnceBegin(currentTerm(), context))
1210                MATCH_NEXT();
1211            BACKTRACK();
1212        case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1213            if (matchParenthesesOnceEnd(currentTerm(), context))
1214                MATCH_NEXT();
1215            BACKTRACK();
1216        case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1217            if (matchParenthesesTerminalBegin(currentTerm(), context))
1218                MATCH_NEXT();
1219            BACKTRACK();
1220        case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1221            if (matchParenthesesTerminalEnd(currentTerm(), context))
1222                MATCH_NEXT();
1223            BACKTRACK();
1224        case ByteTerm::TypeParentheticalAssertionBegin:
1225            if (matchParentheticalAssertionBegin(currentTerm(), context))
1226                MATCH_NEXT();
1227            BACKTRACK();
1228        case ByteTerm::TypeParentheticalAssertionEnd:
1229            if (matchParentheticalAssertionEnd(currentTerm(), context))
1230                MATCH_NEXT();
1231            BACKTRACK();
1232
1233        case ByteTerm::TypeCheckInput:
1234            if (input.checkInput(currentTerm().checkInputCount))
1235                MATCH_NEXT();
1236            BACKTRACK();
1237
1238            case ByteTerm::TypeUncheckInput:
1239                input.uncheckInput(currentTerm().checkInputCount);
1240                MATCH_NEXT();
1241        }
1242
1243        // We should never fall-through to here.
1244        ASSERT_NOT_REACHED();
1245
1246    backtrack:
1247        ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1248
1249        switch (currentTerm().type) {
1250        case ByteTerm::TypeSubpatternBegin:
1251            return JSRegExpNoMatch;
1252        case ByteTerm::TypeSubpatternEnd:
1253            ASSERT_NOT_REACHED();
1254
1255        case ByteTerm::TypeBodyAlternativeBegin:
1256        case ByteTerm::TypeBodyAlternativeDisjunction: {
1257            int offset = currentTerm().alternative.next;
1258            context->term += offset;
1259            if (offset > 0)
1260                MATCH_NEXT();
1261
1262            if (input.atEnd())
1263                return JSRegExpNoMatch;
1264
1265            input.next();
1266
1267            if (pattern->m_containsBeginChars && isBody)
1268                lookupForBeginChars();
1269
1270            context->matchBegin = input.getPos();
1271
1272            if (currentTerm().alternative.onceThrough)
1273                context->term += currentTerm().alternative.next;
1274
1275            MATCH_NEXT();
1276        }
1277        case ByteTerm::TypeBodyAlternativeEnd:
1278            ASSERT_NOT_REACHED();
1279
1280            case ByteTerm::TypeAlternativeBegin:
1281            case ByteTerm::TypeAlternativeDisjunction: {
1282                int offset = currentTerm().alternative.next;
1283                context->term += offset;
1284                if (offset > 0)
1285                    MATCH_NEXT();
1286                BACKTRACK();
1287            }
1288            case ByteTerm::TypeAlternativeEnd: {
1289                // We should never backtrack back into an alternative of the main body of the regex.
1290                BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1291                unsigned offset = backTrack->offset;
1292                context->term -= offset;
1293                BACKTRACK();
1294            }
1295
1296            case ByteTerm::TypeAssertionBOL:
1297            case ByteTerm::TypeAssertionEOL:
1298            case ByteTerm::TypeAssertionWordBoundary:
1299                BACKTRACK();
1300
1301            case ByteTerm::TypePatternCharacterOnce:
1302            case ByteTerm::TypePatternCharacterFixed:
1303            case ByteTerm::TypePatternCharacterGreedy:
1304            case ByteTerm::TypePatternCharacterNonGreedy:
1305                if (backtrackPatternCharacter(currentTerm(), context))
1306                    MATCH_NEXT();
1307                BACKTRACK();
1308            case ByteTerm::TypePatternCasedCharacterOnce:
1309            case ByteTerm::TypePatternCasedCharacterFixed:
1310            case ByteTerm::TypePatternCasedCharacterGreedy:
1311            case ByteTerm::TypePatternCasedCharacterNonGreedy:
1312                if (backtrackPatternCasedCharacter(currentTerm(), context))
1313                    MATCH_NEXT();
1314                BACKTRACK();
1315            case ByteTerm::TypeCharacterClass:
1316                if (backtrackCharacterClass(currentTerm(), context))
1317                    MATCH_NEXT();
1318                BACKTRACK();
1319            case ByteTerm::TypeBackReference:
1320                if (backtrackBackReference(currentTerm(), context))
1321                    MATCH_NEXT();
1322                BACKTRACK();
1323            case ByteTerm::TypeParenthesesSubpattern: {
1324                JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1325
1326                if (result == JSRegExpMatch) {
1327                    MATCH_NEXT();
1328                } else if (result != JSRegExpNoMatch)
1329                    return result;
1330
1331                BACKTRACK();
1332            }
1333            case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1334                if (backtrackParenthesesOnceBegin(currentTerm(), context))
1335                    MATCH_NEXT();
1336                BACKTRACK();
1337            case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1338                if (backtrackParenthesesOnceEnd(currentTerm(), context))
1339                    MATCH_NEXT();
1340                BACKTRACK();
1341            case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1342                if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1343                    MATCH_NEXT();
1344                BACKTRACK();
1345            case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1346                if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1347                    MATCH_NEXT();
1348                BACKTRACK();
1349            case ByteTerm::TypeParentheticalAssertionBegin:
1350                if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1351                    MATCH_NEXT();
1352                BACKTRACK();
1353            case ByteTerm::TypeParentheticalAssertionEnd:
1354                if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1355                    MATCH_NEXT();
1356                BACKTRACK();
1357
1358            case ByteTerm::TypeCheckInput:
1359                input.uncheckInput(currentTerm().checkInputCount);
1360                BACKTRACK();
1361
1362            case ByteTerm::TypeUncheckInput:
1363                input.checkInput(currentTerm().checkInputCount);
1364                BACKTRACK();
1365        }
1366
1367        ASSERT_NOT_REACHED();
1368        return JSRegExpErrorNoMatch;
1369    }
1370
1371    JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1372    {
1373        JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1374
1375        if (result == JSRegExpMatch) {
1376            while (context->matchBegin == context->matchEnd) {
1377                result = matchDisjunction(disjunction, context, true);
1378                if (result != JSRegExpMatch)
1379                    return result;
1380            }
1381            return JSRegExpMatch;
1382        }
1383
1384        return result;
1385    }
1386
1387    int interpret()
1388    {
1389        allocatorPool = pattern->m_allocator->startAllocator();
1390        if (!allocatorPool)
1391            CRASH();
1392
1393        for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
1394            output[i] = -1;
1395
1396        DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
1397
1398        JSRegExpResult result = matchDisjunction(pattern->m_body.get(), context, false, true);
1399        if (result == JSRegExpMatch) {
1400            output[0] = context->matchBegin;
1401            output[1] = context->matchEnd;
1402        }
1403
1404        freeDisjunctionContext(context);
1405
1406        pattern->m_allocator->stopAllocator();
1407
1408        // RegExp.cpp currently expects all error to be converted to -1.
1409        ASSERT((result == JSRegExpMatch) == (output[0] != -1));
1410        return output[0];
1411    }
1412
1413    Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
1414        : pattern(pattern)
1415        , output(output)
1416        , input(inputChar, start, length)
1417        , allocatorPool(0)
1418        , remainingMatchCount(matchLimit)
1419    {
1420    }
1421
1422private:
1423    BytecodePattern* pattern;
1424    int* output;
1425    InputStream input;
1426    BumpPointerPool* allocatorPool;
1427    unsigned remainingMatchCount;
1428};
1429
1430
1431
1432class ByteCompiler {
1433    struct ParenthesesStackEntry {
1434        unsigned beginTerm;
1435        unsigned savedAlternativeIndex;
1436        ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
1437            : beginTerm(beginTerm)
1438            , savedAlternativeIndex(savedAlternativeIndex)
1439        {
1440        }
1441    };
1442
1443public:
1444    ByteCompiler(YarrPattern& pattern)
1445        : m_pattern(pattern)
1446    {
1447        m_currentAlternativeIndex = 0;
1448    }
1449
1450    PassOwnPtr<BytecodePattern> compile(BumpPointerAllocator* allocator)
1451    {
1452        regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize, m_pattern.m_body->m_alternatives[0]->onceThrough());
1453        emitDisjunction(m_pattern.m_body);
1454        regexEnd();
1455
1456        return adoptPtr(new BytecodePattern(m_bodyDisjunction.release(), m_allParenthesesInfo, m_pattern, allocator));
1457    }
1458
1459    void checkInput(unsigned count)
1460    {
1461        m_bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
1462    }
1463
1464    void uncheckInput(unsigned count)
1465    {
1466        m_bodyDisjunction->terms.append(ByteTerm::UncheckInput(count));
1467    }
1468
1469    void assertionBOL(int inputPosition)
1470    {
1471        m_bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
1472    }
1473
1474    void assertionEOL(int inputPosition)
1475    {
1476        m_bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
1477    }
1478
1479    void assertionWordBoundary(bool invert, int inputPosition)
1480    {
1481        m_bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
1482    }
1483
1484    void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1485    {
1486        if (m_pattern.m_ignoreCase) {
1487            UChar lo = Unicode::toLower(ch);
1488            UChar hi = Unicode::toUpper(ch);
1489
1490            if (lo != hi) {
1491                m_bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
1492                return;
1493            }
1494        }
1495
1496        m_bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
1497    }
1498
1499    void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1500    {
1501        m_bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
1502
1503        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1504        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1505        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1506    }
1507
1508    void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1509    {
1510        ASSERT(subpatternId);
1511
1512        m_bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
1513
1514        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
1515        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
1516        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1517    }
1518
1519    void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1520    {
1521        int beginTerm = m_bodyDisjunction->terms.size();
1522
1523        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1524        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1525        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1526        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1527
1528        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1529        m_currentAlternativeIndex = beginTerm + 1;
1530    }
1531
1532    void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1533    {
1534        int beginTerm = m_bodyDisjunction->terms.size();
1535
1536        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1537        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1538        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1539        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1540
1541        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1542        m_currentAlternativeIndex = beginTerm + 1;
1543    }
1544
1545    void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1546    {
1547        // Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1548        // then fix this up at the end! - simplifying this should make it much clearer.
1549        // https://bugs.webkit.org/show_bug.cgi?id=50136
1550
1551        int beginTerm = m_bodyDisjunction->terms.size();
1552
1553        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1554        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1555        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1556        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1557
1558        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1559        m_currentAlternativeIndex = beginTerm + 1;
1560    }
1561
1562    void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1563    {
1564        int beginTerm = m_bodyDisjunction->terms.size();
1565
1566        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, 0));
1567        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
1568        m_bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
1569        m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
1570
1571        m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex));
1572        m_currentAlternativeIndex = beginTerm + 1;
1573    }
1574
1575    void atomParentheticalAssertionEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1576    {
1577        unsigned beginTerm = popParenthesesStack();
1578        closeAlternative(beginTerm + 1);
1579        unsigned endTerm = m_bodyDisjunction->terms.size();
1580
1581        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1582
1583        bool invert = m_bodyDisjunction->terms[beginTerm].invert();
1584        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1585
1586        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1587        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1588        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1589        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1590
1591        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1592        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1593        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1594        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1595    }
1596
1597    unsigned popParenthesesStack()
1598    {
1599        ASSERT(m_parenthesesStack.size());
1600        int stackEnd = m_parenthesesStack.size() - 1;
1601        unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
1602        m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
1603        m_parenthesesStack.shrink(stackEnd);
1604
1605        ASSERT(beginTerm < m_bodyDisjunction->terms.size());
1606        ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size());
1607
1608        return beginTerm;
1609    }
1610
1611#ifndef NDEBUG
1612    void dumpDisjunction(ByteDisjunction* disjunction)
1613    {
1614        printf("ByteDisjunction(%p):\n\t", disjunction);
1615        for (unsigned i = 0; i < disjunction->terms.size(); ++i)
1616            printf("{ %d } ", disjunction->terms[i].type);
1617        printf("\n");
1618    }
1619#endif
1620
1621    void closeAlternative(int beginTerm)
1622    {
1623        int origBeginTerm = beginTerm;
1624        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1625        int endIndex = m_bodyDisjunction->terms.size();
1626
1627        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1628
1629        if (!m_bodyDisjunction->terms[beginTerm].alternative.next)
1630            m_bodyDisjunction->terms.remove(beginTerm);
1631        else {
1632            while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1633                beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1634                ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1635                m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1636                m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1637            }
1638
1639            m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1640
1641            m_bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
1642            m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1643        }
1644    }
1645
1646    void closeBodyAlternative()
1647    {
1648        int beginTerm = 0;
1649        int origBeginTerm = 0;
1650        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1651        int endIndex = m_bodyDisjunction->terms.size();
1652
1653        unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation;
1654
1655        while (m_bodyDisjunction->terms[beginTerm].alternative.next) {
1656            beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next;
1657            ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1658            m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
1659            m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1660        }
1661
1662        m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
1663
1664        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
1665        m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
1666    }
1667
1668    void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
1669    {
1670        unsigned beginTerm = popParenthesesStack();
1671        closeAlternative(beginTerm + 1);
1672        unsigned endTerm = m_bodyDisjunction->terms.size();
1673
1674        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1675
1676        ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm];
1677
1678        bool capture = parenthesesBegin.capture();
1679        unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1680
1681        unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
1682        ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
1683
1684        parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
1685        for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
1686            parenthesesDisjunction->terms.append(m_bodyDisjunction->terms[termInParentheses]);
1687        parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
1688
1689        m_bodyDisjunction->terms.shrink(beginTerm);
1690
1691        m_allParenthesesInfo.append(parenthesesDisjunction);
1692        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture, inputPosition));
1693
1694        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1695        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1696        m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
1697    }
1698
1699    void atomParenthesesOnceEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1700    {
1701        unsigned beginTerm = popParenthesesStack();
1702        closeAlternative(beginTerm + 1);
1703        unsigned endTerm = m_bodyDisjunction->terms.size();
1704
1705        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1706
1707        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1708        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1709
1710        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1711        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1712        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1713        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1714
1715        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1716        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1717        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1718        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1719    }
1720
1721    void atomParenthesesTerminalEnd(int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
1722    {
1723        unsigned beginTerm = popParenthesesStack();
1724        closeAlternative(beginTerm + 1);
1725        unsigned endTerm = m_bodyDisjunction->terms.size();
1726
1727        ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1728
1729        bool capture = m_bodyDisjunction->terms[beginTerm].capture();
1730        unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId;
1731
1732        m_bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1733        m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1734        m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1735        m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
1736
1737        m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
1738        m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
1739        m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
1740        m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
1741    }
1742
1743    void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1744    {
1745        m_bodyDisjunction = adoptPtr(new ByteDisjunction(numSubpatterns, callFrameSize));
1746        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin(onceThrough));
1747        m_bodyDisjunction->terms[0].frameLocation = 0;
1748        m_currentAlternativeIndex = 0;
1749    }
1750
1751    void regexEnd()
1752    {
1753        closeBodyAlternative();
1754    }
1755
1756    void alternativeBodyDisjunction(bool onceThrough)
1757    {
1758        int newAlternativeIndex = m_bodyDisjunction->terms.size();
1759        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1760        m_bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction(onceThrough));
1761
1762        m_currentAlternativeIndex = newAlternativeIndex;
1763    }
1764
1765    void alternativeDisjunction()
1766    {
1767        int newAlternativeIndex = m_bodyDisjunction->terms.size();
1768        m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1769        m_bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
1770
1771        m_currentAlternativeIndex = newAlternativeIndex;
1772    }
1773
1774    void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0, bool isParentheticalAssertion = false)
1775    {
1776        for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
1777            unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
1778
1779            PatternAlternative* alternative = disjunction->m_alternatives[alt];
1780
1781            if (alt) {
1782                if (disjunction == m_pattern.m_body)
1783                    alternativeBodyDisjunction(alternative->onceThrough());
1784                else
1785                    alternativeDisjunction();
1786            }
1787
1788            unsigned minimumSize = alternative->m_minimumSize;
1789            int countToCheck;
1790
1791            if (isParentheticalAssertion && parenthesesInputCountAlreadyChecked > minimumSize)
1792                countToCheck = 0;
1793            else
1794                countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
1795
1796            ASSERT(countToCheck >= 0);
1797            if (countToCheck) {
1798                checkInput(countToCheck);
1799                currentCountAlreadyChecked += countToCheck;
1800            }
1801
1802            for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
1803                PatternTerm& term = alternative->m_terms[i];
1804
1805                switch (term.type) {
1806                case PatternTerm::TypeAssertionBOL:
1807                    assertionBOL(term.inputPosition - currentCountAlreadyChecked);
1808                    break;
1809
1810                case PatternTerm::TypeAssertionEOL:
1811                    assertionEOL(term.inputPosition - currentCountAlreadyChecked);
1812                    break;
1813
1814                case PatternTerm::TypeAssertionWordBoundary:
1815                    assertionWordBoundary(term.invert(), term.inputPosition - currentCountAlreadyChecked);
1816                    break;
1817
1818                case PatternTerm::TypePatternCharacter:
1819                    atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1820                    break;
1821
1822                case PatternTerm::TypeCharacterClass:
1823                    atomCharacterClass(term.characterClass, term.invert(), term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1824                    break;
1825
1826                case PatternTerm::TypeBackReference:
1827                    atomBackReference(term.backReferenceSubpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
1828                        break;
1829
1830                case PatternTerm::TypeForwardReference:
1831                    break;
1832
1833                case PatternTerm::TypeParenthesesSubpattern: {
1834                    unsigned disjunctionAlreadyCheckedCount = 0;
1835                    if (term.quantityCount == 1 && !term.parentheses.isCopy) {
1836                        unsigned alternativeFrameLocation = term.frameLocation;
1837                        // For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
1838                        if (term.quantityType == QuantifierFixedCount)
1839                            disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
1840                        else
1841                            alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
1842                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1843                        atomParenthesesOnceBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, alternativeFrameLocation);
1844                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1845                        atomParenthesesOnceEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1846                    } else if (term.parentheses.isTerminal) {
1847                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1848                        atomParenthesesTerminalBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesOnce);
1849                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, disjunctionAlreadyCheckedCount);
1850                        atomParenthesesTerminalEnd(delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType);
1851                    } else {
1852                        unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
1853                        atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.capture(), delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
1854                        emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
1855                        atomParenthesesSubpatternEnd(term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
1856                    }
1857                    break;
1858                }
1859
1860                case PatternTerm::TypeParentheticalAssertion: {
1861                    unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
1862
1863                    ASSERT(currentCountAlreadyChecked >= static_cast<unsigned>(term.inputPosition));
1864                    int positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
1865                    int uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
1866
1867                    if (uncheckAmount > 0) {
1868                        uncheckInput(uncheckAmount);
1869                        currentCountAlreadyChecked -= uncheckAmount;
1870                    } else
1871                        uncheckAmount = 0;
1872
1873                    atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invert(), term.frameLocation, alternativeFrameLocation);
1874                    emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, positiveInputOffset, true);
1875                    atomParentheticalAssertionEnd(0, term.frameLocation, term.quantityCount, term.quantityType);
1876                    if (uncheckAmount) {
1877                        checkInput(uncheckAmount);
1878                        currentCountAlreadyChecked += uncheckAmount;
1879                    }
1880                    break;
1881                }
1882                }
1883            }
1884        }
1885    }
1886
1887private:
1888    YarrPattern& m_pattern;
1889    OwnPtr<ByteDisjunction> m_bodyDisjunction;
1890    unsigned m_currentAlternativeIndex;
1891    Vector<ParenthesesStackEntry> m_parenthesesStack;
1892    Vector<ByteDisjunction*> m_allParenthesesInfo;
1893};
1894
1895PassOwnPtr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator)
1896{
1897    return ByteCompiler(pattern).compile(allocator);
1898}
1899
1900int interpret(BytecodePattern* bytecode, const UChar* input, unsigned start, unsigned length, int* output)
1901{
1902    return Interpreter(bytecode, output, input, start, length).interpret();
1903}
1904
1905COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
1906COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
1907COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
1908COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
1909COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
1910COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
1911COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
1912
1913
1914} }
1915