1/*
2    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; either
7    version 2 of the License, or (at your option) any later version.
8
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13
14    You should have received a copy of the GNU Library General Public License
15    along with this library; see the file COPYING.LIB.  If not, write to
16    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17    Boston, MA 02110-1301, USA.
18*/
19
20#ifndef SegmentedString_h
21#define SegmentedString_h
22
23#include "platform/PlatformExport.h"
24#include "wtf/Deque.h"
25#include "wtf/text/StringBuilder.h"
26#include "wtf/text/TextPosition.h"
27#include "wtf/text/WTFString.h"
28
29namespace blink {
30
31class SegmentedString;
32
33class PLATFORM_EXPORT SegmentedSubstring {
34public:
35    SegmentedSubstring()
36        : m_length(0)
37        , m_doNotExcludeLineNumbers(true)
38        , m_is8Bit(false)
39    {
40        m_data.string16Ptr = 0;
41    }
42
43    SegmentedSubstring(const String& str)
44        : m_length(str.length())
45        , m_doNotExcludeLineNumbers(true)
46        , m_string(str)
47    {
48        if (m_length) {
49            if (m_string.is8Bit()) {
50                m_is8Bit = true;
51                m_data.string8Ptr = m_string.characters8();
52            } else {
53                m_is8Bit = false;
54                m_data.string16Ptr = m_string.characters16();
55            }
56        } else {
57            m_is8Bit = false;
58        }
59    }
60
61    void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
62
63    bool is8Bit() { return m_is8Bit; }
64
65    bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
66    bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
67
68    void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
69
70    int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
71
72    void appendTo(StringBuilder& builder) const
73    {
74        int offset = m_string.length() - m_length;
75
76        if (!offset) {
77            if (m_length)
78                builder.append(m_string);
79        } else {
80            builder.append(m_string.substring(offset, m_length));
81        }
82    }
83
84    UChar getCurrentChar8()
85    {
86        return *m_data.string8Ptr;
87    }
88
89    UChar getCurrentChar16()
90    {
91        return m_data.string16Ptr ? *m_data.string16Ptr : 0;
92    }
93
94    UChar incrementAndGetCurrentChar8()
95    {
96        ASSERT(m_data.string8Ptr);
97        return *++m_data.string8Ptr;
98    }
99
100    UChar incrementAndGetCurrentChar16()
101    {
102        ASSERT(m_data.string16Ptr);
103        return *++m_data.string16Ptr;
104    }
105
106    String currentSubString(unsigned length)
107    {
108        int offset = m_string.length() - m_length;
109        return m_string.substring(offset, length);
110    }
111
112    ALWAYS_INLINE UChar getCurrentChar()
113    {
114        ASSERT(m_length);
115        if (is8Bit())
116            return getCurrentChar8();
117        return getCurrentChar16();
118    }
119
120    ALWAYS_INLINE UChar incrementAndGetCurrentChar()
121    {
122        ASSERT(m_length);
123        if (is8Bit())
124            return incrementAndGetCurrentChar8();
125        return incrementAndGetCurrentChar16();
126    }
127
128public:
129    union {
130        const LChar* string8Ptr;
131        const UChar* string16Ptr;
132    } m_data;
133    int m_length;
134
135private:
136    bool m_doNotExcludeLineNumbers;
137    bool m_is8Bit;
138    String m_string;
139};
140
141class PLATFORM_EXPORT SegmentedString {
142public:
143    SegmentedString()
144        : m_pushedChar1(0)
145        , m_pushedChar2(0)
146        , m_currentChar(0)
147        , m_numberOfCharactersConsumedPriorToCurrentString(0)
148        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
149        , m_currentLine(0)
150        , m_closed(false)
151        , m_empty(true)
152        , m_fastPathFlags(NoFastPath)
153        , m_advanceFunc(&SegmentedString::advanceEmpty)
154        , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
155    {
156    }
157
158    SegmentedString(const String& str)
159        : m_pushedChar1(0)
160        , m_pushedChar2(0)
161        , m_currentString(str)
162        , m_currentChar(0)
163        , m_numberOfCharactersConsumedPriorToCurrentString(0)
164        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
165        , m_currentLine(0)
166        , m_closed(false)
167        , m_empty(!str.length())
168        , m_fastPathFlags(NoFastPath)
169    {
170        if (m_currentString.m_length)
171            m_currentChar = m_currentString.getCurrentChar();
172        updateAdvanceFunctionPointers();
173    }
174
175    void clear();
176    void close();
177
178    void append(const SegmentedString&);
179    void prepend(const SegmentedString&);
180
181    bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
182    void setExcludeLineNumbers();
183
184    void push(UChar c)
185    {
186        if (!m_pushedChar1) {
187            m_pushedChar1 = c;
188            m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
189            updateSlowCaseFunctionPointers();
190        } else {
191            ASSERT(!m_pushedChar2);
192            m_pushedChar2 = c;
193        }
194    }
195
196    bool isEmpty() const { return m_empty; }
197    unsigned length() const;
198
199    bool isClosed() const { return m_closed; }
200
201    enum LookAheadResult {
202        DidNotMatch,
203        DidMatch,
204        NotEnoughCharacters,
205    };
206
207    LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
208    LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
209
210    void advance()
211    {
212        if (m_fastPathFlags & Use8BitAdvance) {
213            ASSERT(!m_pushedChar1);
214            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
215            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
216
217            if (!haveOneCharacterLeft)
218                return;
219
220            updateSlowCaseFunctionPointers();
221
222            return;
223        }
224
225        (this->*m_advanceFunc)();
226    }
227
228    inline void advanceAndUpdateLineNumber()
229    {
230        if (m_fastPathFlags & Use8BitAdvance) {
231            ASSERT(!m_pushedChar1);
232
233            bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
234            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
235
236            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
237
238            if (!(haveNewLine | haveOneCharacterLeft))
239                return;
240
241            if (haveNewLine) {
242                ++m_currentLine;
243                m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
244            }
245
246            if (haveOneCharacterLeft)
247                updateSlowCaseFunctionPointers();
248
249            return;
250        }
251
252        (this->*m_advanceAndUpdateLineNumberFunc)();
253    }
254
255    void advanceAndASSERT(UChar expectedCharacter)
256    {
257        ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
258        advance();
259    }
260
261    void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
262    {
263        ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
264        advance();
265    }
266
267    void advancePastNonNewline()
268    {
269        ASSERT(currentChar() != '\n');
270        advance();
271    }
272
273    void advancePastNewlineAndUpdateLineNumber()
274    {
275        ASSERT(currentChar() == '\n');
276        if (!m_pushedChar1 && m_currentString.m_length > 1) {
277            int newLineFlag = m_currentString.doNotExcludeLineNumbers();
278            m_currentLine += newLineFlag;
279            if (newLineFlag)
280                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
281            decrementAndCheckLength();
282            m_currentChar = m_currentString.incrementAndGetCurrentChar();
283            return;
284        }
285        advanceAndUpdateLineNumberSlowCase();
286    }
287
288    // Writes the consumed characters into consumedCharacters, which must
289    // have space for at least |count| characters.
290    void advance(unsigned count, UChar* consumedCharacters);
291
292    bool escaped() const { return m_pushedChar1; }
293
294    int numberOfCharactersConsumed() const
295    {
296        int numberOfPushedCharacters = 0;
297        if (m_pushedChar1) {
298            ++numberOfPushedCharacters;
299            if (m_pushedChar2)
300                ++numberOfPushedCharacters;
301        }
302        return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
303    }
304
305    String toString() const;
306
307    UChar currentChar() const { return m_currentChar; }
308
309    // The method is moderately slow, comparing to currentLine method.
310    OrdinalNumber currentColumn() const;
311    OrdinalNumber currentLine() const;
312    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
313    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
314    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
315
316private:
317    enum FastPathFlags {
318        NoFastPath = 0,
319        Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
320        Use8BitAdvance = 1 << 1,
321    };
322
323    void append(const SegmentedSubstring&);
324    void prepend(const SegmentedSubstring&);
325
326    void advance8();
327    void advance16();
328    void advanceAndUpdateLineNumber8();
329    void advanceAndUpdateLineNumber16();
330    void advanceSlowCase();
331    void advanceAndUpdateLineNumberSlowCase();
332    void advanceEmpty();
333    void advanceSubstring();
334
335    void updateSlowCaseFunctionPointers();
336
337    void decrementAndCheckLength()
338    {
339        ASSERT(m_currentString.m_length > 1);
340        if (--m_currentString.m_length == 1)
341            updateSlowCaseFunctionPointers();
342    }
343
344    void updateAdvanceFunctionPointers()
345    {
346        if ((m_currentString.m_length > 1) && !m_pushedChar1) {
347            if (m_currentString.is8Bit()) {
348                m_advanceFunc = &SegmentedString::advance8;
349                m_fastPathFlags = Use8BitAdvance;
350                if (m_currentString.doNotExcludeLineNumbers()) {
351                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
352                    m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
353                } else {
354                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
355                }
356                return;
357            }
358
359            m_advanceFunc = &SegmentedString::advance16;
360            m_fastPathFlags = NoFastPath;
361            if (m_currentString.doNotExcludeLineNumbers())
362                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
363            else
364                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
365            return;
366        }
367
368        if (!m_currentString.m_length && !isComposite()) {
369            m_advanceFunc = &SegmentedString::advanceEmpty;
370            m_fastPathFlags = NoFastPath;
371            m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
372        }
373
374        updateSlowCaseFunctionPointers();
375    }
376
377    inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
378    {
379        if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
380            String currentSubstring = m_currentString.currentSubString(string.length());
381            if (currentSubstring.startsWith(string, caseSensitive))
382                return DidMatch;
383            return DidNotMatch;
384        }
385        return lookAheadSlowCase(string, caseSensitive);
386    }
387
388    LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
389    {
390        unsigned count = string.length();
391        if (count > length())
392            return NotEnoughCharacters;
393        UChar* consumedCharacters;
394        String consumedString = String::createUninitialized(count, consumedCharacters);
395        advance(count, consumedCharacters);
396        LookAheadResult result = DidNotMatch;
397        if (consumedString.startsWith(string, caseSensitive))
398            result = DidMatch;
399        prepend(SegmentedString(consumedString));
400        return result;
401    }
402
403    bool isComposite() const { return !m_substrings.isEmpty(); }
404
405    UChar m_pushedChar1;
406    UChar m_pushedChar2;
407    SegmentedSubstring m_currentString;
408    UChar m_currentChar;
409    int m_numberOfCharactersConsumedPriorToCurrentString;
410    int m_numberOfCharactersConsumedPriorToCurrentLine;
411    int m_currentLine;
412    Deque<SegmentedSubstring> m_substrings;
413    bool m_closed;
414    bool m_empty;
415    unsigned char m_fastPathFlags;
416    void (SegmentedString::*m_advanceFunc)();
417    void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
418};
419
420}
421
422#endif
423