1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "core/editing/TextIterator.h"
29
30#include "HTMLNames.h"
31#include "bindings/v8/ExceptionStatePlaceholder.h"
32#include "core/dom/Document.h"
33#include "core/dom/NodeTraversal.h"
34#include "core/dom/Range.h"
35#include "core/dom/shadow/ShadowRoot.h"
36#include "core/editing/VisiblePosition.h"
37#include "core/editing/VisibleUnits.h"
38#include "core/editing/htmlediting.h"
39#include "core/html/HTMLElement.h"
40#include "core/html/HTMLTextFormControlElement.h"
41#include "core/platform/graphics/Font.h"
42#include "core/platform/text/TextBoundaries.h"
43#include "core/platform/text/TextBreakIteratorInternalICU.h"
44#include "core/rendering/InlineTextBox.h"
45#include "core/rendering/RenderImage.h"
46#include "core/rendering/RenderTableCell.h"
47#include "core/rendering/RenderTableRow.h"
48#include "core/rendering/RenderTextControl.h"
49#include "core/rendering/RenderTextFragment.h"
50#include "wtf/text/CString.h"
51#include "wtf/text/StringBuilder.h"
52#include "wtf/unicode/CharacterNames.h"
53#include <unicode/usearch.h>
54
55using namespace WTF::Unicode;
56using namespace std;
57
58namespace WebCore {
59
60using namespace HTMLNames;
61
62// Buffer that knows how to compare with a search target.
63// Keeps enough of the previous text to be able to search in the future, but no more.
64// Non-breaking spaces are always equal to normal spaces.
65// Case folding is also done if the CaseInsensitive option is specified.
66// Matches are further filtered if the AtWordStarts option is specified, although some
67// matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
68class SearchBuffer {
69    WTF_MAKE_NONCOPYABLE(SearchBuffer);
70public:
71    SearchBuffer(const String& target, FindOptions);
72    ~SearchBuffer();
73
74    // Returns number of characters appended; guaranteed to be in the range [1, length].
75    template<typename CharType>
76    void append(const CharType*, size_t length);
77    size_t numberOfCharactersJustAppended() const { return m_numberOfCharactersJustAppended; }
78
79    bool needsMoreContext() const;
80    void prependContext(const UChar*, size_t length);
81    void reachedBreak();
82
83    // Result is the size in characters of what was found.
84    // And <startOffset> is the number of characters back to the start of what was found.
85    size_t search(size_t& startOffset);
86    bool atBreak() const;
87
88private:
89    bool isBadMatch(const UChar*, size_t length) const;
90    bool isWordStartMatch(size_t start, size_t length) const;
91
92    Vector<UChar> m_target;
93    FindOptions m_options;
94
95    Vector<UChar> m_buffer;
96    size_t m_overlap;
97    size_t m_prefixLength;
98    size_t m_numberOfCharactersJustAppended;
99    bool m_atBreak;
100    bool m_needsMoreContext;
101
102    bool m_targetRequiresKanaWorkaround;
103    Vector<UChar> m_normalizedTarget;
104    mutable Vector<UChar> m_normalizedMatch;
105};
106
107// --------
108
109static const unsigned bitsInWord = sizeof(unsigned) * 8;
110static const unsigned bitInWordMask = bitsInWord - 1;
111
112BitStack::BitStack()
113    : m_size(0)
114{
115}
116
117BitStack::~BitStack()
118{
119}
120
121void BitStack::push(bool bit)
122{
123    unsigned index = m_size / bitsInWord;
124    unsigned shift = m_size & bitInWordMask;
125    if (!shift && index == m_words.size()) {
126        m_words.grow(index + 1);
127        m_words[index] = 0;
128    }
129    unsigned& word = m_words[index];
130    unsigned mask = 1U << shift;
131    if (bit)
132        word |= mask;
133    else
134        word &= ~mask;
135    ++m_size;
136}
137
138void BitStack::pop()
139{
140    if (m_size)
141        --m_size;
142}
143
144bool BitStack::top() const
145{
146    if (!m_size)
147        return false;
148    unsigned shift = (m_size - 1) & bitInWordMask;
149    return m_words.last() & (1U << shift);
150}
151
152unsigned BitStack::size() const
153{
154    return m_size;
155}
156
157// --------
158
159#if !ASSERT_DISABLED
160
161static unsigned depthCrossingShadowBoundaries(Node* node)
162{
163    unsigned depth = 0;
164    for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
165        ++depth;
166    return depth;
167}
168
169#endif
170
171// This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
172static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset)
173{
174    if (!rangeEndContainer)
175        return 0;
176    if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) {
177        if (Node* next = rangeEndContainer->childNode(rangeEndOffset))
178            return next;
179    }
180    for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
181        if (Node* next = node->nextSibling())
182            return next;
183    }
184    return 0;
185}
186
187// --------
188
189static inline bool fullyClipsContents(Node* node)
190{
191    RenderObject* renderer = node->renderer();
192    if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
193        return false;
194    return toRenderBox(renderer)->size().isEmpty();
195}
196
197static inline bool ignoresContainerClip(Node* node)
198{
199    RenderObject* renderer = node->renderer();
200    if (!renderer || renderer->isText())
201        return false;
202    return renderer->style()->hasOutOfFlowPosition();
203}
204
205static void pushFullyClippedState(BitStack& stack, Node* node)
206{
207    ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
208
209    // Push true if this node full clips its contents, or if a parent already has fully
210    // clipped and this is not a node that ignores its container's clip.
211    stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
212}
213
214static void setUpFullyClippedStack(BitStack& stack, Node* node)
215{
216    // Put the nodes in a vector so we can iterate in reverse order.
217    Vector<Node*, 100> ancestry;
218    for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
219        ancestry.append(parent);
220
221    // Call pushFullyClippedState on each node starting with the earliest ancestor.
222    size_t size = ancestry.size();
223    for (size_t i = 0; i < size; ++i)
224        pushFullyClippedState(stack, ancestry[size - i - 1]);
225    pushFullyClippedState(stack, node);
226
227    ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
228}
229
230// --------
231
232TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior)
233    : m_startContainer(0)
234    , m_startOffset(0)
235    , m_endContainer(0)
236    , m_endOffset(0)
237    , m_positionNode(0)
238    , m_textLength(0)
239    , m_remainingTextBox(0)
240    , m_firstLetterText(0)
241    , m_sortedTextBoxesPosition(0)
242    , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)
243    , m_entersTextControls(behavior & TextIteratorEntersTextControls)
244    , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding)
245    , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText)
246    , m_handledFirstLetter(false)
247    , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility)
248    , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters)
249    , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
250    , m_shouldStop(false)
251    , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText)
252{
253    if (!r)
254        return;
255
256    // get and validate the range endpoints
257    Node* startContainer = r->startContainer();
258    if (!startContainer)
259        return;
260    int startOffset = r->startOffset();
261    Node* endContainer = r->endContainer();
262    int endOffset = r->endOffset();
263
264    // Callers should be handing us well-formed ranges. If we discover that this isn't
265    // the case, we could consider changing this assertion to an early return.
266    ASSERT(r->boundaryPointsValid());
267
268    // remember range - this does not change
269    m_startContainer = startContainer;
270    m_startOffset = startOffset;
271    m_endContainer = endContainer;
272    m_endOffset = endOffset;
273
274    // set up the current node for processing
275    m_node = r->firstNode();
276    if (!m_node)
277        return;
278    setUpFullyClippedStack(m_fullyClippedStack, m_node);
279    m_offset = m_node == m_startContainer ? m_startOffset : 0;
280    m_handledNode = false;
281    m_handledChildren = false;
282
283    // calculate first out of bounds node
284    m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset);
285
286    // initialize node processing state
287    m_needsAnotherNewline = false;
288    m_textBox = 0;
289
290    // initialize record of previous node processing
291    m_hasEmitted = false;
292    m_lastTextNode = 0;
293    m_lastTextNodeEndedWithCollapsedSpace = false;
294    m_lastCharacter = 0;
295
296#ifndef NDEBUG
297    // need this just because of the assert in advance()
298    m_positionNode = m_node;
299#endif
300
301    // identify the first run
302    advance();
303}
304
305TextIterator::~TextIterator()
306{
307}
308
309void TextIterator::advance()
310{
311    if (m_shouldStop)
312        return;
313
314    // reset the run information
315    m_positionNode = 0;
316    m_textLength = 0;
317
318    // handle remembered node that needed a newline after the text node's newline
319    if (m_needsAnotherNewline) {
320        // Emit the extra newline, and position it *inside* m_node, after m_node's
321        // contents, in case it's a block, in the same way that we position the first
322        // newline.  The range for the emitted newline should start where the line
323        // break begins.
324        // FIXME: It would be cleaner if we emitted two newlines during the last
325        // iteration, instead of using m_needsAnotherNewline.
326        Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
327        emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
328        m_needsAnotherNewline = false;
329        return;
330    }
331
332    if (!m_textBox && m_remainingTextBox) {
333        m_textBox = m_remainingTextBox;
334        m_remainingTextBox = 0;
335        m_firstLetterText = 0;
336        m_offset = 0;
337    }
338    // handle remembered text box
339    if (m_textBox) {
340        handleTextBox();
341        if (m_positionNode)
342            return;
343    }
344
345    while (m_node && m_node != m_pastEndNode) {
346        if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node))
347            m_shouldStop = true;
348
349        // if the range ends at offset 0 of an element, represent the
350        // position, but not the content, of that element e.g. if the
351        // node is a blockflow element, emit a newline that
352        // precedes the element
353        if (m_node == m_endContainer && m_endOffset == 0) {
354            representNodeOffsetZero();
355            m_node = 0;
356            return;
357        }
358
359        RenderObject* renderer = m_node->renderer();
360        if (!renderer) {
361            m_handledNode = true;
362            m_handledChildren = true;
363        } else {
364            // handle current node according to its type
365            if (!m_handledNode) {
366                if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE?
367                    m_handledNode = handleTextNode();
368                else if (renderer && (renderer->isImage() || renderer->isWidget() ||
369                         (renderer->node() && renderer->node()->isElementNode() &&
370                          (toElement(renderer->node())->isFormControlElement()
371                          || toElement(renderer->node())->hasTagName(legendTag)
372                          || toElement(renderer->node())->hasTagName(meterTag)
373                          || toElement(renderer->node())->hasTagName(progressTag)))))
374                    m_handledNode = handleReplacedElement();
375                else
376                    m_handledNode = handleNonTextNode();
377                if (m_positionNode)
378                    return;
379            }
380        }
381
382        // find a new current node to handle in depth-first manner,
383        // calling exitNode() as we come back thru a parent node
384        Node* next = m_handledChildren ? 0 : m_node->firstChild();
385        m_offset = 0;
386        if (!next) {
387            next = m_node->nextSibling();
388            if (!next) {
389                bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode;
390                Node* parentNode = m_node->parentOrShadowHostNode();
391                while (!next && parentNode) {
392                    if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
393                        return;
394                    bool haveRenderer = m_node->renderer();
395                    m_node = parentNode;
396                    m_fullyClippedStack.pop();
397                    parentNode = m_node->parentOrShadowHostNode();
398                    if (haveRenderer)
399                        exitNode();
400                    if (m_positionNode) {
401                        m_handledNode = true;
402                        m_handledChildren = true;
403                        return;
404                    }
405                    next = m_node->nextSibling();
406                }
407            }
408            m_fullyClippedStack.pop();
409        }
410
411        // set the new current node
412        m_node = next;
413        if (m_node)
414            pushFullyClippedState(m_fullyClippedStack, m_node);
415        m_handledNode = false;
416        m_handledChildren = false;
417        m_handledFirstLetter = false;
418        m_firstLetterText = 0;
419
420        // how would this ever be?
421        if (m_positionNode)
422            return;
423    }
424}
425
426UChar TextIterator::characterAt(unsigned index) const
427{
428    ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length()));
429    if (!(index < static_cast<unsigned>(length())))
430        return 0;
431
432    if (m_singleCharacterBuffer) {
433        ASSERT(!index);
434        ASSERT(length() == 1);
435        return m_singleCharacterBuffer;
436    }
437
438    return string()[startOffset() + index];
439}
440
441String TextIterator::substring(unsigned position, unsigned length) const
442{
443    ASSERT_WITH_SECURITY_IMPLICATION(position < static_cast<unsigned>(this->length()));
444    ASSERT_WITH_SECURITY_IMPLICATION(position + length <= static_cast<unsigned>(this->length()));
445    if (!length)
446        return emptyString();
447    if (m_singleCharacterBuffer) {
448        ASSERT(!position);
449        ASSERT(length == 1);
450        return String(&m_singleCharacterBuffer, 1);
451    }
452    return string().substring(startOffset() + position, length);
453}
454
455void TextIterator::appendTextToStringBuilder(StringBuilder& builder, unsigned position, unsigned maxLength) const
456{
457    unsigned lengthToAppend = std::min(static_cast<unsigned>(length()) - position, maxLength);
458    if (!lengthToAppend)
459        return;
460    if (m_singleCharacterBuffer) {
461        ASSERT(!position);
462        builder.append(m_singleCharacterBuffer);
463    } else {
464        builder.append(string(), startOffset() + position, lengthToAppend);
465    }
466}
467
468bool TextIterator::handleTextNode()
469{
470    if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility)
471        return false;
472
473    RenderText* renderer = toRenderText(m_node->renderer());
474
475    m_lastTextNode = m_node;
476    String str = renderer->text();
477
478    // handle pre-formatted text
479    if (!renderer->style()->collapseWhiteSpace()) {
480        int runStart = m_offset;
481        if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
482            emitCharacter(' ', m_node, 0, runStart, runStart);
483            return false;
484        }
485        if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) {
486            handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
487            if (m_firstLetterText) {
488                String firstLetter = m_firstLetterText->text();
489                emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length());
490                m_firstLetterText = 0;
491                m_textBox = 0;
492                return false;
493            }
494        }
495        if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
496            return false;
497        int strLength = str.length();
498        int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX;
499        int runEnd = min(strLength, end);
500
501        if (runStart >= runEnd)
502            return true;
503
504        emitText(m_node, runStart, runEnd);
505        return true;
506    }
507
508    if (renderer->firstTextBox())
509        m_textBox = renderer->firstTextBox();
510
511    bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset;
512    if (shouldHandleFirstLetter)
513        handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer));
514
515    if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) {
516        if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
517            return false;
518        m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
519        return true;
520    }
521
522    if (m_firstLetterText)
523        renderer = m_firstLetterText;
524
525    // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
526    if (renderer->containsReversedText()) {
527        m_sortedTextBoxes.clear();
528        for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) {
529            m_sortedTextBoxes.append(textBox);
530        }
531        std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
532        m_sortedTextBoxesPosition = 0;
533        m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0];
534    }
535
536    handleTextBox();
537    return true;
538}
539
540void TextIterator::handleTextBox()
541{
542    RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer());
543    if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility) {
544        m_textBox = 0;
545        return;
546    }
547    String str = renderer->text();
548    unsigned start = m_offset;
549    unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : INT_MAX;
550    while (m_textBox) {
551        unsigned textBoxStart = m_textBox->start();
552        unsigned runStart = max(textBoxStart, start);
553
554        // Check for collapsed space at the start of this run.
555        InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox();
556        bool needSpace = m_lastTextNodeEndedWithCollapsedSpace
557            || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0);
558        if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) {
559            if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') {
560                unsigned spaceRunStart = runStart - 1;
561                while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ')
562                    --spaceRunStart;
563                emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1);
564            } else
565                emitCharacter(' ', m_node, 0, runStart, runStart);
566            return;
567        }
568        unsigned textBoxEnd = textBoxStart + m_textBox->len();
569        unsigned runEnd = min(textBoxEnd, end);
570
571        // Determine what the next text box will be, but don't advance yet
572        InlineTextBox* nextTextBox = 0;
573        if (renderer->containsReversedText()) {
574            if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
575                nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
576        } else
577            nextTextBox = m_textBox->nextTextBox();
578        ASSERT(!nextTextBox || nextTextBox->renderer() == renderer);
579
580        if (runStart < runEnd) {
581            // Handle either a single newline character (which becomes a space),
582            // or a run of characters that does not include a newline.
583            // This effectively translates newlines to spaces without copying the text.
584            if (str[runStart] == '\n') {
585                emitCharacter(' ', m_node, 0, runStart, runStart + 1);
586                m_offset = runStart + 1;
587            } else {
588                size_t subrunEnd = str.find('\n', runStart);
589                if (subrunEnd == notFound || subrunEnd > runEnd)
590                    subrunEnd = runEnd;
591
592                m_offset = subrunEnd;
593                emitText(m_node, renderer, runStart, subrunEnd);
594            }
595
596            // If we are doing a subrun that doesn't go to the end of the text box,
597            // come back again to finish handling this text box; don't advance to the next one.
598            if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
599                return;
600
601            // Advance and return
602            unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length();
603            if (nextRunStart > runEnd)
604                m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
605            m_textBox = nextTextBox;
606            if (renderer->containsReversedText())
607                ++m_sortedTextBoxesPosition;
608            return;
609        }
610        // Advance and continue
611        m_textBox = nextTextBox;
612        if (renderer->containsReversedText())
613            ++m_sortedTextBoxesPosition;
614    }
615    if (!m_textBox && m_remainingTextBox) {
616        m_textBox = m_remainingTextBox;
617        m_remainingTextBox = 0;
618        m_firstLetterText = 0;
619        m_offset = 0;
620        handleTextBox();
621    }
622}
623
624static inline RenderText* firstRenderTextInFirstLetter(RenderObject* firstLetter)
625{
626    if (!firstLetter)
627        return 0;
628
629    // FIXME: Should this check descendent objects?
630    for (RenderObject* current = firstLetter->firstChild(); current; current = current->nextSibling()) {
631        if (current->isText())
632            return toRenderText(current);
633    }
634    return 0;
635}
636
637void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer)
638{
639    if (renderer->firstLetter()) {
640        RenderObject* r = renderer->firstLetter();
641        if (r->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
642            return;
643        if (RenderText* firstLetter = firstRenderTextInFirstLetter(r)) {
644            m_handledFirstLetter = true;
645            m_remainingTextBox = m_textBox;
646            m_textBox = firstLetter->firstTextBox();
647            m_sortedTextBoxes.clear();
648            m_firstLetterText = firstLetter;
649        }
650    }
651    m_handledFirstLetter = true;
652}
653
654bool TextIterator::handleReplacedElement()
655{
656    if (m_fullyClippedStack.top())
657        return false;
658
659    RenderObject* renderer = m_node->renderer();
660    if (renderer->style()->visibility() != VISIBLE && !m_ignoresStyleVisibility)
661        return false;
662
663    if (m_lastTextNodeEndedWithCollapsedSpace) {
664        emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
665        return false;
666    }
667
668    if (m_entersTextControls && renderer->isTextControl()) {
669        if (HTMLElement* innerTextElement = toRenderTextControl(renderer)->textFormControlElement()->innerTextElement()) {
670            m_node = innerTextElement->containingShadowRoot();
671            pushFullyClippedState(m_fullyClippedStack, m_node);
672            m_offset = 0;
673            return false;
674        }
675    }
676
677    m_hasEmitted = true;
678
679    if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) {
680        emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1);
681        return true;
682    }
683
684    if (m_emitsCharactersBetweenAllVisiblePositions) {
685        // We want replaced elements to behave like punctuation for boundary
686        // finding, and to simply take up space for the selection preservation
687        // code in moveParagraphs, so we use a comma.
688        emitCharacter(',', m_node->parentNode(), m_node, 0, 1);
689        return true;
690    }
691
692    m_positionNode = m_node->parentNode();
693    m_positionOffsetBaseNode = m_node;
694    m_positionStartOffset = 0;
695    m_positionEndOffset = 1;
696    m_singleCharacterBuffer = 0;
697
698    if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) {
699        m_text = toRenderImage(renderer)->altText();
700        if (!m_text.isEmpty()) {
701            m_textLength = m_text.length();
702            m_lastCharacter = m_text[m_textLength - 1];
703            return true;
704        }
705    }
706
707    m_textLength = 0;
708    m_lastCharacter = 0;
709
710    return true;
711}
712
713bool TextIterator::hasVisibleTextNode(RenderText* renderer)
714{
715    if (renderer->style()->visibility() == VISIBLE)
716        return true;
717    if (renderer->isTextFragment()) {
718        RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer);
719        if (fragment->firstLetter() && fragment->firstLetter()->style()->visibility() == VISIBLE)
720            return true;
721    }
722    return false;
723}
724
725static bool shouldEmitTabBeforeNode(Node* node)
726{
727    RenderObject* r = node->renderer();
728
729    // Table cells are delimited by tabs.
730    if (!r || !isTableCell(node))
731        return false;
732
733    // Want a tab before every cell other than the first one
734    RenderTableCell* rc = toRenderTableCell(r);
735    RenderTable* t = rc->table();
736    return t && (t->cellBefore(rc) || t->cellAbove(rc));
737}
738
739static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
740{
741    RenderObject* renderer = node->renderer();
742
743    if (renderer ? !renderer->isBR() : !node->hasTagName(brTag))
744        return false;
745    return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->hasTagName(inputTag));
746}
747
748static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node)
749{
750    // Block flow (versus inline flow) is represented by having
751    // a newline both before and after the element.
752    RenderObject* r = node->renderer();
753    if (!r) {
754        return (node->hasTagName(blockquoteTag)
755                || node->hasTagName(ddTag)
756                || node->hasTagName(divTag)
757                || node->hasTagName(dlTag)
758                || node->hasTagName(dtTag)
759                || node->hasTagName(h1Tag)
760                || node->hasTagName(h2Tag)
761                || node->hasTagName(h3Tag)
762                || node->hasTagName(h4Tag)
763                || node->hasTagName(h5Tag)
764                || node->hasTagName(h6Tag)
765                || node->hasTagName(hrTag)
766                || node->hasTagName(liTag)
767                || node->hasTagName(listingTag)
768                || node->hasTagName(olTag)
769                || node->hasTagName(pTag)
770                || node->hasTagName(preTag)
771                || node->hasTagName(trTag)
772                || node->hasTagName(ulTag));
773    }
774
775    // Need to make an exception for table cells, because they are blocks, but we
776    // want them tab-delimited rather than having newlines before and after.
777    if (isTableCell(node))
778        return false;
779
780    // Need to make an exception for table row elements, because they are neither
781    // "inline" or "RenderBlock", but we want newlines for them.
782    if (r->isTableRow()) {
783        RenderTable* t = toRenderTableRow(r)->table();
784        if (t && !t->isInline())
785            return true;
786    }
787
788    return !r->isInline() && r->isRenderBlock()
789        && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText();
790}
791
792static bool shouldEmitNewlineAfterNode(Node* node)
793{
794    // FIXME: It should be better but slower to create a VisiblePosition here.
795    if (!shouldEmitNewlinesBeforeAndAfterNode(node))
796        return false;
797    // Check if this is the very last renderer in the document.
798    // If so, then we should not emit a newline.
799    while ((node = NodeTraversal::nextSkippingChildren(node)))
800        if (node->renderer())
801            return true;
802    return false;
803}
804
805static bool shouldEmitNewlineBeforeNode(Node* node)
806{
807    return shouldEmitNewlinesBeforeAndAfterNode(node);
808}
809
810static bool shouldEmitExtraNewlineForNode(Node* node)
811{
812    // When there is a significant collapsed bottom margin, emit an extra
813    // newline for a more realistic result.  We end up getting the right
814    // result even without margin collapsing. For example: <div><p>text</p></div>
815    // will work right even if both the <div> and the <p> have bottom margins.
816    RenderObject* r = node->renderer();
817    if (!r || !r->isBox())
818        return false;
819
820    // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears
821    // not to do this at all
822    if (node->hasTagName(h1Tag)
823        || node->hasTagName(h2Tag)
824        || node->hasTagName(h3Tag)
825        || node->hasTagName(h4Tag)
826        || node->hasTagName(h5Tag)
827        || node->hasTagName(h6Tag)
828        || node->hasTagName(pTag)) {
829        RenderStyle* style = r->style();
830        if (style) {
831            int bottomMargin = toRenderBox(r)->collapsedMarginAfter();
832            int fontSize = style->fontDescription().computedPixelSize();
833            if (bottomMargin * 2 >= fontSize)
834                return true;
835        }
836    }
837
838    return false;
839}
840
841static int collapsedSpaceLength(RenderText* renderer, int textEnd)
842{
843    const String& text = renderer->text();
844    int length = text.length();
845    for (int i = textEnd; i < length; ++i) {
846        if (!renderer->style()->isCollapsibleWhiteSpace(text[i]))
847            return i - textEnd;
848    }
849
850    return length - textEnd;
851}
852
853static int maxOffsetIncludingCollapsedSpaces(Node* node)
854{
855    int offset = caretMaxOffset(node);
856
857    if (node->renderer() && node->renderer()->isText())
858        offset += collapsedSpaceLength(toRenderText(node->renderer()), offset);
859
860    return offset;
861}
862
863// Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
864bool TextIterator::shouldRepresentNodeOffsetZero()
865{
866    if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable())
867        return true;
868
869    // Leave element positioned flush with start of a paragraph
870    // (e.g. do not insert tab before a table cell at the start of a paragraph)
871    if (m_lastCharacter == '\n')
872        return false;
873
874    // Otherwise, show the position if we have emitted any characters
875    if (m_hasEmitted)
876        return true;
877
878    // We've not emitted anything yet. Generally, there is no need for any positioning then.
879    // The only exception is when the element is visually not in the same line as
880    // the start of the range (e.g. the range starts at the end of the previous paragraph).
881    // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
882    // make quicker checks to possibly avoid that. Another check that we could make is
883    // is whether the inline vs block flow changed since the previous visible element.
884    // I think we're already in a special enough case that that won't be needed, tho.
885
886    // No character needed if this is the first node in the range.
887    if (m_node == m_startContainer)
888        return false;
889
890    // If we are outside the start container's subtree, assume we need to emit.
891    // FIXME: m_startContainer could be an inline block
892    if (!m_node->isDescendantOf(m_startContainer))
893        return true;
894
895    // If we started as m_startContainer offset 0 and the current node is a descendant of
896    // the start container, we already had enough context to correctly decide whether to
897    // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
898    // so don't second guess that now.
899    // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
900    // immaterial since we likely would have already emitted something by now.
901    if (m_startOffset == 0)
902        return false;
903
904    // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
905    // Additionally, if the range we are iterating over contains huge sections of unrendered content,
906    // we would create VisiblePositions on every call to this function without this check.
907    if (!m_node->renderer() || m_node->renderer()->style()->visibility() != VISIBLE
908        || (m_node->renderer()->isBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag)))
909        return false;
910
911    // The startPos.isNotNull() check is needed because the start could be before the body,
912    // and in that case we'll get null. We don't want to put in newlines at the start in that case.
913    // The currPos.isNotNull() check is needed because positions in non-HTML content
914    // (like SVG) do not have visible positions, and we don't want to emit for them either.
915    VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
916    VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
917    return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
918}
919
920bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node)
921{
922    return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions);
923}
924
925void TextIterator::representNodeOffsetZero()
926{
927    // Emit a character to show the positioning of m_node.
928
929    // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
930    // create VisiblePositions, which is expensive.  So, we perform the inexpensive checks
931    // on m_node to see if it necessitates emitting a character first and will early return
932    // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
933    if (shouldEmitTabBeforeNode(m_node)) {
934        if (shouldRepresentNodeOffsetZero())
935            emitCharacter('\t', m_node->parentNode(), m_node, 0, 0);
936    } else if (shouldEmitNewlineBeforeNode(m_node)) {
937        if (shouldRepresentNodeOffsetZero())
938            emitCharacter('\n', m_node->parentNode(), m_node, 0, 0);
939    } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) {
940        if (shouldRepresentNodeOffsetZero())
941            emitCharacter(' ', m_node->parentNode(), m_node, 0, 0);
942    }
943}
944
945bool TextIterator::handleNonTextNode()
946{
947    if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText))
948        emitCharacter('\n', m_node->parentNode(), m_node, 0, 1);
949    else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR())
950        emitCharacter(' ', m_node->parentNode(), m_node, 0, 1);
951    else
952        representNodeOffsetZero();
953
954    return true;
955}
956
957void TextIterator::exitNode()
958{
959    // prevent emitting a newline when exiting a collapsed block at beginning of the range
960    // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
961    // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
962    // therefore look like a blank line.
963    if (!m_hasEmitted)
964        return;
965
966    // Emit with a position *inside* m_node, after m_node's contents, in
967    // case it is a block, because the run should start where the
968    // emitted character is positioned visually.
969    Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
970    // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
971    // the logic in _web_attributedStringFromRange match.  We'll get that for free when we switch to use
972    // TextIterator in _web_attributedStringFromRange.
973    // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
974    if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) {
975        // use extra newline to represent margin bottom, as needed
976        bool addNewline = shouldEmitExtraNewlineForNode(m_node);
977
978        // FIXME: We need to emit a '\n' as we leave an empty block(s) that
979        // contain a VisiblePosition when doing selection preservation.
980        if (m_lastCharacter != '\n') {
981            // insert a newline with a position following this block's contents.
982            emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
983            // remember whether to later add a newline for the current node
984            ASSERT(!m_needsAnotherNewline);
985            m_needsAnotherNewline = addNewline;
986        } else if (addNewline)
987            // insert a newline with a position following this block's contents.
988            emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1);
989    }
990
991    // If nothing was emitted, see if we need to emit a space.
992    if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node))
993        emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1);
994}
995
996void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
997{
998    m_hasEmitted = true;
999
1000    // remember information with which to construct the TextIterator::range()
1001    // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode
1002    m_positionNode = textNode;
1003    m_positionOffsetBaseNode = offsetBaseNode;
1004    m_positionStartOffset = textStartOffset;
1005    m_positionEndOffset = textEndOffset;
1006
1007    // remember information with which to construct the TextIterator::characters() and length()
1008    m_singleCharacterBuffer = c;
1009    ASSERT(m_singleCharacterBuffer);
1010    m_textLength = 1;
1011
1012    // remember some iteration state
1013    m_lastTextNodeEndedWithCollapsedSpace = false;
1014    m_lastCharacter = c;
1015}
1016
1017void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset)
1018{
1019    RenderText* renderer = toRenderText(renderObject);
1020    m_text = m_emitsOriginalText ? renderer->originalText() : (m_emitsTextWithoutTranscoding ? renderer->textWithoutTranscoding() : renderer->text());
1021    ASSERT(!m_text.isEmpty());
1022    ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length()));
1023    ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length()));
1024    ASSERT(textStartOffset <= textEndOffset);
1025
1026    m_positionNode = textNode;
1027    m_positionOffsetBaseNode = 0;
1028    m_positionStartOffset = textStartOffset;
1029    m_positionEndOffset = textEndOffset;
1030    m_singleCharacterBuffer = 0;
1031    m_textLength = textEndOffset - textStartOffset;
1032    m_lastCharacter = m_text[textEndOffset - 1];
1033
1034    m_lastTextNodeEndedWithCollapsedSpace = false;
1035    m_hasEmitted = true;
1036}
1037
1038void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset)
1039{
1040    emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset);
1041}
1042
1043PassRefPtr<Range> TextIterator::range() const
1044{
1045    // use the current run information, if we have it
1046    if (m_positionNode) {
1047        if (m_positionOffsetBaseNode) {
1048            int index = m_positionOffsetBaseNode->nodeIndex();
1049            m_positionStartOffset += index;
1050            m_positionEndOffset += index;
1051            m_positionOffsetBaseNode = 0;
1052        }
1053        return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1054    }
1055
1056    // otherwise, return the end of the overall range we were given
1057    if (m_endContainer)
1058        return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset);
1059
1060    return 0;
1061}
1062
1063Node* TextIterator::node() const
1064{
1065    RefPtr<Range> textRange = range();
1066    if (!textRange)
1067        return 0;
1068
1069    Node* node = textRange->startContainer();
1070    if (!node)
1071        return 0;
1072    if (node->offsetInCharacters())
1073        return node;
1074
1075    return node->childNode(textRange->startOffset());
1076}
1077
1078// --------
1079
1080SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior)
1081    : m_node(0)
1082    , m_offset(0)
1083    , m_handledNode(false)
1084    , m_handledChildren(false)
1085    , m_startNode(0)
1086    , m_startOffset(0)
1087    , m_endNode(0)
1088    , m_endOffset(0)
1089    , m_positionNode(0)
1090    , m_positionStartOffset(0)
1091    , m_positionEndOffset(0)
1092    , m_textOffset(0)
1093    , m_textLength(0)
1094    , m_lastTextNode(0)
1095    , m_lastCharacter(0)
1096    , m_singleCharacterBuffer(0)
1097    , m_havePassedStartNode(false)
1098    , m_shouldHandleFirstLetter(false)
1099    , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls)
1100    , m_shouldStop(false)
1101    , m_emitsOriginalText(false)
1102{
1103    ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1104
1105    if (!r)
1106        return;
1107
1108    Node* startNode = r->startContainer();
1109    if (!startNode)
1110        return;
1111    Node* endNode = r->endContainer();
1112    int startOffset = r->startOffset();
1113    int endOffset = r->endOffset();
1114
1115    if (!startNode->offsetInCharacters()) {
1116        if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
1117            startNode = startNode->childNode(startOffset);
1118            startOffset = 0;
1119        }
1120    }
1121    if (!endNode->offsetInCharacters()) {
1122        if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
1123            endNode = endNode->childNode(endOffset - 1);
1124            endOffset = lastOffsetInNode(endNode);
1125        }
1126    }
1127
1128    m_node = endNode;
1129    setUpFullyClippedStack(m_fullyClippedStack, m_node);
1130    m_offset = endOffset;
1131    m_handledNode = false;
1132    m_handledChildren = endOffset == 0;
1133
1134    m_startNode = startNode;
1135    m_startOffset = startOffset;
1136    m_endNode = endNode;
1137    m_endOffset = endOffset;
1138
1139#ifndef NDEBUG
1140    // Need this just because of the assert.
1141    m_positionNode = endNode;
1142#endif
1143
1144    m_lastTextNode = 0;
1145    m_lastCharacter = '\n';
1146
1147    m_havePassedStartNode = false;
1148
1149    advance();
1150}
1151
1152void SimplifiedBackwardsTextIterator::advance()
1153{
1154    ASSERT(m_positionNode);
1155
1156    if (m_shouldStop)
1157        return;
1158
1159    if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) {
1160        m_shouldStop = true;
1161        return;
1162    }
1163
1164    m_positionNode = 0;
1165    m_textLength = 0;
1166
1167    while (m_node && !m_havePassedStartNode) {
1168        // Don't handle node if we start iterating at [node, 0].
1169        if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) {
1170            RenderObject* renderer = m_node->renderer();
1171            if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) {
1172                // FIXME: What about CDATA_SECTION_NODE?
1173                if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1174                    m_handledNode = handleTextNode();
1175            } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
1176                if (renderer->style()->visibility() == VISIBLE && m_offset > 0)
1177                    m_handledNode = handleReplacedElement();
1178            } else
1179                m_handledNode = handleNonTextNode();
1180            if (m_positionNode)
1181                return;
1182        }
1183
1184        if (!m_handledChildren && m_node->hasChildNodes()) {
1185            m_node = m_node->lastChild();
1186            pushFullyClippedState(m_fullyClippedStack, m_node);
1187        } else {
1188            // Exit empty containers as we pass over them or containers
1189            // where [container, 0] is where we started iterating.
1190            if (!m_handledNode
1191                    && canHaveChildrenForEditing(m_node)
1192                    && m_node->parentNode()
1193                    && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) {
1194                exitNode();
1195                if (m_positionNode) {
1196                    m_handledNode = true;
1197                    m_handledChildren = true;
1198                    return;
1199                }
1200            }
1201
1202            // Exit all other containers.
1203            while (!m_node->previousSibling()) {
1204                if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
1205                    break;
1206                m_fullyClippedStack.pop();
1207                exitNode();
1208                if (m_positionNode) {
1209                    m_handledNode = true;
1210                    m_handledChildren = true;
1211                    return;
1212                }
1213            }
1214
1215            m_fullyClippedStack.pop();
1216            if (advanceRespectingRange(m_node->previousSibling()))
1217                pushFullyClippedState(m_fullyClippedStack, m_node);
1218            else
1219                m_node = 0;
1220        }
1221
1222        // For the purpose of word boundary detection,
1223        // we should iterate all visible text and trailing (collapsed) whitespaces.
1224        m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0;
1225        m_handledNode = false;
1226        m_handledChildren = false;
1227
1228        if (m_positionNode)
1229            return;
1230    }
1231}
1232
1233bool SimplifiedBackwardsTextIterator::handleTextNode()
1234{
1235    m_lastTextNode = m_node;
1236
1237    int startOffset;
1238    int offsetInNode;
1239    RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
1240    if (!renderer)
1241        return true;
1242
1243    String text = renderer->text();
1244    if (!renderer->firstTextBox() && text.length() > 0)
1245        return true;
1246
1247    m_positionEndOffset = m_offset;
1248    m_offset = startOffset + offsetInNode;
1249    m_positionNode = m_node;
1250    m_positionStartOffset = m_offset;
1251
1252    ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length()));
1253    ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
1254    ASSERT(m_positionStartOffset <= m_positionEndOffset);
1255
1256    m_textLength = m_positionEndOffset - m_positionStartOffset;
1257    m_textOffset = m_positionStartOffset - offsetInNode;
1258    m_textContainer = text;
1259    m_singleCharacterBuffer = 0;
1260    RELEASE_ASSERT(static_cast<unsigned>(m_textOffset + m_textLength) <= text.length());
1261
1262    m_lastCharacter = text[m_positionEndOffset - 1];
1263
1264    return !m_shouldHandleFirstLetter;
1265}
1266
1267RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
1268{
1269    RenderText* renderer = toRenderText(m_node->renderer());
1270    startOffset = (m_node == m_startNode) ? m_startOffset : 0;
1271
1272    if (!renderer->isTextFragment()) {
1273        offsetInNode = 0;
1274        return renderer;
1275    }
1276
1277    RenderTextFragment* fragment = toRenderTextFragment(renderer);
1278    int offsetAfterFirstLetter = fragment->start();
1279    if (startOffset >= offsetAfterFirstLetter) {
1280        ASSERT(!m_shouldHandleFirstLetter);
1281        offsetInNode = offsetAfterFirstLetter;
1282        return renderer;
1283    }
1284
1285    if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) {
1286        m_shouldHandleFirstLetter = true;
1287        offsetInNode = offsetAfterFirstLetter;
1288        return renderer;
1289    }
1290
1291    m_shouldHandleFirstLetter = false;
1292    offsetInNode = 0;
1293    return firstRenderTextInFirstLetter(fragment->firstLetter());
1294}
1295
1296bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1297{
1298    unsigned index = m_node->nodeIndex();
1299    // We want replaced elements to behave like punctuation for boundary
1300    // finding, and to simply take up space for the selection preservation
1301    // code in moveParagraphs, so we use a comma.  Unconditionally emit
1302    // here because this iterator is only used for boundary finding.
1303    emitCharacter(',', m_node->parentNode(), index, index + 1);
1304    return true;
1305}
1306
1307bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1308{
1309    // We can use a linefeed in place of a tab because this simple iterator is only used to
1310    // find boundaries, not actual content.  A linefeed breaks words, sentences, and paragraphs.
1311    if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1312        unsigned index = m_node->nodeIndex();
1313        // The start of this emitted range is wrong. Ensuring correctness would require
1314        // VisiblePositions and so would be slow. previousBoundary expects this.
1315        emitCharacter('\n', m_node->parentNode(), index + 1, index + 1);
1316    }
1317    return true;
1318}
1319
1320void SimplifiedBackwardsTextIterator::exitNode()
1321{
1322    if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) {
1323        // The start of this emitted range is wrong. Ensuring correctness would require
1324        // VisiblePositions and so would be slow. previousBoundary expects this.
1325        emitCharacter('\n', m_node, 0, 0);
1326    }
1327}
1328
1329void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset)
1330{
1331    m_singleCharacterBuffer = c;
1332    m_positionNode = node;
1333    m_positionStartOffset = startOffset;
1334    m_positionEndOffset = endOffset;
1335    m_textOffset = 0;
1336    m_textLength = 1;
1337    m_lastCharacter = c;
1338}
1339
1340bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
1341{
1342    if (!next)
1343        return false;
1344    m_havePassedStartNode |= m_node == m_startNode;
1345    if (m_havePassedStartNode)
1346        return false;
1347    m_node = next;
1348    return true;
1349}
1350
1351PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
1352{
1353    if (m_positionNode)
1354        return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1355
1356    return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset);
1357}
1358
1359// --------
1360
1361CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior)
1362    : m_offset(0)
1363    , m_runOffset(0)
1364    , m_atBreak(true)
1365    , m_textIterator(r, behavior)
1366{
1367    while (!atEnd() && m_textIterator.length() == 0)
1368        m_textIterator.advance();
1369}
1370
1371PassRefPtr<Range> CharacterIterator::range() const
1372{
1373    RefPtr<Range> r = m_textIterator.range();
1374    if (!m_textIterator.atEnd()) {
1375        if (m_textIterator.length() <= 1) {
1376            ASSERT(m_runOffset == 0);
1377        } else {
1378            Node* n = r->startContainer();
1379            ASSERT(n == r->endContainer());
1380            int offset = r->startOffset() + m_runOffset;
1381            r->setStart(n, offset, ASSERT_NO_EXCEPTION);
1382            r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION);
1383        }
1384    }
1385    return r.release();
1386}
1387
1388void CharacterIterator::advance(int count)
1389{
1390    if (count <= 0) {
1391        ASSERT(count == 0);
1392        return;
1393    }
1394
1395    m_atBreak = false;
1396
1397    // easy if there is enough left in the current m_textIterator run
1398    int remaining = m_textIterator.length() - m_runOffset;
1399    if (count < remaining) {
1400        m_runOffset += count;
1401        m_offset += count;
1402        return;
1403    }
1404
1405    // exhaust the current m_textIterator run
1406    count -= remaining;
1407    m_offset += remaining;
1408
1409    // move to a subsequent m_textIterator run
1410    for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1411        int runLength = m_textIterator.length();
1412        if (runLength == 0)
1413            m_atBreak = true;
1414        else {
1415            // see whether this is m_textIterator to use
1416            if (count < runLength) {
1417                m_runOffset = count;
1418                m_offset += count;
1419                return;
1420            }
1421
1422            // exhaust this m_textIterator run
1423            count -= runLength;
1424            m_offset += runLength;
1425        }
1426    }
1427
1428    // ran to the end of the m_textIterator... no more runs left
1429    m_atBreak = true;
1430    m_runOffset = 0;
1431}
1432
1433String CharacterIterator::string(int numChars)
1434{
1435    StringBuilder result;
1436    result.reserveCapacity(numChars);
1437    while (numChars > 0 && !atEnd()) {
1438        int runSize = min(numChars, length());
1439        m_textIterator.appendTextToStringBuilder(result, m_runOffset, runSize);
1440        numChars -= runSize;
1441        advance(runSize);
1442    }
1443    return result.toString();
1444}
1445
1446static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
1447{
1448    it.advance(offset);
1449    RefPtr<Range> start = it.range();
1450
1451    if (length > 1)
1452        it.advance(length - 1);
1453    RefPtr<Range> end = it.range();
1454
1455    return Range::create(start->startContainer()->document(),
1456        start->startContainer(), start->startOffset(),
1457        end->endContainer(), end->endOffset());
1458}
1459
1460BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior)
1461    : m_offset(0)
1462    , m_runOffset(0)
1463    , m_atBreak(true)
1464    , m_textIterator(range, behavior)
1465{
1466    while (!atEnd() && !m_textIterator.length())
1467        m_textIterator.advance();
1468}
1469
1470PassRefPtr<Range> BackwardsCharacterIterator::range() const
1471{
1472    RefPtr<Range> r = m_textIterator.range();
1473    if (!m_textIterator.atEnd()) {
1474        if (m_textIterator.length() <= 1)
1475            ASSERT(m_runOffset == 0);
1476        else {
1477            Node* n = r->startContainer();
1478            ASSERT(n == r->endContainer());
1479            int offset = r->endOffset() - m_runOffset;
1480            r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION);
1481            r->setEnd(n, offset, ASSERT_NO_EXCEPTION);
1482        }
1483    }
1484    return r.release();
1485}
1486
1487void BackwardsCharacterIterator::advance(int count)
1488{
1489    if (count <= 0) {
1490        ASSERT(!count);
1491        return;
1492    }
1493
1494    m_atBreak = false;
1495
1496    int remaining = m_textIterator.length() - m_runOffset;
1497    if (count < remaining) {
1498        m_runOffset += count;
1499        m_offset += count;
1500        return;
1501    }
1502
1503    count -= remaining;
1504    m_offset += remaining;
1505
1506    for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) {
1507        int runLength = m_textIterator.length();
1508        if (runLength == 0)
1509            m_atBreak = true;
1510        else {
1511            if (count < runLength) {
1512                m_runOffset = count;
1513                m_offset += count;
1514                return;
1515            }
1516
1517            count -= runLength;
1518            m_offset += runLength;
1519        }
1520    }
1521
1522    m_atBreak = true;
1523    m_runOffset = 0;
1524}
1525
1526// --------
1527
1528WordAwareIterator::WordAwareIterator(const Range* range)
1529    : m_didLookAhead(true) // So we consider the first chunk from the text iterator.
1530    , m_textIterator(range)
1531{
1532    advance(); // Get in position over the first chunk of text.
1533}
1534
1535WordAwareIterator::~WordAwareIterator()
1536{
1537}
1538
1539// FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1540
1541void WordAwareIterator::advance()
1542{
1543    m_buffer.clear();
1544
1545    // If last time we did a look-ahead, start with that looked-ahead chunk now
1546    if (!m_didLookAhead) {
1547        ASSERT(!m_textIterator.atEnd());
1548        m_textIterator.advance();
1549    }
1550    m_didLookAhead = false;
1551
1552    // Go to next non-empty chunk.
1553    while (!m_textIterator.atEnd() && m_textIterator.length() == 0)
1554        m_textIterator.advance();
1555
1556    m_range = m_textIterator.range();
1557
1558    if (m_textIterator.atEnd())
1559        return;
1560
1561    while (1) {
1562        // If this chunk ends in whitespace we can just use it as our chunk.
1563        if (isSpaceOrNewline(m_textIterator.characterAt(m_textIterator.length() - 1)))
1564            return;
1565
1566        // If this is the first chunk that failed, save it in m_buffer before look ahead.
1567        if (m_buffer.isEmpty())
1568            m_textIterator.appendTextTo(m_buffer);
1569
1570        // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1571        m_textIterator.advance();
1572        if (m_textIterator.atEnd() || !m_textIterator.length() || isSpaceOrNewline(m_textIterator.characterAt(0))) {
1573            m_didLookAhead = true;
1574            return;
1575        }
1576
1577        // Start gobbling chunks until we get to a suitable stopping point
1578        m_textIterator.appendTextTo(m_buffer);
1579        m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), IGNORE_EXCEPTION);
1580    }
1581}
1582
1583int WordAwareIterator::length() const
1584{
1585    if (!m_buffer.isEmpty())
1586        return m_buffer.size();
1587    return m_textIterator.length();
1588}
1589
1590String WordAwareIterator::substring(unsigned position, unsigned length) const
1591{
1592    if (!m_buffer.isEmpty())
1593        return String(m_buffer.data() + position, length);
1594    return m_textIterator.substring(position, length);
1595}
1596
1597UChar WordAwareIterator::characterAt(unsigned index) const
1598{
1599    if (!m_buffer.isEmpty())
1600        return m_buffer[index];
1601    return m_textIterator.characterAt(index);
1602}
1603
1604// --------
1605
1606static inline UChar foldQuoteMarkOrSoftHyphen(UChar c)
1607{
1608    switch (c) {
1609        case hebrewPunctuationGershayim:
1610        case leftDoubleQuotationMark:
1611        case rightDoubleQuotationMark:
1612            return '"';
1613        case hebrewPunctuationGeresh:
1614        case leftSingleQuotationMark:
1615        case rightSingleQuotationMark:
1616            return '\'';
1617        case softHyphen:
1618            // Replace soft hyphen with an ignorable character so that their presence or absence will
1619            // not affect string comparison.
1620            return 0;
1621        default:
1622            return c;
1623    }
1624}
1625
1626static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length)
1627{
1628    for (size_t i = 0; i < length; ++i)
1629        data[i] = foldQuoteMarkOrSoftHyphen(data[i]);
1630}
1631
1632static const size_t minimumSearchBufferSize = 8192;
1633
1634#ifndef NDEBUG
1635static bool searcherInUse;
1636#endif
1637
1638static UStringSearch* createSearcher()
1639{
1640    // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1641    // but it doesn't matter exactly what it is, since we don't perform any searches
1642    // without setting both the pattern and the text.
1643    UErrorCode status = U_ZERO_ERROR;
1644    String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
1645    UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
1646    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1647    return searcher;
1648}
1649
1650static UStringSearch* searcher()
1651{
1652    static UStringSearch* searcher = createSearcher();
1653    return searcher;
1654}
1655
1656static inline void lockSearcher()
1657{
1658#ifndef NDEBUG
1659    ASSERT(!searcherInUse);
1660    searcherInUse = true;
1661#endif
1662}
1663
1664static inline void unlockSearcher()
1665{
1666#ifndef NDEBUG
1667    ASSERT(searcherInUse);
1668    searcherInUse = false;
1669#endif
1670}
1671
1672// ICU's search ignores the distinction between small kana letters and ones
1673// that are not small, and also characters that differ only in the voicing
1674// marks when considering only primary collation strength differences.
1675// This is not helpful for end users, since these differences make words
1676// distinct, so for our purposes we need these to be considered.
1677// The Unicode folks do not think the collation algorithm should be
1678// changed. To work around this, we would like to tailor the ICU searcher,
1679// but we can't get that to work yet. So instead, we check for cases where
1680// these differences occur, and skip those matches.
1681
1682// We refer to the above technique as the "kana workaround". The next few
1683// functions are helper functinos for the kana workaround.
1684
1685static inline bool isKanaLetter(UChar character)
1686{
1687    // Hiragana letters.
1688    if (character >= 0x3041 && character <= 0x3096)
1689        return true;
1690
1691    // Katakana letters.
1692    if (character >= 0x30A1 && character <= 0x30FA)
1693        return true;
1694    if (character >= 0x31F0 && character <= 0x31FF)
1695        return true;
1696
1697    // Halfwidth katakana letters.
1698    if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
1699        return true;
1700
1701    return false;
1702}
1703
1704static inline bool isSmallKanaLetter(UChar character)
1705{
1706    ASSERT(isKanaLetter(character));
1707
1708    switch (character) {
1709    case 0x3041: // HIRAGANA LETTER SMALL A
1710    case 0x3043: // HIRAGANA LETTER SMALL I
1711    case 0x3045: // HIRAGANA LETTER SMALL U
1712    case 0x3047: // HIRAGANA LETTER SMALL E
1713    case 0x3049: // HIRAGANA LETTER SMALL O
1714    case 0x3063: // HIRAGANA LETTER SMALL TU
1715    case 0x3083: // HIRAGANA LETTER SMALL YA
1716    case 0x3085: // HIRAGANA LETTER SMALL YU
1717    case 0x3087: // HIRAGANA LETTER SMALL YO
1718    case 0x308E: // HIRAGANA LETTER SMALL WA
1719    case 0x3095: // HIRAGANA LETTER SMALL KA
1720    case 0x3096: // HIRAGANA LETTER SMALL KE
1721    case 0x30A1: // KATAKANA LETTER SMALL A
1722    case 0x30A3: // KATAKANA LETTER SMALL I
1723    case 0x30A5: // KATAKANA LETTER SMALL U
1724    case 0x30A7: // KATAKANA LETTER SMALL E
1725    case 0x30A9: // KATAKANA LETTER SMALL O
1726    case 0x30C3: // KATAKANA LETTER SMALL TU
1727    case 0x30E3: // KATAKANA LETTER SMALL YA
1728    case 0x30E5: // KATAKANA LETTER SMALL YU
1729    case 0x30E7: // KATAKANA LETTER SMALL YO
1730    case 0x30EE: // KATAKANA LETTER SMALL WA
1731    case 0x30F5: // KATAKANA LETTER SMALL KA
1732    case 0x30F6: // KATAKANA LETTER SMALL KE
1733    case 0x31F0: // KATAKANA LETTER SMALL KU
1734    case 0x31F1: // KATAKANA LETTER SMALL SI
1735    case 0x31F2: // KATAKANA LETTER SMALL SU
1736    case 0x31F3: // KATAKANA LETTER SMALL TO
1737    case 0x31F4: // KATAKANA LETTER SMALL NU
1738    case 0x31F5: // KATAKANA LETTER SMALL HA
1739    case 0x31F6: // KATAKANA LETTER SMALL HI
1740    case 0x31F7: // KATAKANA LETTER SMALL HU
1741    case 0x31F8: // KATAKANA LETTER SMALL HE
1742    case 0x31F9: // KATAKANA LETTER SMALL HO
1743    case 0x31FA: // KATAKANA LETTER SMALL MU
1744    case 0x31FB: // KATAKANA LETTER SMALL RA
1745    case 0x31FC: // KATAKANA LETTER SMALL RI
1746    case 0x31FD: // KATAKANA LETTER SMALL RU
1747    case 0x31FE: // KATAKANA LETTER SMALL RE
1748    case 0x31FF: // KATAKANA LETTER SMALL RO
1749    case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
1750    case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
1751    case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
1752    case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
1753    case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
1754    case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
1755    case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
1756    case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
1757    case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
1758        return true;
1759    }
1760    return false;
1761}
1762
1763enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark };
1764
1765static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
1766{
1767    ASSERT(isKanaLetter(character));
1768
1769    switch (character) {
1770    case 0x304C: // HIRAGANA LETTER GA
1771    case 0x304E: // HIRAGANA LETTER GI
1772    case 0x3050: // HIRAGANA LETTER GU
1773    case 0x3052: // HIRAGANA LETTER GE
1774    case 0x3054: // HIRAGANA LETTER GO
1775    case 0x3056: // HIRAGANA LETTER ZA
1776    case 0x3058: // HIRAGANA LETTER ZI
1777    case 0x305A: // HIRAGANA LETTER ZU
1778    case 0x305C: // HIRAGANA LETTER ZE
1779    case 0x305E: // HIRAGANA LETTER ZO
1780    case 0x3060: // HIRAGANA LETTER DA
1781    case 0x3062: // HIRAGANA LETTER DI
1782    case 0x3065: // HIRAGANA LETTER DU
1783    case 0x3067: // HIRAGANA LETTER DE
1784    case 0x3069: // HIRAGANA LETTER DO
1785    case 0x3070: // HIRAGANA LETTER BA
1786    case 0x3073: // HIRAGANA LETTER BI
1787    case 0x3076: // HIRAGANA LETTER BU
1788    case 0x3079: // HIRAGANA LETTER BE
1789    case 0x307C: // HIRAGANA LETTER BO
1790    case 0x3094: // HIRAGANA LETTER VU
1791    case 0x30AC: // KATAKANA LETTER GA
1792    case 0x30AE: // KATAKANA LETTER GI
1793    case 0x30B0: // KATAKANA LETTER GU
1794    case 0x30B2: // KATAKANA LETTER GE
1795    case 0x30B4: // KATAKANA LETTER GO
1796    case 0x30B6: // KATAKANA LETTER ZA
1797    case 0x30B8: // KATAKANA LETTER ZI
1798    case 0x30BA: // KATAKANA LETTER ZU
1799    case 0x30BC: // KATAKANA LETTER ZE
1800    case 0x30BE: // KATAKANA LETTER ZO
1801    case 0x30C0: // KATAKANA LETTER DA
1802    case 0x30C2: // KATAKANA LETTER DI
1803    case 0x30C5: // KATAKANA LETTER DU
1804    case 0x30C7: // KATAKANA LETTER DE
1805    case 0x30C9: // KATAKANA LETTER DO
1806    case 0x30D0: // KATAKANA LETTER BA
1807    case 0x30D3: // KATAKANA LETTER BI
1808    case 0x30D6: // KATAKANA LETTER BU
1809    case 0x30D9: // KATAKANA LETTER BE
1810    case 0x30DC: // KATAKANA LETTER BO
1811    case 0x30F4: // KATAKANA LETTER VU
1812    case 0x30F7: // KATAKANA LETTER VA
1813    case 0x30F8: // KATAKANA LETTER VI
1814    case 0x30F9: // KATAKANA LETTER VE
1815    case 0x30FA: // KATAKANA LETTER VO
1816        return VoicedSoundMark;
1817    case 0x3071: // HIRAGANA LETTER PA
1818    case 0x3074: // HIRAGANA LETTER PI
1819    case 0x3077: // HIRAGANA LETTER PU
1820    case 0x307A: // HIRAGANA LETTER PE
1821    case 0x307D: // HIRAGANA LETTER PO
1822    case 0x30D1: // KATAKANA LETTER PA
1823    case 0x30D4: // KATAKANA LETTER PI
1824    case 0x30D7: // KATAKANA LETTER PU
1825    case 0x30DA: // KATAKANA LETTER PE
1826    case 0x30DD: // KATAKANA LETTER PO
1827        return SemiVoicedSoundMark;
1828    }
1829    return NoVoicedSoundMark;
1830}
1831
1832static inline bool isCombiningVoicedSoundMark(UChar character)
1833{
1834    switch (character) {
1835    case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
1836    case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
1837        return true;
1838    }
1839    return false;
1840}
1841
1842static inline bool containsKanaLetters(const String& pattern)
1843{
1844    unsigned length = pattern.length();
1845    for (unsigned i = 0; i < length; ++i) {
1846        if (isKanaLetter(pattern[i]))
1847            return true;
1848    }
1849    return false;
1850}
1851
1852static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
1853{
1854    ASSERT(length);
1855
1856    buffer.resize(length);
1857
1858    UErrorCode status = U_ZERO_ERROR;
1859    size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
1860    ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
1861    ASSERT(bufferSize);
1862
1863    buffer.resize(bufferSize);
1864
1865    if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
1866        return;
1867
1868    status = U_ZERO_ERROR;
1869    unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
1870    ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
1871}
1872
1873static bool isNonLatin1Separator(UChar32 character)
1874{
1875    ASSERT_ARG(character, character >= 256);
1876
1877    return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
1878}
1879
1880static inline bool isSeparator(UChar32 character)
1881{
1882    static const bool latin1SeparatorTable[256] = {
1883        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1884        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1885        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
1886        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
1887        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
1888        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
1889        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
1890        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
1891        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1892        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1893        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1894        1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1895        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1896        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1897        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1898        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
1899    };
1900
1901    if (character < 256)
1902        return latin1SeparatorTable[character];
1903
1904    return isNonLatin1Separator(character);
1905}
1906
1907inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
1908    : m_options(options)
1909    , m_prefixLength(0)
1910    , m_numberOfCharactersJustAppended(0)
1911    , m_atBreak(true)
1912    , m_needsMoreContext(options & AtWordStarts)
1913    , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
1914{
1915    ASSERT(!target.isEmpty());
1916    target.appendTo(m_target);
1917
1918    // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1919    // of doing it in a separate replacement pass here, but ICU doesn't offer a way
1920    // to add tailoring on top of the locale-specific tailoring as of this writing.
1921    foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());
1922
1923    size_t targetLength = m_target.size();
1924    m_buffer.reserveInitialCapacity(max(targetLength * 8, minimumSearchBufferSize));
1925    m_overlap = m_buffer.capacity() / 4;
1926
1927    if ((m_options & AtWordStarts) && targetLength) {
1928        UChar32 targetFirstCharacter;
1929        U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
1930        // Characters in the separator category never really occur at the beginning of a word,
1931        // so if the target begins with such a character, we just ignore the AtWordStart option.
1932        if (isSeparator(targetFirstCharacter)) {
1933            m_options &= ~AtWordStarts;
1934            m_needsMoreContext = false;
1935        }
1936    }
1937
1938    // Grab the single global searcher.
1939    // If we ever have a reason to do more than once search buffer at once, we'll have
1940    // to move to multiple searchers.
1941    lockSearcher();
1942
1943    UStringSearch* searcher = WebCore::searcher();
1944    UCollator* collator = usearch_getCollator(searcher);
1945
1946    UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
1947    if (ucol_getStrength(collator) != strength) {
1948        ucol_setStrength(collator, strength);
1949        usearch_reset(searcher);
1950    }
1951
1952    UErrorCode status = U_ZERO_ERROR;
1953    usearch_setPattern(searcher, m_target.data(), targetLength, &status);
1954    ASSERT(status == U_ZERO_ERROR);
1955
1956    // The kana workaround requires a normalized copy of the target string.
1957    if (m_targetRequiresKanaWorkaround)
1958        normalizeCharacters(m_target.data(), m_target.size(), m_normalizedTarget);
1959}
1960
1961inline SearchBuffer::~SearchBuffer()
1962{
1963    // Leave the static object pointing to a valid string.
1964    UErrorCode status = U_ZERO_ERROR;
1965    usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
1966    ASSERT(status == U_ZERO_ERROR);
1967
1968    unlockSearcher();
1969}
1970
1971template<typename CharType>
1972inline void SearchBuffer::append(const CharType* characters, size_t length)
1973{
1974    ASSERT(length);
1975
1976    if (m_atBreak) {
1977        m_buffer.shrink(0);
1978        m_prefixLength = 0;
1979        m_atBreak = false;
1980    } else if (m_buffer.size() == m_buffer.capacity()) {
1981        memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
1982        m_prefixLength -= min(m_prefixLength, m_buffer.size() - m_overlap);
1983        m_buffer.shrink(m_overlap);
1984    }
1985
1986    size_t oldLength = m_buffer.size();
1987    size_t usableLength = min(m_buffer.capacity() - oldLength, length);
1988    ASSERT(usableLength);
1989    m_buffer.resize(oldLength + usableLength);
1990    UChar* destination = m_buffer.data() + oldLength;
1991    StringImpl::copyChars(destination, characters, usableLength);
1992    foldQuoteMarksAndSoftHyphens(destination, usableLength);
1993    m_numberOfCharactersJustAppended = usableLength;
1994}
1995
1996inline bool SearchBuffer::needsMoreContext() const
1997{
1998    return m_needsMoreContext;
1999}
2000
2001inline void SearchBuffer::prependContext(const UChar* characters, size_t length)
2002{
2003    ASSERT(m_needsMoreContext);
2004    ASSERT(m_prefixLength == m_buffer.size());
2005
2006    if (!length)
2007        return;
2008
2009    m_atBreak = false;
2010
2011    size_t wordBoundaryContextStart = length;
2012    if (wordBoundaryContextStart) {
2013        U16_BACK_1(characters, 0, wordBoundaryContextStart);
2014        wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart);
2015    }
2016
2017    size_t usableLength = min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart);
2018    m_buffer.prepend(characters + length - usableLength, usableLength);
2019    m_prefixLength += usableLength;
2020
2021    if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
2022        m_needsMoreContext = false;
2023}
2024
2025inline bool SearchBuffer::atBreak() const
2026{
2027    return m_atBreak;
2028}
2029
2030inline void SearchBuffer::reachedBreak()
2031{
2032    m_atBreak = true;
2033}
2034
2035inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
2036{
2037    // This function implements the kana workaround. If usearch treats
2038    // it as a match, but we do not want to, then it's a "bad match".
2039    if (!m_targetRequiresKanaWorkaround)
2040        return false;
2041
2042    // Normalize into a match buffer. We reuse a single buffer rather than
2043    // creating a new one each time.
2044    normalizeCharacters(match, matchLength, m_normalizedMatch);
2045
2046    const UChar* a = m_normalizedTarget.begin();
2047    const UChar* aEnd = m_normalizedTarget.end();
2048
2049    const UChar* b = m_normalizedMatch.begin();
2050    const UChar* bEnd = m_normalizedMatch.end();
2051
2052    while (true) {
2053        // Skip runs of non-kana-letter characters. This is necessary so we can
2054        // correctly handle strings where the target and match have different-length
2055        // runs of characters that match, while still double checking the correctness
2056        // of matches of kana letters with other kana letters.
2057        while (a != aEnd && !isKanaLetter(*a))
2058            ++a;
2059        while (b != bEnd && !isKanaLetter(*b))
2060            ++b;
2061
2062        // If we reached the end of either the target or the match, we should have
2063        // reached the end of both; both should have the same number of kana letters.
2064        if (a == aEnd || b == bEnd) {
2065            ASSERT(a == aEnd);
2066            ASSERT(b == bEnd);
2067            return false;
2068        }
2069
2070        // Check for differences in the kana letter character itself.
2071        if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b))
2072            return true;
2073        if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b))
2074            return true;
2075        ++a;
2076        ++b;
2077
2078        // Check for differences in combining voiced sound marks found after the letter.
2079        while (1) {
2080            if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) {
2081                if (b != bEnd && isCombiningVoicedSoundMark(*b))
2082                    return true;
2083                break;
2084            }
2085            if (!(b != bEnd && isCombiningVoicedSoundMark(*b)))
2086                return true;
2087            if (*a != *b)
2088                return true;
2089            ++a;
2090            ++b;
2091        }
2092    }
2093}
2094
2095inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
2096{
2097    ASSERT(m_options & AtWordStarts);
2098
2099    if (!start)
2100        return true;
2101
2102    int size = m_buffer.size();
2103    int offset = start;
2104    UChar32 firstCharacter;
2105    U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
2106
2107    if (m_options & TreatMedialCapitalAsWordStart) {
2108        UChar32 previousCharacter;
2109        U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
2110
2111        if (isSeparator(firstCharacter)) {
2112            // The start of a separator run is a word start (".org" in "webkit.org").
2113            if (!isSeparator(previousCharacter))
2114                return true;
2115        } else if (isASCIIUpper(firstCharacter)) {
2116            // The start of an uppercase run is a word start ("Kit" in "WebKit").
2117            if (!isASCIIUpper(previousCharacter))
2118                return true;
2119            // The last character of an uppercase run followed by a non-separator, non-digit
2120            // is a word start ("Request" in "XMLHTTPRequest").
2121            offset = start;
2122            U16_FWD_1(m_buffer.data(), offset, size);
2123            UChar32 nextCharacter = 0;
2124            if (offset < size)
2125                U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
2126            if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
2127                return true;
2128        } else if (isASCIIDigit(firstCharacter)) {
2129            // The start of a digit run is a word start ("2" in "WebKit2").
2130            if (!isASCIIDigit(previousCharacter))
2131                return true;
2132        } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
2133            // The start of a non-separator, non-uppercase, non-digit run is a word start,
2134            // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
2135            return true;
2136        }
2137    }
2138
2139    // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
2140    // a word, so treat the position before any CJK character as a word start.
2141    if (Font::isCJKIdeographOrSymbol(firstCharacter))
2142        return true;
2143
2144    size_t wordBreakSearchStart = start + length;
2145    while (wordBreakSearchStart > start)
2146        wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */);
2147    return wordBreakSearchStart == start;
2148}
2149
2150inline size_t SearchBuffer::search(size_t& start)
2151{
2152    size_t size = m_buffer.size();
2153    if (m_atBreak) {
2154        if (!size)
2155            return 0;
2156    } else {
2157        if (size != m_buffer.capacity())
2158            return 0;
2159    }
2160
2161    UStringSearch* searcher = WebCore::searcher();
2162
2163    UErrorCode status = U_ZERO_ERROR;
2164    usearch_setText(searcher, m_buffer.data(), size, &status);
2165    ASSERT(status == U_ZERO_ERROR);
2166
2167    usearch_setOffset(searcher, m_prefixLength, &status);
2168    ASSERT(status == U_ZERO_ERROR);
2169
2170    int matchStart = usearch_next(searcher, &status);
2171    ASSERT(status == U_ZERO_ERROR);
2172
2173nextMatch:
2174    if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
2175        ASSERT(matchStart == USEARCH_DONE);
2176        return 0;
2177    }
2178
2179    // Matches that start in the overlap area are only tentative.
2180    // The same match may appear later, matching more characters,
2181    // possibly including a combining character that's not yet in the buffer.
2182    if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
2183        size_t overlap = m_overlap;
2184        if (m_options & AtWordStarts) {
2185            // Ensure that there is sufficient context before matchStart the next time around for
2186            // determining if it is at a word boundary.
2187            int wordBoundaryContextStart = matchStart;
2188            U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
2189            wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
2190            overlap = min(size - 1, max(overlap, size - wordBoundaryContextStart));
2191        }
2192        memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
2193        m_prefixLength -= min(m_prefixLength, size - overlap);
2194        m_buffer.shrink(overlap);
2195        return 0;
2196    }
2197
2198    size_t matchedLength = usearch_getMatchedLength(searcher);
2199    ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
2200
2201    // If this match is "bad", move on to the next match.
2202    if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
2203        matchStart = usearch_next(searcher, &status);
2204        ASSERT(status == U_ZERO_ERROR);
2205        goto nextMatch;
2206    }
2207
2208    size_t newSize = size - (matchStart + 1);
2209    memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
2210    m_prefixLength -= min<size_t>(m_prefixLength, matchStart + 1);
2211    m_buffer.shrink(newSize);
2212
2213    start = size - matchStart;
2214    return matchedLength;
2215}
2216
2217// --------
2218
2219int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation)
2220{
2221    int length = 0;
2222    for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance())
2223        length += it.length();
2224
2225    return length;
2226}
2227
2228PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
2229{
2230    CharacterIterator entireRangeIterator(entireRange);
2231    return characterSubrange(entireRangeIterator, characterOffset, characterCount);
2232}
2233
2234PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
2235{
2236    RefPtr<Range> resultRange = scope->document()->createRange();
2237
2238    int docTextPosition = 0;
2239    int rangeEnd = rangeLocation + rangeLength;
2240    bool startRangeFound = false;
2241
2242    RefPtr<Range> textRunRange;
2243
2244    TextIterator it(rangeOfContents(scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior);
2245
2246    // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
2247    if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) {
2248        textRunRange = it.range();
2249
2250        resultRange->setStart(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION);
2251        resultRange->setEnd(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION);
2252
2253        return resultRange.release();
2254    }
2255
2256    for (; !it.atEnd(); it.advance()) {
2257        int len = it.length();
2258        textRunRange = it.range();
2259
2260        bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len;
2261        bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len;
2262
2263        // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only
2264        // in those cases that textRunRange is used.
2265        if (foundEnd) {
2266            // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
2267            // position for emitted '\n's.
2268            if (len == 1 && it.characterAt(0) == '\n') {
2269                scope->document()->updateLayoutIgnorePendingStylesheets();
2270                it.advance();
2271                if (!it.atEnd()) {
2272                    RefPtr<Range> range = it.range();
2273                    textRunRange->setEnd(range->startContainer(), range->startOffset(), ASSERT_NO_EXCEPTION);
2274                } else {
2275                    Position runStart = textRunRange->startPosition();
2276                    Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
2277                    if (runEnd.isNotNull())
2278                        textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ASSERT_NO_EXCEPTION);
2279                }
2280            }
2281        }
2282
2283        if (foundStart) {
2284            startRangeFound = true;
2285            if (textRunRange->startContainer()->isTextNode()) {
2286                int offset = rangeLocation - docTextPosition;
2287                resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION);
2288            } else {
2289                if (rangeLocation == docTextPosition)
2290                    resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION);
2291                else
2292                    resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
2293            }
2294        }
2295
2296        if (foundEnd) {
2297            if (textRunRange->startContainer()->isTextNode()) {
2298                int offset = rangeEnd - docTextPosition;
2299                resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), IGNORE_EXCEPTION);
2300            } else {
2301                if (rangeEnd == docTextPosition)
2302                    resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), IGNORE_EXCEPTION);
2303                else
2304                    resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
2305            }
2306            docTextPosition += len;
2307            break;
2308        }
2309        docTextPosition += len;
2310    }
2311
2312    if (!startRangeFound)
2313        return 0;
2314
2315    if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds
2316        resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), IGNORE_EXCEPTION);
2317    }
2318
2319    return resultRange.release();
2320}
2321
2322bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length)
2323{
2324    location = notFound;
2325    length = 0;
2326
2327    if (!range->startContainer())
2328        return false;
2329
2330    // The critical assumption is that this only gets called with ranges that
2331    // concentrate on a given area containing the selection root. This is done
2332    // because of text fields and textareas. The DOM for those is not
2333    // directly in the document DOM, so ensure that the range does not cross a
2334    // boundary of one of those.
2335    if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope))
2336        return false;
2337    if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope))
2338        return false;
2339
2340    RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset());
2341    ASSERT(testRange->startContainer() == scope);
2342    location = TextIterator::rangeLength(testRange.get());
2343
2344    testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION);
2345    ASSERT(testRange->startContainer() == scope);
2346    length = TextIterator::rangeLength(testRange.get()) - location;
2347    return true;
2348}
2349
2350// --------
2351
2352String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDisplayString)
2353{
2354    // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
2355    static const unsigned initialCapacity = 1 << 15;
2356
2357    unsigned bufferLength = 0;
2358    StringBuilder builder;
2359    builder.reserveCapacity(initialCapacity);
2360    TextIteratorBehavior behavior = defaultBehavior;
2361    if (!isDisplayString)
2362        behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding);
2363
2364    for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
2365        it.appendTextToStringBuilder(builder);
2366        bufferLength += it.length();
2367    }
2368
2369    if (!bufferLength)
2370        return emptyString();
2371
2372    String result = builder.toString();
2373
2374    if (isDisplayString && r->ownerDocument())
2375        r->ownerDocument()->displayStringModifiedByEncoding(result);
2376
2377    return result;
2378}
2379
2380static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward)
2381{
2382    RefPtr<Range> result = range->cloneRange(ASSERT_NO_EXCEPTION);
2383    result->collapse(!forward, ASSERT_NO_EXCEPTION);
2384    return result.release();
2385}
2386
2387static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart)
2388{
2389    matchStart = 0;
2390    size_t matchLength = 0;
2391
2392    SearchBuffer buffer(target, options);
2393
2394    if (buffer.needsMoreContext()) {
2395        RefPtr<Range> startRange = it.range();
2396        RefPtr<Range> beforeStartRange = startRange->ownerDocument()->createRange();
2397        beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION);
2398        for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
2399            Vector<UChar, 1024> characters;
2400            backwardsIterator.prependTextTo(characters);
2401            buffer.prependContext(characters.data(), characters.size());
2402            if (!buffer.needsMoreContext())
2403                break;
2404        }
2405    }
2406
2407    while (!it.atEnd()) {
2408        it.appendTextTo(buffer);
2409        it.advance(buffer.numberOfCharactersJustAppended());
2410tryAgain:
2411        size_t matchStartOffset;
2412        if (size_t newMatchLength = buffer.search(matchStartOffset)) {
2413            // Note that we found a match, and where we found it.
2414            size_t lastCharacterInBufferOffset = it.characterOffset();
2415            ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
2416            matchStart = lastCharacterInBufferOffset - matchStartOffset;
2417            matchLength = newMatchLength;
2418            // If searching forward, stop on the first match.
2419            // If searching backward, don't stop, so we end up with the last match.
2420            if (!(options & Backwards))
2421                break;
2422            goto tryAgain;
2423        }
2424        if (it.atBreak() && !buffer.atBreak()) {
2425            buffer.reachedBreak();
2426            goto tryAgain;
2427        }
2428    }
2429
2430    return matchLength;
2431}
2432
2433PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options)
2434{
2435    // CharacterIterator requires renderers to be up-to-date
2436    range->ownerDocument()->updateLayout();
2437
2438    // First, find the text.
2439    size_t matchStart;
2440    size_t matchLength;
2441    {
2442        CharacterIterator findIterator(range, TextIteratorEntersTextControls);
2443        matchLength = findPlainText(findIterator, target, options, matchStart);
2444        if (!matchLength)
2445            return collapsedToBoundary(range, !(options & Backwards));
2446    }
2447
2448    // Then, find the document position of the start and the end of the text.
2449    CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls);
2450    return characterSubrange(computeRangeIterator, matchStart, matchLength);
2451}
2452
2453}
2454