15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2010 Google, Inc. All Rights Reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met:
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    documentation and/or other materials provided with the distribution.
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2302772c6a72f1ee0b226341a4f4439970c29fc861Ben Murdoch * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h"
2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLEntitySearch.h"
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
2953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLEntityTable.h"
305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)namespace WebCore {
325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static const HTMLEntityTableEntry* halfway(const HTMLEntityTableEntry* left, const HTMLEntityTableEntry* right)
345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return &left[(right - left) / 2];
365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)HTMLEntitySearch::HTMLEntitySearch()
395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    : m_currentLength(0)
405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_mostRecentMatch(0)
415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_first(HTMLEntityTable::firstEntry())
425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    , m_last(HTMLEntityTable::lastEntry())
435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)HTMLEntitySearch::CompareResult HTMLEntitySearch::compare(const HTMLEntityTableEntry* entry, UChar nextCharacter) const
475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (entry->length < m_currentLength + 1)
495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return Before;
505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    UChar entryNextCharacter = entry->entity[m_currentLength];
515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (entryNextCharacter == nextCharacter)
525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return Prefix;
535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return entryNextCharacter < nextCharacter ? Before : After;
545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const HTMLEntityTableEntry* HTMLEntitySearch::findFirst(UChar nextCharacter) const
575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const HTMLEntityTableEntry* left = m_first;
595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const HTMLEntityTableEntry* right = m_last;
605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (left == right)
615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return left;
625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    CompareResult result = compare(left, nextCharacter);
635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (result == Prefix)
645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return left;
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (result == After)
665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return right;
675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (left + 1 < right) {
685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const HTMLEntityTableEntry* probe = halfway(left, right);
695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result = compare(probe, nextCharacter);
705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (result == Before)
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            left = probe;
725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        else {
735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            ASSERT(result == After || result == Prefix);
745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            right = probe;
755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(left + 1 == right);
785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return right;
795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const HTMLEntityTableEntry* HTMLEntitySearch::findLast(UChar nextCharacter) const
825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const HTMLEntityTableEntry* left = m_first;
845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    const HTMLEntityTableEntry* right = m_last;
855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (left == right)
865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return right;
875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    CompareResult result = compare(right, nextCharacter);
885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (result == Prefix)
895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return right;
905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (result == Before)
915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return left;
925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    while (left + 1 < right) {
935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        const HTMLEntityTableEntry* probe = halfway(left, right);
945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        result = compare(probe, nextCharacter);
955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (result == After)
965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            right = probe;
975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        else {
985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            ASSERT(result == Before || result == Prefix);
995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            left = probe;
1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        }
1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(left + 1 == right);
1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    return left;
1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLEntitySearch::advance(UChar nextCharacter)
1075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){
1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ASSERT(isEntityPrefix());
1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (!m_currentLength) {
1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_first = HTMLEntityTable::firstEntryStartingWith(nextCharacter);
1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_last = HTMLEntityTable::lastEntryStartingWith(nextCharacter);
1125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (!m_first || !m_last)
1135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return fail();
1145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    } else {
1155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_first = findFirst(nextCharacter);
1165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        m_last = findLast(nextCharacter);
1175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        if (m_first == m_last && compare(m_first, nextCharacter) != Prefix)
1185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)            return fail();
1195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    ++m_currentLength;
1215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    if (m_first->length != m_currentLength) {
1225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)        return;
1235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    }
1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)    m_mostRecentMatch = m_first;
1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
1265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
1275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
128