15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/* 25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2010 Google, Inc. All Rights Reserved. 35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without 55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions 65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met: 75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright 85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer. 95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright 105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * notice, this list of conditions and the following disclaimer in the 115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * documentation and/or other materials provided with the distribution. 125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 2302772c6a72f1ee0b226341a4f4439970c29fc861Ben Murdoch * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */ 255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#include "config.h" 2753e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLEntitySearch.h" 285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 2953e740f4a82e17f3ae59772501622dc354e42336Torne (Richard Coles)#include "core/html/parser/HTMLEntityTable.h" 305c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)namespace WebCore { 325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)static const HTMLEntityTableEntry* halfway(const HTMLEntityTableEntry* left, const HTMLEntityTableEntry* right) 345c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 355c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return &left[(right - left) / 2]; 365c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 375c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 385c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)HTMLEntitySearch::HTMLEntitySearch() 395c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) : m_currentLength(0) 405c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) , m_mostRecentMatch(0) 415c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) , m_first(HTMLEntityTable::firstEntry()) 425c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) , m_last(HTMLEntityTable::lastEntry()) 435c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 445c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 455c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 465c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)HTMLEntitySearch::CompareResult HTMLEntitySearch::compare(const HTMLEntityTableEntry* entry, UChar nextCharacter) const 475c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 485c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (entry->length < m_currentLength + 1) 495c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return Before; 505c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) UChar entryNextCharacter = entry->entity[m_currentLength]; 515c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (entryNextCharacter == nextCharacter) 525c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return Prefix; 535c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return entryNextCharacter < nextCharacter ? Before : After; 545c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 555c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 565c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const HTMLEntityTableEntry* HTMLEntitySearch::findFirst(UChar nextCharacter) const 575c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 585c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* left = m_first; 595c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* right = m_last; 605c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (left == right) 615c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return left; 625c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) CompareResult result = compare(left, nextCharacter); 635c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == Prefix) 645c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return left; 655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == After) 665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return right; 675c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (left + 1 < right) { 685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* probe = halfway(left, right); 695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) result = compare(probe, nextCharacter); 705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == Before) 715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) left = probe; 725c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) else { 735c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(result == After || result == Prefix); 745c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) right = probe; 755c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 765c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 775c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(left + 1 == right); 785c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return right; 795c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 805c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 815c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)const HTMLEntityTableEntry* HTMLEntitySearch::findLast(UChar nextCharacter) const 825c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 835c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* left = m_first; 845c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* right = m_last; 855c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (left == right) 865c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return right; 875c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) CompareResult result = compare(right, nextCharacter); 885c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == Prefix) 895c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return right; 905c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == Before) 915c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return left; 925c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) while (left + 1 < right) { 935c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) const HTMLEntityTableEntry* probe = halfway(left, right); 945c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) result = compare(probe, nextCharacter); 955c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (result == After) 965c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) right = probe; 975c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) else { 985c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(result == Before || result == Prefix); 995c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) left = probe; 1005c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1015c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1025c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(left + 1 == right); 1035c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return left; 1045c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 1055c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1065c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)void HTMLEntitySearch::advance(UChar nextCharacter) 1075c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles){ 1085c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ASSERT(isEntityPrefix()); 1095c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (!m_currentLength) { 1105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_first = HTMLEntityTable::firstEntryStartingWith(nextCharacter); 1115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_last = HTMLEntityTable::lastEntryStartingWith(nextCharacter); 1125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (!m_first || !m_last) 1135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return fail(); 1145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } else { 1155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_first = findFirst(nextCharacter); 1165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_last = findLast(nextCharacter); 1175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (m_first == m_last && compare(m_first, nextCharacter) != Prefix) 1185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return fail(); 1195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) ++m_currentLength; 1215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) if (m_first->length != m_currentLength) { 1225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) return; 1235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) } 1245c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) m_mostRecentMatch = m_first; 1255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 1265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) 1275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)} 128