15c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)/*
25c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2008 Apple Inc. All Rights Reserved.
35c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Copyright (C) 2010 Google, Inc. All Rights Reserved.
45c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
55c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * Redistribution and use in source and binary forms, with or without
65c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * modification, are permitted provided that the following conditions
75c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * are met:
85c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 1. Redistributions of source code must retain the above copyright
95c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer.
105c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * 2. Redistributions in binary form must reproduce the above copyright
115c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    notice, this list of conditions and the following disclaimer in the
125c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *    documentation and/or other materials provided with the distribution.
135c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) *
145c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
155c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
165c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
175c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
185c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
195c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
205c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
215c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
225c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
235c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2402772c6a72f1ee0b226341a4f4439970c29fc861Ben Murdoch * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
255c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles) */
265c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
275c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#ifndef HTMLEntityParser_h
285c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#define HTMLEntityParser_h
295c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
301e202183a5dc46166763171984b285173f8585e5Torne (Richard Coles)#include "platform/text/SegmentedString.h"
315c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
325c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)namespace WebCore {
335c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
343c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdochclass DecodedHTMLEntity {
353c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdochprivate:
363c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    // HTML entities contain at most four UTF-16 code units.
373c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    static const unsigned kMaxLength = 4;
383c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
393c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdochpublic:
403c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    DecodedHTMLEntity() : length(0) { }
413c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
423c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    bool isEmpty() const { return !length; }
433c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
443c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    void append(UChar c)
453c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    {
463c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        RELEASE_ASSERT(length < kMaxLength);
473c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        data[length++] = c;
483c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    }
493c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
503c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    void append(UChar32 c)
513c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    {
523c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        if (U_IS_BMP(c)) {
533c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch            append(static_cast<UChar>(c));
543c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch            return;
553c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        }
563c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        append(U16_LEAD(c));
573c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch        append(U16_TRAIL(c));
583c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    }
593c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
603c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    unsigned length;
613c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch    UChar data[kMaxLength];
623c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch};
633c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdoch
643c9e4aeaee9f9b0a9a814da07bcb33319c7ea363Ben Murdochbool consumeHTMLEntity(SegmentedString&, DecodedHTMLEntity& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter = '\0');
655c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
665c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)// Used by the XML parser.  Not suitable for use in HTML parsing.  Use consumeHTMLEntity instead.
67926b001d589ce2f10facb93dd4b87578ea35a855Torne (Richard Coles)size_t decodeNamedEntityToUCharArray(const char*, UChar result[4]);
685c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
695c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)}
705c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)
715c87bf8b86a7c82ef50fb7a89697d8e02e2553beTorne (Richard Coles)#endif
72