1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "core/dom/DecodedDataDocumentParser.h"
28
29#include "bindings/v8/ExceptionStatePlaceholder.h"
30#include "core/dom/Document.h"
31#include "core/dom/Element.h"
32#include "core/loader/TextResourceDecoder.h"
33#include "wtf/text/TextEncodingRegistry.h"
34
35namespace WebCore {
36
37namespace {
38
39class TitleEncodingFixer {
40public:
41    explicit TitleEncodingFixer(Document* document)
42        : m_document(document)
43        , m_firstEncoding(document->decoder()->encoding())
44    {
45    }
46
47    // It's possible for the encoding of the document to change while we're decoding
48    // data. That can only occur while we're processing the <head> portion of the
49    // document. There isn't much user-visible content in the <head>, but there is
50    // the <title> element. This function detects that situation and re-decodes the
51    // document's title so that the user doesn't see an incorrectly decoded title
52    // in the title bar.
53    inline void fixTitleEncodingIfNeeded()
54    {
55        if (m_firstEncoding == m_document->decoder()->encoding())
56            return; // In the common case, the encoding doesn't change and there isn't any work to do.
57        fixTitleEncoding();
58    }
59
60private:
61    void fixTitleEncoding();
62
63    Document* m_document;
64    WTF::TextEncoding m_firstEncoding;
65};
66
67void TitleEncodingFixer::fixTitleEncoding()
68{
69    RefPtr<Element> titleElement = m_document->titleElement();
70    if (!titleElement
71        || titleElement->firstElementChild()
72        || m_firstEncoding != Latin1Encoding()
73        || !titleElement->textContent().containsOnlyLatin1())
74        return; // Either we don't have a title yet or something bizzare as happened and we give up.
75    CString originalBytes = titleElement->textContent().latin1();
76    OwnPtr<TextCodec> codec = newTextCodec(m_document->decoder()->encoding());
77    String correctlyDecodedTitle = codec->decode(originalBytes.data(), originalBytes.length(), true);
78    titleElement->setTextContent(correctlyDecodedTitle, IGNORE_EXCEPTION);
79}
80
81}
82
83DecodedDataDocumentParser::DecodedDataDocumentParser(Document* document)
84    : DocumentParser(document)
85{
86}
87
88size_t DecodedDataDocumentParser::appendBytes(const char* data, size_t length)
89{
90    if (!length)
91        return 0;
92
93    TitleEncodingFixer encodingFixer(document());
94
95    String decoded = document()->decoder()->decode(data, length);
96
97    encodingFixer.fixTitleEncodingIfNeeded();
98
99    if (decoded.isEmpty())
100        return 0;
101
102    size_t consumedChars = decoded.length();
103    append(decoded.releaseImpl());
104
105    return consumedChars;
106}
107
108size_t DecodedDataDocumentParser::flush()
109{
110    // null decoder indicates there is no data received.
111    // We have nothing to do in that case.
112    TextResourceDecoder* decoder = document()->decoder();
113    if (!decoder)
114        return 0;
115    String remainingData = decoder->flush();
116    if (remainingData.isEmpty())
117        return 0;
118
119    size_t consumedChars = remainingData.length();
120    append(remainingData.releaseImpl());
121
122    return consumedChars;
123}
124
125};
126