15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/*
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    This library is free software; you can redistribute it and/or
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    modify it under the terms of the GNU Library General Public
8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)    License as published by the Free Software Foundation; either
9eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch    version 2 of the License, or (at your option) any later version.
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    This library is distributed in the hope that it will be useful,
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    but WITHOUT ANY WARRANTY; without even the implied warranty of
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Library General Public License for more details.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    You should have received a copy of the GNU Library General Public License
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    along with this library; see the file COPYING.LIB.  If not, write to
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Boston, MA 02110-1301, USA.
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "config.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "core/html/parser/TextResourceDecoder.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
26868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "core/HTMLNames.h"
27868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "core/dom/DOMImplementation.h"
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "core/html/parser/HTMLMetaCharsetParser.h"
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "platform/text/TextEncodingDetector.h"
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/StringExtras.h"
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/text/TextCodec.h"
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/text/TextEncodingRegistry.h"
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace WTF;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace blink {
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace HTMLNames;
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4)
41868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles){
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9;
582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// You might think we should put these find functions elsewhere, perhaps with the
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// similar functions that operate on UChar, but arguably only the decoder has
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a reason to process strings of char rather than UChar.
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int find(const char* subject, size_t subjectLength, const char* target)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t targetLength = strlen(target);
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (targetLength > subjectLength)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return -1;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 0; i <= subjectLength - targetLength; ++i) {
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        bool match = true;
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        for (size_t j = 0; j < targetLength; ++j) {
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            if (subject[i + j] != target[j]) {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                match = false;
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                break;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            }
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        }
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        if (match)
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)            return i;
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return -1;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WTF::TextEncoding findTextEncoding(const char* encodingName, int length)
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Vector<char, 64> buffer(length + 1);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    memcpy(buffer.data(), encodingName, length);
872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    buffer[length] = '\0';
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return buffer.data();
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType)
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (equalIgnoringCase(mimeType, "text/css"))
94        return CSSContent;
95    if (equalIgnoringCase(mimeType, "text/html"))
96        return HTMLContent;
97    if (DOMImplementation::isXMLMIMEType(mimeType))
98        return XMLContent;
99    return PlainTextContent;
100}
101
102const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType contentType, const WTF::TextEncoding& specifiedDefaultEncoding)
103{
104    // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII
105    // for text/xml. This matches Firefox.
106    if (contentType == XMLContent)
107        return UTF8Encoding();
108    if (!specifiedDefaultEncoding.isValid())
109        return Latin1Encoding();
110    return specifiedDefaultEncoding;
111}
112
113TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector)
114    : m_contentType(determineContentType(mimeType))
115    , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding))
116    , m_source(DefaultEncoding)
117    , m_hintEncoding(0)
118    , m_checkedForBOM(false)
119    , m_checkedForCSSCharset(false)
120    , m_checkedForXMLCharset(false)
121    , m_checkedForMetaCharset(false)
122    , m_useLenientXMLDecoding(false)
123    , m_sawError(false)
124    , m_usesEncodingDetector(usesEncodingDetector)
125{
126}
127
128TextResourceDecoder::~TextResourceDecoder()
129{
130}
131
132void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, EncodingSource source)
133{
134    // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings).
135    if (!encoding.isValid())
136        return;
137
138    // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR),
139    // treat x-user-defined as windows-1252 (bug 18270)
140    if (source == EncodingFromMetaTag && !strcasecmp(encoding.name(), "x-user-defined"))
141        m_encoding = "windows-1252";
142    else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset)
143        m_encoding = encoding.closestByteBasedEquivalent();
144    else
145        m_encoding = encoding;
146
147    m_codec.clear();
148    m_source = source;
149}
150
151// Returns the position of the encoding string.
152static int findXMLEncoding(const char* str, int len, int& encodingLength)
153{
154    int pos = find(str, len, "encoding");
155    if (pos == -1)
156        return -1;
157    pos += 8;
158
159    // Skip spaces and stray control characters.
160    while (pos < len && str[pos] <= ' ')
161        ++pos;
162
163    // Skip equals sign.
164    if (pos >= len || str[pos] != '=')
165        return -1;
166    ++pos;
167
168    // Skip spaces and stray control characters.
169    while (pos < len && str[pos] <= ' ')
170        ++pos;
171
172    // Skip quotation mark.
173    if (pos >= len)
174        return - 1;
175    char quoteMark = str[pos];
176    if (quoteMark != '"' && quoteMark != '\'')
177        return -1;
178    ++pos;
179
180    // Find the trailing quotation mark.
181    int end = pos;
182    while (end < len && str[end] != quoteMark)
183        ++end;
184    if (end >= len)
185        return -1;
186
187    encodingLength = end - pos;
188    return pos;
189}
190
191size_t TextResourceDecoder::checkForBOM(const char* data, size_t len)
192{
193    // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding.
194    // We let it override even a user-chosen encoding.
195    ASSERT(!m_checkedForBOM);
196
197    size_t lengthOfBOM = 0;
198
199    size_t bufferLength = m_buffer.size();
200
201    size_t buf1Len = bufferLength;
202    size_t buf2Len = len;
203    const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.data());
204    const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data);
205    unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
206    unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
207    unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0;
208    unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0;
209
210    // Check for the BOM.
211    if (c1 == 0xFF && c2 == 0xFE) {
212        if (c3 || c4) {
213            setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
214            lengthOfBOM = 2;
215        } else {
216            setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
217            lengthOfBOM = 4;
218        }
219    } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) {
220        setEncoding(UTF8Encoding(), AutoDetectedEncoding);
221        lengthOfBOM = 3;
222    } else if (c1 == 0xFE && c2 == 0xFF) {
223        setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
224        lengthOfBOM = 2;
225    } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) {
226        setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
227        lengthOfBOM = 4;
228    }
229
230    if (lengthOfBOM || bufferLength + len >= 4)
231        m_checkedForBOM = true;
232
233    return lengthOfBOM;
234}
235
236bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool& movedDataToBuffer)
237{
238    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
239        m_checkedForCSSCharset = true;
240        return true;
241    }
242
243    size_t oldSize = m_buffer.size();
244    m_buffer.grow(oldSize + len);
245    memcpy(m_buffer.data() + oldSize, data, len);
246
247    movedDataToBuffer = true;
248
249    if (m_buffer.size() <= 13) // strlen('@charset "x";') == 13
250        return false;
251
252    const char* dataStart = m_buffer.data();
253    const char* dataEnd = dataStart + m_buffer.size();
254
255    if (bytesEqual(dataStart, '@', 'c', 'h', 'a', 'r', 's', 'e', 't', ' ', '"')) {
256        dataStart += 10;
257        const char* pos = dataStart;
258
259        while (pos < dataEnd && *pos != '"')
260            ++pos;
261        if (pos == dataEnd)
262            return false;
263
264        int encodingNameLength = pos - dataStart;
265
266        ++pos;
267        if (pos == dataEnd)
268            return false;
269
270        if (*pos == ';')
271            setEncoding(findTextEncoding(dataStart, encodingNameLength), EncodingFromCSSCharset);
272    }
273
274    m_checkedForCSSCharset = true;
275    return true;
276}
277
278bool TextResourceDecoder::checkForXMLCharset(const char* data, size_t len, bool& movedDataToBuffer)
279{
280    if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) {
281        m_checkedForXMLCharset = true;
282        return true;
283    }
284
285    // This is not completely efficient, since the function might go
286    // through the HTML head several times.
287
288    size_t oldSize = m_buffer.size();
289    m_buffer.grow(oldSize + len);
290    memcpy(m_buffer.data() + oldSize, data, len);
291
292    movedDataToBuffer = true;
293
294    const char* ptr = m_buffer.data();
295    const char* pEnd = ptr + m_buffer.size();
296
297    // Is there enough data available to check for XML declaration?
298    if (m_buffer.size() < 8)
299        return false;
300
301    // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents.
302    // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case.
303    if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) {
304        const char* xmlDeclarationEnd = ptr;
305        while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>')
306            ++xmlDeclarationEnd;
307        if (xmlDeclarationEnd == pEnd)
308            return false;
309        // No need for +1, because we have an extra "?" to lose at the end of XML declaration.
310        int len = 0;
311        int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len);
312        if (pos != -1)
313            setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader);
314        // continue looking for a charset - it may be specified in an HTTP-Equiv meta
315    } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) {
316        setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding);
317    } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) {
318        setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding);
319    } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) {
320        setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding);
321    } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) {
322        setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding);
323    }
324
325    m_checkedForXMLCharset = true;
326    return true;
327}
328
329void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length)
330{
331    if (m_source == UserChosenEncoding || m_source == EncodingFromHTTPHeader || m_source == AutoDetectedEncoding) {
332        m_checkedForMetaCharset = true;
333        return;
334    }
335
336    if (!m_charsetParser)
337        m_charsetParser = HTMLMetaCharsetParser::create();
338
339    if (!m_charsetParser->checkForMetaCharset(data, length))
340        return;
341
342    setEncoding(m_charsetParser->encoding(), EncodingFromMetaTag);
343    m_charsetParser.clear();
344    m_checkedForMetaCharset = true;
345    return;
346}
347
348// We use the encoding detector in two cases:
349//   1. Encoding detector is turned ON and no other encoding source is
350//      available (that is, it's DefaultEncoding).
351//   2. Encoding detector is turned ON and the encoding is set to
352//      the encoding of the parent frame, which is also auto-detected.
353//   Note that condition #2 is NOT satisfied unless parent-child frame
354//   relationship is compliant to the same-origin policy. If they're from
355//   different domains, |m_source| would not be set to EncodingFromParentFrame
356//   in the first place.
357bool TextResourceDecoder::shouldAutoDetect() const
358{
359    // Just checking m_hintEncoding suffices here because it's only set
360    // in setHintEncoding when the source is AutoDetectedEncoding.
361    return m_usesEncodingDetector
362        && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding));
363}
364
365String TextResourceDecoder::decode(const char* data, size_t len)
366{
367    size_t lengthOfBOM = 0;
368    if (!m_checkedForBOM)
369        lengthOfBOM = checkForBOM(data, len);
370
371    bool movedDataToBuffer = false;
372
373    if (m_contentType == CSSContent && !m_checkedForCSSCharset) {
374        if (!checkForCSSCharset(data, len, movedDataToBuffer))
375            return emptyString();
376    }
377
378    if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_checkedForXMLCharset) {
379        if (!checkForXMLCharset(data, len, movedDataToBuffer))
380            return emptyString();
381    }
382
383    const char* dataForDecode = data + lengthOfBOM;
384    size_t lengthForDecode = len - lengthOfBOM;
385
386    if (!m_buffer.isEmpty()) {
387        if (!movedDataToBuffer) {
388            size_t oldSize = m_buffer.size();
389            m_buffer.grow(oldSize + len);
390            memcpy(m_buffer.data() + oldSize, data, len);
391        }
392
393        dataForDecode = m_buffer.data() + lengthOfBOM;
394        lengthForDecode = m_buffer.size() - lengthOfBOM;
395    }
396
397    if (m_contentType == HTMLContent && !m_checkedForMetaCharset)
398        checkForMetaCharset(dataForDecode, lengthForDecode);
399
400    if (shouldAutoDetect()) {
401        WTF::TextEncoding detectedEncoding;
402        if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding))
403            setEncoding(detectedEncoding, EncodingFromContentSniffing);
404    }
405
406    ASSERT(m_encoding.isValid());
407
408    if (!m_codec)
409        m_codec = newTextCodec(m_encoding);
410
411    String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
412
413    m_buffer.clear();
414    return result;
415}
416
417String TextResourceDecoder::flush()
418{
419    // If we can not identify the encoding even after a document is completely
420    // loaded, we need to detect the encoding if other conditions for
421    // autodetection is satisfied.
422    if (m_buffer.size() && shouldAutoDetect()
423        && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) {
424        WTF::TextEncoding detectedEncoding;
425        if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding))
426            setEncoding(detectedEncoding, EncodingFromContentSniffing);
427    }
428
429    if (!m_codec)
430        m_codec = newTextCodec(m_encoding);
431
432    String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError);
433    m_buffer.clear();
434    m_codec.clear();
435    m_checkedForBOM = false; // Skip BOM again when re-decoding.
436    return result;
437}
438
439}
440