15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)/* 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Copyright (C) 2005, 2006, 2007 Alexey Proskuryakov (ap@nypop.com) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) This library is free software; you can redistribute it and/or 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) modify it under the terms of the GNU Library General Public 8868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) License as published by the Free Software Foundation; either 9eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch version 2 of the License, or (at your option) any later version. 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) This library is distributed in the hope that it will be useful, 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) but WITHOUT ANY WARRANTY; without even the implied warranty of 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Library General Public License for more details. 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) You should have received a copy of the GNU Library General Public License 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) along with this library; see the file COPYING.LIB. If not, write to 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Boston, MA 02110-1301, USA. 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)*/ 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "config.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "core/html/parser/TextResourceDecoder.h" 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 26868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "core/HTMLNames.h" 27868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "core/dom/DOMImplementation.h" 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "core/html/parser/HTMLMetaCharsetParser.h" 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "platform/text/TextEncodingDetector.h" 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/StringExtras.h" 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/text/TextCodec.h" 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "wtf/text/TextEncodingRegistry.h" 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace WTF; 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace blink { 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)using namespace HTMLNames; 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4090dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4) 41868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles){ 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4; 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5; 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7) 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static inline bool bytesEqual(const char* p, char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return p[0] == b0 && p[1] == b1 && p[2] == b2 && p[3] == b3 && p[4] == b4 && p[5] == b5 && p[6] == b6 && p[7] == b7 && p[8] == b8 && p[9] == b9; 582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// You might think we should put these find functions elsewhere, perhaps with the 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// similar functions that operate on UChar, but arguably only the decoder has 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// a reason to process strings of char rather than UChar. 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static int find(const char* subject, size_t subjectLength, const char* target) 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t targetLength = strlen(target); 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (targetLength > subjectLength) 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i <= subjectLength - targetLength; ++i) { 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool match = true; 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t j = 0; j < targetLength; ++j) { 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (subject[i + j] != target[j]) { 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) match = false; 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) break; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (match) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return i; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return -1; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static WTF::TextEncoding findTextEncoding(const char* encodingName, int length) 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Vector<char, 64> buffer(length + 1); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) memcpy(buffer.data(), encodingName, length); 872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) buffer[length] = '\0'; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return buffer.data(); 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TextResourceDecoder::ContentType TextResourceDecoder::determineContentType(const String& mimeType) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles){ 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (equalIgnoringCase(mimeType, "text/css")) 94 return CSSContent; 95 if (equalIgnoringCase(mimeType, "text/html")) 96 return HTMLContent; 97 if (DOMImplementation::isXMLMIMEType(mimeType)) 98 return XMLContent; 99 return PlainTextContent; 100} 101 102const WTF::TextEncoding& TextResourceDecoder::defaultEncoding(ContentType contentType, const WTF::TextEncoding& specifiedDefaultEncoding) 103{ 104 // Despite 8.5 "Text/xml with Omitted Charset" of RFC 3023, we assume UTF-8 instead of US-ASCII 105 // for text/xml. This matches Firefox. 106 if (contentType == XMLContent) 107 return UTF8Encoding(); 108 if (!specifiedDefaultEncoding.isValid()) 109 return Latin1Encoding(); 110 return specifiedDefaultEncoding; 111} 112 113TextResourceDecoder::TextResourceDecoder(const String& mimeType, const WTF::TextEncoding& specifiedDefaultEncoding, bool usesEncodingDetector) 114 : m_contentType(determineContentType(mimeType)) 115 , m_encoding(defaultEncoding(m_contentType, specifiedDefaultEncoding)) 116 , m_source(DefaultEncoding) 117 , m_hintEncoding(0) 118 , m_checkedForBOM(false) 119 , m_checkedForCSSCharset(false) 120 , m_checkedForXMLCharset(false) 121 , m_checkedForMetaCharset(false) 122 , m_useLenientXMLDecoding(false) 123 , m_sawError(false) 124 , m_usesEncodingDetector(usesEncodingDetector) 125{ 126} 127 128TextResourceDecoder::~TextResourceDecoder() 129{ 130} 131 132void TextResourceDecoder::setEncoding(const WTF::TextEncoding& encoding, EncodingSource source) 133{ 134 // In case the encoding didn't exist, we keep the old one (helps some sites specifying invalid encodings). 135 if (!encoding.isValid()) 136 return; 137 138 // When encoding comes from meta tag (i.e. it cannot be XML files sent via XHR), 139 // treat x-user-defined as windows-1252 (bug 18270) 140 if (source == EncodingFromMetaTag && !strcasecmp(encoding.name(), "x-user-defined")) 141 m_encoding = "windows-1252"; 142 else if (source == EncodingFromMetaTag || source == EncodingFromXMLHeader || source == EncodingFromCSSCharset) 143 m_encoding = encoding.closestByteBasedEquivalent(); 144 else 145 m_encoding = encoding; 146 147 m_codec.clear(); 148 m_source = source; 149} 150 151// Returns the position of the encoding string. 152static int findXMLEncoding(const char* str, int len, int& encodingLength) 153{ 154 int pos = find(str, len, "encoding"); 155 if (pos == -1) 156 return -1; 157 pos += 8; 158 159 // Skip spaces and stray control characters. 160 while (pos < len && str[pos] <= ' ') 161 ++pos; 162 163 // Skip equals sign. 164 if (pos >= len || str[pos] != '=') 165 return -1; 166 ++pos; 167 168 // Skip spaces and stray control characters. 169 while (pos < len && str[pos] <= ' ') 170 ++pos; 171 172 // Skip quotation mark. 173 if (pos >= len) 174 return - 1; 175 char quoteMark = str[pos]; 176 if (quoteMark != '"' && quoteMark != '\'') 177 return -1; 178 ++pos; 179 180 // Find the trailing quotation mark. 181 int end = pos; 182 while (end < len && str[end] != quoteMark) 183 ++end; 184 if (end >= len) 185 return -1; 186 187 encodingLength = end - pos; 188 return pos; 189} 190 191size_t TextResourceDecoder::checkForBOM(const char* data, size_t len) 192{ 193 // Check for UTF-16/32 or UTF-8 BOM mark at the beginning, which is a sure sign of a Unicode encoding. 194 // We let it override even a user-chosen encoding. 195 ASSERT(!m_checkedForBOM); 196 197 size_t lengthOfBOM = 0; 198 199 size_t bufferLength = m_buffer.size(); 200 201 size_t buf1Len = bufferLength; 202 size_t buf2Len = len; 203 const unsigned char* buf1 = reinterpret_cast<const unsigned char*>(m_buffer.data()); 204 const unsigned char* buf2 = reinterpret_cast<const unsigned char*>(data); 205 unsigned char c1 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 206 unsigned char c2 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 207 unsigned char c3 = buf1Len ? (--buf1Len, *buf1++) : buf2Len ? (--buf2Len, *buf2++) : 0; 208 unsigned char c4 = buf2Len ? (--buf2Len, *buf2++) : 0; 209 210 // Check for the BOM. 211 if (c1 == 0xFF && c2 == 0xFE) { 212 if (c3 || c4) { 213 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 214 lengthOfBOM = 2; 215 } else { 216 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 217 lengthOfBOM = 4; 218 } 219 } else if (c1 == 0xEF && c2 == 0xBB && c3 == 0xBF) { 220 setEncoding(UTF8Encoding(), AutoDetectedEncoding); 221 lengthOfBOM = 3; 222 } else if (c1 == 0xFE && c2 == 0xFF) { 223 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 224 lengthOfBOM = 2; 225 } else if (!c1 && !c2 && c3 == 0xFE && c4 == 0xFF) { 226 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); 227 lengthOfBOM = 4; 228 } 229 230 if (lengthOfBOM || bufferLength + len >= 4) 231 m_checkedForBOM = true; 232 233 return lengthOfBOM; 234} 235 236bool TextResourceDecoder::checkForCSSCharset(const char* data, size_t len, bool& movedDataToBuffer) 237{ 238 if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) { 239 m_checkedForCSSCharset = true; 240 return true; 241 } 242 243 size_t oldSize = m_buffer.size(); 244 m_buffer.grow(oldSize + len); 245 memcpy(m_buffer.data() + oldSize, data, len); 246 247 movedDataToBuffer = true; 248 249 if (m_buffer.size() <= 13) // strlen('@charset "x";') == 13 250 return false; 251 252 const char* dataStart = m_buffer.data(); 253 const char* dataEnd = dataStart + m_buffer.size(); 254 255 if (bytesEqual(dataStart, '@', 'c', 'h', 'a', 'r', 's', 'e', 't', ' ', '"')) { 256 dataStart += 10; 257 const char* pos = dataStart; 258 259 while (pos < dataEnd && *pos != '"') 260 ++pos; 261 if (pos == dataEnd) 262 return false; 263 264 int encodingNameLength = pos - dataStart; 265 266 ++pos; 267 if (pos == dataEnd) 268 return false; 269 270 if (*pos == ';') 271 setEncoding(findTextEncoding(dataStart, encodingNameLength), EncodingFromCSSCharset); 272 } 273 274 m_checkedForCSSCharset = true; 275 return true; 276} 277 278bool TextResourceDecoder::checkForXMLCharset(const char* data, size_t len, bool& movedDataToBuffer) 279{ 280 if (m_source != DefaultEncoding && m_source != EncodingFromParentFrame) { 281 m_checkedForXMLCharset = true; 282 return true; 283 } 284 285 // This is not completely efficient, since the function might go 286 // through the HTML head several times. 287 288 size_t oldSize = m_buffer.size(); 289 m_buffer.grow(oldSize + len); 290 memcpy(m_buffer.data() + oldSize, data, len); 291 292 movedDataToBuffer = true; 293 294 const char* ptr = m_buffer.data(); 295 const char* pEnd = ptr + m_buffer.size(); 296 297 // Is there enough data available to check for XML declaration? 298 if (m_buffer.size() < 8) 299 return false; 300 301 // Handle XML declaration, which can have encoding in it. This encoding is honored even for HTML documents. 302 // It is an error for an XML declaration not to be at the start of an XML document, and it is ignored in HTML documents in such case. 303 if (bytesEqual(ptr, '<', '?', 'x', 'm', 'l')) { 304 const char* xmlDeclarationEnd = ptr; 305 while (xmlDeclarationEnd != pEnd && *xmlDeclarationEnd != '>') 306 ++xmlDeclarationEnd; 307 if (xmlDeclarationEnd == pEnd) 308 return false; 309 // No need for +1, because we have an extra "?" to lose at the end of XML declaration. 310 int len = 0; 311 int pos = findXMLEncoding(ptr, xmlDeclarationEnd - ptr, len); 312 if (pos != -1) 313 setEncoding(findTextEncoding(ptr + pos, len), EncodingFromXMLHeader); 314 // continue looking for a charset - it may be specified in an HTTP-Equiv meta 315 } else if (bytesEqual(ptr, '<', 0, '?', 0, 'x', 0)) { 316 setEncoding(UTF16LittleEndianEncoding(), AutoDetectedEncoding); 317 } else if (bytesEqual(ptr, 0, '<', 0, '?', 0, 'x')) { 318 setEncoding(UTF16BigEndianEncoding(), AutoDetectedEncoding); 319 } else if (bytesEqual(ptr, '<', 0, 0, 0, '?', 0, 0, 0)) { 320 setEncoding(UTF32LittleEndianEncoding(), AutoDetectedEncoding); 321 } else if (bytesEqual(ptr, 0, 0, 0, '<', 0, 0, 0, '?')) { 322 setEncoding(UTF32BigEndianEncoding(), AutoDetectedEncoding); 323 } 324 325 m_checkedForXMLCharset = true; 326 return true; 327} 328 329void TextResourceDecoder::checkForMetaCharset(const char* data, size_t length) 330{ 331 if (m_source == UserChosenEncoding || m_source == EncodingFromHTTPHeader || m_source == AutoDetectedEncoding) { 332 m_checkedForMetaCharset = true; 333 return; 334 } 335 336 if (!m_charsetParser) 337 m_charsetParser = HTMLMetaCharsetParser::create(); 338 339 if (!m_charsetParser->checkForMetaCharset(data, length)) 340 return; 341 342 setEncoding(m_charsetParser->encoding(), EncodingFromMetaTag); 343 m_charsetParser.clear(); 344 m_checkedForMetaCharset = true; 345 return; 346} 347 348// We use the encoding detector in two cases: 349// 1. Encoding detector is turned ON and no other encoding source is 350// available (that is, it's DefaultEncoding). 351// 2. Encoding detector is turned ON and the encoding is set to 352// the encoding of the parent frame, which is also auto-detected. 353// Note that condition #2 is NOT satisfied unless parent-child frame 354// relationship is compliant to the same-origin policy. If they're from 355// different domains, |m_source| would not be set to EncodingFromParentFrame 356// in the first place. 357bool TextResourceDecoder::shouldAutoDetect() const 358{ 359 // Just checking m_hintEncoding suffices here because it's only set 360 // in setHintEncoding when the source is AutoDetectedEncoding. 361 return m_usesEncodingDetector 362 && (m_source == DefaultEncoding || (m_source == EncodingFromParentFrame && m_hintEncoding)); 363} 364 365String TextResourceDecoder::decode(const char* data, size_t len) 366{ 367 size_t lengthOfBOM = 0; 368 if (!m_checkedForBOM) 369 lengthOfBOM = checkForBOM(data, len); 370 371 bool movedDataToBuffer = false; 372 373 if (m_contentType == CSSContent && !m_checkedForCSSCharset) { 374 if (!checkForCSSCharset(data, len, movedDataToBuffer)) 375 return emptyString(); 376 } 377 378 if ((m_contentType == HTMLContent || m_contentType == XMLContent) && !m_checkedForXMLCharset) { 379 if (!checkForXMLCharset(data, len, movedDataToBuffer)) 380 return emptyString(); 381 } 382 383 const char* dataForDecode = data + lengthOfBOM; 384 size_t lengthForDecode = len - lengthOfBOM; 385 386 if (!m_buffer.isEmpty()) { 387 if (!movedDataToBuffer) { 388 size_t oldSize = m_buffer.size(); 389 m_buffer.grow(oldSize + len); 390 memcpy(m_buffer.data() + oldSize, data, len); 391 } 392 393 dataForDecode = m_buffer.data() + lengthOfBOM; 394 lengthForDecode = m_buffer.size() - lengthOfBOM; 395 } 396 397 if (m_contentType == HTMLContent && !m_checkedForMetaCharset) 398 checkForMetaCharset(dataForDecode, lengthForDecode); 399 400 if (shouldAutoDetect()) { 401 WTF::TextEncoding detectedEncoding; 402 if (detectTextEncoding(data, len, m_hintEncoding, &detectedEncoding)) 403 setEncoding(detectedEncoding, EncodingFromContentSniffing); 404 } 405 406 ASSERT(m_encoding.isValid()); 407 408 if (!m_codec) 409 m_codec = newTextCodec(m_encoding); 410 411 String result = m_codec->decode(dataForDecode, lengthForDecode, DoNotFlush, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 412 413 m_buffer.clear(); 414 return result; 415} 416 417String TextResourceDecoder::flush() 418{ 419 // If we can not identify the encoding even after a document is completely 420 // loaded, we need to detect the encoding if other conditions for 421 // autodetection is satisfied. 422 if (m_buffer.size() && shouldAutoDetect() 423 && ((!m_checkedForXMLCharset && (m_contentType == HTMLContent || m_contentType == XMLContent)) || (!m_checkedForCSSCharset && (m_contentType == CSSContent)))) { 424 WTF::TextEncoding detectedEncoding; 425 if (detectTextEncoding(m_buffer.data(), m_buffer.size(), m_hintEncoding, &detectedEncoding)) 426 setEncoding(detectedEncoding, EncodingFromContentSniffing); 427 } 428 429 if (!m_codec) 430 m_codec = newTextCodec(m_encoding); 431 432 String result = m_codec->decode(m_buffer.data(), m_buffer.size(), FetchEOF, m_contentType == XMLContent && !m_useLenientXMLDecoding, m_sawError); 433 m_buffer.clear(); 434 m_codec.clear(); 435 m_checkedForBOM = false; // Skip BOM again when re-decoding. 436 return result; 437} 438 439} 440