15abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick/* 25abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * Copyright (C) 2010 Google Inc. All rights reserved. 35abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * 45abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * Redistribution and use in source and binary forms, with or without 55abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * modification, are permitted provided that the following conditions 65abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * are met: 75abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * 1. Redistributions of source code must retain the above copyright 85abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * notice, this list of conditions and the following disclaimer. 95abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * 2. Redistributions in binary form must reproduce the above copyright 105abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * notice, this list of conditions and the following disclaimer in the 115abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * documentation and/or other materials provided with the distribution. 125abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * 135abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 145abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 155abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 165abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 175abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 185abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 195abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 205abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 215abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 225abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 235abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick */ 245abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 255abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "config.h" 265abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "TextDocumentParser.h" 275abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 285abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "HTMLDocument.h" 295abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "HTMLNames.h" 305abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "HTMLTokenizer.h" 315abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick#include "HTMLTreeBuilder.h" 325abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 335abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merricknamespace WebCore { 345abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 355abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrickusing namespace HTMLNames; 365abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 375abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain MerrickTextDocumentParser::TextDocumentParser(HTMLDocument* document) 385abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick : HTMLDocumentParser(document, false) 395abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick , m_haveInsertedFakePreElement(false) 405abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick{ 415abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick tokenizer()->setState(HTMLTokenizer::PLAINTEXTState); 425abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick} 435abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 445abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain MerrickTextDocumentParser::~TextDocumentParser() 455abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick{ 465abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick} 475abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 485abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrickvoid TextDocumentParser::append(const SegmentedString& text) 495abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick{ 505abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick if (!m_haveInsertedFakePreElement) 515abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick insertFakePreElement(); 525abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick HTMLDocumentParser::append(text); 535abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick} 545abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 555abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrickvoid TextDocumentParser::insertFakePreElement() 565abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick{ 575abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick // In principle, we should create a specialized tree builder for 585abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick // TextDocuments, but instead we re-use the existing HTMLTreeBuilder. 595abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick // We create a fake token and give it to the tree builder rather than 605abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick // sending fake bytes through the front-end of the parser to avoid 615abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick // distrubing the line/column number calculations. 625abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 635abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick RefPtr<Attribute> styleAttribute = Attribute::createMapped("style", "word-wrap: break-word; white-space: pre-wrap;"); 645abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick RefPtr<NamedNodeMap> attributes = NamedNodeMap::create(); 655abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick attributes->insertAttribute(styleAttribute.release(), false); 665abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick AtomicHTMLToken fakePre(HTMLToken::StartTag, preTag.localName(), attributes.release()); 675abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 685abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick treeBuilder()->constructTreeFromAtomicToken(fakePre); 695abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick m_haveInsertedFakePreElement = true; 705abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick} 715abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick 725abb8606fa57c3ebfc8b3c3dbc3fa4a25d2ae306Iain Merrick} 73