1/*
2 * Copyright (C) 2013 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32#include "core/frame/SmartClip.h"
33
34#include "core/dom/ContainerNode.h"
35#include "core/dom/Document.h"
36#include "core/dom/NodeTraversal.h"
37#include "core/frame/LocalDOMWindow.h"
38#include "core/frame/FrameView.h"
39#include "core/html/HTMLFrameOwnerElement.h"
40#include "core/page/Page.h"
41#include "core/rendering/RenderObject.h"
42#include "wtf/text/StringBuilder.h"
43
44namespace blink {
45
46static IntRect applyScaleWithoutCollapsingToZero(const IntRect& rect, float scale)
47{
48    IntRect result = rect;
49    result.scale(scale);
50    if (rect.width() > 0 && !result.width())
51        result.setWidth(1);
52    if (rect.height() > 0 && !result.height())
53        result.setHeight(1);
54    return result;
55}
56
57static Node* nodeInsideFrame(Node* node)
58{
59    if (node->isFrameOwnerElement())
60        return toHTMLFrameOwnerElement(node)->contentDocument();
61    return 0;
62}
63
64IntRect SmartClipData::rect() const
65{
66    return m_rect;
67}
68
69const String& SmartClipData::clipData() const
70{
71    return m_string;
72}
73
74SmartClip::SmartClip(PassRefPtrWillBeRawPtr<LocalFrame> frame)
75    : m_frame(frame)
76{
77}
78
79SmartClipData SmartClip::dataForRect(const IntRect& cropRect)
80{
81    IntRect resizedCropRect = applyScaleWithoutCollapsingToZero(cropRect, 1 / pageScaleFactor());
82
83    Node* bestNode = findBestOverlappingNode(m_frame->document(), resizedCropRect);
84    if (!bestNode)
85        return SmartClipData();
86
87    if (Node* nodeFromFrame = nodeInsideFrame(bestNode)) {
88        // FIXME: This code only hit-tests a single iframe. It seems like we ought support nested frames.
89        if (Node* bestNodeInFrame = findBestOverlappingNode(nodeFromFrame, resizedCropRect))
90            bestNode = bestNodeInFrame;
91    }
92
93    WillBeHeapVector<RawPtrWillBeMember<Node> > hitNodes;
94    collectOverlappingChildNodes(bestNode, resizedCropRect, hitNodes);
95
96    if (hitNodes.isEmpty() || hitNodes.size() == bestNode->countChildren()) {
97        hitNodes.clear();
98        hitNodes.append(bestNode);
99    }
100
101    // Unite won't work with the empty rect, so we initialize to the first rect.
102    IntRect unitedRects = hitNodes[0]->pixelSnappedBoundingBox();
103    StringBuilder collectedText;
104    for (size_t i = 0; i < hitNodes.size(); ++i) {
105        collectedText.append(extractTextFromNode(hitNodes[i]));
106        unitedRects.unite(hitNodes[i]->pixelSnappedBoundingBox());
107    }
108
109    return SmartClipData(bestNode, convertRectToWindow(unitedRects), collectedText.toString());
110}
111
112float SmartClip::pageScaleFactor()
113{
114    return m_frame->page()->pageScaleFactor();
115}
116
117// This function is a bit of a mystery. If you understand what it does, please
118// consider adding a more descriptive name.
119Node* SmartClip::minNodeContainsNodes(Node* minNode, Node* newNode)
120{
121    if (!newNode)
122        return minNode;
123    if (!minNode)
124        return newNode;
125
126    IntRect minNodeRect = minNode->pixelSnappedBoundingBox();
127    IntRect newNodeRect = newNode->pixelSnappedBoundingBox();
128
129    Node* parentMinNode = minNode->parentNode();
130    Node* parentNewNode = newNode->parentNode();
131
132    if (minNodeRect.contains(newNodeRect)) {
133        if (parentMinNode && parentNewNode && parentNewNode->parentNode() == parentMinNode)
134            return parentMinNode;
135        return minNode;
136    }
137
138    if (newNodeRect.contains(minNodeRect)) {
139        if (parentMinNode && parentNewNode && parentMinNode->parentNode() == parentNewNode)
140            return parentNewNode;
141        return newNode;
142    }
143
144    // This loop appears to find the nearest ancestor of minNode (in DOM order)
145    // that contains the newNodeRect. It's very unclear to me why that's an
146    // interesting node to find. Presumably this loop will often just return
147    // the documentElement.
148    Node* node = minNode;
149    while (node) {
150        if (node->renderer()) {
151            IntRect nodeRect = node->pixelSnappedBoundingBox();
152            if (nodeRect.contains(newNodeRect)) {
153                return node;
154            }
155        }
156        node = node->parentNode();
157    }
158
159    return 0;
160}
161
162Node* SmartClip::findBestOverlappingNode(Node* rootNode, const IntRect& cropRect)
163{
164    if (!rootNode)
165        return 0;
166
167    IntRect resizedCropRect = rootNode->document().view()->windowToContents(cropRect);
168
169    Node* node = rootNode;
170    Node* minNode = 0;
171
172    while (node) {
173        IntRect nodeRect = node->pixelSnappedBoundingBox();
174
175        if (node->isElementNode() && equalIgnoringCase(toElement(node)->fastGetAttribute(HTMLNames::aria_hiddenAttr), "true")) {
176            node = NodeTraversal::nextSkippingChildren(*node, rootNode);
177            continue;
178        }
179
180        RenderObject* renderer = node->renderer();
181        if (renderer && !nodeRect.isEmpty()) {
182            if (renderer->isText()
183                || renderer->isRenderImage()
184                || node->isFrameOwnerElement()
185                || (renderer->style()->hasBackgroundImage() && !shouldSkipBackgroundImage(node))) {
186                if (resizedCropRect.intersects(nodeRect)) {
187                    minNode = minNodeContainsNodes(minNode, node);
188                } else {
189                    node = NodeTraversal::nextSkippingChildren(*node, rootNode);
190                    continue;
191                }
192            }
193        }
194        node = NodeTraversal::next(*node, rootNode);
195    }
196
197    return minNode;
198}
199
200// This function appears to heuristically guess whether to include a background
201// image in the smart clip. It seems to want to include sprites created from
202// CSS background images but to skip actual backgrounds.
203bool SmartClip::shouldSkipBackgroundImage(Node* node)
204{
205    ASSERT(node);
206    // Apparently we're only interested in background images on spans and divs.
207    if (!isHTMLSpanElement(*node) && !isHTMLDivElement(*node))
208        return true;
209
210    // This check actually makes a bit of sense. If you're going to sprite an
211    // image out of a CSS background, you're probably going to specify a height
212    // or a width. On the other hand, if we've got a legit background image,
213    // it's very likely the height or the width will be set to auto.
214    RenderObject* renderer = node->renderer();
215    if (renderer && (renderer->style()->logicalHeight().isAuto() || renderer->style()->logicalWidth().isAuto()))
216        return true;
217
218    return false;
219}
220
221void SmartClip::collectOverlappingChildNodes(Node* parentNode, const IntRect& cropRect, WillBeHeapVector<RawPtrWillBeMember<Node> >& hitNodes)
222{
223    if (!parentNode)
224        return;
225    IntRect resizedCropRect = parentNode->document().view()->windowToContents(cropRect);
226    for (Node* child = parentNode->firstChild(); child; child = child->nextSibling()) {
227        IntRect childRect = child->pixelSnappedBoundingBox();
228        if (resizedCropRect.intersects(childRect))
229            hitNodes.append(child);
230    }
231}
232
233IntRect SmartClip::convertRectToWindow(const IntRect& nodeRect)
234{
235    IntRect result = m_frame->document()->view()->contentsToWindow(nodeRect);
236    result.scale(pageScaleFactor());
237    return result;
238}
239
240String SmartClip::extractTextFromNode(Node* node)
241{
242    // Science has proven that no text nodes are ever positioned at y == -99999.
243    int prevYPos = -99999;
244
245    StringBuilder result;
246    for (Node* currentNode = node; currentNode; currentNode = NodeTraversal::next(*currentNode, node)) {
247        RenderStyle* style = currentNode->computedStyle();
248        if (style && style->userSelect() == SELECT_NONE)
249            continue;
250
251        if (Node* nodeFromFrame = nodeInsideFrame(currentNode))
252            result.append(extractTextFromNode(nodeFromFrame));
253
254        IntRect nodeRect = currentNode->pixelSnappedBoundingBox();
255        if (currentNode->renderer() && !nodeRect.isEmpty()) {
256            if (currentNode->isTextNode()) {
257                String nodeValue = currentNode->nodeValue();
258
259                // It's unclear why we blacklist solitary "\n" node values.
260                // Maybe we're trying to ignore <br> tags somehow?
261                if (nodeValue == "\n")
262                    nodeValue = "";
263
264                if (nodeRect.y() != prevYPos) {
265                    prevYPos = nodeRect.y();
266                    result.append('\n');
267                }
268
269                result.append(nodeValue);
270            }
271        }
272    }
273
274    return result.toString();
275}
276
277} // namespace blink
278