1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/renderer/chrome_render_view_observer.h"
6
7#include "base/bind.h"
8#include "base/bind_helpers.h"
9#include "base/command_line.h"
10#include "base/debug/trace_event.h"
11#include "base/message_loop/message_loop.h"
12#include "base/metrics/histogram.h"
13#include "base/strings/string_util.h"
14#include "base/strings/utf_string_conversions.h"
15#include "chrome/common/chrome_constants.h"
16#include "chrome/common/chrome_switches.h"
17#include "chrome/common/prerender_messages.h"
18#include "chrome/common/render_messages.h"
19#include "chrome/common/url_constants.h"
20#include "chrome/renderer/isolated_world_ids.h"
21#include "chrome/renderer/prerender/prerender_helper.h"
22#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
23#include "chrome/renderer/web_apps.h"
24#include "chrome/renderer/webview_color_overlay.h"
25#include "components/translate/content/renderer/translate_helper.h"
26#include "components/web_cache/renderer/web_cache_render_process_observer.h"
27#include "content/public/common/bindings_policy.h"
28#include "content/public/renderer/content_renderer_client.h"
29#include "content/public/renderer/render_frame.h"
30#include "content/public/renderer/render_view.h"
31#include "extensions/common/constants.h"
32#include "extensions/renderer/extension_groups.h"
33#include "net/base/data_url.h"
34#include "skia/ext/platform_canvas.h"
35#include "third_party/WebKit/public/platform/WebCString.h"
36#include "third_party/WebKit/public/platform/WebRect.h"
37#include "third_party/WebKit/public/platform/WebSize.h"
38#include "third_party/WebKit/public/platform/WebString.h"
39#include "third_party/WebKit/public/platform/WebURLRequest.h"
40#include "third_party/WebKit/public/platform/WebVector.h"
41#include "third_party/WebKit/public/web/WebAXObject.h"
42#include "third_party/WebKit/public/web/WebDataSource.h"
43#include "third_party/WebKit/public/web/WebDocument.h"
44#include "third_party/WebKit/public/web/WebElement.h"
45#include "third_party/WebKit/public/web/WebInputEvent.h"
46#include "third_party/WebKit/public/web/WebLocalFrame.h"
47#include "third_party/WebKit/public/web/WebNode.h"
48#include "third_party/WebKit/public/web/WebNodeList.h"
49#include "third_party/WebKit/public/web/WebView.h"
50#include "ui/base/ui_base_switches_util.h"
51#include "ui/gfx/favicon_size.h"
52#include "ui/gfx/size.h"
53#include "ui/gfx/size_f.h"
54#include "ui/gfx/skbitmap_operations.h"
55#include "v8/include/v8-testing.h"
56
57#if defined(ENABLE_EXTENSIONS)
58#include "chrome/common/extensions/chrome_extension_messages.h"
59#endif
60
61using blink::WebAXObject;
62using blink::WebCString;
63using blink::WebDataSource;
64using blink::WebDocument;
65using blink::WebElement;
66using blink::WebFrame;
67using blink::WebGestureEvent;
68using blink::WebIconURL;
69using blink::WebLocalFrame;
70using blink::WebNode;
71using blink::WebNodeList;
72using blink::WebRect;
73using blink::WebSecurityOrigin;
74using blink::WebSize;
75using blink::WebString;
76using blink::WebTouchEvent;
77using blink::WebURL;
78using blink::WebURLRequest;
79using blink::WebView;
80using blink::WebVector;
81using blink::WebWindowFeatures;
82
83// Delay in milliseconds that we'll wait before capturing the page contents
84// and thumbnail.
85static const int kDelayForCaptureMs = 500;
86
87// Typically, we capture the page data once the page is loaded.
88// Sometimes, the page never finishes to load, preventing the page capture
89// To workaround this problem, we always perform a capture after the following
90// delay.
91static const int kDelayForForcedCaptureMs = 6000;
92
93// define to write the time necessary for thumbnail/DOM text retrieval,
94// respectively, into the system debug log
95// #define TIME_TEXT_RETRIEVAL
96
97// maximum number of characters in the document to index, any text beyond this
98// point will be clipped
99static const size_t kMaxIndexChars = 65535;
100
101// Constants for UMA statistic collection.
102static const char kTranslateCaptureText[] = "Translate.CaptureText";
103
104namespace {
105
106#if defined(OS_ANDROID)
107// Parses the DOM for a <meta> tag with a particular name.
108// |meta_tag_content| is set to the contents of the 'content' attribute.
109// |found_tag| is set to true if the tag was successfully found.
110// Returns true if the document was parsed without errors.
111bool RetrieveMetaTagContent(const WebFrame* main_frame,
112                            const GURL& expected_url,
113                            const std::string& meta_tag_name,
114                            bool* found_tag,
115                            std::string* meta_tag_content) {
116  WebDocument document =
117      main_frame ? main_frame->document() : WebDocument();
118  WebElement head = document.isNull() ? WebElement() : document.head();
119  GURL document_url = document.isNull() ? GURL() : GURL(document.url());
120
121  // Search the DOM for the <meta> tag with the given name.
122  *found_tag = false;
123  *meta_tag_content = "";
124  if (!head.isNull()) {
125    WebNodeList children = head.childNodes();
126    for (unsigned i = 0; i < children.length(); ++i) {
127      WebNode child = children.item(i);
128      if (!child.isElementNode())
129        continue;
130      WebElement elem = child.to<WebElement>();
131      if (elem.hasHTMLTagName("meta")) {
132        if (elem.hasAttribute("name") && elem.hasAttribute("content")) {
133          std::string name = elem.getAttribute("name").utf8();
134          if (name == meta_tag_name) {
135            *meta_tag_content = elem.getAttribute("content").utf8();
136            *found_tag = true;
137            break;
138          }
139        }
140      }
141    }
142  }
143
144  // Make sure we're checking the right page and that the length of the content
145  // string is reasonable.
146  bool success = document_url == expected_url;
147  if (meta_tag_content->size() > chrome::kMaxMetaTagAttributeLength) {
148    *meta_tag_content = "";
149    success = false;
150  }
151
152  return success;
153}
154#endif
155
156}  // namespace
157
158ChromeRenderViewObserver::ChromeRenderViewObserver(
159    content::RenderView* render_view,
160    web_cache::WebCacheRenderProcessObserver* web_cache_render_process_observer)
161    : content::RenderViewObserver(render_view),
162      web_cache_render_process_observer_(web_cache_render_process_observer),
163      translate_helper_(new translate::TranslateHelper(
164          render_view,
165          chrome::ISOLATED_WORLD_ID_TRANSLATE,
166          extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS,
167          extensions::kExtensionScheme)),
168      phishing_classifier_(NULL),
169      capture_timer_(false, false) {
170  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
171  if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection))
172    OnSetClientSidePhishingDetection(true);
173}
174
175ChromeRenderViewObserver::~ChromeRenderViewObserver() {
176}
177
178bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) {
179  bool handled = true;
180  IPC_BEGIN_MESSAGE_MAP(ChromeRenderViewObserver, message)
181#if !defined(OS_ANDROID) && !defined(OS_IOS)
182    IPC_MESSAGE_HANDLER(ChromeViewMsg_WebUIJavaScript, OnWebUIJavaScript)
183#endif
184#if defined(ENABLE_EXTENSIONS)
185    IPC_MESSAGE_HANDLER(ChromeViewMsg_SetVisuallyDeemphasized,
186                        OnSetVisuallyDeemphasized)
187#endif
188#if defined(OS_ANDROID)
189    IPC_MESSAGE_HANDLER(ChromeViewMsg_UpdateTopControlsState,
190                        OnUpdateTopControlsState)
191    IPC_MESSAGE_HANDLER(ChromeViewMsg_RetrieveMetaTagContent,
192                        OnRetrieveMetaTagContent)
193#endif
194    IPC_MESSAGE_HANDLER(ChromeViewMsg_GetWebApplicationInfo,
195                        OnGetWebApplicationInfo)
196    IPC_MESSAGE_HANDLER(ChromeViewMsg_SetClientSidePhishingDetection,
197                        OnSetClientSidePhishingDetection)
198    IPC_MESSAGE_HANDLER(ChromeViewMsg_SetWindowFeatures, OnSetWindowFeatures)
199    IPC_MESSAGE_UNHANDLED(handled = false)
200  IPC_END_MESSAGE_MAP()
201
202  return handled;
203}
204
205#if !defined(OS_ANDROID) && !defined(OS_IOS)
206void ChromeRenderViewObserver::OnWebUIJavaScript(
207    const base::string16& javascript) {
208  webui_javascript_.push_back(javascript);
209}
210#endif
211
212#if defined(OS_ANDROID)
213void ChromeRenderViewObserver::OnUpdateTopControlsState(
214    content::TopControlsState constraints,
215    content::TopControlsState current,
216    bool animate) {
217  render_view()->UpdateTopControlsState(constraints, current, animate);
218}
219
220void ChromeRenderViewObserver::OnRetrieveMetaTagContent(
221    const GURL& expected_url,
222    const std::string tag_name) {
223  bool found_tag;
224  std::string content_str;
225  bool parsed_successfully = RetrieveMetaTagContent(
226      render_view()->GetWebView()->mainFrame(),
227      expected_url,
228      tag_name,
229      &found_tag,
230      &content_str);
231
232  Send(new ChromeViewHostMsg_DidRetrieveMetaTagContent(
233      routing_id(),
234      parsed_successfully && found_tag,
235      tag_name,
236      content_str,
237      expected_url));
238}
239#endif
240
241void ChromeRenderViewObserver::OnGetWebApplicationInfo() {
242  WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
243  DCHECK(main_frame);
244
245  WebApplicationInfo web_app_info;
246  web_apps::ParseWebAppFromWebDocument(main_frame, &web_app_info);
247
248  // The warning below is specific to mobile but it doesn't hurt to show it even
249  // if the Chromium build is running on a desktop. It will get more exposition.
250  if (web_app_info.mobile_capable ==
251        WebApplicationInfo::MOBILE_CAPABLE_APPLE) {
252    blink::WebConsoleMessage message(
253        blink::WebConsoleMessage::LevelWarning,
254        "<meta name=\"apple-mobile-web-app-capable\" content=\"yes\"> is "
255        "deprecated. Please include <meta name=\"mobile-web-app-capable\" "
256        "content=\"yes\"> - "
257        "http://developers.google.com/chrome/mobile/docs/installtohomescreen");
258    main_frame->addMessageToConsole(message);
259  }
260
261  // Prune out any data URLs in the set of icons.  The browser process expects
262  // any icon with a data URL to have originated from a favicon.  We don't want
263  // to decode arbitrary data URLs in the browser process.  See
264  // http://b/issue?id=1162972
265  for (std::vector<WebApplicationInfo::IconInfo>::iterator it =
266          web_app_info.icons.begin(); it != web_app_info.icons.end();) {
267    if (it->url.SchemeIs(url::kDataScheme))
268      it = web_app_info.icons.erase(it);
269    else
270      ++it;
271  }
272
273  // Truncate the strings we send to the browser process.
274  web_app_info.title =
275      web_app_info.title.substr(0, chrome::kMaxMetaTagAttributeLength);
276  web_app_info.description =
277      web_app_info.description.substr(0, chrome::kMaxMetaTagAttributeLength);
278
279  Send(new ChromeViewHostMsg_DidGetWebApplicationInfo(
280      routing_id(), web_app_info));
281}
282
283void ChromeRenderViewObserver::OnSetWindowFeatures(
284    const WebWindowFeatures& window_features) {
285  render_view()->GetWebView()->setWindowFeatures(window_features);
286}
287
288void ChromeRenderViewObserver::Navigate(const GURL& url) {
289  // Execute cache clear operations that were postponed until a navigation
290  // event (including tab reload).
291  if (web_cache_render_process_observer_)
292    web_cache_render_process_observer_->ExecutePendingClearCache();
293  // Let translate_helper do any preparatory work for loading a URL.
294  if (translate_helper_)
295    translate_helper_->PrepareForUrl(url);
296}
297
298void ChromeRenderViewObserver::OnSetClientSidePhishingDetection(
299    bool enable_phishing_detection) {
300#if defined(FULL_SAFE_BROWSING) && !defined(OS_CHROMEOS)
301  phishing_classifier_ = enable_phishing_detection ?
302      safe_browsing::PhishingClassifierDelegate::Create(render_view(), NULL) :
303      NULL;
304#endif
305}
306
307#if defined(ENABLE_EXTENSIONS)
308void ChromeRenderViewObserver::OnSetVisuallyDeemphasized(bool deemphasized) {
309  bool already_deemphasized = !!dimmed_color_overlay_.get();
310  if (already_deemphasized == deemphasized)
311    return;
312
313  if (deemphasized) {
314    // 70% opaque grey.
315    SkColor greyish = SkColorSetARGB(178, 0, 0, 0);
316    dimmed_color_overlay_.reset(
317        new WebViewColorOverlay(render_view(), greyish));
318  } else {
319    dimmed_color_overlay_.reset();
320  }
321}
322#endif
323
324void ChromeRenderViewObserver::DidStartLoading() {
325  if ((render_view()->GetEnabledBindings() & content::BINDINGS_POLICY_WEB_UI) &&
326      !webui_javascript_.empty()) {
327    for (size_t i = 0; i < webui_javascript_.size(); ++i) {
328      render_view()->GetMainRenderFrame()->ExecuteJavaScript(
329          webui_javascript_[i]);
330    }
331    webui_javascript_.clear();
332  }
333}
334
335void ChromeRenderViewObserver::DidStopLoading() {
336  WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
337  GURL osdd_url = main_frame->document().openSearchDescriptionURL();
338  if (!osdd_url.is_empty()) {
339    Send(new ChromeViewHostMsg_PageHasOSDD(
340        routing_id(), main_frame->document().url(), osdd_url,
341        search_provider::AUTODETECTED_PROVIDER));
342  }
343
344  // Don't capture pages including refresh meta tag.
345  if (HasRefreshMetaTag(main_frame))
346    return;
347
348  CapturePageInfoLater(
349      false,  // preliminary_capture
350      base::TimeDelta::FromMilliseconds(
351          render_view()->GetContentStateImmediately() ?
352              0 : kDelayForCaptureMs));
353}
354
355void ChromeRenderViewObserver::DidCommitProvisionalLoad(
356    WebLocalFrame* frame, bool is_new_navigation) {
357  // Don't capture pages being not new, or including refresh meta tag.
358  if (!is_new_navigation || HasRefreshMetaTag(frame))
359    return;
360
361  CapturePageInfoLater(
362      true,  // preliminary_capture
363      base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs));
364}
365
366void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture,
367                                                    base::TimeDelta delay) {
368  capture_timer_.Start(
369      FROM_HERE,
370      delay,
371      base::Bind(&ChromeRenderViewObserver::CapturePageInfo,
372                 base::Unretained(this),
373                 preliminary_capture));
374}
375
376void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) {
377  if (!render_view()->GetWebView())
378    return;
379
380  WebFrame* main_frame = render_view()->GetWebView()->mainFrame();
381  if (!main_frame)
382    return;
383
384  // Don't index/capture pages that are in view source mode.
385  if (main_frame->isViewSourceModeEnabled())
386    return;
387
388  // Don't index/capture pages that failed to load.  This only checks the top
389  // level frame so the thumbnail may contain a frame that failed to load.
390  WebDataSource* ds = main_frame->dataSource();
391  if (ds && ds->hasUnreachableURL())
392    return;
393
394  // Don't index/capture pages that are being prerendered.
395  if (prerender::PrerenderHelper::IsPrerendering(
396          render_view()->GetMainRenderFrame())) {
397    return;
398  }
399
400  // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the
401  // translate helper for language detection and possible translation.
402  base::string16 contents;
403  base::TimeTicks capture_begin_time = base::TimeTicks::Now();
404  CaptureText(main_frame, &contents);
405  UMA_HISTOGRAM_TIMES(kTranslateCaptureText,
406                      base::TimeTicks::Now() - capture_begin_time);
407  if (translate_helper_)
408    translate_helper_->PageCaptured(contents);
409
410  TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo");
411
412#if defined(FULL_SAFE_BROWSING)
413  // Will swap out the string.
414  if (phishing_classifier_)
415    phishing_classifier_->PageCaptured(&contents, preliminary_capture);
416#endif
417}
418
419void ChromeRenderViewObserver::CaptureText(WebFrame* frame,
420                                           base::string16* contents) {
421  contents->clear();
422  if (!frame)
423    return;
424
425#ifdef TIME_TEXT_RETRIEVAL
426  double begin = time_util::GetHighResolutionTimeNow();
427#endif
428
429  // get the contents of the frame
430  *contents = frame->contentAsText(kMaxIndexChars);
431
432#ifdef TIME_TEXT_RETRIEVAL
433  double end = time_util::GetHighResolutionTimeNow();
434  char buf[128];
435  sprintf_s(buf, "%d chars retrieved for indexing in %gms\n",
436            contents.size(), (end - begin)*1000);
437  OutputDebugStringA(buf);
438#endif
439
440  // When the contents are clipped to the maximum, we don't want to have a
441  // partial word indexed at the end that might have been clipped. Therefore,
442  // terminate the string at the last space to ensure no words are clipped.
443  if (contents->size() == kMaxIndexChars) {
444    size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16);
445    if (last_space_index != base::string16::npos)
446      contents->resize(last_space_index);
447  }
448}
449
450bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame* frame) {
451  if (!frame)
452    return false;
453  WebElement head = frame->document().head();
454  if (head.isNull() || !head.hasChildNodes())
455    return false;
456
457  const WebString tag_name(base::ASCIIToUTF16("meta"));
458  const WebString attribute_name(base::ASCIIToUTF16("http-equiv"));
459
460  WebNodeList children = head.childNodes();
461  for (size_t i = 0; i < children.length(); ++i) {
462    WebNode node = children.item(i);
463    if (!node.isElementNode())
464      continue;
465    WebElement element = node.to<WebElement>();
466    if (!element.hasHTMLTagName(tag_name))
467      continue;
468    WebString value = element.getAttribute(attribute_name);
469    if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh"))
470      continue;
471    return true;
472  }
473  return false;
474}
475