1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/renderer/chrome_render_view_observer.h" 6 7#include "base/bind.h" 8#include "base/bind_helpers.h" 9#include "base/command_line.h" 10#include "base/debug/trace_event.h" 11#include "base/message_loop/message_loop.h" 12#include "base/metrics/histogram.h" 13#include "base/strings/string_util.h" 14#include "base/strings/utf_string_conversions.h" 15#include "chrome/common/chrome_constants.h" 16#include "chrome/common/chrome_switches.h" 17#include "chrome/common/prerender_messages.h" 18#include "chrome/common/render_messages.h" 19#include "chrome/common/url_constants.h" 20#include "chrome/renderer/isolated_world_ids.h" 21#include "chrome/renderer/prerender/prerender_helper.h" 22#include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" 23#include "chrome/renderer/web_apps.h" 24#include "chrome/renderer/webview_color_overlay.h" 25#include "components/translate/content/renderer/translate_helper.h" 26#include "components/web_cache/renderer/web_cache_render_process_observer.h" 27#include "content/public/common/bindings_policy.h" 28#include "content/public/renderer/content_renderer_client.h" 29#include "content/public/renderer/render_frame.h" 30#include "content/public/renderer/render_view.h" 31#include "extensions/common/constants.h" 32#include "extensions/renderer/extension_groups.h" 33#include "net/base/data_url.h" 34#include "skia/ext/platform_canvas.h" 35#include "third_party/WebKit/public/platform/WebCString.h" 36#include "third_party/WebKit/public/platform/WebRect.h" 37#include "third_party/WebKit/public/platform/WebSize.h" 38#include "third_party/WebKit/public/platform/WebString.h" 39#include "third_party/WebKit/public/platform/WebURLRequest.h" 40#include "third_party/WebKit/public/platform/WebVector.h" 41#include "third_party/WebKit/public/web/WebAXObject.h" 42#include "third_party/WebKit/public/web/WebDataSource.h" 43#include "third_party/WebKit/public/web/WebDocument.h" 44#include "third_party/WebKit/public/web/WebElement.h" 45#include "third_party/WebKit/public/web/WebInputEvent.h" 46#include "third_party/WebKit/public/web/WebLocalFrame.h" 47#include "third_party/WebKit/public/web/WebNode.h" 48#include "third_party/WebKit/public/web/WebNodeList.h" 49#include "third_party/WebKit/public/web/WebView.h" 50#include "ui/base/ui_base_switches_util.h" 51#include "ui/gfx/favicon_size.h" 52#include "ui/gfx/size.h" 53#include "ui/gfx/size_f.h" 54#include "ui/gfx/skbitmap_operations.h" 55#include "v8/include/v8-testing.h" 56 57#if defined(ENABLE_EXTENSIONS) 58#include "chrome/common/extensions/chrome_extension_messages.h" 59#endif 60 61using blink::WebAXObject; 62using blink::WebCString; 63using blink::WebDataSource; 64using blink::WebDocument; 65using blink::WebElement; 66using blink::WebFrame; 67using blink::WebGestureEvent; 68using blink::WebIconURL; 69using blink::WebLocalFrame; 70using blink::WebNode; 71using blink::WebNodeList; 72using blink::WebRect; 73using blink::WebSecurityOrigin; 74using blink::WebSize; 75using blink::WebString; 76using blink::WebTouchEvent; 77using blink::WebURL; 78using blink::WebURLRequest; 79using blink::WebView; 80using blink::WebVector; 81using blink::WebWindowFeatures; 82 83// Delay in milliseconds that we'll wait before capturing the page contents 84// and thumbnail. 85static const int kDelayForCaptureMs = 500; 86 87// Typically, we capture the page data once the page is loaded. 88// Sometimes, the page never finishes to load, preventing the page capture 89// To workaround this problem, we always perform a capture after the following 90// delay. 91static const int kDelayForForcedCaptureMs = 6000; 92 93// define to write the time necessary for thumbnail/DOM text retrieval, 94// respectively, into the system debug log 95// #define TIME_TEXT_RETRIEVAL 96 97// maximum number of characters in the document to index, any text beyond this 98// point will be clipped 99static const size_t kMaxIndexChars = 65535; 100 101// Constants for UMA statistic collection. 102static const char kTranslateCaptureText[] = "Translate.CaptureText"; 103 104namespace { 105 106#if defined(OS_ANDROID) 107// Parses the DOM for a <meta> tag with a particular name. 108// |meta_tag_content| is set to the contents of the 'content' attribute. 109// |found_tag| is set to true if the tag was successfully found. 110// Returns true if the document was parsed without errors. 111bool RetrieveMetaTagContent(const WebFrame* main_frame, 112 const GURL& expected_url, 113 const std::string& meta_tag_name, 114 bool* found_tag, 115 std::string* meta_tag_content) { 116 WebDocument document = 117 main_frame ? main_frame->document() : WebDocument(); 118 WebElement head = document.isNull() ? WebElement() : document.head(); 119 GURL document_url = document.isNull() ? GURL() : GURL(document.url()); 120 121 // Search the DOM for the <meta> tag with the given name. 122 *found_tag = false; 123 *meta_tag_content = ""; 124 if (!head.isNull()) { 125 WebNodeList children = head.childNodes(); 126 for (unsigned i = 0; i < children.length(); ++i) { 127 WebNode child = children.item(i); 128 if (!child.isElementNode()) 129 continue; 130 WebElement elem = child.to<WebElement>(); 131 if (elem.hasHTMLTagName("meta")) { 132 if (elem.hasAttribute("name") && elem.hasAttribute("content")) { 133 std::string name = elem.getAttribute("name").utf8(); 134 if (name == meta_tag_name) { 135 *meta_tag_content = elem.getAttribute("content").utf8(); 136 *found_tag = true; 137 break; 138 } 139 } 140 } 141 } 142 } 143 144 // Make sure we're checking the right page and that the length of the content 145 // string is reasonable. 146 bool success = document_url == expected_url; 147 if (meta_tag_content->size() > chrome::kMaxMetaTagAttributeLength) { 148 *meta_tag_content = ""; 149 success = false; 150 } 151 152 return success; 153} 154#endif 155 156} // namespace 157 158ChromeRenderViewObserver::ChromeRenderViewObserver( 159 content::RenderView* render_view, 160 web_cache::WebCacheRenderProcessObserver* web_cache_render_process_observer) 161 : content::RenderViewObserver(render_view), 162 web_cache_render_process_observer_(web_cache_render_process_observer), 163 translate_helper_(new translate::TranslateHelper( 164 render_view, 165 chrome::ISOLATED_WORLD_ID_TRANSLATE, 166 extensions::EXTENSION_GROUP_INTERNAL_TRANSLATE_SCRIPTS, 167 extensions::kExtensionScheme)), 168 phishing_classifier_(NULL), 169 capture_timer_(false, false) { 170 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); 171 if (!command_line.HasSwitch(switches::kDisableClientSidePhishingDetection)) 172 OnSetClientSidePhishingDetection(true); 173} 174 175ChromeRenderViewObserver::~ChromeRenderViewObserver() { 176} 177 178bool ChromeRenderViewObserver::OnMessageReceived(const IPC::Message& message) { 179 bool handled = true; 180 IPC_BEGIN_MESSAGE_MAP(ChromeRenderViewObserver, message) 181#if !defined(OS_ANDROID) && !defined(OS_IOS) 182 IPC_MESSAGE_HANDLER(ChromeViewMsg_WebUIJavaScript, OnWebUIJavaScript) 183#endif 184#if defined(ENABLE_EXTENSIONS) 185 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetVisuallyDeemphasized, 186 OnSetVisuallyDeemphasized) 187#endif 188#if defined(OS_ANDROID) 189 IPC_MESSAGE_HANDLER(ChromeViewMsg_UpdateTopControlsState, 190 OnUpdateTopControlsState) 191 IPC_MESSAGE_HANDLER(ChromeViewMsg_RetrieveMetaTagContent, 192 OnRetrieveMetaTagContent) 193#endif 194 IPC_MESSAGE_HANDLER(ChromeViewMsg_GetWebApplicationInfo, 195 OnGetWebApplicationInfo) 196 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetClientSidePhishingDetection, 197 OnSetClientSidePhishingDetection) 198 IPC_MESSAGE_HANDLER(ChromeViewMsg_SetWindowFeatures, OnSetWindowFeatures) 199 IPC_MESSAGE_UNHANDLED(handled = false) 200 IPC_END_MESSAGE_MAP() 201 202 return handled; 203} 204 205#if !defined(OS_ANDROID) && !defined(OS_IOS) 206void ChromeRenderViewObserver::OnWebUIJavaScript( 207 const base::string16& javascript) { 208 webui_javascript_.push_back(javascript); 209} 210#endif 211 212#if defined(OS_ANDROID) 213void ChromeRenderViewObserver::OnUpdateTopControlsState( 214 content::TopControlsState constraints, 215 content::TopControlsState current, 216 bool animate) { 217 render_view()->UpdateTopControlsState(constraints, current, animate); 218} 219 220void ChromeRenderViewObserver::OnRetrieveMetaTagContent( 221 const GURL& expected_url, 222 const std::string tag_name) { 223 bool found_tag; 224 std::string content_str; 225 bool parsed_successfully = RetrieveMetaTagContent( 226 render_view()->GetWebView()->mainFrame(), 227 expected_url, 228 tag_name, 229 &found_tag, 230 &content_str); 231 232 Send(new ChromeViewHostMsg_DidRetrieveMetaTagContent( 233 routing_id(), 234 parsed_successfully && found_tag, 235 tag_name, 236 content_str, 237 expected_url)); 238} 239#endif 240 241void ChromeRenderViewObserver::OnGetWebApplicationInfo() { 242 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); 243 DCHECK(main_frame); 244 245 WebApplicationInfo web_app_info; 246 web_apps::ParseWebAppFromWebDocument(main_frame, &web_app_info); 247 248 // The warning below is specific to mobile but it doesn't hurt to show it even 249 // if the Chromium build is running on a desktop. It will get more exposition. 250 if (web_app_info.mobile_capable == 251 WebApplicationInfo::MOBILE_CAPABLE_APPLE) { 252 blink::WebConsoleMessage message( 253 blink::WebConsoleMessage::LevelWarning, 254 "<meta name=\"apple-mobile-web-app-capable\" content=\"yes\"> is " 255 "deprecated. Please include <meta name=\"mobile-web-app-capable\" " 256 "content=\"yes\"> - " 257 "http://developers.google.com/chrome/mobile/docs/installtohomescreen"); 258 main_frame->addMessageToConsole(message); 259 } 260 261 // Prune out any data URLs in the set of icons. The browser process expects 262 // any icon with a data URL to have originated from a favicon. We don't want 263 // to decode arbitrary data URLs in the browser process. See 264 // http://b/issue?id=1162972 265 for (std::vector<WebApplicationInfo::IconInfo>::iterator it = 266 web_app_info.icons.begin(); it != web_app_info.icons.end();) { 267 if (it->url.SchemeIs(url::kDataScheme)) 268 it = web_app_info.icons.erase(it); 269 else 270 ++it; 271 } 272 273 // Truncate the strings we send to the browser process. 274 web_app_info.title = 275 web_app_info.title.substr(0, chrome::kMaxMetaTagAttributeLength); 276 web_app_info.description = 277 web_app_info.description.substr(0, chrome::kMaxMetaTagAttributeLength); 278 279 Send(new ChromeViewHostMsg_DidGetWebApplicationInfo( 280 routing_id(), web_app_info)); 281} 282 283void ChromeRenderViewObserver::OnSetWindowFeatures( 284 const WebWindowFeatures& window_features) { 285 render_view()->GetWebView()->setWindowFeatures(window_features); 286} 287 288void ChromeRenderViewObserver::Navigate(const GURL& url) { 289 // Execute cache clear operations that were postponed until a navigation 290 // event (including tab reload). 291 if (web_cache_render_process_observer_) 292 web_cache_render_process_observer_->ExecutePendingClearCache(); 293 // Let translate_helper do any preparatory work for loading a URL. 294 if (translate_helper_) 295 translate_helper_->PrepareForUrl(url); 296} 297 298void ChromeRenderViewObserver::OnSetClientSidePhishingDetection( 299 bool enable_phishing_detection) { 300#if defined(FULL_SAFE_BROWSING) && !defined(OS_CHROMEOS) 301 phishing_classifier_ = enable_phishing_detection ? 302 safe_browsing::PhishingClassifierDelegate::Create(render_view(), NULL) : 303 NULL; 304#endif 305} 306 307#if defined(ENABLE_EXTENSIONS) 308void ChromeRenderViewObserver::OnSetVisuallyDeemphasized(bool deemphasized) { 309 bool already_deemphasized = !!dimmed_color_overlay_.get(); 310 if (already_deemphasized == deemphasized) 311 return; 312 313 if (deemphasized) { 314 // 70% opaque grey. 315 SkColor greyish = SkColorSetARGB(178, 0, 0, 0); 316 dimmed_color_overlay_.reset( 317 new WebViewColorOverlay(render_view(), greyish)); 318 } else { 319 dimmed_color_overlay_.reset(); 320 } 321} 322#endif 323 324void ChromeRenderViewObserver::DidStartLoading() { 325 if ((render_view()->GetEnabledBindings() & content::BINDINGS_POLICY_WEB_UI) && 326 !webui_javascript_.empty()) { 327 for (size_t i = 0; i < webui_javascript_.size(); ++i) { 328 render_view()->GetMainRenderFrame()->ExecuteJavaScript( 329 webui_javascript_[i]); 330 } 331 webui_javascript_.clear(); 332 } 333} 334 335void ChromeRenderViewObserver::DidStopLoading() { 336 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); 337 GURL osdd_url = main_frame->document().openSearchDescriptionURL(); 338 if (!osdd_url.is_empty()) { 339 Send(new ChromeViewHostMsg_PageHasOSDD( 340 routing_id(), main_frame->document().url(), osdd_url, 341 search_provider::AUTODETECTED_PROVIDER)); 342 } 343 344 // Don't capture pages including refresh meta tag. 345 if (HasRefreshMetaTag(main_frame)) 346 return; 347 348 CapturePageInfoLater( 349 false, // preliminary_capture 350 base::TimeDelta::FromMilliseconds( 351 render_view()->GetContentStateImmediately() ? 352 0 : kDelayForCaptureMs)); 353} 354 355void ChromeRenderViewObserver::DidCommitProvisionalLoad( 356 WebLocalFrame* frame, bool is_new_navigation) { 357 // Don't capture pages being not new, or including refresh meta tag. 358 if (!is_new_navigation || HasRefreshMetaTag(frame)) 359 return; 360 361 CapturePageInfoLater( 362 true, // preliminary_capture 363 base::TimeDelta::FromMilliseconds(kDelayForForcedCaptureMs)); 364} 365 366void ChromeRenderViewObserver::CapturePageInfoLater(bool preliminary_capture, 367 base::TimeDelta delay) { 368 capture_timer_.Start( 369 FROM_HERE, 370 delay, 371 base::Bind(&ChromeRenderViewObserver::CapturePageInfo, 372 base::Unretained(this), 373 preliminary_capture)); 374} 375 376void ChromeRenderViewObserver::CapturePageInfo(bool preliminary_capture) { 377 if (!render_view()->GetWebView()) 378 return; 379 380 WebFrame* main_frame = render_view()->GetWebView()->mainFrame(); 381 if (!main_frame) 382 return; 383 384 // Don't index/capture pages that are in view source mode. 385 if (main_frame->isViewSourceModeEnabled()) 386 return; 387 388 // Don't index/capture pages that failed to load. This only checks the top 389 // level frame so the thumbnail may contain a frame that failed to load. 390 WebDataSource* ds = main_frame->dataSource(); 391 if (ds && ds->hasUnreachableURL()) 392 return; 393 394 // Don't index/capture pages that are being prerendered. 395 if (prerender::PrerenderHelper::IsPrerendering( 396 render_view()->GetMainRenderFrame())) { 397 return; 398 } 399 400 // Retrieve the frame's full text (up to kMaxIndexChars), and pass it to the 401 // translate helper for language detection and possible translation. 402 base::string16 contents; 403 base::TimeTicks capture_begin_time = base::TimeTicks::Now(); 404 CaptureText(main_frame, &contents); 405 UMA_HISTOGRAM_TIMES(kTranslateCaptureText, 406 base::TimeTicks::Now() - capture_begin_time); 407 if (translate_helper_) 408 translate_helper_->PageCaptured(contents); 409 410 TRACE_EVENT0("renderer", "ChromeRenderViewObserver::CapturePageInfo"); 411 412#if defined(FULL_SAFE_BROWSING) 413 // Will swap out the string. 414 if (phishing_classifier_) 415 phishing_classifier_->PageCaptured(&contents, preliminary_capture); 416#endif 417} 418 419void ChromeRenderViewObserver::CaptureText(WebFrame* frame, 420 base::string16* contents) { 421 contents->clear(); 422 if (!frame) 423 return; 424 425#ifdef TIME_TEXT_RETRIEVAL 426 double begin = time_util::GetHighResolutionTimeNow(); 427#endif 428 429 // get the contents of the frame 430 *contents = frame->contentAsText(kMaxIndexChars); 431 432#ifdef TIME_TEXT_RETRIEVAL 433 double end = time_util::GetHighResolutionTimeNow(); 434 char buf[128]; 435 sprintf_s(buf, "%d chars retrieved for indexing in %gms\n", 436 contents.size(), (end - begin)*1000); 437 OutputDebugStringA(buf); 438#endif 439 440 // When the contents are clipped to the maximum, we don't want to have a 441 // partial word indexed at the end that might have been clipped. Therefore, 442 // terminate the string at the last space to ensure no words are clipped. 443 if (contents->size() == kMaxIndexChars) { 444 size_t last_space_index = contents->find_last_of(base::kWhitespaceUTF16); 445 if (last_space_index != base::string16::npos) 446 contents->resize(last_space_index); 447 } 448} 449 450bool ChromeRenderViewObserver::HasRefreshMetaTag(WebFrame* frame) { 451 if (!frame) 452 return false; 453 WebElement head = frame->document().head(); 454 if (head.isNull() || !head.hasChildNodes()) 455 return false; 456 457 const WebString tag_name(base::ASCIIToUTF16("meta")); 458 const WebString attribute_name(base::ASCIIToUTF16("http-equiv")); 459 460 WebNodeList children = head.childNodes(); 461 for (size_t i = 0; i < children.length(); ++i) { 462 WebNode node = children.item(i); 463 if (!node.isElementNode()) 464 continue; 465 WebElement element = node.to<WebElement>(); 466 if (!element.hasHTMLTagName(tag_name)) 467 continue; 468 WebString value = element.getAttribute(attribute_name); 469 if (value.isNull() || !LowerCaseEqualsASCII(value, "refresh")) 470 continue; 471 return true; 472 } 473 return false; 474} 475