1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "components/dom_distiller/content/dom_distiller_viewer_source.h" 6 7#include <sstream> 8#include <string> 9#include <vector> 10 11#include "base/memory/ref_counted_memory.h" 12#include "base/memory/scoped_ptr.h" 13#include "base/message_loop/message_loop.h" 14#include "base/strings/utf_string_conversions.h" 15#include "components/dom_distiller/core/task_tracker.h" 16#include "components/dom_distiller/core/url_constants.h" 17#include "components/dom_distiller/core/viewer.h" 18#include "content/public/browser/navigation_details.h" 19#include "content/public/browser/navigation_entry.h" 20#include "content/public/browser/render_frame_host.h" 21#include "content/public/browser/render_view_host.h" 22#include "content/public/browser/web_contents.h" 23#include "content/public/browser/web_contents_observer.h" 24#include "net/base/url_util.h" 25#include "net/url_request/url_request.h" 26 27namespace dom_distiller { 28 29// Handles receiving data asynchronously for a specific entry, and passing 30// it along to the data callback for the data source. Lifetime matches that of 31// the current main frame's page in the Viewer instance. 32class DomDistillerViewerSource::RequestViewerHandle 33 : public ViewRequestDelegate, 34 public content::WebContentsObserver { 35 public: 36 explicit RequestViewerHandle( 37 content::WebContents* web_contents, 38 const std::string& expected_scheme, 39 const std::string& expected_request_path, 40 const content::URLDataSource::GotDataCallback& callback); 41 virtual ~RequestViewerHandle(); 42 43 // ViewRequestDelegate implementation. 44 virtual void OnArticleReady( 45 const DistilledArticleProto* article_proto) OVERRIDE; 46 47 virtual void OnArticleUpdated( 48 ArticleDistillationUpdate article_update) OVERRIDE; 49 50 void TakeViewerHandle(scoped_ptr<ViewerHandle> viewer_handle); 51 52 // WebContentsObserver: 53 virtual void DidNavigateMainFrame( 54 const content::LoadCommittedDetails& details, 55 const content::FrameNavigateParams& params) OVERRIDE; 56 virtual void RenderProcessGone(base::TerminationStatus status) OVERRIDE; 57 virtual void WebContentsDestroyed() OVERRIDE; 58 virtual void DidFinishLoad( 59 int64 frame_id, 60 const GURL& validated_url, 61 bool is_main_frame, 62 content::RenderViewHost* render_view_host) OVERRIDE; 63 64 private: 65 // Sends JavaScript to the attached Viewer, buffering data if the viewer isn't 66 // ready. 67 void SendJavaScript(const std::string& buffer); 68 69 // Cancels the current view request. Once called, no updates will be 70 // propagated to the view, and the request to DomDistillerService will be 71 // cancelled. 72 void Cancel(); 73 74 // The handle to the view request towards the DomDistillerService. It 75 // needs to be kept around to ensure the distillation request finishes. 76 scoped_ptr<ViewerHandle> viewer_handle_; 77 78 // WebContents associated with the Viewer's render process. 79 content::WebContents* web_contents_; 80 81 // The scheme hosting the current view request; 82 std::string expected_scheme_; 83 84 // The query path for the current view request. 85 std::string expected_request_path_; 86 87 // Holds the callback to where the data retrieved is sent back. 88 content::URLDataSource::GotDataCallback callback_; 89 90 // Number of pages of the distilled article content that have been rendered by 91 // the viewer. 92 int page_count_; 93 94 // Whether the page is sufficiently initialized to handle updates from the 95 // distiller. 96 bool waiting_for_page_ready_; 97 98 // Temporary store of pending JavaScript if the page isn't ready to receive 99 // data from distillation. 100 std::string buffer_; 101}; 102 103DomDistillerViewerSource::RequestViewerHandle::RequestViewerHandle( 104 content::WebContents* web_contents, 105 const std::string& expected_scheme, 106 const std::string& expected_request_path, 107 const content::URLDataSource::GotDataCallback& callback) 108 : web_contents_(web_contents), 109 expected_scheme_(expected_scheme), 110 expected_request_path_(expected_request_path), 111 callback_(callback), 112 page_count_(0), 113 waiting_for_page_ready_(true) { 114 content::WebContentsObserver::Observe(web_contents_); 115} 116 117DomDistillerViewerSource::RequestViewerHandle::~RequestViewerHandle() { 118 // Balanced with constructor although can be a no-op if frame navigated away. 119 content::WebContentsObserver::Observe(NULL); 120} 121 122void DomDistillerViewerSource::RequestViewerHandle::SendJavaScript( 123 const std::string& buffer) { 124 if (waiting_for_page_ready_) { 125 buffer_ += buffer; 126 } else { 127 if (web_contents_) { 128 web_contents_->GetMainFrame()->ExecuteJavaScript( 129 base::UTF8ToUTF16(buffer)); 130 } 131 } 132} 133 134void DomDistillerViewerSource::RequestViewerHandle::DidNavigateMainFrame( 135 const content::LoadCommittedDetails& details, 136 const content::FrameNavigateParams& params) { 137 const GURL& navigation = details.entry->GetURL(); 138 if (details.is_in_page || ( 139 navigation.SchemeIs(expected_scheme_.c_str()) && 140 expected_request_path_ == navigation.query())) { 141 // In-page navigations, as well as the main view request can be ignored. 142 return; 143 } 144 145 Cancel(); 146 147} 148 149void DomDistillerViewerSource::RequestViewerHandle::RenderProcessGone( 150 base::TerminationStatus status) { 151 Cancel(); 152} 153 154void DomDistillerViewerSource::RequestViewerHandle::WebContentsDestroyed() { 155 Cancel(); 156} 157 158void DomDistillerViewerSource::RequestViewerHandle::Cancel() { 159 // Ensure we don't send any incremental updates to the Viewer. 160 web_contents_ = NULL; 161 162 // No need to listen for notifications. 163 content::WebContentsObserver::Observe(NULL); 164 165 // Schedule the Viewer for deletion. Ensures distillation is cancelled, and 166 // any pending data stored in |buffer_| is released. 167 base::MessageLoop::current()->DeleteSoon(FROM_HERE, this); 168} 169 170void DomDistillerViewerSource::RequestViewerHandle::DidFinishLoad( 171 int64 frame_id, 172 const GURL& validated_url, 173 bool is_main_frame, 174 content::RenderViewHost* render_view_host) { 175 if (!is_main_frame || web_contents_ == NULL) { 176 return; 177 } 178 waiting_for_page_ready_ = false; 179 if (buffer_.empty()) { 180 return; 181 } 182 if (web_contents_) { 183 web_contents_->GetMainFrame()->ExecuteJavaScript( 184 base::UTF8ToUTF16(buffer_)); 185 } 186 buffer_.clear(); 187} 188 189void DomDistillerViewerSource::RequestViewerHandle::OnArticleReady( 190 const DistilledArticleProto* article_proto) { 191 if (page_count_ == 0) { 192 // This is a single-page article. 193 std::string unsafe_page_html = viewer::GetUnsafeArticleHtml(article_proto); 194 callback_.Run(base::RefCountedString::TakeString(&unsafe_page_html)); 195 } else if (page_count_ == article_proto->pages_size()) { 196 // We may still be showing the "Loading" indicator. 197 SendJavaScript(viewer::GetToggleLoadingIndicatorJs(true)); 198 } else { 199 // It's possible that we didn't get some incremental updates from the 200 // distiller. Ensure all remaining pages are flushed to the viewer. 201 for (;page_count_ < article_proto->pages_size(); page_count_++) { 202 const DistilledPageProto& page = article_proto->pages(page_count_); 203 SendJavaScript( 204 viewer::GetUnsafeIncrementalDistilledPageJs( 205 &page, 206 page_count_ == article_proto->pages_size())); 207 } 208 } 209 // No need to hold on to the ViewerHandle now that distillation is complete. 210 viewer_handle_.reset(); 211} 212 213void DomDistillerViewerSource::RequestViewerHandle::OnArticleUpdated( 214 ArticleDistillationUpdate article_update) { 215 for (;page_count_ < static_cast<int>(article_update.GetPagesSize()); 216 page_count_++) { 217 const DistilledPageProto& page = 218 article_update.GetDistilledPage(page_count_); 219 if (page_count_ == 0) { 220 // This is the first page, so send Viewer page scaffolding too. 221 std::string unsafe_page_html = viewer::GetUnsafePartialArticleHtml(&page); 222 callback_.Run(base::RefCountedString::TakeString(&unsafe_page_html)); 223 } else { 224 SendJavaScript( 225 viewer::GetUnsafeIncrementalDistilledPageJs(&page, false)); 226 } 227 } 228} 229 230void DomDistillerViewerSource::RequestViewerHandle::TakeViewerHandle( 231 scoped_ptr<ViewerHandle> viewer_handle) { 232 viewer_handle_ = viewer_handle.Pass(); 233} 234 235DomDistillerViewerSource::DomDistillerViewerSource( 236 DomDistillerServiceInterface* dom_distiller_service, 237 const std::string& scheme) 238 : scheme_(scheme), dom_distiller_service_(dom_distiller_service) { 239} 240 241DomDistillerViewerSource::~DomDistillerViewerSource() { 242} 243 244std::string DomDistillerViewerSource::GetSource() const { 245 return scheme_ + "://"; 246} 247 248void DomDistillerViewerSource::StartDataRequest( 249 const std::string& path, 250 int render_process_id, 251 int render_frame_id, 252 const content::URLDataSource::GotDataCallback& callback) { 253 content::RenderFrameHost* render_frame_host = 254 content::RenderFrameHost::FromID(render_process_id, render_frame_id); 255 DCHECK(render_frame_host); 256 content::RenderViewHost* render_view_host = 257 render_frame_host->GetRenderViewHost(); 258 DCHECK(render_view_host); 259 CHECK_EQ(0, render_view_host->GetEnabledBindings()); 260 261 if (kViewerCssPath == path) { 262 std::string css = viewer::GetCss(); 263 callback.Run(base::RefCountedString::TakeString(&css)); 264 return; 265 } 266 if (kViewerJsPath == path) { 267 std::string js = viewer::GetJavaScript(); 268 callback.Run(base::RefCountedString::TakeString(&js)); 269 return; 270 } 271 content::WebContents* web_contents = 272 content::WebContents::FromRenderFrameHost( 273 content::RenderFrameHost::FromID(render_process_id, 274 render_frame_id)); 275 DCHECK(web_contents); 276 // An empty |path| is invalid, but guard against it. If not empty, assume 277 // |path| starts with '?', which is stripped away. 278 const std::string path_after_query_separator = 279 path.size() > 0 ? path.substr(1) : ""; 280 RequestViewerHandle* request_viewer_handle = new RequestViewerHandle( 281 web_contents, scheme_, path_after_query_separator, callback); 282 scoped_ptr<ViewerHandle> viewer_handle = viewer::CreateViewRequest( 283 dom_distiller_service_, path, request_viewer_handle); 284 285 if (viewer_handle) { 286 // The service returned a |ViewerHandle| and guarantees it will call 287 // the |RequestViewerHandle|, so passing ownership to it, to ensure the 288 // request is not cancelled. The |RequestViewerHandle| will delete itself 289 // after receiving the callback. 290 request_viewer_handle->TakeViewerHandle(viewer_handle.Pass()); 291 } else { 292 // The service did not return a |ViewerHandle|, which means the 293 // |RequestViewerHandle| will never be called, so clean up now. 294 delete request_viewer_handle; 295 296 std::string error_page_html = viewer::GetErrorPageHtml(); 297 callback.Run(base::RefCountedString::TakeString(&error_page_html)); 298 } 299}; 300 301std::string DomDistillerViewerSource::GetMimeType( 302 const std::string& path) const { 303 if (kViewerCssPath == path) { 304 return "text/css"; 305 } 306 if (kViewerJsPath == path) { 307 return "text/javascript"; 308 } 309 return "text/html"; 310} 311 312bool DomDistillerViewerSource::ShouldServiceRequest( 313 const net::URLRequest* request) const { 314 return request->url().SchemeIs(scheme_.c_str()); 315} 316 317// TODO(nyquist): Start tracking requests using this method. 318void DomDistillerViewerSource::WillServiceRequest( 319 const net::URLRequest* request, 320 std::string* path) const { 321} 322 323std::string DomDistillerViewerSource::GetContentSecurityPolicyObjectSrc() 324 const { 325 return "object-src 'none'; style-src 'self';"; 326} 327 328} // namespace dom_distiller 329