1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/dom_distiller/content/dom_distiller_viewer_source.h"
6
7#include <sstream>
8#include <string>
9#include <vector>
10
11#include "base/memory/ref_counted_memory.h"
12#include "base/memory/scoped_ptr.h"
13#include "base/message_loop/message_loop.h"
14#include "base/strings/utf_string_conversions.h"
15#include "components/dom_distiller/core/task_tracker.h"
16#include "components/dom_distiller/core/url_constants.h"
17#include "components/dom_distiller/core/viewer.h"
18#include "content/public/browser/navigation_details.h"
19#include "content/public/browser/navigation_entry.h"
20#include "content/public/browser/render_frame_host.h"
21#include "content/public/browser/render_view_host.h"
22#include "content/public/browser/web_contents.h"
23#include "content/public/browser/web_contents_observer.h"
24#include "net/base/url_util.h"
25#include "net/url_request/url_request.h"
26
27namespace dom_distiller {
28
29// Handles receiving data asynchronously for a specific entry, and passing
30// it along to the data callback for the data source. Lifetime matches that of
31// the current main frame's page in the Viewer instance.
32class DomDistillerViewerSource::RequestViewerHandle
33    : public ViewRequestDelegate,
34      public content::WebContentsObserver {
35 public:
36  explicit RequestViewerHandle(
37      content::WebContents* web_contents,
38      const std::string& expected_scheme,
39      const std::string& expected_request_path,
40      const content::URLDataSource::GotDataCallback& callback);
41  virtual ~RequestViewerHandle();
42
43  // ViewRequestDelegate implementation.
44  virtual void OnArticleReady(
45      const DistilledArticleProto* article_proto) OVERRIDE;
46
47  virtual void OnArticleUpdated(
48      ArticleDistillationUpdate article_update) OVERRIDE;
49
50  void TakeViewerHandle(scoped_ptr<ViewerHandle> viewer_handle);
51
52  // WebContentsObserver:
53  virtual void DidNavigateMainFrame(
54      const content::LoadCommittedDetails& details,
55      const content::FrameNavigateParams& params) OVERRIDE;
56  virtual void RenderProcessGone(base::TerminationStatus status) OVERRIDE;
57  virtual void WebContentsDestroyed() OVERRIDE;
58  virtual void DidFinishLoad(
59      int64 frame_id,
60      const GURL& validated_url,
61      bool is_main_frame,
62      content::RenderViewHost* render_view_host) OVERRIDE;
63
64 private:
65  // Sends JavaScript to the attached Viewer, buffering data if the viewer isn't
66  // ready.
67  void SendJavaScript(const std::string& buffer);
68
69  // Cancels the current view request. Once called, no updates will be
70  // propagated to the view, and the request to DomDistillerService will be
71  // cancelled.
72  void Cancel();
73
74  // The handle to the view request towards the DomDistillerService. It
75  // needs to be kept around to ensure the distillation request finishes.
76  scoped_ptr<ViewerHandle> viewer_handle_;
77
78  // WebContents associated with the Viewer's render process.
79  content::WebContents* web_contents_;
80
81  // The scheme hosting the current view request;
82  std::string expected_scheme_;
83
84  // The query path for the current view request.
85  std::string expected_request_path_;
86
87  // Holds the callback to where the data retrieved is sent back.
88  content::URLDataSource::GotDataCallback callback_;
89
90  // Number of pages of the distilled article content that have been rendered by
91  // the viewer.
92  int page_count_;
93
94  // Whether the page is sufficiently initialized to handle updates from the
95  // distiller.
96  bool waiting_for_page_ready_;
97
98  // Temporary store of pending JavaScript if the page isn't ready to receive
99  // data from distillation.
100  std::string buffer_;
101};
102
103DomDistillerViewerSource::RequestViewerHandle::RequestViewerHandle(
104    content::WebContents* web_contents,
105    const std::string& expected_scheme,
106    const std::string& expected_request_path,
107    const content::URLDataSource::GotDataCallback& callback)
108    : web_contents_(web_contents),
109      expected_scheme_(expected_scheme),
110      expected_request_path_(expected_request_path),
111      callback_(callback),
112      page_count_(0),
113      waiting_for_page_ready_(true) {
114  content::WebContentsObserver::Observe(web_contents_);
115}
116
117DomDistillerViewerSource::RequestViewerHandle::~RequestViewerHandle() {
118  // Balanced with constructor although can be a no-op if frame navigated away.
119  content::WebContentsObserver::Observe(NULL);
120}
121
122void DomDistillerViewerSource::RequestViewerHandle::SendJavaScript(
123    const std::string& buffer) {
124  if (waiting_for_page_ready_) {
125    buffer_ += buffer;
126  } else {
127    if (web_contents_) {
128      web_contents_->GetMainFrame()->ExecuteJavaScript(
129          base::UTF8ToUTF16(buffer));
130    }
131  }
132}
133
134void DomDistillerViewerSource::RequestViewerHandle::DidNavigateMainFrame(
135    const content::LoadCommittedDetails& details,
136    const content::FrameNavigateParams& params) {
137  const GURL& navigation = details.entry->GetURL();
138  if (details.is_in_page || (
139      navigation.SchemeIs(expected_scheme_.c_str()) &&
140      expected_request_path_ == navigation.query())) {
141    // In-page navigations, as well as the main view request can be ignored.
142    return;
143  }
144
145  Cancel();
146
147}
148
149void DomDistillerViewerSource::RequestViewerHandle::RenderProcessGone(
150    base::TerminationStatus status) {
151  Cancel();
152}
153
154void DomDistillerViewerSource::RequestViewerHandle::WebContentsDestroyed() {
155  Cancel();
156}
157
158void DomDistillerViewerSource::RequestViewerHandle::Cancel() {
159  // Ensure we don't send any incremental updates to the Viewer.
160  web_contents_ = NULL;
161
162  // No need to listen for notifications.
163  content::WebContentsObserver::Observe(NULL);
164
165  // Schedule the Viewer for deletion. Ensures distillation is cancelled, and
166  // any pending data stored in |buffer_| is released.
167  base::MessageLoop::current()->DeleteSoon(FROM_HERE, this);
168}
169
170void DomDistillerViewerSource::RequestViewerHandle::DidFinishLoad(
171     int64 frame_id,
172     const GURL& validated_url,
173     bool is_main_frame,
174     content::RenderViewHost* render_view_host)  {
175  if (!is_main_frame || web_contents_ == NULL) {
176    return;
177  }
178  waiting_for_page_ready_ = false;
179  if (buffer_.empty()) {
180    return;
181  }
182  if (web_contents_) {
183    web_contents_->GetMainFrame()->ExecuteJavaScript(
184        base::UTF8ToUTF16(buffer_));
185  }
186  buffer_.clear();
187}
188
189void DomDistillerViewerSource::RequestViewerHandle::OnArticleReady(
190    const DistilledArticleProto* article_proto) {
191  if (page_count_ == 0) {
192    // This is a single-page article.
193    std::string unsafe_page_html = viewer::GetUnsafeArticleHtml(article_proto);
194    callback_.Run(base::RefCountedString::TakeString(&unsafe_page_html));
195  } else if (page_count_ == article_proto->pages_size()) {
196    // We may still be showing the "Loading" indicator.
197    SendJavaScript(viewer::GetToggleLoadingIndicatorJs(true));
198  } else {
199    // It's possible that we didn't get some incremental updates from the
200    // distiller. Ensure all remaining pages are flushed to the viewer.
201    for (;page_count_ < article_proto->pages_size(); page_count_++) {
202      const DistilledPageProto& page = article_proto->pages(page_count_);
203      SendJavaScript(
204          viewer::GetUnsafeIncrementalDistilledPageJs(
205              &page,
206              page_count_ == article_proto->pages_size()));
207    }
208  }
209  // No need to hold on to the ViewerHandle now that distillation is complete.
210  viewer_handle_.reset();
211}
212
213void DomDistillerViewerSource::RequestViewerHandle::OnArticleUpdated(
214    ArticleDistillationUpdate article_update) {
215  for (;page_count_ < static_cast<int>(article_update.GetPagesSize());
216       page_count_++) {
217    const DistilledPageProto& page =
218        article_update.GetDistilledPage(page_count_);
219    if (page_count_ == 0) {
220      // This is the first page, so send Viewer page scaffolding too.
221      std::string unsafe_page_html = viewer::GetUnsafePartialArticleHtml(&page);
222      callback_.Run(base::RefCountedString::TakeString(&unsafe_page_html));
223    } else {
224      SendJavaScript(
225          viewer::GetUnsafeIncrementalDistilledPageJs(&page, false));
226    }
227  }
228}
229
230void DomDistillerViewerSource::RequestViewerHandle::TakeViewerHandle(
231    scoped_ptr<ViewerHandle> viewer_handle) {
232  viewer_handle_ = viewer_handle.Pass();
233}
234
235DomDistillerViewerSource::DomDistillerViewerSource(
236    DomDistillerServiceInterface* dom_distiller_service,
237    const std::string& scheme)
238    : scheme_(scheme), dom_distiller_service_(dom_distiller_service) {
239}
240
241DomDistillerViewerSource::~DomDistillerViewerSource() {
242}
243
244std::string DomDistillerViewerSource::GetSource() const {
245  return scheme_ + "://";
246}
247
248void DomDistillerViewerSource::StartDataRequest(
249    const std::string& path,
250    int render_process_id,
251    int render_frame_id,
252    const content::URLDataSource::GotDataCallback& callback) {
253  content::RenderFrameHost* render_frame_host =
254      content::RenderFrameHost::FromID(render_process_id, render_frame_id);
255  DCHECK(render_frame_host);
256  content::RenderViewHost* render_view_host =
257      render_frame_host->GetRenderViewHost();
258  DCHECK(render_view_host);
259  CHECK_EQ(0, render_view_host->GetEnabledBindings());
260
261  if (kViewerCssPath == path) {
262    std::string css = viewer::GetCss();
263    callback.Run(base::RefCountedString::TakeString(&css));
264    return;
265  }
266  if (kViewerJsPath == path) {
267    std::string js = viewer::GetJavaScript();
268    callback.Run(base::RefCountedString::TakeString(&js));
269    return;
270  }
271  content::WebContents* web_contents =
272      content::WebContents::FromRenderFrameHost(
273          content::RenderFrameHost::FromID(render_process_id,
274                                           render_frame_id));
275  DCHECK(web_contents);
276  // An empty |path| is invalid, but guard against it. If not empty, assume
277  // |path| starts with '?', which is stripped away.
278  const std::string path_after_query_separator =
279      path.size() > 0 ? path.substr(1) : "";
280  RequestViewerHandle* request_viewer_handle = new RequestViewerHandle(
281      web_contents, scheme_, path_after_query_separator, callback);
282  scoped_ptr<ViewerHandle> viewer_handle = viewer::CreateViewRequest(
283      dom_distiller_service_, path, request_viewer_handle);
284
285  if (viewer_handle) {
286    // The service returned a |ViewerHandle| and guarantees it will call
287    // the |RequestViewerHandle|, so passing ownership to it, to ensure the
288    // request is not cancelled. The |RequestViewerHandle| will delete itself
289    // after receiving the callback.
290    request_viewer_handle->TakeViewerHandle(viewer_handle.Pass());
291  } else {
292    // The service did not return a |ViewerHandle|, which means the
293    // |RequestViewerHandle| will never be called, so clean up now.
294    delete request_viewer_handle;
295
296    std::string error_page_html = viewer::GetErrorPageHtml();
297    callback.Run(base::RefCountedString::TakeString(&error_page_html));
298  }
299};
300
301std::string DomDistillerViewerSource::GetMimeType(
302    const std::string& path) const {
303  if (kViewerCssPath == path) {
304    return "text/css";
305  }
306  if (kViewerJsPath == path) {
307    return "text/javascript";
308  }
309  return "text/html";
310}
311
312bool DomDistillerViewerSource::ShouldServiceRequest(
313    const net::URLRequest* request) const {
314  return request->url().SchemeIs(scheme_.c_str());
315}
316
317// TODO(nyquist): Start tracking requests using this method.
318void DomDistillerViewerSource::WillServiceRequest(
319    const net::URLRequest* request,
320    std::string* path) const {
321}
322
323std::string DomDistillerViewerSource::GetContentSecurityPolicyObjectSrc()
324    const {
325  return "object-src 'none'; style-src 'self';";
326}
327
328}  // namespace dom_distiller
329