distiller_page.cc revision 5f1c94371a64b3196d4be9466099bb892df9b88e
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/dom_distiller/core/distiller_page.h"
6
7#include "base/bind.h"
8#include "base/json/json_writer.h"
9#include "base/logging.h"
10#include "base/message_loop/message_loop.h"
11#include "base/strings/string_util.h"
12#include "base/strings/utf_string_conversions.h"
13#include "grit/component_resources.h"
14#include "third_party/dom_distiller_js/dom_distiller.pb.h"
15#include "third_party/dom_distiller_js/dom_distiller_json_converter.h"
16#include "ui/base/resource/resource_bundle.h"
17#include "url/gurl.h"
18
19namespace dom_distiller {
20
21namespace {
22
23const char* kOptionsPlaceholder = "$$OPTIONS";
24
25std::string GetDistillerScriptWithOptions(
26    const dom_distiller::proto::DomDistillerOptions& options) {
27  std::string script = ResourceBundle::GetSharedInstance()
28                           .GetRawDataResource(IDR_DISTILLER_JS)
29                           .as_string();
30  if (script.empty()) {
31    return "";
32  }
33
34  scoped_ptr<base::Value> options_value(
35      dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options));
36  std::string options_json;
37  if (!base::JSONWriter::Write(options_value.get(), &options_json)) {
38    NOTREACHED();
39  }
40  size_t options_offset = script.find(kOptionsPlaceholder);
41  DCHECK_NE(std::string::npos, options_offset);
42  DCHECK_EQ(std::string::npos,
43            script.find(kOptionsPlaceholder, options_offset + 1));
44  script =
45      script.replace(options_offset, strlen(kOptionsPlaceholder), options_json);
46  return script;
47}
48
49}
50
51DistilledPageInfo::DistilledPageInfo() {}
52
53DistilledPageInfo::~DistilledPageInfo() {}
54
55DistilledPageInfo::MarkupArticle::MarkupArticle() {}
56
57DistilledPageInfo::MarkupArticle::~MarkupArticle() {}
58
59DistilledPageInfo::MarkupImage::MarkupImage() {}
60
61DistilledPageInfo::MarkupImage::~MarkupImage() {}
62
63DistilledPageInfo::MarkupInfo::MarkupInfo() {}
64
65DistilledPageInfo::MarkupInfo::~MarkupInfo() {}
66
67DistillerPageFactory::~DistillerPageFactory() {}
68
69DistillerPage::DistillerPage() : ready_(true) {}
70
71DistillerPage::~DistillerPage() {}
72
73void DistillerPage::DistillPage(
74    const GURL& gurl,
75    const dom_distiller::proto::DomDistillerOptions options,
76    const DistillerPageCallback& callback) {
77  DCHECK(ready_);
78  // It is only possible to distill one page at a time. |ready_| is reset when
79  // the callback to OnDistillationDone happens.
80  ready_ = false;
81  distiller_page_callback_ = callback;
82  DistillPageImpl(gurl, GetDistillerScriptWithOptions(options));
83}
84
85void DistillerPage::OnDistillationDone(const GURL& page_url,
86                                       const base::Value* value) {
87  DCHECK(!ready_);
88  ready_ = true;
89
90  scoped_ptr<DistilledPageInfo> page_info(new DistilledPageInfo());
91  bool found_content = !value->IsType(base::Value::TYPE_NULL);
92  if (found_content) {
93    dom_distiller::proto::DomDistillerResult distiller_result =
94        dom_distiller::proto::json::DomDistillerResult::ReadFromValue(value);
95
96    page_info->title = distiller_result.title();
97    page_info->html = distiller_result.distilled_content().html();
98    page_info->next_page_url = distiller_result.pagination_info().next_page();
99    page_info->prev_page_url = distiller_result.pagination_info().prev_page();
100    for (int i = 0; i < distiller_result.image_urls_size(); ++i) {
101      const std::string image_url = distiller_result.image_urls(i);
102      if (GURL(image_url).is_valid()) {
103        page_info->image_urls.push_back(image_url);
104      }
105    }
106    const dom_distiller::proto::MarkupInfo& src_markup_info =
107        distiller_result.markup_info();
108    DistilledPageInfo::MarkupInfo& dst_markup_info = page_info->markup_info;
109    dst_markup_info.title = src_markup_info.title();
110    dst_markup_info.type = src_markup_info.type();
111    dst_markup_info.url = src_markup_info.url();
112    dst_markup_info.description = src_markup_info.description();
113    dst_markup_info.publisher = src_markup_info.publisher();
114    dst_markup_info.copyright = src_markup_info.copyright();
115    dst_markup_info.author = src_markup_info.author();
116
117    const dom_distiller::proto::MarkupArticle& src_article =
118        src_markup_info.article();
119    DistilledPageInfo::MarkupArticle& dst_article = dst_markup_info.article;
120    dst_article.published_time = src_article.published_time();
121    dst_article.modified_time = src_article.modified_time();
122    dst_article.expiration_time = src_article.expiration_time();
123    dst_article.section = src_article.section();
124    for (int i = 0; i < src_article.authors_size(); ++i) {
125      dst_article.authors.push_back(src_article.authors(i));
126    }
127
128    for (int i = 0; i < src_markup_info.images_size(); ++i) {
129      const dom_distiller::proto::MarkupImage& src_image =
130          src_markup_info.images(i);
131      DistilledPageInfo::MarkupImage dst_image;
132      dst_image.url = src_image.url();
133      dst_image.secure_url = src_image.secure_url();
134      dst_image.type = src_image.type();
135      dst_image.caption = src_image.caption();
136      dst_image.width = src_image.width();
137      dst_image.height = src_image.height();
138      dst_markup_info.images.push_back(dst_image);
139    }
140  }
141
142  base::MessageLoop::current()->PostTask(
143      FROM_HERE,
144      base::Bind(
145          distiller_page_callback_, base::Passed(&page_info), found_content));
146}
147
148}  // namespace dom_distiller
149