1f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved.
2f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// found in the LICENSE file.
4f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
5f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller.h"
6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
7f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include <map>
8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <vector>
9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/auto_reset.h"
11f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/bind.h"
12f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/callback.h"
135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/location.h"
145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/message_loop/message_loop.h"
155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/strings/string_number_conversions.h"
16f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/strings/utf_string_conversions.h"
17f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/values.h"
18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller_page.h"
19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller_url_fetcher.h"
205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "components/dom_distiller/core/proto/distilled_article.pb.h"
21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/proto/distilled_page.pb.h"
22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/url_request/url_request_context_getter.h"
235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace {
255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Maximum number of distilled pages in an article.
265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const size_t kMaxPagesInArticle = 32;
275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace dom_distiller {
30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerFactoryImpl::DistillerFactoryImpl(
32cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory,
33cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
34cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    : distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()),
35cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      dom_distiller_options_(dom_distiller_options) {
365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu}
37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
38f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerFactoryImpl::~DistillerFactoryImpl() {}
39f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
40f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() {
41cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  scoped_ptr<DistillerImpl> distiller(new DistillerImpl(
42cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      *distiller_url_fetcher_factory_, dom_distiller_options_));
43a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)  return distiller.PassAs<Distiller>();
44f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData::DistilledPageData() {}
475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData::~DistilledPageData() {}
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
50f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerImpl::DistillerImpl(
51cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    const DistillerURLFetcherFactory& distiller_url_fetcher_factory,
52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)    const dom_distiller::proto::DomDistillerOptions& dom_distiller_options)
535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    : distiller_url_fetcher_factory_(distiller_url_fetcher_factory),
54cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)      dom_distiller_options_(dom_distiller_options),
55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      max_pages_in_article_(kMaxPagesInArticle),
56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      destruction_allowed_(true),
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      weak_factory_(this) {
58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)DistillerImpl::~DistillerImpl() {
61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  DCHECK(destruction_allowed_);
62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) {
655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  max_pages_in_article_ = max_num_pages;
665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool DistillerImpl::AreAllPagesFinished() const {
695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return started_pages_index_.empty() && waiting_pages_.empty();
705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)size_t DistillerImpl::TotalPageCount() const {
735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return waiting_pages_.size() + started_pages_index_.size() +
745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)         finished_pages_index_.size();
75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::AddToDistillationQueue(int page_num, const GURL& url) {
785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!IsPageNumberInUse(page_num) && url.is_valid() &&
795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      TotalPageCount() < max_pages_in_article_ &&
805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      seen_urls_.find(url.spec()) == seen_urls_.end()) {
815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    waiting_pages_[page_num] = url;
825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool DistillerImpl::IsPageNumberInUse(int page_num) const {
865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return waiting_pages_.find(page_num) != waiting_pages_.end() ||
875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)         started_pages_index_.find(page_num) != started_pages_index_.end() ||
885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)         finished_pages_index_.find(page_num) != finished_pages_index_.end();
89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index)
925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    const {
935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK_LT(index, pages_.size());
945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DistilledPageData* page_data = pages_[index];
955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(page_data);
965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return page_data;
97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::DistillPage(const GURL& url,
1005c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu                                scoped_ptr<DistillerPage> distiller_page,
101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                const DistillationFinishedCallback& finished_cb,
102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                const DistillationUpdateCallback& update_cb) {
1035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(AreAllPagesFinished());
1045c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu  distiller_page_ = distiller_page.Pass();
105a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  finished_cb_ = finished_cb;
106a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  update_cb_ = update_cb;
1075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  AddToDistillationQueue(0, url);
1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DistillNextPage();
1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::DistillNextPage() {
1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (!waiting_pages_.empty()) {
1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    std::map<int, GURL>::iterator front = waiting_pages_.begin();
1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    int page_num = front->first;
1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    const GURL url = front->second;
1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    waiting_pages_.erase(front);
1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK(url.is_valid());
1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK(started_pages_index_.find(page_num) == started_pages_index_.end());
1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    seen_urls_.insert(url.spec());
1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    pages_.push_back(new DistilledPageData());
1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    started_pages_index_[page_num] = pages_.size() - 1;
125a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    distiller_page_->DistillPage(
1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        url,
127cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        dom_distiller_options_,
1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        base::Bind(&DistillerImpl::OnPageDistillationFinished,
129a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                   weak_factory_.GetWeakPtr(),
1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                   page_num,
1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                   url));
132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::OnPageDistillationFinished(
1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    int page_num,
1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    const GURL& page_url,
1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    scoped_ptr<proto::DomDistillerResult> distiller_result,
1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    bool distillation_successful) {
1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (distillation_successful) {
1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    DCHECK(distiller_result.get());
1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DistilledPageData* page_data =
1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        GetPageAtIndex(started_pages_index_[page_num]);
145a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    page_data->distilled_page_proto =
146a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        new base::RefCountedData<DistilledPageProto>();
1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    page_data->page_num = page_num;
1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (distiller_result->has_title()) {
1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      page_data->distilled_page_proto->data.set_title(
1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          distiller_result->title());
1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
152a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    page_data->distilled_page_proto->data.set_url(page_url.spec());
1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (distiller_result->has_distilled_content() &&
1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        distiller_result->distilled_content().has_html()) {
1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      page_data->distilled_page_proto->data.set_html(
1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          distiller_result->distilled_content().html());
1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    }
1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (distiller_result->has_debug_info() &&
1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        distiller_result->debug_info().has_log()) {
1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      page_data->distilled_page_proto->data.mutable_debug_info()->set_log(
1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          distiller_result->debug_info().log());
1625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    }
1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if (distiller_result->has_pagination_info()) {
1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      proto::PaginationInfo pagination_info =
1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          distiller_result->pagination_info();
1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (pagination_info.has_next_page()) {
1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        GURL next_page_url(pagination_info.next_page());
1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (next_page_url.is_valid()) {
1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          // The pages should be in same origin.
1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin());
1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          AddToDistillationQueue(page_num + 1, next_page_url);
1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        }
1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if (pagination_info.has_prev_page()) {
1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        GURL prev_page_url(pagination_info.prev_page());
1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if (prev_page_url.is_valid()) {
1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin());
1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          AddToDistillationQueue(page_num - 1, prev_page_url);
1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        }
1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      }
1835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    }
1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    for (int img_num = 0; img_num < distiller_result->image_urls_size();
1865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)         ++img_num) {
1875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      std::string image_id =
1885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)          base::IntToString(page_num + 1) + "_" + base::IntToString(img_num);
1891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      FetchImage(page_num, image_id, distiller_result->image_urls(img_num));
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    }
1915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    AddPageIfDone(page_num);
1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DistillNextPage();
1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  } else {
1955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    started_pages_index_.erase(page_num);
1965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    RunDistillerCallbackIfDone();
197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
199f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
2005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::FetchImage(int page_num,
2015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                               const std::string& image_id,
202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                               const std::string& item) {
2035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
2045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
205f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  DistillerURLFetcher* fetcher =
206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)      distiller_url_fetcher_factory_.CreateDistillerURLFetcher();
2075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  page_data->image_fetchers_.push_back(fetcher);
2085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  fetcher->FetchURL(item,
210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                    base::Bind(&DistillerImpl::OnFetchImageDone,
211a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                               weak_factory_.GetWeakPtr(),
2125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                               page_num,
2135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                               base::Unretained(fetcher),
2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                               image_id));
215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
2175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::OnFetchImageDone(int page_num,
2185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                                     DistillerURLFetcher* url_fetcher,
2195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                                     const std::string& id,
220f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)                                     const std::string& response) {
2215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
2225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
2231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  DCHECK(page_data->distilled_page_proto.get());
2245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(url_fetcher);
2255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  ScopedVector<DistillerURLFetcher>::iterator fetcher_it =
2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      std::find(page_data->image_fetchers_.begin(),
2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                page_data->image_fetchers_.end(),
2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                url_fetcher);
2295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(fetcher_it != page_data->image_fetchers_.end());
2315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone
2325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  // callback is invoked by the |url_fetcher|.
2335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  page_data->image_fetchers_.weak_erase(fetcher_it);
2345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher);
2355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
236a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  DistilledPageProto_Image* image =
237a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      page_data->distilled_page_proto->data.add_image();
238f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  image->set_name(id);
239f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  image->set_data(response);
2405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  AddPageIfDone(page_num);
2425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
2435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::AddPageIfDone(int page_num) {
2455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end());
2465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end());
2475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]);
2485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (page_data->image_fetchers_.empty()) {
2495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    finished_pages_index_[page_num] = started_pages_index_[page_num];
2505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    started_pages_index_.erase(page_num);
251a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const ArticleDistillationUpdate& article_update =
252a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)        CreateDistillationUpdate();
253a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    DCHECK_EQ(article_update.GetPagesSize(), finished_pages_index_.size());
254a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    update_cb_.Run(article_update);
2555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    RunDistillerCallbackIfDone();
2565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
2575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
2585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
259a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)const ArticleDistillationUpdate DistillerImpl::CreateDistillationUpdate()
260a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    const {
261a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool has_prev_page = false;
262a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  bool has_next_page = false;
263a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  if (!finished_pages_index_.empty()) {
264a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    int prev_page_num = finished_pages_index_.begin()->first - 1;
265a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    int next_page_num = finished_pages_index_.rbegin()->first + 1;
266a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    has_prev_page = IsPageNumberInUse(prev_page_num);
267a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    has_next_page = IsPageNumberInUse(next_page_num);
268a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
269a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
270a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  std::vector<scoped_refptr<ArticleDistillationUpdate::RefCountedPageProto> >
271a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      update_pages;
272a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  for (std::map<int, size_t>::const_iterator it = finished_pages_index_.begin();
273a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)       it != finished_pages_index_.end();
274a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)       ++it) {
275a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    update_pages.push_back(pages_[it->second]->distilled_page_proto);
276a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  }
277a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return ArticleDistillationUpdate(update_pages, has_next_page, has_prev_page);
278a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
279a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
2805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::RunDistillerCallbackIfDone() {
281a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  DCHECK(!finished_cb_.is_null());
2825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  if (AreAllPagesFinished()) {
2835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    bool first_page = true;
2845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    scoped_ptr<DistilledArticleProto> article_proto(
2855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        new DistilledArticleProto());
2865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    // Stitch the pages back into the article.
2875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    for (std::map<int, size_t>::iterator it = finished_pages_index_.begin();
2885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)         it != finished_pages_index_.end();) {
2895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      DistilledPageData* page_data = GetPageAtIndex(it->second);
290a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      *(article_proto->add_pages()) = page_data->distilled_page_proto->data;
2915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      if (first_page) {
293cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)        article_proto->set_title(page_data->distilled_page_proto->data.title());
2945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        first_page = false;
2955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      }
2965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
2975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      finished_pages_index_.erase(it++);
2985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    }
2995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
3005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    pages_.clear();
3015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK_LE(static_cast<size_t>(article_proto->pages_size()),
3025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)              max_pages_in_article_);
3035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
3045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK(pages_.empty());
3055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    DCHECK(finished_pages_index_.empty());
306a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
307a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    base::AutoReset<bool> dont_delete_this_in_callback(&destruction_allowed_,
308a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                                       false);
309a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    finished_cb_.Run(article_proto.Pass());
310a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    finished_cb_.Reset();
311f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  }
312f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}
313f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
314f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}  // namespace dom_distiller
315