1f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// found in the LICENSE file. 4f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 5f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller.h" 6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 7f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include <map> 8a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include <vector> 9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/auto_reset.h" 11f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/bind.h" 12f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/callback.h" 135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/location.h" 145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/message_loop/message_loop.h" 155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "base/strings/string_number_conversions.h" 16f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/strings/utf_string_conversions.h" 17f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/values.h" 18f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller_page.h" 19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller_url_fetcher.h" 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)#include "components/dom_distiller/core/proto/distilled_article.pb.h" 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/proto/distilled_page.pb.h" 22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "net/url_request/url_request_context_getter.h" 235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace { 255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)// Maximum number of distilled pages in an article. 265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)const size_t kMaxPagesInArticle = 32; 275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace dom_distiller { 30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerFactoryImpl::DistillerFactoryImpl( 32cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) scoped_ptr<DistillerURLFetcherFactory> distiller_url_fetcher_factory, 33cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const dom_distiller::proto::DomDistillerOptions& dom_distiller_options) 34cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) : distiller_url_fetcher_factory_(distiller_url_fetcher_factory.Pass()), 35cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) dom_distiller_options_(dom_distiller_options) { 365c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu} 37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 38f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerFactoryImpl::~DistillerFactoryImpl() {} 39f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 40f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)scoped_ptr<Distiller> DistillerFactoryImpl::CreateDistiller() { 41cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) scoped_ptr<DistillerImpl> distiller(new DistillerImpl( 42cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) *distiller_url_fetcher_factory_, dom_distiller_options_)); 43a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) return distiller.PassAs<Distiller>(); 44f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData::DistilledPageData() {} 475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData::~DistilledPageData() {} 495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 50f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerImpl::DistillerImpl( 51cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const DistillerURLFetcherFactory& distiller_url_fetcher_factory, 52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const dom_distiller::proto::DomDistillerOptions& dom_distiller_options) 535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) : distiller_url_fetcher_factory_(distiller_url_fetcher_factory), 54cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) dom_distiller_options_(dom_distiller_options), 55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) max_pages_in_article_(kMaxPagesInArticle), 56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) destruction_allowed_(true), 57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) weak_factory_(this) { 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)DistillerImpl::~DistillerImpl() { 61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK(destruction_allowed_); 62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::SetMaxNumPagesInArticle(size_t max_num_pages) { 655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) max_pages_in_article_ = max_num_pages; 665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool DistillerImpl::AreAllPagesFinished() const { 695d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return started_pages_index_.empty() && waiting_pages_.empty(); 705d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)size_t DistillerImpl::TotalPageCount() const { 735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return waiting_pages_.size() + started_pages_index_.size() + 745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) finished_pages_index_.size(); 75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::AddToDistillationQueue(int page_num, const GURL& url) { 785d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!IsPageNumberInUse(page_num) && url.is_valid() && 795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) TotalPageCount() < max_pages_in_article_ && 805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) seen_urls_.find(url.spec()) == seen_urls_.end()) { 815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) waiting_pages_[page_num] = url; 825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)bool DistillerImpl::IsPageNumberInUse(int page_num) const { 865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return waiting_pages_.find(page_num) != waiting_pages_.end() || 875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) started_pages_index_.find(page_num) != started_pages_index_.end() || 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) finished_pages_index_.find(page_num) != finished_pages_index_.end(); 89f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 90f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)DistillerImpl::DistilledPageData* DistillerImpl::GetPageAtIndex(size_t index) 925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const { 935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK_LT(index, pages_.size()); 945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = pages_[index]; 955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(page_data); 965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return page_data; 97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::DistillPage(const GURL& url, 1005c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu scoped_ptr<DistillerPage> distiller_page, 101a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const DistillationFinishedCallback& finished_cb, 102a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const DistillationUpdateCallback& update_cb) { 1035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(AreAllPagesFinished()); 1045c02ac1a9c1b504631c0a3d2b6e737b5d738bae1Bo Liu distiller_page_ = distiller_page.Pass(); 105a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) finished_cb_ = finished_cb; 106a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) update_cb_ = update_cb; 1075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) AddToDistillationQueue(0, url); 1095d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistillNextPage(); 1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::DistillNextPage() { 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (!waiting_pages_.empty()) { 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::map<int, GURL>::iterator front = waiting_pages_.begin(); 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int page_num = front->first; 1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const GURL url = front->second; 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) waiting_pages_.erase(front); 1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(url.is_valid()); 1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(started_pages_index_.find(page_num) == started_pages_index_.end()); 1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end()); 1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) seen_urls_.insert(url.spec()); 1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) pages_.push_back(new DistilledPageData()); 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) started_pages_index_[page_num] = pages_.size() - 1; 125a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch distiller_page_->DistillPage( 1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) url, 127cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) dom_distiller_options_, 1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::Bind(&DistillerImpl::OnPageDistillationFinished, 129a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) weak_factory_.GetWeakPtr(), 1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_num, 1315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) url)); 132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::OnPageDistillationFinished( 1365d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int page_num, 1375d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const GURL& page_url, 1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci scoped_ptr<proto::DomDistillerResult> distiller_result, 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool distillation_successful) { 1405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 1415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (distillation_successful) { 1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci DCHECK(distiller_result.get()); 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = 1445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) GetPageAtIndex(started_pages_index_[page_num]); 145a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) page_data->distilled_page_proto = 146a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) new base::RefCountedData<DistilledPageProto>(); 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_data->page_num = page_num; 1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_title()) { 1491320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci page_data->distilled_page_proto->data.set_title( 1501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->title()); 1511320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 152a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) page_data->distilled_page_proto->data.set_url(page_url.spec()); 1531320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_distilled_content() && 1541320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->distilled_content().has_html()) { 1551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci page_data->distilled_page_proto->data.set_html( 1561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->distilled_content().html()); 1571320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1581320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_debug_info() && 1591320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->debug_info().has_log()) { 1601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci page_data->distilled_page_proto->data.mutable_debug_info()->set_log( 1611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->debug_info().log()); 1625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1641320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_pagination_info()) { 1651320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci proto::PaginationInfo pagination_info = 1661320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->pagination_info(); 1671320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (pagination_info.has_next_page()) { 1681320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci GURL next_page_url(pagination_info.next_page()); 1691320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (next_page_url.is_valid()) { 1701320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // The pages should be in same origin. 1711320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci DCHECK_EQ(next_page_url.GetOrigin(), page_url.GetOrigin()); 1721320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci AddToDistillationQueue(page_num + 1, next_page_url); 1731320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (pagination_info.has_prev_page()) { 1771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci GURL prev_page_url(pagination_info.prev_page()); 1781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (prev_page_url.is_valid()) { 1791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci DCHECK_EQ(prev_page_url.GetOrigin(), page_url.GetOrigin()); 1801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci AddToDistillationQueue(page_num - 1, prev_page_url); 1811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci for (int img_num = 0; img_num < distiller_result->image_urls_size(); 1865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ++img_num) { 1875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::string image_id = 1885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::IntToString(page_num + 1) + "_" + base::IntToString(img_num); 1891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci FetchImage(page_num, image_id, distiller_result->image_urls(img_num)); 190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 1915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) AddPageIfDone(page_num); 1935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistillNextPage(); 1945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } else { 1955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) started_pages_index_.erase(page_num); 1965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) RunDistillerCallbackIfDone(); 197f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 198f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 199f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 2005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::FetchImage(int page_num, 2015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const std::string& image_id, 202f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const std::string& item) { 2035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 2045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); 205f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) DistillerURLFetcher* fetcher = 206f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) distiller_url_fetcher_factory_.CreateDistillerURLFetcher(); 2075d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_data->image_fetchers_.push_back(fetcher); 2085d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 209f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) fetcher->FetchURL(item, 210f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::Bind(&DistillerImpl::OnFetchImageDone, 211a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) weak_factory_.GetWeakPtr(), 2125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_num, 2135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::Unretained(fetcher), 2145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) image_id)); 215f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 216f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 2175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::OnFetchImageDone(int page_num, 2185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistillerURLFetcher* url_fetcher, 2195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const std::string& id, 220f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const std::string& response) { 2215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 2225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); 2231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci DCHECK(page_data->distilled_page_proto.get()); 2245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(url_fetcher); 2255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ScopedVector<DistillerURLFetcher>::iterator fetcher_it = 2265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) std::find(page_data->image_fetchers_.begin(), 2275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_data->image_fetchers_.end(), 2285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) url_fetcher); 2295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(fetcher_it != page_data->image_fetchers_.end()); 2315d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Delete the |url_fetcher| by DeleteSoon since the OnFetchImageDone 2325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // callback is invoked by the |url_fetcher|. 2335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) page_data->image_fetchers_.weak_erase(fetcher_it); 2345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::MessageLoop::current()->DeleteSoon(FROM_HERE, url_fetcher); 2355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 236a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DistilledPageProto_Image* image = 237a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) page_data->distilled_page_proto->data.add_image(); 238f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) image->set_name(id); 239f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) image->set_data(response); 2405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2415d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) AddPageIfDone(page_num); 2425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 2435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::AddPageIfDone(int page_num) { 2455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(started_pages_index_.find(page_num) != started_pages_index_.end()); 2465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(finished_pages_index_.find(page_num) == finished_pages_index_.end()); 2475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = GetPageAtIndex(started_pages_index_[page_num]); 2485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (page_data->image_fetchers_.empty()) { 2495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) finished_pages_index_[page_num] = started_pages_index_[page_num]; 2505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) started_pages_index_.erase(page_num); 251a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const ArticleDistillationUpdate& article_update = 252a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) CreateDistillationUpdate(); 253a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK_EQ(article_update.GetPagesSize(), finished_pages_index_.size()); 254a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) update_cb_.Run(article_update); 2555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) RunDistillerCallbackIfDone(); 2565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 2575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 2585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 259a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)const ArticleDistillationUpdate DistillerImpl::CreateDistillationUpdate() 260a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const { 261a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) bool has_prev_page = false; 262a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) bool has_next_page = false; 263a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) if (!finished_pages_index_.empty()) { 264a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) int prev_page_num = finished_pages_index_.begin()->first - 1; 265a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) int next_page_num = finished_pages_index_.rbegin()->first + 1; 266a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) has_prev_page = IsPageNumberInUse(prev_page_num); 267a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) has_next_page = IsPageNumberInUse(next_page_num); 268a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) } 269a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 270a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) std::vector<scoped_refptr<ArticleDistillationUpdate::RefCountedPageProto> > 271a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) update_pages; 272a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) for (std::map<int, size_t>::const_iterator it = finished_pages_index_.begin(); 273a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) it != finished_pages_index_.end(); 274a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ++it) { 275a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) update_pages.push_back(pages_[it->second]->distilled_page_proto); 276a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) } 277a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return ArticleDistillationUpdate(update_pages, has_next_page, has_prev_page); 278a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 279a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 2805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)void DistillerImpl::RunDistillerCallbackIfDone() { 281a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) DCHECK(!finished_cb_.is_null()); 2825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (AreAllPagesFinished()) { 2835d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool first_page = true; 2845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) scoped_ptr<DistilledArticleProto> article_proto( 2855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) new DistilledArticleProto()); 2865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Stitch the pages back into the article. 2875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for (std::map<int, size_t>::iterator it = finished_pages_index_.begin(); 2885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) it != finished_pages_index_.end();) { 2895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DistilledPageData* page_data = GetPageAtIndex(it->second); 290a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) *(article_proto->add_pages()) = page_data->distilled_page_proto->data; 2915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if (first_page) { 293cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) article_proto->set_title(page_data->distilled_page_proto->data.title()); 2945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) first_page = false; 2955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 2965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 2975d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) finished_pages_index_.erase(it++); 2985d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) } 2995d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3005d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) pages_.clear(); 3015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK_LE(static_cast<size_t>(article_proto->pages_size()), 3025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) max_pages_in_article_); 3035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 3045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(pages_.empty()); 3055d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) DCHECK(finished_pages_index_.empty()); 306a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 307a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) base::AutoReset<bool> dont_delete_this_in_callback(&destruction_allowed_, 308a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) false); 309a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) finished_cb_.Run(article_proto.Pass()); 310a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) finished_cb_.Reset(); 311f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) } 312f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 313f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 314f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} // namespace dom_distiller 315