1f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// found in the LICENSE file. 4f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 5f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "components/dom_distiller/core/distiller_page.h" 6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 7a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch#include "base/bind.h" 8cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "base/json/json_writer.h" 9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/logging.h" 10a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch#include "base/message_loop/message_loop.h" 111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/metrics/histogram.h" 12cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "base/strings/string_util.h" 13cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "base/strings/utf_string_conversions.h" 141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/time/time.h" 151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "grit/components_resources.h" 16cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/dom_distiller_js/dom_distiller.pb.h" 17cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)#include "third_party/dom_distiller_js/dom_distiller_json_converter.h" 18a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch#include "ui/base/resource/resource_bundle.h" 19f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "url/gurl.h" 20f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace dom_distiller { 22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 23cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)namespace { 24cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 25cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)const char* kOptionsPlaceholder = "$$OPTIONS"; 26cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 27cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)std::string GetDistillerScriptWithOptions( 28cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const dom_distiller::proto::DomDistillerOptions& options) { 29cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) std::string script = ResourceBundle::GetSharedInstance() 30cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) .GetRawDataResource(IDR_DISTILLER_JS) 31cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) .as_string(); 32cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (script.empty()) { 33cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return ""; 34cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) } 35cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 36cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) scoped_ptr<base::Value> options_value( 37cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) dom_distiller::proto::json::DomDistillerOptions::WriteToValue(options)); 38cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) std::string options_json; 39cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (!base::JSONWriter::Write(options_value.get(), &options_json)) { 40cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) NOTREACHED(); 41cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) } 42cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) size_t options_offset = script.find(kOptionsPlaceholder); 43cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) DCHECK_NE(std::string::npos, options_offset); 44cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) DCHECK_EQ(std::string::npos, 45cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) script.find(kOptionsPlaceholder, options_offset + 1)); 46cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) script = 47cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) script.replace(options_offset, strlen(kOptionsPlaceholder), options_json); 48cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) return script; 49cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)} 50cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 51cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)} 52cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)DistillerPageFactory::~DistillerPageFactory() {} 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 550529e5d033099cbfc42635f6f6183833b09dff6eBen MurdochDistillerPage::DistillerPage() : ready_(true) {} 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 57a02191e04bc25c4935f804f2c080ae28663d096dBen MurdochDistillerPage::~DistillerPage() {} 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 59cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)void DistillerPage::DistillPage( 60cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const GURL& gurl, 61cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const dom_distiller::proto::DomDistillerOptions options, 62cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) const DistillerPageCallback& callback) { 630529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch DCHECK(ready_); 640529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // It is only possible to distill one page at a time. |ready_| is reset when 650529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch // the callback to OnDistillationDone happens. 660529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch ready_ = false; 67a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch distiller_page_callback_ = callback; 68cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) DistillPageImpl(gurl, GetDistillerScriptWithOptions(options)); 69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 710529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochvoid DistillerPage::OnDistillationDone(const GURL& page_url, 720529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch const base::Value* value) { 730529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch DCHECK(!ready_); 740529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch ready_ = true; 75cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci scoped_ptr<dom_distiller::proto::DomDistillerResult> distiller_result( 771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci new dom_distiller::proto::DomDistillerResult()); 781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci bool found_content; 791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (value->IsType(base::Value::TYPE_NULL)) { 801320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci found_content = false; 811320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } else { 821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci found_content = 831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci dom_distiller::proto::json::DomDistillerResult::ReadFromValue( 841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci value, distiller_result.get()); 851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (!found_content) { 861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci DVLOG(1) << "Unable to parse DomDistillerResult."; 871320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } else { 881320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_timing_info()) { 891320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const dom_distiller::proto::TimingInfo& timing = 901320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->timing_info(); 911320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (timing.has_markup_parsing_time()) { 921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_TIMES( 931320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Time.MarkupParsing", 941320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::TimeDelta::FromMillisecondsD(timing.markup_parsing_time())); 951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 961320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (timing.has_document_construction_time()) { 971320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_TIMES( 981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Time.DocumentConstruction", 991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::TimeDelta::FromMillisecondsD( 1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci timing.document_construction_time())); 1011320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1021320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (timing.has_article_processing_time()) { 1031320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_TIMES( 1041320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Time.ArticleProcessing", 1051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::TimeDelta::FromMillisecondsD( 1061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci timing.article_processing_time())); 1071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (timing.has_formatting_time()) { 1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_TIMES( 1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Time.Formatting", 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::TimeDelta::FromMillisecondsD(timing.formatting_time())); 1121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (timing.has_total_time()) { 1141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_TIMES( 1151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Time.DistillationTotal", 1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::TimeDelta::FromMillisecondsD(timing.total_time())); 1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (distiller_result->has_statistics_info()) { 1201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci const dom_distiller::proto::StatisticsInfo& statistics = 1211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci distiller_result->statistics_info(); 1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (statistics.has_word_count()) { 1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci UMA_HISTOGRAM_CUSTOM_COUNTS( 1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci "DomDistiller.Statistics.WordCount", 1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci statistics.word_count(), 1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 1, 4000, 50); 1271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 128a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 1295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 130a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch } 131cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) 132cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) base::MessageLoop::current()->PostTask( 133cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) FROM_HERE, 1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::Bind(distiller_page_callback_, 1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci base::Passed(&distiller_result), 1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci found_content)); 137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} // namespace dom_distiller 140