1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/precache/core/precache_fetcher.h"
6
7#include <string>
8
9#include "base/bind.h"
10#include "base/callback.h"
11#include "base/command_line.h"
12#include "base/compiler_specific.h"
13#include "base/containers/hash_tables.h"
14#include "components/precache/core/precache_switches.h"
15#include "components/precache/core/proto/precache.pb.h"
16#include "net/base/escape.h"
17#include "net/base/load_flags.h"
18#include "net/url_request/url_fetcher.h"
19#include "net/url_request/url_fetcher_delegate.h"
20#include "net/url_request/url_request_context_getter.h"
21#include "net/url_request/url_request_status.h"
22
23using net::URLFetcher;
24
25namespace precache {
26
27namespace {
28
29GURL GetConfigURL() {
30  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
31  if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
32    return GURL(
33        command_line.GetSwitchValueASCII(switches::kPrecacheConfigSettingsURL));
34  }
35
36#if defined(PRECACHE_CONFIG_SETTINGS_URL)
37  return GURL(PRECACHE_CONFIG_SETTINGS_URL);
38#else
39  // The precache config settings URL could not be determined, so return an
40  // empty, invalid GURL.
41  return GURL();
42#endif
43}
44
45std::string GetManifestURLPrefix() {
46  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
47  if (command_line.HasSwitch(switches::kPrecacheManifestURLPrefix)) {
48    return command_line.GetSwitchValueASCII(
49        switches::kPrecacheManifestURLPrefix);
50  }
51
52#if defined(PRECACHE_MANIFEST_URL_PREFIX)
53  return PRECACHE_MANIFEST_URL_PREFIX;
54#else
55  // The precache manifest URL prefix could not be determined, so return an
56  // empty string.
57  return std::string();
58#endif
59}
60
61// Construct the URL of the precache manifest for the given starting URL.
62// The server is expecting a request for a URL consisting of the manifest URL
63// prefix followed by the doubly escaped starting URL.
64GURL ConstructManifestURL(const GURL& starting_url) {
65  return GURL(
66      GetManifestURLPrefix() +
67      net::EscapeQueryParamValue(
68          net::EscapeQueryParamValue(starting_url.spec(), false), false));
69}
70
71// Attempts to parse a protobuf message from the response string of a
72// URLFetcher. If parsing is successful, the message parameter will contain the
73// parsed protobuf and this function will return true. Otherwise, returns false.
74bool ParseProtoFromFetchResponse(const URLFetcher& source,
75                                 ::google::protobuf::MessageLite* message) {
76  std::string response_string;
77
78  if (!source.GetStatus().is_success()) {
79    DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
80    return false;
81  }
82  if (!source.GetResponseAsString(&response_string)) {
83    DLOG(WARNING) << "No response string present: "
84                  << source.GetOriginalURL().spec();
85    return false;
86  }
87  if (!message->ParseFromString(response_string)) {
88    DLOG(WARNING) << "Unable to parse proto served from "
89                  << source.GetOriginalURL().spec();
90    return false;
91  }
92  return true;
93}
94
95}  // namespace
96
97// Class that fetches a URL, and runs the specified callback when the fetch is
98// complete. This class exists so that a different method can be run in
99// response to different kinds of fetches, e.g. OnConfigFetchComplete when
100// configuration settings are fetched, OnManifestFetchComplete when a manifest
101// is fetched, etc.
102class PrecacheFetcher::Fetcher : public net::URLFetcherDelegate {
103 public:
104  // Construct a new Fetcher. This will create and start a new URLFetcher for
105  // the specified URL using the specified request context.
106  Fetcher(net::URLRequestContextGetter* request_context, const GURL& url,
107          const base::Callback<void(const URLFetcher&)>& callback);
108  virtual ~Fetcher() {}
109  virtual void OnURLFetchComplete(const URLFetcher* source) OVERRIDE;
110
111 private:
112  const base::Callback<void(const URLFetcher&)> callback_;
113  scoped_ptr<URLFetcher> url_fetcher_;
114
115  DISALLOW_COPY_AND_ASSIGN(Fetcher);
116};
117
118PrecacheFetcher::Fetcher::Fetcher(
119    net::URLRequestContextGetter* request_context, const GURL& url,
120    const base::Callback<void(const URLFetcher&)>& callback)
121    : callback_(callback) {
122  url_fetcher_.reset(URLFetcher::Create(url, URLFetcher::GET, this));
123  url_fetcher_->SetRequestContext(request_context);
124  url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_PROMPT_FOR_LOGIN);
125  url_fetcher_->Start();
126}
127
128void PrecacheFetcher::Fetcher::OnURLFetchComplete(const URLFetcher* source) {
129  callback_.Run(*source);
130}
131
132PrecacheFetcher::PrecacheFetcher(
133    const std::list<GURL>& starting_urls,
134    net::URLRequestContextGetter* request_context,
135    PrecacheFetcher::PrecacheDelegate* precache_delegate)
136    : starting_urls_(starting_urls),
137      request_context_(request_context),
138      precache_delegate_(precache_delegate) {
139  DCHECK(request_context_.get());  // Request context must be non-NULL.
140  DCHECK(precache_delegate_);  // Precache delegate must be non-NULL.
141
142  DCHECK_NE(GURL(), GetConfigURL())
143      << "Could not determine the precache config settings URL.";
144  DCHECK_NE(std::string(), GetManifestURLPrefix())
145      << "Could not determine the precache manifest URL prefix.";
146}
147
148PrecacheFetcher::~PrecacheFetcher() {
149}
150
151void PrecacheFetcher::Start() {
152  DCHECK(!fetcher_);  // Start shouldn't be called repeatedly.
153
154  GURL config_url = GetConfigURL();
155  DCHECK(config_url.is_valid());
156
157  // Fetch the precache configuration settings from the server.
158  fetcher_.reset(new Fetcher(request_context_.get(),
159                             config_url,
160                             base::Bind(&PrecacheFetcher::OnConfigFetchComplete,
161                                        base::Unretained(this))));
162}
163
164void PrecacheFetcher::StartNextFetch() {
165  if (!resource_urls_to_fetch_.empty()) {
166    // Fetch the next resource URL.
167    fetcher_.reset(
168        new Fetcher(request_context_.get(),
169                    resource_urls_to_fetch_.front(),
170                    base::Bind(&PrecacheFetcher::OnResourceFetchComplete,
171                               base::Unretained(this))));
172
173    resource_urls_to_fetch_.pop_front();
174    return;
175  }
176
177  if (!manifest_urls_to_fetch_.empty()) {
178    // Fetch the next manifest URL.
179    fetcher_.reset(
180        new Fetcher(request_context_.get(),
181                    manifest_urls_to_fetch_.front(),
182                    base::Bind(&PrecacheFetcher::OnManifestFetchComplete,
183                               base::Unretained(this))));
184
185    manifest_urls_to_fetch_.pop_front();
186    return;
187  }
188
189  // There are no more URLs to fetch, so end the precache cycle.
190  precache_delegate_->OnDone();
191  // OnDone may have deleted this PrecacheFetcher, so don't do anything after it
192  // is called.
193}
194
195void PrecacheFetcher::OnConfigFetchComplete(const URLFetcher& source) {
196  PrecacheConfigurationSettings config;
197
198  if (ParseProtoFromFetchResponse(source, &config)) {
199    // Keep track of starting URLs that manifests are being fetched for, in
200    // order to remove duplicates. This is a hash set on strings, and not GURLs,
201    // because there is no hash function defined for GURL.
202    base::hash_set<std::string> unique_starting_urls;
203
204    // Attempt to fetch manifests for starting URLs up to the maximum top sites
205    // count. If a manifest does not exist for a particular starting URL, then
206    // the fetch will fail, and that starting URL will be ignored.
207    int64 rank = 0;
208    for (std::list<GURL>::const_iterator it = starting_urls_.begin();
209         it != starting_urls_.end() && rank < config.top_sites_count();
210         ++it, ++rank) {
211      if (unique_starting_urls.find(it->spec()) == unique_starting_urls.end()) {
212        // Only add a fetch for the manifest URL if this manifest isn't already
213        // going to be fetched.
214        manifest_urls_to_fetch_.push_back(ConstructManifestURL(*it));
215        unique_starting_urls.insert(it->spec());
216      }
217    }
218
219    for (int i = 0; i < config.forced_starting_url_size(); ++i) {
220      // Convert the string URL into a GURL and take the spec() of it so that
221      // the URL string gets canonicalized.
222      GURL url(config.forced_starting_url(i));
223      if (unique_starting_urls.find(url.spec()) == unique_starting_urls.end()) {
224        // Only add a fetch for the manifest URL if this manifest isn't already
225        // going to be fetched.
226        manifest_urls_to_fetch_.push_back(ConstructManifestURL(url));
227        unique_starting_urls.insert(url.spec());
228      }
229    }
230  }
231
232  StartNextFetch();
233}
234
235void PrecacheFetcher::OnManifestFetchComplete(const URLFetcher& source) {
236  PrecacheManifest manifest;
237
238  if (ParseProtoFromFetchResponse(source, &manifest)) {
239    for (int i = 0; i < manifest.resource_size(); ++i) {
240      if (manifest.resource(i).has_url()) {
241        resource_urls_to_fetch_.push_back(GURL(manifest.resource(i).url()));
242      }
243    }
244  }
245
246  StartNextFetch();
247}
248
249void PrecacheFetcher::OnResourceFetchComplete(const URLFetcher& source) {
250  // The resource has already been put in the cache during the fetch process, so
251  // nothing more needs to be done for the resource.
252  StartNextFetch();
253}
254
255}  // namespace precache
256