1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/policy/core/common/cloud/external_policy_data_updater.h"
6
7#include "base/bind.h"
8#include "base/bind_helpers.h"
9#include "base/callback.h"
10#include "base/location.h"
11#include "base/logging.h"
12#include "base/sequenced_task_runner.h"
13#include "base/stl_util.h"
14#include "components/policy/core/common/cloud/external_policy_data_fetcher.h"
15#include "crypto/sha2.h"
16#include "net/base/backoff_entry.h"
17#include "url/gurl.h"
18
19namespace policy {
20
21namespace {
22
23// Policies for exponential backoff of failed requests. There are 3 policies for
24// different classes of errors.
25
26// For temporary errors (HTTP 500, RST, etc).
27const net::BackoffEntry::Policy kRetrySoonPolicy = {
28  // Number of initial errors to ignore before starting to back off.
29  0,
30
31  // Initial delay in ms: 60 seconds.
32  1000 * 60,
33
34  // Factor by which the waiting time is multiplied.
35  2,
36
37  // Fuzzing percentage; this spreads delays randomly between 80% and 100%
38  // of the calculated time.
39  0.20,
40
41  // Maximum delay in ms: 12 hours.
42  1000 * 60 * 60 * 12,
43
44  // When to discard an entry: never.
45  -1,
46
47  // |always_use_initial_delay|; false means that the initial delay is
48  // applied after the first error, and starts backing off from there.
49  false,
50};
51
52// For other errors (request failed, server errors).
53const net::BackoffEntry::Policy kRetryLaterPolicy = {
54  // Number of initial errors to ignore before starting to back off.
55  0,
56
57  // Initial delay in ms: 1 hour.
58  1000 * 60 * 60,
59
60  // Factor by which the waiting time is multiplied.
61  2,
62
63  // Fuzzing percentage; this spreads delays randomly between 80% and 100%
64  // of the calculated time.
65  0.20,
66
67  // Maximum delay in ms: 12 hours.
68  1000 * 60 * 60 * 12,
69
70  // When to discard an entry: never.
71  -1,
72
73  // |always_use_initial_delay|; false means that the initial delay is
74  // applied after the first error, and starts backing off from there.
75  false,
76};
77
78// When the data fails validation (maybe because the policy URL and the data
79// served at that URL are out of sync). This essentially retries every 12 hours,
80// with some random jitter.
81const net::BackoffEntry::Policy kRetryMuchLaterPolicy = {
82  // Number of initial errors to ignore before starting to back off.
83  0,
84
85  // Initial delay in ms: 12 hours.
86  1000 * 60 * 60 * 12,
87
88  // Factor by which the waiting time is multiplied.
89  2,
90
91  // Fuzzing percentage; this spreads delays randomly between 80% and 100%
92  // of the calculated time.
93  0.20,
94
95  // Maximum delay in ms: 12 hours.
96  1000 * 60 * 60 * 12,
97
98  // When to discard an entry: never.
99  -1,
100
101  // |always_use_initial_delay|; false means that the initial delay is
102  // applied after the first error, and starts backing off from there.
103  false,
104};
105
106// Maximum number of retries for requests that aren't likely to get a
107// different response (e.g. HTTP 4xx replies).
108const int kMaxLimitedRetries = 3;
109
110}  // namespace
111
112class ExternalPolicyDataUpdater::FetchJob
113    : public base::SupportsWeakPtr<FetchJob> {
114 public:
115  FetchJob(ExternalPolicyDataUpdater* updater,
116           const std::string& key,
117           const ExternalPolicyDataUpdater::Request& request,
118           const ExternalPolicyDataUpdater::FetchSuccessCallback& callback);
119  virtual ~FetchJob();
120
121  const std::string& key() const;
122  const ExternalPolicyDataUpdater::Request& request() const;
123
124  void Start();
125
126  void OnFetchFinished(ExternalPolicyDataFetcher::Result result,
127                       scoped_ptr<std::string> data);
128
129 private:
130  void OnFailed(net::BackoffEntry* backoff_entry);
131  void Reschedule();
132
133  // Always valid as long as |this| is alive.
134  ExternalPolicyDataUpdater* updater_;
135
136  const std::string key_;
137  const ExternalPolicyDataUpdater::Request request_;
138  ExternalPolicyDataUpdater::FetchSuccessCallback callback_;
139
140  // If the job is currently running, a corresponding |fetch_job_| exists in the
141  // |external_policy_data_fetcher_|. The job must eventually call back to the
142  // |updater_|'s OnJobSucceeded() or OnJobFailed() method in this case.
143  // If the job is currently not running, |fetch_job_| is NULL and no callbacks
144  // should be invoked.
145  ExternalPolicyDataFetcher::Job* fetch_job_;  // Not owned.
146
147  // Some errors should trigger a limited number of retries, even with backoff.
148  // This counts down the number of such retries to stop retrying once the limit
149  // is reached.
150  int limited_retries_remaining_;
151
152  // Various delays to retry a failed download, depending on the failure reason.
153  net::BackoffEntry retry_soon_entry_;
154  net::BackoffEntry retry_later_entry_;
155  net::BackoffEntry retry_much_later_entry_;
156
157  DISALLOW_COPY_AND_ASSIGN(FetchJob);
158};
159
160ExternalPolicyDataUpdater::Request::Request() {
161}
162
163ExternalPolicyDataUpdater::Request::Request(const std::string& url,
164                                            const std::string& hash,
165                                            int64 max_size)
166    : url(url), hash(hash), max_size(max_size) {
167}
168
169bool ExternalPolicyDataUpdater::Request::operator==(
170    const Request& other) const {
171  return url == other.url && hash == other.hash && max_size == other.max_size;
172}
173
174ExternalPolicyDataUpdater::FetchJob::FetchJob(
175    ExternalPolicyDataUpdater* updater,
176    const std::string& key,
177    const ExternalPolicyDataUpdater::Request& request,
178    const ExternalPolicyDataUpdater::FetchSuccessCallback& callback)
179    : updater_(updater),
180      key_(key),
181      request_(request),
182      callback_(callback),
183      fetch_job_(NULL),
184      limited_retries_remaining_(kMaxLimitedRetries),
185      retry_soon_entry_(&kRetrySoonPolicy),
186      retry_later_entry_(&kRetryLaterPolicy),
187      retry_much_later_entry_(&kRetryMuchLaterPolicy) {
188}
189
190ExternalPolicyDataUpdater::FetchJob::~FetchJob() {
191  if (fetch_job_) {
192    // Cancel the fetch job in the |external_policy_data_fetcher_|.
193    updater_->external_policy_data_fetcher_->CancelJob(fetch_job_);
194    // Inform the |updater_| that the job was canceled.
195    updater_->OnJobFailed(this);
196  }
197}
198
199const std::string& ExternalPolicyDataUpdater::FetchJob::key() const {
200  return key_;
201}
202
203const ExternalPolicyDataUpdater::Request&
204    ExternalPolicyDataUpdater::FetchJob::request() const {
205  return request_;
206}
207
208void ExternalPolicyDataUpdater::FetchJob::Start() {
209  DCHECK(!fetch_job_);
210  // Start a fetch job in the |external_policy_data_fetcher_|. This will
211  // eventually call back to OnFetchFinished() with the result.
212  fetch_job_ = updater_->external_policy_data_fetcher_->StartJob(
213      GURL(request_.url), request_.max_size,
214      base::Bind(&ExternalPolicyDataUpdater::FetchJob::OnFetchFinished,
215                 base::Unretained(this)));
216}
217
218void ExternalPolicyDataUpdater::FetchJob::OnFetchFinished(
219    ExternalPolicyDataFetcher::Result result,
220    scoped_ptr<std::string> data) {
221  // The fetch job in the |external_policy_data_fetcher_| is finished.
222  fetch_job_ = NULL;
223
224  switch (result) {
225    case ExternalPolicyDataFetcher::CONNECTION_INTERRUPTED:
226      // The connection was interrupted. Try again soon.
227      OnFailed(&retry_soon_entry_);
228      return;
229    case ExternalPolicyDataFetcher::NETWORK_ERROR:
230      // Another network error occurred. Try again later.
231      OnFailed(&retry_later_entry_);
232      return;
233    case ExternalPolicyDataFetcher::SERVER_ERROR:
234      // Problem at the server. Try again soon.
235      OnFailed(&retry_soon_entry_);
236      return;
237    case ExternalPolicyDataFetcher::CLIENT_ERROR:
238      // Client error. This is unlikely to go away. Try again later, and give up
239      // retrying after 3 attempts.
240      OnFailed(limited_retries_remaining_ ? &retry_later_entry_ : NULL);
241      if (limited_retries_remaining_)
242        --limited_retries_remaining_;
243      return;
244    case ExternalPolicyDataFetcher::HTTP_ERROR:
245      // Any other type of HTTP failure. Try again later.
246      OnFailed(&retry_later_entry_);
247      return;
248    case ExternalPolicyDataFetcher::MAX_SIZE_EXCEEDED:
249      // Received |data| exceeds maximum allowed size. This may be because the
250      // data being served is stale. Try again much later.
251      OnFailed(&retry_much_later_entry_);
252      return;
253    case ExternalPolicyDataFetcher::SUCCESS:
254      break;
255  }
256
257  if (crypto::SHA256HashString(*data) != request_.hash) {
258    // Received |data| does not match expected hash. This may be because the
259    // data being served is stale. Try again much later.
260    OnFailed(&retry_much_later_entry_);
261    return;
262  }
263
264  // If the callback rejects the data, try again much later.
265  if (!callback_.Run(*data)) {
266    OnFailed(&retry_much_later_entry_);
267    return;
268  }
269
270  // Signal success.
271  updater_->OnJobSucceeded(this);
272}
273
274void ExternalPolicyDataUpdater::FetchJob::OnFailed(net::BackoffEntry* entry) {
275  if (entry) {
276    entry->InformOfRequest(false);
277
278    // This function may have been invoked because the job was obsoleted and is
279    // in the process of being deleted. If this is the case, the WeakPtr will
280    // become invalid and the delayed task will never run.
281    updater_->task_runner_->PostDelayedTask(
282        FROM_HERE,
283        base::Bind(&FetchJob::Reschedule, AsWeakPtr()),
284        entry->GetTimeUntilRelease());
285  }
286
287  updater_->OnJobFailed(this);
288}
289
290void ExternalPolicyDataUpdater::FetchJob::Reschedule() {
291  updater_->ScheduleJob(this);
292}
293
294ExternalPolicyDataUpdater::ExternalPolicyDataUpdater(
295    scoped_refptr<base::SequencedTaskRunner> task_runner,
296    scoped_ptr<ExternalPolicyDataFetcher> external_policy_data_fetcher,
297    size_t max_parallel_fetches)
298    : task_runner_(task_runner),
299      external_policy_data_fetcher_(external_policy_data_fetcher.release()),
300      max_parallel_jobs_(max_parallel_fetches),
301      running_jobs_(0),
302      shutting_down_(false) {
303  DCHECK(task_runner_->RunsTasksOnCurrentThread());
304}
305
306ExternalPolicyDataUpdater::~ExternalPolicyDataUpdater() {
307  DCHECK(task_runner_->RunsTasksOnCurrentThread());
308  shutting_down_ = true;
309  STLDeleteValues(&job_map_);
310}
311
312void ExternalPolicyDataUpdater::FetchExternalData(
313    const std::string key,
314    const Request& request,
315    const FetchSuccessCallback& callback) {
316  DCHECK(task_runner_->RunsTasksOnCurrentThread());
317
318  // Check whether a job exists for this |key| already.
319  FetchJob* job = job_map_[key];
320  if (job) {
321    // If the current |job| is handling the given |request| already, nothing
322    // needs to be done.
323    if (job->request() == request)
324      return;
325
326    // Otherwise, the current |job| is obsolete. If the |job| is on the queue,
327    // its WeakPtr will be invalidated and skipped by StartNextJobs(). If |job|
328    // is currently running, it will call OnJobFailed() immediately.
329    delete job;
330    job_map_.erase(key);
331  }
332
333  // Start a new job to handle |request|.
334  job = new FetchJob(this, key, request, callback);
335  job_map_[key] = job;
336  ScheduleJob(job);
337}
338
339void ExternalPolicyDataUpdater::CancelExternalDataFetch(
340    const std::string& key) {
341  DCHECK(task_runner_->RunsTasksOnCurrentThread());
342
343  // If a |job| exists for this |key|, delete it. If the |job| is on the queue,
344  // its WeakPtr will be invalidated and skipped by StartNextJobs(). If |job| is
345  // currently running, it will call OnJobFailed() immediately.
346  std::map<std::string, FetchJob*>::iterator job = job_map_.find(key);
347  if (job != job_map_.end()) {
348    delete job->second;
349    job_map_.erase(job);
350  }
351}
352
353void ExternalPolicyDataUpdater::StartNextJobs() {
354  if (shutting_down_)
355    return;
356
357  while (running_jobs_ < max_parallel_jobs_ && !job_queue_.empty()) {
358    FetchJob* job = job_queue_.front().get();
359    job_queue_.pop();
360
361    // Some of the jobs may have been invalidated, and have to be skipped.
362    if (job) {
363      ++running_jobs_;
364      // A started job will always call OnJobSucceeded() or OnJobFailed().
365      job->Start();
366    }
367  }
368}
369
370void ExternalPolicyDataUpdater::ScheduleJob(FetchJob* job) {
371  DCHECK_EQ(job_map_[job->key()], job);
372
373  job_queue_.push(job->AsWeakPtr());
374
375  StartNextJobs();
376}
377
378void ExternalPolicyDataUpdater::OnJobSucceeded(FetchJob* job) {
379  DCHECK(running_jobs_);
380  DCHECK_EQ(job_map_[job->key()], job);
381
382  --running_jobs_;
383  job_map_.erase(job->key());
384  delete job;
385
386  StartNextJobs();
387}
388
389void ExternalPolicyDataUpdater::OnJobFailed(FetchJob* job) {
390  DCHECK(running_jobs_);
391  DCHECK_EQ(job_map_[job->key()], job);
392
393  --running_jobs_;
394
395  // The job is not deleted when it fails because a retry attempt may have been
396  // scheduled.
397  StartNextJobs();
398}
399
400}  // namespace policy
401