1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef WEBKIT_BROWSER_APPCACHE_APPCACHE_UPDATE_JOB_H_
6#define WEBKIT_BROWSER_APPCACHE_APPCACHE_UPDATE_JOB_H_
7
8#include <deque>
9#include <map>
10#include <set>
11#include <string>
12#include <vector>
13
14#include "base/gtest_prod_util.h"
15#include "base/memory/ref_counted.h"
16#include "net/base/completion_callback.h"
17#include "net/http/http_response_headers.h"
18#include "net/url_request/url_request.h"
19#include "url/gurl.h"
20#include "webkit/browser/appcache/appcache.h"
21#include "webkit/browser/appcache/appcache_host.h"
22#include "webkit/browser/appcache/appcache_response.h"
23#include "webkit/browser/appcache/appcache_service.h"
24#include "webkit/browser/appcache/appcache_storage.h"
25#include "webkit/browser/webkit_storage_browser_export.h"
26#include "webkit/common/appcache/appcache_interfaces.h"
27
28namespace appcache {
29
30class HostNotifier;
31
32// Application cache Update algorithm and state.
33class WEBKIT_STORAGE_BROWSER_EXPORT AppCacheUpdateJob
34    : public AppCacheStorage::Delegate,
35      public AppCacheHost::Observer,
36      public AppCacheService::Observer {
37 public:
38  AppCacheUpdateJob(AppCacheService* service, AppCacheGroup* group);
39  virtual ~AppCacheUpdateJob();
40
41  // Triggers the update process or adds more info if this update is already
42  // in progress.
43  void StartUpdate(AppCacheHost* host, const GURL& new_master_resource);
44
45 private:
46  friend class AppCacheUpdateJobTest;
47  class URLFetcher;
48
49  // Master entries have multiple hosts, for example, the same page is opened
50  // in different tabs.
51  typedef std::vector<AppCacheHost*> PendingHosts;
52  typedef std::map<GURL, PendingHosts> PendingMasters;
53  typedef std::map<GURL, URLFetcher*> PendingUrlFetches;
54  typedef std::map<int64, GURL> LoadingResponses;
55
56  static const int kRerunDelayMs = 1000;
57
58  // TODO(michaeln): Rework the set of states vs update types vs stored states.
59  // The NO_UPDATE state is really more of an update type. For all update types
60  // storing the results is relevant.
61
62  enum UpdateType {
63    UNKNOWN_TYPE,
64    UPGRADE_ATTEMPT,
65    CACHE_ATTEMPT,
66  };
67
68  enum InternalUpdateState {
69    FETCH_MANIFEST,
70    NO_UPDATE,
71    DOWNLOADING,
72
73    // Every state after this comment indicates the update is terminating.
74    REFETCH_MANIFEST,
75    CACHE_FAILURE,
76    CANCELLED,
77    COMPLETED,
78  };
79
80  enum StoredState {
81    UNSTORED,
82    STORING,
83    STORED,
84  };
85
86  struct UrlToFetch {
87    UrlToFetch(const GURL& url, bool checked, AppCacheResponseInfo* info);
88    ~UrlToFetch();
89
90    GURL url;
91    bool storage_checked;
92    scoped_refptr<AppCacheResponseInfo> existing_response_info;
93  };
94
95  class URLFetcher : public net::URLRequest::Delegate {
96   public:
97    enum FetchType {
98      MANIFEST_FETCH,
99      URL_FETCH,
100      MASTER_ENTRY_FETCH,
101      MANIFEST_REFETCH,
102    };
103    URLFetcher(const GURL& url,
104               FetchType fetch_type,
105               AppCacheUpdateJob* job);
106    virtual ~URLFetcher();
107    void Start();
108    FetchType fetch_type() const { return fetch_type_; }
109    net::URLRequest* request() const { return request_.get(); }
110    const AppCacheEntry& existing_entry() const { return existing_entry_; }
111    const std::string& manifest_data() const { return manifest_data_; }
112    AppCacheResponseWriter* response_writer() const {
113      return response_writer_.get();
114    }
115    void set_existing_response_headers(net::HttpResponseHeaders* headers) {
116      existing_response_headers_ = headers;
117    }
118    void set_existing_entry(const AppCacheEntry& entry) {
119      existing_entry_ = entry;
120    }
121
122   private:
123    // URLRequest::Delegate overrides
124    virtual void OnReceivedRedirect(net::URLRequest* request,
125                                    const GURL& new_url,
126                                    bool* defer_redirect) OVERRIDE;
127    virtual void OnResponseStarted(net::URLRequest* request) OVERRIDE;
128    virtual void OnReadCompleted(net::URLRequest* request,
129                                 int bytes_read) OVERRIDE;
130
131    void AddConditionalHeaders(const net::HttpResponseHeaders* headers);
132    void OnWriteComplete(int result);
133    void ReadResponseData();
134    bool ConsumeResponseData(int bytes_read);
135    void OnResponseCompleted();
136    bool MaybeRetryRequest();
137
138    GURL url_;
139    AppCacheUpdateJob* job_;
140    FetchType fetch_type_;
141    int retry_503_attempts_;
142    scoped_refptr<net::IOBuffer> buffer_;
143    scoped_ptr<net::URLRequest> request_;
144    AppCacheEntry existing_entry_;
145    scoped_refptr<net::HttpResponseHeaders> existing_response_headers_;
146    std::string manifest_data_;
147    scoped_ptr<AppCacheResponseWriter> response_writer_;
148  };  // class URLFetcher
149
150  AppCacheResponseWriter* CreateResponseWriter();
151
152  // Methods for AppCacheStorage::Delegate.
153  virtual void OnResponseInfoLoaded(AppCacheResponseInfo* response_info,
154                                    int64 response_id) OVERRIDE;
155  virtual void OnGroupAndNewestCacheStored(AppCacheGroup* group,
156                                           AppCache* newest_cache,
157                                           bool success,
158                                           bool would_exceed_quota) OVERRIDE;
159  virtual void OnGroupMadeObsolete(AppCacheGroup* group, bool success) OVERRIDE;
160
161  // Methods for AppCacheHost::Observer.
162  virtual void OnCacheSelectionComplete(AppCacheHost* host) OVERRIDE {}  // N/A
163  virtual void OnDestructionImminent(AppCacheHost* host) OVERRIDE;
164
165  // Methods for AppCacheService::Observer.
166  virtual void OnServiceReinitialized(
167      AppCacheStorageReference* old_storage) OVERRIDE;
168
169  void HandleCacheFailure(const std::string& error_message);
170
171  void FetchManifest(bool is_first_fetch);
172  void HandleManifestFetchCompleted(URLFetcher* fetcher);
173  void ContinueHandleManifestFetchCompleted(bool changed);
174
175  void HandleUrlFetchCompleted(URLFetcher* fetcher);
176  void HandleMasterEntryFetchCompleted(URLFetcher* fetcher);
177
178  void HandleManifestRefetchCompleted(URLFetcher* fetcher);
179  void OnManifestInfoWriteComplete(int result);
180  void OnManifestDataWriteComplete(int result);
181
182  void StoreGroupAndCache();
183
184  void NotifySingleHost(AppCacheHost* host, EventID event_id);
185  void NotifyAllAssociatedHosts(EventID event_id);
186  void NotifyAllProgress(const GURL& url);
187  void NotifyAllFinalProgress();
188  void NotifyAllError(const std::string& error_message);
189  void AddAllAssociatedHostsToNotifier(HostNotifier* notifier);
190
191  // Checks if manifest is byte for byte identical with the manifest
192  // in the newest application cache.
193  void CheckIfManifestChanged();
194  void OnManifestDataReadComplete(int result);
195
196  // Creates the list of files that may need to be fetched and initiates
197  // fetches. Section 6.9.4 steps 12-17
198  void BuildUrlFileList(const Manifest& manifest);
199  void AddUrlToFileList(const GURL& url, int type);
200  void FetchUrls();
201  void CancelAllUrlFetches();
202  bool ShouldSkipUrlFetch(const AppCacheEntry& entry);
203
204  // If entry already exists in the cache currently being updated, merge
205  // the entry type information with the existing entry.
206  // Returns true if entry exists in cache currently being updated.
207  bool AlreadyFetchedEntry(const GURL& url, int entry_type);
208
209  // TODO(jennb): Delete when update no longer fetches master entries directly.
210  // Creates the list of master entries that need to be fetched and initiates
211  // fetches.
212  void AddMasterEntryToFetchList(AppCacheHost* host, const GURL& url,
213                                 bool is_new);
214  void FetchMasterEntries();
215  void CancelAllMasterEntryFetches(const std::string& error_message);
216
217  // Asynchronously loads the entry from the newest complete cache if the
218  // HTTP caching semantics allow.
219  // Returns false if immediately obvious that data cannot be loaded from
220  // newest complete cache.
221  bool MaybeLoadFromNewestCache(const GURL& url, AppCacheEntry& entry);
222  void LoadFromNewestCacheFailed(const GURL& url,
223                                 AppCacheResponseInfo* newest_response_info);
224
225  // Does nothing if update process is still waiting for pending master
226  // entries or URL fetches to complete downloading. Otherwise, completes
227  // the update process.
228  void MaybeCompleteUpdate();
229
230  // Schedules a rerun of the entire update with the same parameters as
231  // this update job after a short delay.
232  void ScheduleUpdateRetry(int delay_ms);
233
234  void Cancel();
235  void ClearPendingMasterEntries();
236  void DiscardInprogressCache();
237  void DiscardDuplicateResponses();
238
239  // Deletes this object after letting the stack unwind.
240  void DeleteSoon();
241
242  bool IsTerminating() { return internal_state_ >= REFETCH_MANIFEST ||
243                                stored_state_ != UNSTORED; }
244
245  AppCacheService* service_;
246  const GURL manifest_url_;  // here for easier access
247
248  // Defined prior to refs to AppCaches and Groups because destruction
249  // order matters, the disabled_storage_reference_ must outlive those
250  // objects.
251  scoped_refptr<AppCacheStorageReference> disabled_storage_reference_;
252
253  scoped_refptr<AppCache> inprogress_cache_;
254
255  AppCacheGroup* group_;
256
257  UpdateType update_type_;
258  InternalUpdateState internal_state_;
259
260  PendingMasters pending_master_entries_;
261  size_t master_entries_completed_;
262
263  // TODO(jennb): Delete when update no longer fetches master entries directly.
264  // Helper containers to track which pending master entries have yet to be
265  // fetched and which are currently being fetched. Master entries that
266  // are listed in the manifest may be fetched as a regular URL instead of
267  // as a separate master entry fetch to optimize against duplicate fetches.
268  std::set<GURL> master_entries_to_fetch_;
269  PendingUrlFetches master_entry_fetches_;
270
271  // URLs of files to fetch along with their flags.
272  AppCache::EntryMap url_file_list_;
273  size_t url_fetches_completed_;
274
275  // Helper container to track which urls have not been fetched yet. URLs are
276  // removed when the fetch is initiated. Flag indicates whether an attempt
277  // to load the URL from storage has already been tried and failed.
278  std::deque<UrlToFetch> urls_to_fetch_;
279
280  // Helper container to track which urls are being loaded from response
281  // storage.
282  LoadingResponses loading_responses_;
283
284  // Keep track of pending URL requests so we can cancel them if necessary.
285  URLFetcher* manifest_fetcher_;
286  PendingUrlFetches pending_url_fetches_;
287
288  // Temporary storage of manifest response data for parsing and comparison.
289  std::string manifest_data_;
290  scoped_ptr<net::HttpResponseInfo> manifest_response_info_;
291  scoped_ptr<AppCacheResponseWriter> manifest_response_writer_;
292  scoped_refptr<net::IOBuffer> read_manifest_buffer_;
293  std::string loaded_manifest_data_;
294  scoped_ptr<AppCacheResponseReader> manifest_response_reader_;
295
296  // New master entries added to the cache by this job, used to cleanup
297  // in error conditions.
298  std::vector<GURL> added_master_entries_;
299
300  // Response ids stored by this update job, used to cleanup in
301  // error conditions.
302  std::vector<int64> stored_response_ids_;
303
304  // In some cases we fetch the same resource multiple times, and then
305  // have to delete the duplicates upon successful update. These ids
306  // are also in the stored_response_ids_ collection so we only schedule
307  // these for deletion on success.
308  // TODO(michaeln): Rework when we no longer fetches master entries directly.
309  std::vector<int64> duplicate_response_ids_;
310
311  // Whether we've stored the resulting group/cache yet.
312  StoredState stored_state_;
313
314  AppCacheStorage* storage_;
315
316  FRIEND_TEST_ALL_PREFIXES(AppCacheGroupTest, QueueUpdate);
317
318  DISALLOW_COPY_AND_ASSIGN(AppCacheUpdateJob);
319};
320
321}  // namespace appcache
322
323#endif  // WEBKIT_BROWSER_APPCACHE_APPCACHE_UPDATE_JOB_H_
324