save_package.h revision 424c4d7b64af9d0d8fd9624f381f469654d5e3d2
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
6#define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
7
8#include <queue>
9#include <set>
10#include <string>
11#include <vector>
12
13#include "base/basictypes.h"
14#include "base/containers/hash_tables.h"
15#include "base/files/file_path.h"
16#include "base/gtest_prod_util.h"
17#include "base/memory/ref_counted.h"
18#include "base/memory/weak_ptr.h"
19#include "base/time/time.h"
20#include "content/common/content_export.h"
21#include "content/public/browser/download_item.h"
22#include "content/public/browser/download_manager_delegate.h"
23#include "content/public/browser/save_page_type.h"
24#include "content/public/browser/web_contents_observer.h"
25#include "content/public/common/referrer.h"
26#include "net/base/net_errors.h"
27#include "url/gurl.h"
28
29class GURL;
30
31namespace content {
32class DownloadItemImpl;
33class DownloadManagerImpl;
34class WebContents;
35class SaveFileManager;
36class SaveItem;
37class SavePackage;
38struct SaveFileCreateInfo;
39
40// The SavePackage object manages the process of saving a page as only-html or
41// complete-html or MHTML and providing the information for displaying saving
42// status.  Saving page as only-html means means that we save web page to a
43// single HTML file regardless internal sub resources and sub frames.  Saving
44// page as complete-html page means we save not only the main html file the user
45// told it to save but also a directory for the auxiliary files such as all
46// sub-frame html files, image files, css files and js files.  Saving page as
47// MHTML means the same thing as complete-html, but it uses the MHTML format to
48// contain the html and all auxiliary files in a single text file.
49//
50// Each page saving job may include one or multiple files which need to be
51// saved. Each file is represented by a SaveItem, and all SaveItems are owned
52// by the SavePackage. SaveItems are created when a user initiates a page
53// saving job, and exist for the duration of one contents's life time.
54class CONTENT_EXPORT SavePackage
55    : public base::RefCountedThreadSafe<SavePackage>,
56      public WebContentsObserver,
57      public DownloadItem::Observer,
58      public base::SupportsWeakPtr<SavePackage> {
59 public:
60  enum WaitState {
61    // State when created but not initialized.
62    INITIALIZE = 0,
63    // State when after initializing, but not yet saving.
64    START_PROCESS,
65    // Waiting on a list of savable resources from the backend.
66    RESOURCES_LIST,
67    // Waiting for data sent from net IO or from file system.
68    NET_FILES,
69    // Waiting for html DOM data sent from render process.
70    HTML_DATA,
71    // Saving page finished successfully.
72    SUCCESSFUL,
73    // Failed to save page.
74    FAILED
75  };
76
77  static const base::FilePath::CharType kDefaultHtmlExtension[];
78
79  // Constructor for user initiated page saving. This constructor results in a
80  // SavePackage that will generate and sanitize a suggested name for the user
81  // in the "Save As" dialog box.
82  explicit SavePackage(WebContents* web_contents);
83
84  // This contructor is used only for testing. We can bypass the file and
85  // directory name generation / sanitization by providing well known paths
86  // better suited for tests.
87  SavePackage(WebContents* web_contents,
88              SavePageType save_type,
89              const base::FilePath& file_full_path,
90              const base::FilePath& directory_full_path);
91
92  // Initialize the SavePackage. Returns true if it initializes properly.  Need
93  // to make sure that this method must be called in the UI thread because using
94  // g_browser_process on a non-UI thread can cause crashes during shutdown.
95  // |cb| will be called when the DownloadItem is created, before data is
96  // written to disk.
97  bool Init(const SavePackageDownloadCreatedCallback& cb);
98
99  // Cancel all in progress request, might be called by user or internal error.
100  void Cancel(bool user_action);
101
102  void Finish();
103
104  // Notifications sent from the file thread to the UI thread.
105  void StartSave(const SaveFileCreateInfo* info);
106  bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
107  void SaveFinished(int32 save_id, int64 size, bool is_success);
108  void SaveFailed(const GURL& save_url);
109  void SaveCanceled(SaveItem* save_item);
110
111  // Rough percent complete, -1 means we don't know (since we didn't receive a
112  // total size).
113  int PercentComplete();
114
115  bool canceled() const { return user_canceled_ || disk_error_occurred_; }
116  bool finished() const { return finished_; }
117  SavePageType save_type() const { return save_type_; }
118  int contents_id() const { return contents_id_; }
119  int id() const { return unique_id_; }
120  WebContents* web_contents() const;
121
122  void GetSaveInfo();
123
124 private:
125  friend class base::RefCountedThreadSafe<SavePackage>;
126
127  void InitWithDownloadItem(
128      const SavePackageDownloadCreatedCallback& download_created_callback,
129      DownloadItemImpl* item);
130
131  // Callback for WebContents::GenerateMHTML().
132  void OnMHTMLGenerated(int64 size);
133
134  // For testing only.
135  SavePackage(WebContents* web_contents,
136              const base::FilePath& file_full_path,
137              const base::FilePath& directory_full_path);
138
139  virtual ~SavePackage();
140
141  // Notes from Init() above applies here as well.
142  void InternalInit();
143
144  void Stop();
145  void CheckFinish();
146  void SaveNextFile(bool process_all_remainder_items);
147  void DoSavingProcess();
148
149  // WebContentsObserver implementation.
150  virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
151
152  // DownloadItem::Observer implementation.
153  virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE;
154
155  // Update the download history of this item upon completion.
156  void FinalizeDownloadEntry();
157
158  // Detach from DownloadManager.
159  void StopObservation();
160
161  // Return max length of a path for a specific base directory.
162  // This is needed on POSIX, which restrict the length of file names in
163  // addition to the restriction on the length of path names.
164  // |base_dir| is assumed to be a directory name with no trailing slash.
165  static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir);
166
167  static bool GetSafePureFileName(
168      const base::FilePath& dir_path,
169      const base::FilePath::StringType& file_name_ext,
170      uint32 max_file_path_len,
171      base::FilePath::StringType* pure_file_name);
172
173  // Create a file name based on the response from the server.
174  bool GenerateFileName(const std::string& disposition,
175                        const GURL& url,
176                        bool need_html_ext,
177                        base::FilePath::StringType* generated_name);
178
179  // Get all savable resource links from current web page, include main
180  // frame and sub-frame.
181  void GetAllSavableResourceLinksForCurrentPage();
182  // Get html data by serializing all frames of current page with lists
183  // which contain all resource links that have local copy.
184  void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
185
186  // Look up SaveItem by save id from in progress map.
187  SaveItem* LookupItemInProcessBySaveId(int32 save_id);
188
189  // Remove SaveItem from in progress map and put it to saved map.
190  void PutInProgressItemToSavedMap(SaveItem* save_item);
191
192  // Retrieves the URL to be saved from the WebContents.
193  GURL GetUrlToBeSaved();
194
195  void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir,
196                                   const base::FilePath& download_save_dir,
197                                   bool skip_dir_check,
198                                   const std::string& mime_type,
199                                   const std::string& accept_langs);
200  void ContinueGetSaveInfo(const base::FilePath& suggested_path,
201                           bool can_save_as_complete);
202  void OnPathPicked(
203      const base::FilePath& final_name,
204      SavePageType type,
205      const SavePackageDownloadCreatedCallback& cb);
206  void OnReceivedSavableResourceLinksForCurrentPage(
207      const std::vector<GURL>& resources_list,
208      const std::vector<Referrer>& referrers_list,
209      const std::vector<GURL>& frames_list);
210
211  void OnReceivedSerializedHtmlData(const GURL& frame_url,
212                                    const std::string& data,
213                                    int32 status);
214
215  typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
216  // in_progress_items_ is map of all saving job in in-progress state.
217  SaveUrlItemMap in_progress_items_;
218  // saved_failed_items_ is map of all saving job which are failed.
219  SaveUrlItemMap saved_failed_items_;
220
221  // The number of in process SaveItems.
222  int in_process_count() const {
223    return static_cast<int>(in_progress_items_.size());
224  }
225
226  // The number of all SaveItems which have completed, including success items
227  // and failed items.
228  int completed_count() const {
229    return static_cast<int>(saved_success_items_.size() +
230                            saved_failed_items_.size());
231  }
232
233  // The current speed in files per second. This is used to update the
234  // DownloadItem associated to this SavePackage. The files per second is
235  // presented by the DownloadItem to the UI as bytes per second, which is
236  // not correct but matches the way the total and received number of files is
237  // presented as the total and received bytes.
238  int64 CurrentSpeed() const;
239
240  // Helper function for preparing suggested name for the SaveAs Dialog. The
241  // suggested name is determined by the web document's title.
242  base::FilePath GetSuggestedNameForSaveAs(
243      bool can_save_as_complete,
244      const std::string& contents_mime_type,
245      const std::string& accept_langs);
246
247  // Ensures that the file name has a proper extension for HTML by adding ".htm"
248  // if necessary.
249  static base::FilePath EnsureHtmlExtension(const base::FilePath& name);
250
251  // Ensures that the file name has a proper extension for supported formats
252  // if necessary.
253  static base::FilePath EnsureMimeExtension(const base::FilePath& name,
254      const std::string& contents_mime_type);
255
256  // Returns extension for supported MIME types (for example, for "text/plain"
257  // it returns "txt").
258  static const base::FilePath::CharType* ExtensionForMimeType(
259      const std::string& contents_mime_type);
260
261  typedef std::queue<SaveItem*> SaveItemQueue;
262  // A queue for items we are about to start saving.
263  SaveItemQueue waiting_item_queue_;
264
265  typedef base::hash_map<int32, SaveItem*> SavedItemMap;
266  // saved_success_items_ is map of all saving job which are successfully saved.
267  SavedItemMap saved_success_items_;
268
269  // Non-owning pointer for handling file writing on the file thread.
270  SaveFileManager* file_manager_;
271
272  // DownloadManager owns the DownloadItem and handles history and UI.
273  DownloadManagerImpl* download_manager_;
274  DownloadItemImpl* download_;
275
276  // The URL of the page the user wants to save.
277  GURL page_url_;
278  base::FilePath saved_main_file_path_;
279  base::FilePath saved_main_directory_path_;
280
281  // The title of the page the user wants to save.
282  string16 title_;
283
284  // Used to calculate package download speed (in files per second).
285  base::TimeTicks start_tick_;
286
287  // Indicates whether the actual saving job is finishing or not.
288  bool finished_;
289
290  // Indicates whether a call to Finish() has been scheduled.
291  bool mhtml_finishing_;
292
293  // Indicates whether user canceled the saving job.
294  bool user_canceled_;
295
296  // Indicates whether user get disk error.
297  bool disk_error_occurred_;
298
299  // Type about saving page as only-html or complete-html.
300  SavePageType save_type_;
301
302  // Number of all need to be saved resources.
303  size_t all_save_items_count_;
304
305  typedef std::set<base::FilePath::StringType,
306                   bool (*)(const base::FilePath::StringType&,
307                            const base::FilePath::StringType&)> FileNameSet;
308  // This set is used to eliminate duplicated file names in saving directory.
309  FileNameSet file_name_set_;
310
311  typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap;
312  // This map is used to track serial number for specified filename.
313  FileNameCountMap file_name_count_map_;
314
315  // Indicates current waiting state when SavePackage try to get something
316  // from outside.
317  WaitState wait_state_;
318
319  // Since for one contents, it can only have one SavePackage in same time.
320  // Now we actually use render_process_id as the contents's unique id.
321  const int contents_id_;
322
323  // Unique ID for this SavePackage.
324  const int unique_id_;
325
326  // Variables to record errors that happened so we can record them via
327  // UMA statistics.
328  bool wrote_to_completed_file_;
329  bool wrote_to_failed_file_;
330
331  friend class SavePackageTest;
332  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
333  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
334
335  DISALLOW_COPY_AND_ASSIGN(SavePackage);
336};
337
338}  // namespace content
339
340#endif  // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
341