save_package.h revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
6#define CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
7#pragma once
8
9#include <queue>
10#include <string>
11#include <vector>
12
13#include "base/basictypes.h"
14#include "base/file_path.h"
15#include "base/gtest_prod_util.h"
16#include "base/hash_tables.h"
17#include "base/ref_counted.h"
18#include "base/task.h"
19#include "chrome/browser/tab_contents/tab_contents_observer.h"
20#include "chrome/browser/ui/shell_dialogs.h"
21#include "googleurl/src/gurl.h"
22
23class SaveFileManager;
24class SaveItem;
25class SavePackage;
26class DownloadItem;
27class DownloadManager;
28class GURL;
29class MessageLoop;
30class PrefService;
31class Profile;
32class TabContents;
33class URLRequestContextGetter;
34class TabContents;
35
36namespace base {
37class Thread;
38class Time;
39}
40
41struct SaveFileCreateInfo;
42struct SavePackageParam;
43
44// The SavePackage object manages the process of saving a page as only-html or
45// complete-html and providing the information for displaying saving status.
46// Saving page as only-html means means that we save web page to a single HTML
47// file regardless internal sub resources and sub frames.
48// Saving page as complete-html page means we save not only the main html file
49// the user told it to save but also a directory for the auxiliary files such
50// as all sub-frame html files, image files, css files and js files.
51//
52// Each page saving job may include one or multiple files which need to be
53// saved. Each file is represented by a SaveItem, and all SaveItems are owned
54// by the SavePackage. SaveItems are created when a user initiates a page
55// saving job, and exist for the duration of one tab's life time.
56class SavePackage : public base::RefCountedThreadSafe<SavePackage>,
57                    public TabContentsObserver,
58                    public SelectFileDialog::Listener {
59 public:
60  enum SavePackageType {
61    // The value of the save type before its set by the user.
62    SAVE_TYPE_UNKNOWN = -1,
63    // User chose to save only the HTML of the page.
64    SAVE_AS_ONLY_HTML = 0,
65    // User chose to save complete-html page.
66    SAVE_AS_COMPLETE_HTML = 1
67  };
68
69  enum WaitState {
70    // State when created but not initialized.
71    INITIALIZE = 0,
72    // State when after initializing, but not yet saving.
73    START_PROCESS,
74    // Waiting on a list of savable resources from the backend.
75    RESOURCES_LIST,
76    // Waiting for data sent from net IO or from file system.
77    NET_FILES,
78    // Waiting for html DOM data sent from render process.
79    HTML_DATA,
80    // Saving page finished successfully.
81    SUCCESSFUL,
82    // Failed to save page.
83    FAILED
84  };
85
86  // Constructor for user initiated page saving. This constructor results in a
87  // SavePackage that will generate and sanitize a suggested name for the user
88  // in the "Save As" dialog box.
89  explicit SavePackage(TabContents* tab_contents);
90
91  // This contructor is used only for testing. We can bypass the file and
92  // directory name generation / sanitization by providing well known paths
93  // better suited for tests.
94  SavePackage(TabContents* tab_contents,
95              SavePackageType save_type,
96              const FilePath& file_full_path,
97              const FilePath& directory_full_path);
98
99  // Initialize the SavePackage. Returns true if it initializes properly.
100  // Need to make sure that this method must be called in the UI thread because
101  // using g_browser_process on a non-UI thread can cause crashes during
102  // shutdown.
103  bool Init();
104
105  void Cancel(bool user_action);
106
107  void Finish();
108
109  // Notifications sent from the file thread to the UI thread.
110  void StartSave(const SaveFileCreateInfo* info);
111  bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
112  void SaveFinished(int32 save_id, int64 size, bool is_success);
113  void SaveFailed(const GURL& save_url);
114  void SaveCanceled(SaveItem* save_item);
115
116  // Rough percent complete, -1 means we don't know (since we didn't receive a
117  // total size).
118  int PercentComplete();
119
120  // Show or Open a saved page via the Windows shell.
121  void ShowDownloadInShell();
122
123  bool canceled() const { return user_canceled_ || disk_error_occurred_; }
124  bool finished() const { return finished_; }
125  SavePackageType save_type() const { return save_type_; }
126  int tab_id() const { return tab_id_; }
127  int id() const { return unique_id_; }
128
129  void GetSaveInfo();
130
131  // Statics -------------------------------------------------------------------
132
133  // Used to disable prompting the user for a directory/filename of the saved
134  // web page.  This is available for testing.
135  static void SetShouldPromptUser(bool should_prompt);
136
137  // Check whether we can do the saving page operation for the specified URL.
138  static bool IsSavableURL(const GURL& url);
139
140  // Check whether we can do the saving page operation for the contents which
141  // have the specified MIME type.
142  static bool IsSavableContents(const std::string& contents_mime_type);
143
144  // SelectFileDialog::Listener ------------------------------------------------
145  virtual void FileSelected(const FilePath& path, int index, void* params);
146  virtual void FileSelectionCanceled(void* params);
147
148 private:
149  friend class base::RefCountedThreadSafe<SavePackage>;
150
151  // For testing only.
152  SavePackage(TabContents* tab_contents,
153              const FilePath& file_full_path,
154              const FilePath& directory_full_path);
155
156  ~SavePackage();
157
158  // Notes from Init() above applies here as well.
159  void InternalInit();
160
161  void Stop();
162  void CheckFinish();
163  void SaveNextFile(bool process_all_remainder_items);
164  void DoSavingProcess();
165
166  // TabContentsObserver implementation.
167  virtual bool OnMessageReceived(const IPC::Message& message);
168
169  // Return max length of a path for a specific base directory.
170  // This is needed on POSIX, which restrict the length of file names in
171  // addition to the restriction on the length of path names.
172  // |base_dir| is assumed to be a directory name with no trailing slash.
173  static uint32 GetMaxPathLengthForDirectory(const FilePath& base_dir);
174
175  static bool GetSafePureFileName(const FilePath& dir_path,
176                                  const FilePath::StringType& file_name_ext,
177                                  uint32 max_file_path_len,
178                                  FilePath::StringType* pure_file_name);
179
180  // Create a file name based on the response from the server.
181  bool GenerateFileName(const std::string& disposition,
182                        const GURL& url,
183                        bool need_html_ext,
184                        FilePath::StringType* generated_name);
185
186  // Get all savable resource links from current web page, include main
187  // frame and sub-frame.
188  void GetAllSavableResourceLinksForCurrentPage();
189  // Get html data by serializing all frames of current page with lists
190  // which contain all resource links that have local copy.
191  void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
192
193  SaveItem* LookupItemInProcessBySaveId(int32 save_id);
194  void PutInProgressItemToSavedMap(SaveItem* save_item);
195
196  // Retrieves the URL to be saved from tab_contents_ variable.
197  GURL GetUrlToBeSaved();
198
199  void CreateDirectoryOnFileThread(const FilePath& website_save_dir,
200                                   const FilePath& download_save_dir,
201                                   const std::string& mime_type);
202  void ContinueGetSaveInfo(const FilePath& suggested_path,
203                           bool can_save_as_complete);
204  void ContinueSave(const FilePath& final_name, int index);
205
206  void OnReceivedSavableResourceLinksForCurrentPage(
207      const std::vector<GURL>& resources_list,
208      const std::vector<GURL>& referrers_list,
209      const std::vector<GURL>& frames_list);
210
211  void OnReceivedSerializedHtmlData(const GURL& frame_url,
212                                    const std::string& data,
213                                    int32 status);
214
215
216  typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
217  // in_progress_items_ is map of all saving job in in-progress state.
218  SaveUrlItemMap in_progress_items_;
219  // saved_failed_items_ is map of all saving job which are failed.
220  SaveUrlItemMap saved_failed_items_;
221
222  // The number of in process SaveItems.
223  int in_process_count() const {
224    return static_cast<int>(in_progress_items_.size());
225  }
226
227  // The number of all SaveItems which have completed, including success items
228  // and failed items.
229  int completed_count() const {
230    return static_cast<int>(saved_success_items_.size() +
231                            saved_failed_items_.size());
232  }
233
234  // Retrieve the preference for the directory to save pages to.
235  static FilePath GetSaveDirPreference(PrefService* prefs);
236
237  // Helper function for preparing suggested name for the SaveAs Dialog. The
238  // suggested name is determined by the web document's title.
239  FilePath GetSuggestedNameForSaveAs(
240      bool can_save_as_complete,
241      const std::string& contents_mime_type);
242
243  // Ensures that the file name has a proper extension for HTML by adding ".htm"
244  // if necessary.
245  static FilePath EnsureHtmlExtension(const FilePath& name);
246
247  // Ensures that the file name has a proper extension for supported formats
248  // if necessary.
249  static FilePath EnsureMimeExtension(const FilePath& name,
250      const std::string& contents_mime_type);
251
252  // Returns extension for supported MIME types (for example, for "text/plain"
253  // it returns "txt").
254  static const FilePath::CharType* ExtensionForMimeType(
255      const std::string& contents_mime_type);
256
257  typedef std::queue<SaveItem*> SaveItemQueue;
258  // A queue for items we are about to start saving.
259  SaveItemQueue waiting_item_queue_;
260
261  typedef base::hash_map<int32, SaveItem*> SavedItemMap;
262  // saved_success_items_ is map of all saving job which are successfully saved.
263  SavedItemMap saved_success_items_;
264
265  // The request context which provides application-specific context for
266  // net::URLRequest instances.
267  scoped_refptr<URLRequestContextGetter> request_context_getter_;
268
269  // Non-owning pointer for handling file writing on the file thread.
270  SaveFileManager* file_manager_;
271
272  TabContents* tab_contents_;
273
274  // We use a fake DownloadItem here in order to reuse the DownloadItemView.
275  // This class owns the pointer.
276  DownloadItem* download_;
277
278  // The URL of the page the user wants to save.
279  GURL page_url_;
280  FilePath saved_main_file_path_;
281  FilePath saved_main_directory_path_;
282
283  // The title of the page the user wants to save.
284  string16 title_;
285
286  // Indicates whether the actual saving job is finishing or not.
287  bool finished_;
288
289  // Indicates whether user canceled the saving job.
290  bool user_canceled_;
291
292  // Indicates whether user get disk error.
293  bool disk_error_occurred_;
294
295  // Type about saving page as only-html or complete-html.
296  SavePackageType save_type_;
297
298  // Number of all need to be saved resources.
299  size_t all_save_items_count_;
300
301  typedef base::hash_set<FilePath::StringType> FileNameSet;
302  // This set is used to eliminate duplicated file names in saving directory.
303  FileNameSet file_name_set_;
304
305  typedef base::hash_map<FilePath::StringType, uint32> FileNameCountMap;
306  // This map is used to track serial number for specified filename.
307  FileNameCountMap file_name_count_map_;
308
309  // Indicates current waiting state when SavePackage try to get something
310  // from outside.
311  WaitState wait_state_;
312
313  // Since for one tab, it can only have one SavePackage in same time.
314  // Now we actually use render_process_id as tab's unique id.
315  const int tab_id_;
316
317  // Unique ID for this SavePackage.
318  const int unique_id_;
319
320  // For managing select file dialogs.
321  scoped_refptr<SelectFileDialog> select_file_dialog_;
322
323  friend class SavePackageTest;
324  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
325  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
326
327  ScopedRunnableMethodFactory<SavePackage> method_factory_;
328
329  DISALLOW_COPY_AND_ASSIGN(SavePackage);
330};
331
332#endif  // CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
333