1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
6#define CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
7#pragma once
8
9#include <queue>
10#include <string>
11#include <vector>
12
13#include "base/basictypes.h"
14#include "base/file_path.h"
15#include "base/gtest_prod_util.h"
16#include "base/hash_tables.h"
17#include "base/memory/ref_counted.h"
18#include "base/task.h"
19#include "chrome/browser/ui/shell_dialogs.h"
20#include "content/browser/tab_contents/tab_contents_observer.h"
21#include "googleurl/src/gurl.h"
22
23class DownloadItem;
24class DownloadManager;
25class GURL;
26class MessageLoop;
27class PrefService;
28class Profile;
29struct SaveFileCreateInfo;
30class SaveFileManager;
31class SaveItem;
32class SavePackage;
33struct SavePackageParam;
34class TabContents;
35
36namespace base {
37class Thread;
38class Time;
39}
40
41namespace net {
42class URLRequestContextGetter;
43}
44
45
46// The SavePackage object manages the process of saving a page as only-html or
47// complete-html and providing the information for displaying saving status.
48// Saving page as only-html means means that we save web page to a single HTML
49// file regardless internal sub resources and sub frames.
50// Saving page as complete-html page means we save not only the main html file
51// the user told it to save but also a directory for the auxiliary files such
52// as all sub-frame html files, image files, css files and js files.
53//
54// Each page saving job may include one or multiple files which need to be
55// saved. Each file is represented by a SaveItem, and all SaveItems are owned
56// by the SavePackage. SaveItems are created when a user initiates a page
57// saving job, and exist for the duration of one tab's life time.
58class SavePackage : public base::RefCountedThreadSafe<SavePackage>,
59                    public TabContentsObserver,
60                    public SelectFileDialog::Listener {
61 public:
62  enum SavePackageType {
63    // The value of the save type before its set by the user.
64    SAVE_TYPE_UNKNOWN = -1,
65    // User chose to save only the HTML of the page.
66    SAVE_AS_ONLY_HTML = 0,
67    // User chose to save complete-html page.
68    SAVE_AS_COMPLETE_HTML = 1
69  };
70
71  enum WaitState {
72    // State when created but not initialized.
73    INITIALIZE = 0,
74    // State when after initializing, but not yet saving.
75    START_PROCESS,
76    // Waiting on a list of savable resources from the backend.
77    RESOURCES_LIST,
78    // Waiting for data sent from net IO or from file system.
79    NET_FILES,
80    // Waiting for html DOM data sent from render process.
81    HTML_DATA,
82    // Saving page finished successfully.
83    SUCCESSFUL,
84    // Failed to save page.
85    FAILED
86  };
87
88  // Constructor for user initiated page saving. This constructor results in a
89  // SavePackage that will generate and sanitize a suggested name for the user
90  // in the "Save As" dialog box.
91  explicit SavePackage(TabContents* tab_contents);
92
93  // This contructor is used only for testing. We can bypass the file and
94  // directory name generation / sanitization by providing well known paths
95  // better suited for tests.
96  SavePackage(TabContents* tab_contents,
97              SavePackageType save_type,
98              const FilePath& file_full_path,
99              const FilePath& directory_full_path);
100
101  // Initialize the SavePackage. Returns true if it initializes properly.
102  // Need to make sure that this method must be called in the UI thread because
103  // using g_browser_process on a non-UI thread can cause crashes during
104  // shutdown.
105  bool Init();
106
107  void Cancel(bool user_action);
108
109  void Finish();
110
111  // Notifications sent from the file thread to the UI thread.
112  void StartSave(const SaveFileCreateInfo* info);
113  bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
114  void SaveFinished(int32 save_id, int64 size, bool is_success);
115  void SaveFailed(const GURL& save_url);
116  void SaveCanceled(SaveItem* save_item);
117
118  // Rough percent complete, -1 means we don't know (since we didn't receive a
119  // total size).
120  int PercentComplete();
121
122  // Show or Open a saved page via the Windows shell.
123  void ShowDownloadInShell();
124
125  bool canceled() const { return user_canceled_ || disk_error_occurred_; }
126  bool finished() const { return finished_; }
127  SavePackageType save_type() const { return save_type_; }
128  int tab_id() const { return tab_id_; }
129  int id() const { return unique_id_; }
130
131  void GetSaveInfo();
132
133  // Statics -------------------------------------------------------------------
134
135  // Used to disable prompting the user for a directory/filename of the saved
136  // web page.  This is available for testing.
137  static void SetShouldPromptUser(bool should_prompt);
138
139  // Check whether we can do the saving page operation for the specified URL.
140  static bool IsSavableURL(const GURL& url);
141
142  // Check whether we can do the saving page operation for the contents which
143  // have the specified MIME type.
144  static bool IsSavableContents(const std::string& contents_mime_type);
145
146  // SelectFileDialog::Listener ------------------------------------------------
147  virtual void FileSelected(const FilePath& path, int index, void* params);
148  virtual void FileSelectionCanceled(void* params);
149
150 private:
151  friend class base::RefCountedThreadSafe<SavePackage>;
152
153  // For testing only.
154  SavePackage(TabContents* tab_contents,
155              const FilePath& file_full_path,
156              const FilePath& directory_full_path);
157
158  ~SavePackage();
159
160  // Notes from Init() above applies here as well.
161  void InternalInit();
162
163  void Stop();
164  void CheckFinish();
165  void SaveNextFile(bool process_all_remainder_items);
166  void DoSavingProcess();
167
168  // TabContentsObserver implementation.
169  virtual bool OnMessageReceived(const IPC::Message& message);
170
171  // Return max length of a path for a specific base directory.
172  // This is needed on POSIX, which restrict the length of file names in
173  // addition to the restriction on the length of path names.
174  // |base_dir| is assumed to be a directory name with no trailing slash.
175  static uint32 GetMaxPathLengthForDirectory(const FilePath& base_dir);
176
177  static bool GetSafePureFileName(const FilePath& dir_path,
178                                  const FilePath::StringType& file_name_ext,
179                                  uint32 max_file_path_len,
180                                  FilePath::StringType* pure_file_name);
181
182  // Create a file name based on the response from the server.
183  bool GenerateFileName(const std::string& disposition,
184                        const GURL& url,
185                        bool need_html_ext,
186                        FilePath::StringType* generated_name);
187
188  // Get all savable resource links from current web page, include main
189  // frame and sub-frame.
190  void GetAllSavableResourceLinksForCurrentPage();
191  // Get html data by serializing all frames of current page with lists
192  // which contain all resource links that have local copy.
193  void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
194
195  SaveItem* LookupItemInProcessBySaveId(int32 save_id);
196  void PutInProgressItemToSavedMap(SaveItem* save_item);
197
198  // Retrieves the URL to be saved from tab_contents_ variable.
199  GURL GetUrlToBeSaved();
200
201  void CreateDirectoryOnFileThread(const FilePath& website_save_dir,
202                                   const FilePath& download_save_dir,
203                                   const std::string& mime_type);
204  void ContinueGetSaveInfo(const FilePath& suggested_path,
205                           bool can_save_as_complete);
206  void ContinueSave(const FilePath& final_name, int index);
207
208  void OnReceivedSavableResourceLinksForCurrentPage(
209      const std::vector<GURL>& resources_list,
210      const std::vector<GURL>& referrers_list,
211      const std::vector<GURL>& frames_list);
212
213  void OnReceivedSerializedHtmlData(const GURL& frame_url,
214                                    const std::string& data,
215                                    int32 status);
216
217
218  typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
219  // in_progress_items_ is map of all saving job in in-progress state.
220  SaveUrlItemMap in_progress_items_;
221  // saved_failed_items_ is map of all saving job which are failed.
222  SaveUrlItemMap saved_failed_items_;
223
224  // The number of in process SaveItems.
225  int in_process_count() const {
226    return static_cast<int>(in_progress_items_.size());
227  }
228
229  // The number of all SaveItems which have completed, including success items
230  // and failed items.
231  int completed_count() const {
232    return static_cast<int>(saved_success_items_.size() +
233                            saved_failed_items_.size());
234  }
235
236  // Retrieve the preference for the directory to save pages to.
237  static FilePath GetSaveDirPreference(PrefService* prefs);
238
239  // Helper function for preparing suggested name for the SaveAs Dialog. The
240  // suggested name is determined by the web document's title.
241  FilePath GetSuggestedNameForSaveAs(
242      bool can_save_as_complete,
243      const std::string& contents_mime_type);
244
245  // Ensures that the file name has a proper extension for HTML by adding ".htm"
246  // if necessary.
247  static FilePath EnsureHtmlExtension(const FilePath& name);
248
249  // Ensures that the file name has a proper extension for supported formats
250  // if necessary.
251  static FilePath EnsureMimeExtension(const FilePath& name,
252      const std::string& contents_mime_type);
253
254  // Returns extension for supported MIME types (for example, for "text/plain"
255  // it returns "txt").
256  static const FilePath::CharType* ExtensionForMimeType(
257      const std::string& contents_mime_type);
258
259  typedef std::queue<SaveItem*> SaveItemQueue;
260  // A queue for items we are about to start saving.
261  SaveItemQueue waiting_item_queue_;
262
263  typedef base::hash_map<int32, SaveItem*> SavedItemMap;
264  // saved_success_items_ is map of all saving job which are successfully saved.
265  SavedItemMap saved_success_items_;
266
267  // The request context which provides application-specific context for
268  // net::URLRequest instances.
269  scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
270
271  // Non-owning pointer for handling file writing on the file thread.
272  SaveFileManager* file_manager_;
273
274  // We use a fake DownloadItem here in order to reuse the DownloadItemView.
275  // This class owns the pointer.
276  DownloadItem* download_;
277
278  // The URL of the page the user wants to save.
279  GURL page_url_;
280  FilePath saved_main_file_path_;
281  FilePath saved_main_directory_path_;
282
283  // The title of the page the user wants to save.
284  string16 title_;
285
286  // Indicates whether the actual saving job is finishing or not.
287  bool finished_;
288
289  // Indicates whether user canceled the saving job.
290  bool user_canceled_;
291
292  // Indicates whether user get disk error.
293  bool disk_error_occurred_;
294
295  // Type about saving page as only-html or complete-html.
296  SavePackageType save_type_;
297
298  // Number of all need to be saved resources.
299  size_t all_save_items_count_;
300
301  typedef base::hash_set<FilePath::StringType> FileNameSet;
302  // This set is used to eliminate duplicated file names in saving directory.
303  FileNameSet file_name_set_;
304
305  typedef base::hash_map<FilePath::StringType, uint32> FileNameCountMap;
306  // This map is used to track serial number for specified filename.
307  FileNameCountMap file_name_count_map_;
308
309  // Indicates current waiting state when SavePackage try to get something
310  // from outside.
311  WaitState wait_state_;
312
313  // Since for one tab, it can only have one SavePackage in same time.
314  // Now we actually use render_process_id as tab's unique id.
315  const int tab_id_;
316
317  // Unique ID for this SavePackage.
318  const int unique_id_;
319
320  // For managing select file dialogs.
321  scoped_refptr<SelectFileDialog> select_file_dialog_;
322
323  friend class SavePackageTest;
324  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
325  FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
326
327  ScopedRunnableMethodFactory<SavePackage> method_factory_;
328
329  DISALLOW_COPY_AND_ASSIGN(SavePackage);
330};
331
332#endif  // CHROME_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
333