1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/download/save_package.h"
6
7#include <algorithm>
8
9#include "base/bind.h"
10#include "base/files/file_path.h"
11#include "base/files/file_util.h"
12#include "base/i18n/file_util_icu.h"
13#include "base/logging.h"
14#include "base/message_loop/message_loop.h"
15#include "base/stl_util.h"
16#include "base/strings/string_piece.h"
17#include "base/strings/string_split.h"
18#include "base/strings/sys_string_conversions.h"
19#include "base/strings/utf_string_conversions.h"
20#include "base/threading/thread.h"
21#include "content/browser/download/download_item_impl.h"
22#include "content/browser/download/download_manager_impl.h"
23#include "content/browser/download/download_stats.h"
24#include "content/browser/download/save_file.h"
25#include "content/browser/download/save_file_manager.h"
26#include "content/browser/download/save_item.h"
27#include "content/browser/loader/resource_dispatcher_host_impl.h"
28#include "content/browser/renderer_host/render_process_host_impl.h"
29#include "content/browser/renderer_host/render_view_host_delegate.h"
30#include "content/browser/renderer_host/render_view_host_impl.h"
31#include "content/common/view_messages.h"
32#include "content/public/browser/browser_context.h"
33#include "content/public/browser/browser_thread.h"
34#include "content/public/browser/content_browser_client.h"
35#include "content/public/browser/download_manager_delegate.h"
36#include "content/public/browser/navigation_entry.h"
37#include "content/public/browser/notification_service.h"
38#include "content/public/browser/notification_types.h"
39#include "content/public/browser/resource_context.h"
40#include "content/public/browser/web_contents.h"
41#include "net/base/filename_util.h"
42#include "net/base/io_buffer.h"
43#include "net/base/mime_util.h"
44#include "net/url_request/url_request_context.h"
45#include "third_party/WebKit/public/web/WebPageSerializerClient.h"
46#include "url/url_constants.h"
47
48using base::Time;
49using blink::WebPageSerializerClient;
50
51namespace content {
52namespace {
53
54// A counter for uniquely identifying each save package.
55int g_save_package_id = 0;
56
57// Default name which will be used when we can not get proper name from
58// resource URL.
59const char kDefaultSaveName[] = "saved_resource";
60
61// Maximum number of file ordinal number. I think it's big enough for resolving
62// name-conflict files which has same base file name.
63const int32 kMaxFileOrdinalNumber = 9999;
64
65// Maximum length for file path. Since Windows have MAX_PATH limitation for
66// file path, we need to make sure length of file path of every saved file
67// is less than MAX_PATH
68#if defined(OS_WIN)
69const uint32 kMaxFilePathLength = MAX_PATH - 1;
70#elif defined(OS_POSIX)
71const uint32 kMaxFilePathLength = PATH_MAX - 1;
72#endif
73
74// Maximum length for file ordinal number part. Since we only support the
75// maximum 9999 for ordinal number, which means maximum file ordinal number part
76// should be "(9998)", so the value is 6.
77const uint32 kMaxFileOrdinalNumberPartLength = 6;
78
79// Strip current ordinal number, if any. Should only be used on pure
80// file names, i.e. those stripped of their extensions.
81// TODO(estade): improve this to not choke on alternate encodings.
82base::FilePath::StringType StripOrdinalNumber(
83    const base::FilePath::StringType& pure_file_name) {
84  base::FilePath::StringType::size_type r_paren_index =
85      pure_file_name.rfind(FILE_PATH_LITERAL(')'));
86  base::FilePath::StringType::size_type l_paren_index =
87      pure_file_name.rfind(FILE_PATH_LITERAL('('));
88  if (l_paren_index >= r_paren_index)
89    return pure_file_name;
90
91  for (base::FilePath::StringType::size_type i = l_paren_index + 1;
92       i != r_paren_index; ++i) {
93    if (!IsAsciiDigit(pure_file_name[i]))
94      return pure_file_name;
95  }
96
97  return pure_file_name.substr(0, l_paren_index);
98}
99
100// Check whether we can save page as complete-HTML for the contents which
101// have specified a MIME type. Now only contents which have the MIME type
102// "text/html" can be saved as complete-HTML.
103bool CanSaveAsComplete(const std::string& contents_mime_type) {
104  return contents_mime_type == "text/html" ||
105         contents_mime_type == "application/xhtml+xml";
106}
107
108// Request handle for SavePackage downloads. Currently doesn't support
109// pause/resume/cancel, but returns a WebContents.
110class SavePackageRequestHandle : public DownloadRequestHandleInterface {
111 public:
112  SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
113      : save_package_(save_package) {}
114
115  // DownloadRequestHandleInterface
116  virtual WebContents* GetWebContents() const OVERRIDE {
117    return save_package_.get() ? save_package_->web_contents() : NULL;
118  }
119  virtual DownloadManager* GetDownloadManager() const OVERRIDE {
120    return NULL;
121  }
122  virtual void PauseRequest() const OVERRIDE {}
123  virtual void ResumeRequest() const OVERRIDE {}
124  virtual void CancelRequest() const OVERRIDE {}
125  virtual std::string DebugString() const OVERRIDE {
126    return "SavePackage DownloadRequestHandle";
127  }
128
129 private:
130  base::WeakPtr<SavePackage> save_package_;
131};
132
133}  // namespace
134
135const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
136    FILE_PATH_LITERAL("html");
137
138SavePackage::SavePackage(WebContents* web_contents,
139                         SavePageType save_type,
140                         const base::FilePath& file_full_path,
141                         const base::FilePath& directory_full_path)
142    : WebContentsObserver(web_contents),
143      file_manager_(NULL),
144      download_manager_(NULL),
145      download_(NULL),
146      page_url_(GetUrlToBeSaved()),
147      saved_main_file_path_(file_full_path),
148      saved_main_directory_path_(directory_full_path),
149      title_(web_contents->GetTitle()),
150      start_tick_(base::TimeTicks::Now()),
151      finished_(false),
152      mhtml_finishing_(false),
153      user_canceled_(false),
154      disk_error_occurred_(false),
155      save_type_(save_type),
156      all_save_items_count_(0),
157      file_name_set_(&base::FilePath::CompareLessIgnoreCase),
158      wait_state_(INITIALIZE),
159      contents_id_(web_contents->GetRenderProcessHost()->GetID()),
160      unique_id_(g_save_package_id++),
161      wrote_to_completed_file_(false),
162      wrote_to_failed_file_(false) {
163  DCHECK(page_url_.is_valid());
164  DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
165         (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
166         (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
167  DCHECK(!saved_main_file_path_.empty() &&
168         saved_main_file_path_.value().length() <= kMaxFilePathLength);
169  DCHECK(!saved_main_directory_path_.empty() &&
170         saved_main_directory_path_.value().length() < kMaxFilePathLength);
171  InternalInit();
172}
173
174SavePackage::SavePackage(WebContents* web_contents)
175    : WebContentsObserver(web_contents),
176      file_manager_(NULL),
177      download_manager_(NULL),
178      download_(NULL),
179      page_url_(GetUrlToBeSaved()),
180      title_(web_contents->GetTitle()),
181      start_tick_(base::TimeTicks::Now()),
182      finished_(false),
183      mhtml_finishing_(false),
184      user_canceled_(false),
185      disk_error_occurred_(false),
186      save_type_(SAVE_PAGE_TYPE_UNKNOWN),
187      all_save_items_count_(0),
188      file_name_set_(&base::FilePath::CompareLessIgnoreCase),
189      wait_state_(INITIALIZE),
190      contents_id_(web_contents->GetRenderProcessHost()->GetID()),
191      unique_id_(g_save_package_id++),
192      wrote_to_completed_file_(false),
193      wrote_to_failed_file_(false) {
194  DCHECK(page_url_.is_valid());
195  InternalInit();
196}
197
198// This is for testing use. Set |finished_| as true because we don't want
199// method Cancel to be be called in destructor in test mode.
200// We also don't call InternalInit().
201SavePackage::SavePackage(WebContents* web_contents,
202                         const base::FilePath& file_full_path,
203                         const base::FilePath& directory_full_path)
204    : WebContentsObserver(web_contents),
205      file_manager_(NULL),
206      download_manager_(NULL),
207      download_(NULL),
208      saved_main_file_path_(file_full_path),
209      saved_main_directory_path_(directory_full_path),
210      start_tick_(base::TimeTicks::Now()),
211      finished_(true),
212      mhtml_finishing_(false),
213      user_canceled_(false),
214      disk_error_occurred_(false),
215      save_type_(SAVE_PAGE_TYPE_UNKNOWN),
216      all_save_items_count_(0),
217      file_name_set_(&base::FilePath::CompareLessIgnoreCase),
218      wait_state_(INITIALIZE),
219      contents_id_(0),
220      unique_id_(g_save_package_id++),
221      wrote_to_completed_file_(false),
222      wrote_to_failed_file_(false) {
223}
224
225SavePackage::~SavePackage() {
226  // Stop receiving saving job's updates
227  if (!finished_ && !canceled()) {
228    // Unexpected quit.
229    Cancel(true);
230  }
231
232  // We should no longer be observing the DownloadItem at this point.
233  CHECK(!download_);
234
235  DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
236                                   completed_count() +
237                                   in_process_count()));
238  // Free all SaveItems.
239  while (!waiting_item_queue_.empty()) {
240    // We still have some items which are waiting for start to save.
241    SaveItem* save_item = waiting_item_queue_.front();
242    waiting_item_queue_.pop();
243    delete save_item;
244  }
245
246  STLDeleteValues(&saved_success_items_);
247  STLDeleteValues(&in_progress_items_);
248  STLDeleteValues(&saved_failed_items_);
249
250  file_manager_ = NULL;
251}
252
253GURL SavePackage::GetUrlToBeSaved() {
254  // Instead of using web_contents_.GetURL here, we use url() (which is the
255  // "real" url of the page) from the NavigationEntry because it reflects its
256  // origin rather than the displayed one (returned by GetURL) which may be
257  // different (like having "view-source:" on the front).
258  NavigationEntry* visible_entry =
259      web_contents()->GetController().GetVisibleEntry();
260  return visible_entry->GetURL();
261}
262
263void SavePackage::Cancel(bool user_action) {
264  if (!canceled()) {
265    if (user_action)
266      user_canceled_ = true;
267    else
268      disk_error_occurred_ = true;
269    Stop();
270  }
271  RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
272}
273
274// Init() can be called directly, or indirectly via GetSaveInfo(). In both
275// cases, we need file_manager_ to be initialized, so we do this first.
276void SavePackage::InternalInit() {
277  ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
278  if (!rdh) {
279    NOTREACHED();
280    return;
281  }
282
283  file_manager_ = rdh->save_file_manager();
284  DCHECK(file_manager_);
285
286  download_manager_ = static_cast<DownloadManagerImpl*>(
287      BrowserContext::GetDownloadManager(
288          web_contents()->GetBrowserContext()));
289  DCHECK(download_manager_);
290
291  RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
292}
293
294bool SavePackage::Init(
295    const SavePackageDownloadCreatedCallback& download_created_callback) {
296  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
297  // Set proper running state.
298  if (wait_state_ != INITIALIZE)
299    return false;
300
301  wait_state_ = START_PROCESS;
302
303  // Initialize the request context and resource dispatcher.
304  BrowserContext* browser_context = web_contents()->GetBrowserContext();
305  if (!browser_context) {
306    NOTREACHED();
307    return false;
308  }
309
310  scoped_ptr<DownloadRequestHandleInterface> request_handle(
311      new SavePackageRequestHandle(AsWeakPtr()));
312  // The download manager keeps ownership but adds us as an observer.
313  download_manager_->CreateSavePackageDownloadItem(
314      saved_main_file_path_,
315      page_url_,
316      ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
317       "multipart/related" : "text/html"),
318      request_handle.Pass(),
319      base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
320                 download_created_callback));
321  return true;
322}
323
324void SavePackage::InitWithDownloadItem(
325    const SavePackageDownloadCreatedCallback& download_created_callback,
326    DownloadItemImpl* item) {
327  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
328  DCHECK(item);
329  download_ = item;
330  download_->AddObserver(this);
331  // Confirm above didn't delete the tab out from under us.
332  if (!download_created_callback.is_null())
333    download_created_callback.Run(download_);
334
335  // Check save type and process the save page job.
336  if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
337    // Get directory
338    DCHECK(!saved_main_directory_path_.empty());
339    GetAllSavableResourceLinksForCurrentPage();
340  } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
341    web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
342        &SavePackage::OnMHTMLGenerated, this));
343  } else {
344    DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
345    wait_state_ = NET_FILES;
346    SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
347        SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
348        SaveFileCreateInfo::SAVE_FILE_FROM_NET;
349    SaveItem* save_item = new SaveItem(page_url_,
350                                       Referrer(),
351                                       this,
352                                       save_source);
353    // Add this item to waiting list.
354    waiting_item_queue_.push(save_item);
355    all_save_items_count_ = 1;
356    download_->SetTotalBytes(1);
357
358    DoSavingProcess();
359  }
360}
361
362void SavePackage::OnMHTMLGenerated(int64 size) {
363  if (size <= 0) {
364    Cancel(false);
365    return;
366  }
367  wrote_to_completed_file_ = true;
368
369  // Hack to avoid touching download_ after user cancel.
370  // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
371  // with SavePackage flow.
372  if (download_->GetState() == DownloadItem::IN_PROGRESS) {
373    download_->SetTotalBytes(size);
374    download_->DestinationUpdate(size, 0, std::string());
375    // Must call OnAllDataSaved here in order for
376    // GDataDownloadObserver::ShouldUpload() to return true.
377    // ShouldCompleteDownload() may depend on the gdata uploader to finish.
378    download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
379  }
380
381  if (!download_manager_->GetDelegate()) {
382    Finish();
383    return;
384  }
385
386  if (download_manager_->GetDelegate()->ShouldCompleteDownload(
387          download_, base::Bind(&SavePackage::Finish, this))) {
388    Finish();
389  }
390}
391
392// On POSIX, the length of |pure_file_name| + |file_name_ext| is further
393// restricted by NAME_MAX. The maximum allowed path looks like:
394// '/path/to/save_dir' + '/' + NAME_MAX.
395uint32 SavePackage::GetMaxPathLengthForDirectory(
396    const base::FilePath& base_dir) {
397#if defined(OS_POSIX)
398  return std::min(kMaxFilePathLength,
399                  static_cast<uint32>(base_dir.value().length()) +
400                  NAME_MAX + 1);
401#else
402  return kMaxFilePathLength;
403#endif
404}
405
406// File name is considered being consist of pure file name, dot and file
407// extension name. File name might has no dot and file extension, or has
408// multiple dot inside file name. The dot, which separates the pure file
409// name and file extension name, is last dot in the whole file name.
410// This function is for making sure the length of specified file path is not
411// great than the specified maximum length of file path and getting safe pure
412// file name part if the input pure file name is too long.
413// The parameter |dir_path| specifies directory part of the specified
414// file path. The parameter |file_name_ext| specifies file extension
415// name part of the specified file path (including start dot). The parameter
416// |max_file_path_len| specifies maximum length of the specified file path.
417// The parameter |pure_file_name| input pure file name part of the specified
418// file path. If the length of specified file path is great than
419// |max_file_path_len|, the |pure_file_name| will output new pure file name
420// part for making sure the length of specified file path is less than
421// specified maximum length of file path. Return false if the function can
422// not get a safe pure file name, otherwise it returns true.
423bool SavePackage::GetSafePureFileName(
424    const base::FilePath& dir_path,
425    const base::FilePath::StringType& file_name_ext,
426    uint32 max_file_path_len,
427    base::FilePath::StringType* pure_file_name) {
428  DCHECK(!pure_file_name->empty());
429  int available_length = static_cast<int>(max_file_path_len -
430                                          dir_path.value().length() -
431                                          file_name_ext.length());
432  // Need an extra space for the separator.
433  if (!dir_path.EndsWithSeparator())
434    --available_length;
435
436  // Plenty of room.
437  if (static_cast<int>(pure_file_name->length()) <= available_length)
438    return true;
439
440  // Limited room. Truncate |pure_file_name| to fit.
441  if (available_length > 0) {
442    *pure_file_name = pure_file_name->substr(0, available_length);
443    return true;
444  }
445
446  // Not enough room to even use a shortened |pure_file_name|.
447  pure_file_name->clear();
448  return false;
449}
450
451// Generate name for saving resource.
452bool SavePackage::GenerateFileName(const std::string& disposition,
453                                   const GURL& url,
454                                   bool need_html_ext,
455                                   base::FilePath::StringType* generated_name) {
456  // TODO(jungshik): Figure out the referrer charset when having one
457  // makes sense and pass it to GenerateFileName.
458  base::FilePath file_path = net::GenerateFileName(url,
459                                                   disposition,
460                                                   std::string(),
461                                                   std::string(),
462                                                   std::string(),
463                                                   kDefaultSaveName);
464
465  DCHECK(!file_path.empty());
466  base::FilePath::StringType pure_file_name =
467      file_path.RemoveExtension().BaseName().value();
468  base::FilePath::StringType file_name_ext = file_path.Extension();
469
470  // If it is HTML resource, use ".html" as its extension.
471  if (need_html_ext) {
472    file_name_ext = FILE_PATH_LITERAL(".");
473    file_name_ext.append(kDefaultHtmlExtension);
474  }
475
476  // Need to make sure the suggested file name is not too long.
477  uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
478
479  // Get safe pure file name.
480  if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
481                           max_path, &pure_file_name))
482    return false;
483
484  base::FilePath::StringType file_name = pure_file_name + file_name_ext;
485
486  // Check whether we already have same name in a case insensitive manner.
487  FileNameSet::const_iterator iter = file_name_set_.find(file_name);
488  if (iter == file_name_set_.end()) {
489    file_name_set_.insert(file_name);
490  } else {
491    // Found same name, increase the ordinal number for the file name.
492    pure_file_name =
493        base::FilePath(*iter).RemoveExtension().BaseName().value();
494    base::FilePath::StringType base_file_name =
495        StripOrdinalNumber(pure_file_name);
496
497    // We need to make sure the length of base file name plus maximum ordinal
498    // number path will be less than or equal to kMaxFilePathLength.
499    if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
500        max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
501      return false;
502
503    // Prepare the new ordinal number.
504    uint32 ordinal_number;
505    FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
506    if (it == file_name_count_map_.end()) {
507      // First base-name-conflict resolving, use 1 as initial ordinal number.
508      file_name_count_map_[base_file_name] = 1;
509      ordinal_number = 1;
510    } else {
511      // We have met same base-name conflict, use latest ordinal number.
512      ordinal_number = it->second;
513    }
514
515    if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
516      // Use a random file from temporary file.
517      base::FilePath temp_file;
518      base::CreateTemporaryFile(&temp_file);
519      file_name = temp_file.RemoveExtension().BaseName().value();
520      // Get safe pure file name.
521      if (!GetSafePureFileName(saved_main_directory_path_,
522                               base::FilePath::StringType(),
523                               max_path, &file_name))
524        return false;
525    } else {
526      for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
527        base::FilePath::StringType new_name = base_file_name +
528            base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
529        if (file_name_set_.find(new_name) == file_name_set_.end()) {
530          // Resolved name conflict.
531          file_name = new_name;
532          file_name_count_map_[base_file_name] = ++i;
533          break;
534        }
535      }
536    }
537
538    file_name_set_.insert(file_name);
539  }
540
541  DCHECK(!file_name.empty());
542  generated_name->assign(file_name);
543
544  return true;
545}
546
547// We have received a message from SaveFileManager about a new saving job. We
548// create a SaveItem and store it in our in_progress list.
549void SavePackage::StartSave(const SaveFileCreateInfo* info) {
550  DCHECK(info && !info->url.is_empty());
551
552  SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
553  if (it == in_progress_items_.end()) {
554    // If not found, we must have cancel action.
555    DCHECK(canceled());
556    return;
557  }
558  SaveItem* save_item = it->second;
559
560  DCHECK(!saved_main_file_path_.empty());
561
562  save_item->SetSaveId(info->save_id);
563  save_item->SetTotalBytes(info->total_bytes);
564
565  // Determine the proper path for a saving job, by choosing either the default
566  // save directory, or prompting the user.
567  DCHECK(!save_item->has_final_name());
568  if (info->url != page_url_) {
569    base::FilePath::StringType generated_name;
570    // For HTML resource file, make sure it will have .htm as extension name,
571    // otherwise, when you open the saved page in Chrome again, download
572    // file manager will treat it as downloadable resource, and download it
573    // instead of opening it as HTML.
574    bool need_html_ext =
575        info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
576    if (!GenerateFileName(info->content_disposition,
577                          GURL(info->url),
578                          need_html_ext,
579                          &generated_name)) {
580      // We can not generate file name for this SaveItem, so we cancel the
581      // saving page job if the save source is from serialized DOM data.
582      // Otherwise, it means this SaveItem is sub-resource type, we treat it
583      // as an error happened on saving. We can ignore this type error for
584      // sub-resource links which will be resolved as absolute links instead
585      // of local links in final saved contents.
586      if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
587        Cancel(true);
588      else
589        SaveFinished(save_item->save_id(), 0, false);
590      return;
591    }
592
593    // When saving page as only-HTML, we only have a SaveItem whose url
594    // must be page_url_.
595    DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
596    DCHECK(!saved_main_directory_path_.empty());
597
598    // Now we get final name retrieved from GenerateFileName, we will use it
599    // rename the SaveItem.
600    base::FilePath final_name =
601        saved_main_directory_path_.Append(generated_name);
602    save_item->Rename(final_name);
603  } else {
604    // It is the main HTML file, use the name chosen by the user.
605    save_item->Rename(saved_main_file_path_);
606  }
607
608  // If the save source is from file system, inform SaveFileManager to copy
609  // corresponding file to the file path which this SaveItem specifies.
610  if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
611    BrowserThread::PostTask(
612        BrowserThread::FILE, FROM_HERE,
613        base::Bind(&SaveFileManager::SaveLocalFile,
614                   file_manager_,
615                   save_item->url(),
616                   save_item->save_id(),
617                   contents_id()));
618    return;
619  }
620
621  // Check whether we begin to require serialized HTML data.
622  if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
623      wait_state_ == HTML_DATA) {
624    // Inform backend to serialize the all frames' DOM and send serialized
625    // HTML data back.
626    GetSerializedHtmlDataForCurrentPageWithLocalLinks();
627  }
628}
629
630SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
631  if (in_process_count()) {
632    for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
633        it != in_progress_items_.end(); ++it) {
634      SaveItem* save_item = it->second;
635      DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
636      if (save_item->save_id() == save_id)
637        return save_item;
638    }
639  }
640  return NULL;
641}
642
643void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
644  SaveUrlItemMap::iterator it = in_progress_items_.find(
645      save_item->url().spec());
646  DCHECK(it != in_progress_items_.end());
647  DCHECK(save_item == it->second);
648  in_progress_items_.erase(it);
649
650  if (save_item->success()) {
651    // Add it to saved_success_items_.
652    DCHECK(saved_success_items_.find(save_item->save_id()) ==
653           saved_success_items_.end());
654    saved_success_items_[save_item->save_id()] = save_item;
655  } else {
656    // Add it to saved_failed_items_.
657    DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
658           saved_failed_items_.end());
659    saved_failed_items_[save_item->url().spec()] = save_item;
660  }
661}
662
663// Called for updating saving state.
664bool SavePackage::UpdateSaveProgress(int32 save_id,
665                                     int64 size,
666                                     bool write_success) {
667  // Because we might have canceled this saving job before,
668  // so we might not find corresponding SaveItem.
669  SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
670  if (!save_item)
671    return false;
672
673  save_item->Update(size);
674
675  // If we got disk error, cancel whole save page job.
676  if (!write_success) {
677    // Cancel job with reason of disk error.
678    Cancel(false);
679  }
680  return true;
681}
682
683// Stop all page saving jobs that are in progress and instruct the file thread
684// to delete all saved  files.
685void SavePackage::Stop() {
686  // If we haven't moved out of the initial state, there's nothing to cancel and
687  // there won't be valid pointers for file_manager_ or download_.
688  if (wait_state_ == INITIALIZE)
689    return;
690
691  // When stopping, if it still has some items in in_progress, cancel them.
692  DCHECK(canceled());
693  if (in_process_count()) {
694    SaveUrlItemMap::iterator it = in_progress_items_.begin();
695    for (; it != in_progress_items_.end(); ++it) {
696      SaveItem* save_item = it->second;
697      DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
698      save_item->Cancel();
699    }
700    // Remove all in progress item to saved map. For failed items, they will
701    // be put into saved_failed_items_, for successful item, they will be put
702    // into saved_success_items_.
703    while (in_process_count())
704      PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
705  }
706
707  // This vector contains the save ids of the save files which SaveFileManager
708  // needs to remove from its save_file_map_.
709  SaveIDList save_ids;
710  for (SavedItemMap::iterator it = saved_success_items_.begin();
711      it != saved_success_items_.end(); ++it)
712    save_ids.push_back(it->first);
713  for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
714      it != saved_failed_items_.end(); ++it)
715    save_ids.push_back(it->second->save_id());
716
717  BrowserThread::PostTask(
718      BrowserThread::FILE, FROM_HERE,
719      base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
720                 file_manager_,
721                 save_ids));
722
723  finished_ = true;
724  wait_state_ = FAILED;
725
726  // Inform the DownloadItem we have canceled whole save page job.
727  if (download_) {
728    download_->Cancel(false);
729    FinalizeDownloadEntry();
730  }
731}
732
733void SavePackage::CheckFinish() {
734  if (in_process_count() || finished_)
735    return;
736
737  base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
738                        saved_success_items_.size() > 1) ?
739                        saved_main_directory_path_ : base::FilePath();
740
741  // This vector contains the final names of all the successfully saved files
742  // along with their save ids. It will be passed to SaveFileManager to do the
743  // renaming job.
744  FinalNameList final_names;
745  for (SavedItemMap::iterator it = saved_success_items_.begin();
746      it != saved_success_items_.end(); ++it)
747    final_names.push_back(std::make_pair(it->first,
748                                         it->second->full_path()));
749
750  BrowserThread::PostTask(
751      BrowserThread::FILE, FROM_HERE,
752      base::Bind(&SaveFileManager::RenameAllFiles,
753                 file_manager_,
754                 final_names,
755                 dir,
756                 web_contents()->GetRenderProcessHost()->GetID(),
757                 web_contents()->GetRenderViewHost()->GetRoutingID(),
758                 id()));
759}
760
761// Successfully finished all items of this SavePackage.
762void SavePackage::Finish() {
763  // User may cancel the job when we're moving files to the final directory.
764  if (canceled())
765    return;
766
767  wait_state_ = SUCCESSFUL;
768  finished_ = true;
769
770  // Record finish.
771  RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
772
773  // Record any errors that occurred.
774  if (wrote_to_completed_file_) {
775    RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
776  }
777
778  if (wrote_to_failed_file_) {
779    RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
780  }
781
782  // This vector contains the save ids of the save files which SaveFileManager
783  // needs to remove from its save_file_map_.
784  SaveIDList save_ids;
785  for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
786       it != saved_failed_items_.end(); ++it)
787    save_ids.push_back(it->second->save_id());
788
789  BrowserThread::PostTask(
790      BrowserThread::FILE, FROM_HERE,
791      base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
792                 file_manager_,
793                 save_ids));
794
795  if (download_) {
796    // Hack to avoid touching download_ after user cancel.
797    // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
798    // with SavePackage flow.
799    if (download_->GetState() == DownloadItem::IN_PROGRESS) {
800      if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
801        download_->DestinationUpdate(
802            all_save_items_count_, CurrentSpeed(), std::string());
803        download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
804      }
805      download_->MarkAsComplete();
806    }
807    FinalizeDownloadEntry();
808  }
809}
810
811// Called for updating end state.
812void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
813  // Because we might have canceled this saving job before,
814  // so we might not find corresponding SaveItem. Just ignore it.
815  SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
816  if (!save_item)
817    return;
818
819  // Let SaveItem set end state.
820  save_item->Finish(size, is_success);
821  // Remove the associated save id and SavePackage.
822  file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
823
824  PutInProgressItemToSavedMap(save_item);
825
826  // Inform the DownloadItem to update UI.
827  // We use the received bytes as number of saved files.
828  // Hack to avoid touching download_ after user cancel.
829  // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
830  // with SavePackage flow.
831  if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
832    download_->DestinationUpdate(
833        completed_count(), CurrentSpeed(), std::string());
834  }
835
836  if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
837      save_item->url() == page_url_ && !save_item->received_bytes()) {
838    // If size of main HTML page is 0, treat it as disk error.
839    Cancel(false);
840    return;
841  }
842
843  if (canceled()) {
844    DCHECK(finished_);
845    return;
846  }
847
848  // Continue processing the save page job.
849  DoSavingProcess();
850
851  // Check whether we can successfully finish whole job.
852  CheckFinish();
853}
854
855// Sometimes, the net io will only call SaveFileManager::SaveFinished with
856// save id -1 when it encounters error. Since in this case, save id will be
857// -1, so we can only use URL to find which SaveItem is associated with
858// this error.
859// Saving an item failed. If it's a sub-resource, ignore it. If the error comes
860// from serializing HTML data, then cancel saving page.
861void SavePackage::SaveFailed(const GURL& save_url) {
862  SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
863  if (it == in_progress_items_.end()) {
864    NOTREACHED();  // Should not exist!
865    return;
866  }
867  SaveItem* save_item = it->second;
868
869  save_item->Finish(0, false);
870
871  PutInProgressItemToSavedMap(save_item);
872
873  // Inform the DownloadItem to update UI.
874  // We use the received bytes as number of saved files.
875  // Hack to avoid touching download_ after user cancel.
876  // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
877  // with SavePackage flow.
878  if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
879    download_->DestinationUpdate(
880        completed_count(), CurrentSpeed(), std::string());
881  }
882
883  if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
884      (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
885      (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
886    // We got error when saving page. Treat it as disk error.
887    Cancel(true);
888  }
889
890  if (canceled()) {
891    DCHECK(finished_);
892    return;
893  }
894
895  // Continue processing the save page job.
896  DoSavingProcess();
897
898  CheckFinish();
899}
900
901void SavePackage::SaveCanceled(SaveItem* save_item) {
902  // Call the RemoveSaveFile in UI thread.
903  file_manager_->RemoveSaveFile(save_item->save_id(),
904                                save_item->url(),
905                                this);
906  if (save_item->save_id() != -1)
907    BrowserThread::PostTask(
908        BrowserThread::FILE, FROM_HERE,
909        base::Bind(&SaveFileManager::CancelSave,
910                   file_manager_,
911                   save_item->save_id()));
912}
913
914// Initiate a saving job of a specific URL. We send the request to
915// SaveFileManager, which will dispatch it to different approach according to
916// the save source. Parameter process_all_remaining_items indicates whether
917// we need to save all remaining items.
918void SavePackage::SaveNextFile(bool process_all_remaining_items) {
919  DCHECK(web_contents());
920  DCHECK(waiting_item_queue_.size());
921
922  do {
923    // Pop SaveItem from waiting list.
924    SaveItem* save_item = waiting_item_queue_.front();
925    waiting_item_queue_.pop();
926
927    // Add the item to in_progress_items_.
928    SaveUrlItemMap::iterator it = in_progress_items_.find(
929        save_item->url().spec());
930    DCHECK(it == in_progress_items_.end());
931    in_progress_items_[save_item->url().spec()] = save_item;
932    save_item->Start();
933    file_manager_->SaveURL(save_item->url(),
934                           save_item->referrer(),
935                           web_contents()->GetRenderProcessHost()->GetID(),
936                           routing_id(),
937                           save_item->save_source(),
938                           save_item->full_path(),
939                           web_contents()->
940                               GetBrowserContext()->GetResourceContext(),
941                           this);
942  } while (process_all_remaining_items && waiting_item_queue_.size());
943}
944
945// Calculate the percentage of whole save page job.
946int SavePackage::PercentComplete() {
947  if (!all_save_items_count_)
948    return 0;
949  else if (!in_process_count())
950    return 100;
951  else
952    return completed_count() / all_save_items_count_;
953}
954
955int64 SavePackage::CurrentSpeed() const {
956  base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
957  int64 diff_ms = diff.InMilliseconds();
958  return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
959}
960
961// Continue processing the save page job after one SaveItem has been
962// finished.
963void SavePackage::DoSavingProcess() {
964  if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
965    // We guarantee that images and JavaScripts must be downloaded first.
966    // So when finishing all those sub-resources, we will know which
967    // sub-resource's link can be replaced with local file path, which
968    // sub-resource's link need to be replaced with absolute URL which
969    // point to its internet address because it got error when saving its data.
970
971    // Start a new SaveItem job if we still have job in waiting queue.
972    if (waiting_item_queue_.size()) {
973      DCHECK(wait_state_ == NET_FILES);
974      SaveItem* save_item = waiting_item_queue_.front();
975      if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
976        SaveNextFile(false);
977      } else if (!in_process_count()) {
978        // If there is no in-process SaveItem, it means all sub-resources
979        // have been processed. Now we need to start serializing HTML DOM
980        // for the current page to get the generated HTML data.
981        wait_state_ = HTML_DATA;
982        // All non-HTML resources have been finished, start all remaining
983        // HTML files.
984        SaveNextFile(true);
985      }
986    } else if (in_process_count()) {
987      // Continue asking for HTML data.
988      DCHECK(wait_state_ == HTML_DATA);
989    }
990  } else {
991    // Save as HTML only or MHTML.
992    DCHECK(wait_state_ == NET_FILES);
993    DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
994           (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
995    if (waiting_item_queue_.size()) {
996      DCHECK(all_save_items_count_ == waiting_item_queue_.size());
997      SaveNextFile(false);
998    }
999  }
1000}
1001
1002bool SavePackage::OnMessageReceived(const IPC::Message& message) {
1003  bool handled = true;
1004  IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
1005    IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
1006                        OnReceivedSavableResourceLinksForCurrentPage)
1007    IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
1008                        OnReceivedSerializedHtmlData)
1009    IPC_MESSAGE_UNHANDLED(handled = false)
1010  IPC_END_MESSAGE_MAP()
1011  return handled;
1012}
1013
1014// After finishing all SaveItems which need to get data from net.
1015// We collect all URLs which have local storage and send the
1016// map:(originalURL:currentLocalPath) to render process (backend).
1017// Then render process will serialize DOM and send data to us.
1018void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
1019  if (wait_state_ != HTML_DATA)
1020    return;
1021  std::vector<GURL> saved_links;
1022  std::vector<base::FilePath> saved_file_paths;
1023  int successful_started_items_count = 0;
1024
1025  // Collect all saved items which have local storage.
1026  // First collect the status of all the resource files and check whether they
1027  // have created local files although they have not been completely saved.
1028  // If yes, the file can be saved. Otherwise, there is a disk error, so we
1029  // need to cancel the page saving job.
1030  for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1031       it != in_progress_items_.end(); ++it) {
1032    DCHECK(it->second->save_source() ==
1033           SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1034    if (it->second->has_final_name())
1035      successful_started_items_count++;
1036    saved_links.push_back(it->second->url());
1037    saved_file_paths.push_back(it->second->file_name());
1038  }
1039
1040  // If not all file of HTML resource have been started, then wait.
1041  if (successful_started_items_count != in_process_count())
1042    return;
1043
1044  // Collect all saved success items.
1045  for (SavedItemMap::iterator it = saved_success_items_.begin();
1046       it != saved_success_items_.end(); ++it) {
1047    DCHECK(it->second->has_final_name());
1048    saved_links.push_back(it->second->url());
1049    saved_file_paths.push_back(it->second->file_name());
1050  }
1051
1052  // Get the relative directory name.
1053  base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
1054
1055  Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
1056      routing_id(), saved_links, saved_file_paths, relative_dir_name));
1057}
1058
1059// Process the serialized HTML content data of a specified web page
1060// retrieved from render process.
1061void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
1062                                               const std::string& data,
1063                                               int32 status) {
1064  WebPageSerializerClient::PageSerializationStatus flag =
1065      static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
1066  // Check current state.
1067  if (wait_state_ != HTML_DATA)
1068    return;
1069
1070  int id = contents_id();
1071  // If the all frames are finished saving, we need to close the
1072  // remaining SaveItems.
1073  if (flag == WebPageSerializerClient::AllFramesAreFinished) {
1074    for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
1075         it != in_progress_items_.end(); ++it) {
1076      VLOG(20) << " " << __FUNCTION__ << "()"
1077               << " save_id = " << it->second->save_id()
1078               << " url = \"" << it->second->url().spec() << "\"";
1079      BrowserThread::PostTask(
1080          BrowserThread::FILE, FROM_HERE,
1081          base::Bind(&SaveFileManager::SaveFinished,
1082                     file_manager_,
1083                     it->second->save_id(),
1084                     it->second->url(),
1085                     id,
1086                     true));
1087    }
1088    return;
1089  }
1090
1091  SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
1092  if (it == in_progress_items_.end()) {
1093    for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
1094      saved_it != saved_success_items_.end(); ++saved_it) {
1095      if (saved_it->second->url() == frame_url) {
1096        wrote_to_completed_file_ = true;
1097        break;
1098      }
1099    }
1100
1101    it = saved_failed_items_.find(frame_url.spec());
1102    if (it != saved_failed_items_.end())
1103      wrote_to_failed_file_ = true;
1104
1105    return;
1106  }
1107
1108  SaveItem* save_item = it->second;
1109  DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1110
1111  if (!data.empty()) {
1112    // Prepare buffer for saving HTML data.
1113    scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
1114    memcpy(new_data->data(), data.data(), data.size());
1115
1116    // Call write file functionality in file thread.
1117    BrowserThread::PostTask(
1118        BrowserThread::FILE, FROM_HERE,
1119        base::Bind(&SaveFileManager::UpdateSaveProgress,
1120                   file_manager_,
1121                   save_item->save_id(),
1122                   new_data,
1123                   static_cast<int>(data.size())));
1124  }
1125
1126  // Current frame is completed saving, call finish in file thread.
1127  if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
1128    VLOG(20) << " " << __FUNCTION__ << "()"
1129             << " save_id = " << save_item->save_id()
1130             << " url = \"" << save_item->url().spec() << "\"";
1131    BrowserThread::PostTask(
1132        BrowserThread::FILE, FROM_HERE,
1133        base::Bind(&SaveFileManager::SaveFinished,
1134                   file_manager_,
1135                   save_item->save_id(),
1136                   save_item->url(),
1137                   id,
1138                   true));
1139  }
1140}
1141
1142// Ask for all savable resource links from backend, include main frame and
1143// sub-frame.
1144void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
1145  if (wait_state_ != START_PROCESS)
1146    return;
1147
1148  wait_state_ = RESOURCES_LIST;
1149  Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
1150                                                            page_url_));
1151}
1152
1153// Give backend the lists which contain all resource links that have local
1154// storage, after which, render process will serialize DOM for generating
1155// HTML data.
1156void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
1157    const std::vector<GURL>& resources_list,
1158    const std::vector<Referrer>& referrers_list,
1159    const std::vector<GURL>& frames_list) {
1160  if (wait_state_ != RESOURCES_LIST)
1161    return;
1162
1163  if (resources_list.size() != referrers_list.size())
1164    return;
1165
1166  all_save_items_count_ = static_cast<int>(resources_list.size()) +
1167                           static_cast<int>(frames_list.size());
1168
1169  // We use total bytes as the total number of files we want to save.
1170  // Hack to avoid touching download_ after user cancel.
1171  // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
1172  // with SavePackage flow.
1173  if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
1174    download_->SetTotalBytes(all_save_items_count_);
1175
1176  if (all_save_items_count_) {
1177    // Put all sub-resources to wait list.
1178    for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
1179      const GURL& u = resources_list[i];
1180      DCHECK(u.is_valid());
1181      SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
1182          SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
1183          SaveFileCreateInfo::SAVE_FILE_FROM_NET;
1184      SaveItem* save_item = new SaveItem(u, referrers_list[i],
1185                                         this, save_source);
1186      waiting_item_queue_.push(save_item);
1187    }
1188    // Put all HTML resources to wait list.
1189    for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
1190      const GURL& u = frames_list[i];
1191      DCHECK(u.is_valid());
1192      SaveItem* save_item = new SaveItem(
1193          u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
1194      waiting_item_queue_.push(save_item);
1195    }
1196    wait_state_ = NET_FILES;
1197    DoSavingProcess();
1198  } else {
1199    // No resource files need to be saved, treat it as user cancel.
1200    Cancel(true);
1201  }
1202}
1203
1204base::FilePath SavePackage::GetSuggestedNameForSaveAs(
1205    bool can_save_as_complete,
1206    const std::string& contents_mime_type,
1207    const std::string& accept_langs) {
1208  base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
1209
1210  // If the page's title matches its URL, use the URL. Try to use the last path
1211  // component or if there is none, the domain as the file name.
1212  // Normally we want to base the filename on the page title, or if it doesn't
1213  // exist, on the URL. It's not easy to tell if the page has no title, because
1214  // if the page has no title, WebContents::GetTitle() will return the page's
1215  // URL (adjusted for display purposes). Therefore, we convert the "title"
1216  // back to a URL, and if it matches the original page URL, we know the page
1217  // had no title (or had a title equal to its URL, which is fine to treat
1218  // similarly).
1219  if (title_ == net::FormatUrl(page_url_, accept_langs)) {
1220    std::string url_path;
1221    if (!page_url_.SchemeIs(url::kDataScheme)) {
1222      std::vector<std::string> url_parts;
1223      base::SplitString(page_url_.path(), '/', &url_parts);
1224      if (!url_parts.empty()) {
1225        for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
1226          url_path = url_parts[i];
1227          if (!url_path.empty())
1228            break;
1229        }
1230      }
1231      if (url_path.empty())
1232        url_path = page_url_.host();
1233    } else {
1234      url_path = "dataurl";
1235    }
1236    name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
1237  }
1238
1239  // Ask user for getting final saving name.
1240  name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
1241                                             contents_mime_type);
1242  // Adjust extension for complete types.
1243  if (can_save_as_complete)
1244    name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
1245
1246  base::FilePath::StringType file_name = name_with_proper_ext.value();
1247  base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' ');
1248  return base::FilePath(file_name);
1249}
1250
1251base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
1252  // If the file name doesn't have an extension suitable for HTML files,
1253  // append one.
1254  base::FilePath::StringType ext = name.Extension();
1255  if (!ext.empty())
1256    ext.erase(ext.begin());  // Erase preceding '.'.
1257  std::string mime_type;
1258  if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
1259      !CanSaveAsComplete(mime_type)) {
1260    return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1261                          kDefaultHtmlExtension);
1262  }
1263  return name;
1264}
1265
1266base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
1267    const std::string& contents_mime_type) {
1268  // Start extension at 1 to skip over period if non-empty.
1269  base::FilePath::StringType ext = name.Extension().length() ?
1270      name.Extension().substr(1) : name.Extension();
1271  base::FilePath::StringType suggested_extension =
1272      ExtensionForMimeType(contents_mime_type);
1273  std::string mime_type;
1274  if (!suggested_extension.empty() &&
1275      !net::GetMimeTypeFromExtension(ext, &mime_type)) {
1276    // Extension is absent or needs to be updated.
1277    return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
1278                    suggested_extension);
1279  }
1280  return name;
1281}
1282
1283const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
1284    const std::string& contents_mime_type) {
1285  static const struct {
1286    const base::FilePath::CharType *mime_type;
1287    const base::FilePath::CharType *suggested_extension;
1288  } extensions[] = {
1289    { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
1290    { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
1291    { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
1292    { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
1293    { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
1294  };
1295#if defined(OS_POSIX)
1296  base::FilePath::StringType mime_type(contents_mime_type);
1297#elif defined(OS_WIN)
1298  base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
1299#endif  // OS_WIN
1300  for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) {
1301    if (mime_type == extensions[i].mime_type)
1302      return extensions[i].suggested_extension;
1303  }
1304  return FILE_PATH_LITERAL("");
1305}
1306
1307void SavePackage::GetSaveInfo() {
1308  // Can't use web_contents_ in the file thread, so get the data that we need
1309  // before calling to it.
1310  base::FilePath website_save_dir, download_save_dir;
1311  bool skip_dir_check = false;
1312  DCHECK(download_manager_);
1313  if (download_manager_->GetDelegate()) {
1314    download_manager_->GetDelegate()->GetSaveDir(
1315        web_contents()->GetBrowserContext(), &website_save_dir,
1316        &download_save_dir, &skip_dir_check);
1317  }
1318  std::string mime_type = web_contents()->GetContentsMimeType();
1319  std::string accept_languages =
1320      GetContentClient()->browser()->GetAcceptLangs(
1321          web_contents()->GetBrowserContext());
1322
1323  BrowserThread::PostTask(
1324      BrowserThread::FILE, FROM_HERE,
1325      base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
1326          website_save_dir, download_save_dir, skip_dir_check,
1327          mime_type, accept_languages));
1328}
1329
1330void SavePackage::CreateDirectoryOnFileThread(
1331    const base::FilePath& website_save_dir,
1332    const base::FilePath& download_save_dir,
1333    bool skip_dir_check,
1334    const std::string& mime_type,
1335    const std::string& accept_langs) {
1336  base::FilePath save_dir;
1337  // If the default html/websites save folder doesn't exist...
1338  // We skip the directory check for gdata directories on ChromeOS.
1339  if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
1340    // If the default download dir doesn't exist, create it.
1341    if (!base::DirectoryExists(download_save_dir)) {
1342      bool res = base::CreateDirectory(download_save_dir);
1343      DCHECK(res);
1344    }
1345    save_dir = download_save_dir;
1346  } else {
1347    // If it does exist, use the default save dir param.
1348    save_dir = website_save_dir;
1349  }
1350
1351  bool can_save_as_complete = CanSaveAsComplete(mime_type);
1352  base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
1353      can_save_as_complete, mime_type, accept_langs);
1354  base::FilePath::StringType pure_file_name =
1355      suggested_filename.RemoveExtension().BaseName().value();
1356  base::FilePath::StringType file_name_ext = suggested_filename.Extension();
1357
1358  // Need to make sure the suggested file name is not too long.
1359  uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
1360
1361  if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
1362    save_dir = save_dir.Append(pure_file_name + file_name_ext);
1363  } else {
1364    // Cannot create a shorter filename. This will cause the save as operation
1365    // to fail unless the user pick a shorter name. Continuing even though it
1366    // will fail because returning means no save as popup for the user, which
1367    // is even more confusing. This case should be rare though.
1368    save_dir = save_dir.Append(suggested_filename);
1369  }
1370
1371  BrowserThread::PostTask(
1372      BrowserThread::UI, FROM_HERE,
1373      base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
1374                 can_save_as_complete));
1375}
1376
1377void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
1378                                      bool can_save_as_complete) {
1379
1380  // The WebContents which owns this SavePackage may have disappeared during
1381  // the UI->FILE->UI thread hop of
1382  // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
1383  if (!web_contents() || !download_manager_->GetDelegate())
1384    return;
1385
1386  base::FilePath::StringType default_extension;
1387  if (can_save_as_complete)
1388    default_extension = kDefaultHtmlExtension;
1389
1390  download_manager_->GetDelegate()->ChooseSavePath(
1391      web_contents(),
1392      suggested_path,
1393      default_extension,
1394      can_save_as_complete,
1395      base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
1396}
1397
1398void SavePackage::OnPathPicked(
1399    const base::FilePath& final_name,
1400    SavePageType type,
1401    const SavePackageDownloadCreatedCallback& download_created_callback) {
1402  DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
1403         (type == SAVE_PAGE_TYPE_AS_MHTML) ||
1404         (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
1405  // Ensure the filename is safe.
1406  saved_main_file_path_ = final_name;
1407  // TODO(asanka): This call may block on IO and shouldn't be made
1408  // from the UI thread.  See http://crbug.com/61827.
1409  net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
1410                            &saved_main_file_path_);
1411
1412  saved_main_directory_path_ = saved_main_file_path_.DirName();
1413  save_type_ = type;
1414  if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
1415    // Make new directory for saving complete file.
1416    saved_main_directory_path_ = saved_main_directory_path_.Append(
1417        saved_main_file_path_.RemoveExtension().BaseName().value() +
1418        FILE_PATH_LITERAL("_files"));
1419  }
1420
1421  Init(download_created_callback);
1422}
1423
1424void SavePackage::StopObservation() {
1425  DCHECK(download_);
1426  DCHECK(download_manager_);
1427
1428  download_->RemoveObserver(this);
1429  download_ = NULL;
1430  download_manager_ = NULL;
1431}
1432
1433void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
1434  StopObservation();
1435}
1436
1437void SavePackage::FinalizeDownloadEntry() {
1438  DCHECK(download_);
1439  DCHECK(download_manager_);
1440
1441  download_manager_->OnSavePackageSuccessfullyFinished(download_);
1442  StopObservation();
1443}
1444
1445}  // namespace content
1446