change_list_processor.cc revision 46d4c2bc3267f3f028f39e7e311b0f89aba2e4fd
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/drive/change_list_processor.h"
6
7#include "base/metrics/histogram.h"
8#include "base/strings/string_number_conversions.h"
9#include "chrome/browser/chromeos/drive/drive.pb.h"
10#include "chrome/browser/chromeos/drive/file_system_util.h"
11#include "chrome/browser/chromeos/drive/resource_entry_conversion.h"
12#include "chrome/browser/chromeos/drive/resource_metadata.h"
13#include "chrome/browser/drive/drive_api_util.h"
14#include "google_apis/drive/drive_api_parser.h"
15#include "google_apis/drive/gdata_wapi_parser.h"
16
17namespace drive {
18namespace internal {
19
20namespace {
21
22class ChangeListToEntryMapUMAStats {
23 public:
24  ChangeListToEntryMapUMAStats()
25    : num_regular_files_(0),
26      num_hosted_documents_(0),
27      num_shared_with_me_entries_(0) {
28  }
29
30  // Increments number of files.
31  void IncrementNumFiles(bool is_hosted_document) {
32    is_hosted_document ? num_hosted_documents_++ : num_regular_files_++;
33  }
34
35  // Increments number of shared-with-me entries.
36  void IncrementNumSharedWithMeEntries() {
37    num_shared_with_me_entries_++;
38  }
39
40  // Updates UMA histograms with file counts.
41  void UpdateFileCountUmaHistograms() {
42    const int num_total_files = num_hosted_documents_ + num_regular_files_;
43    UMA_HISTOGRAM_COUNTS("Drive.NumberOfRegularFiles", num_regular_files_);
44    UMA_HISTOGRAM_COUNTS("Drive.NumberOfHostedDocuments",
45                         num_hosted_documents_);
46    UMA_HISTOGRAM_COUNTS("Drive.NumberOfTotalFiles", num_total_files);
47    UMA_HISTOGRAM_COUNTS("Drive.NumberOfSharedWithMeEntries",
48                         num_shared_with_me_entries_);
49  }
50
51 private:
52  int num_regular_files_;
53  int num_hosted_documents_;
54  int num_shared_with_me_entries_;
55};
56
57// Returns true if it's OK to overwrite the local entry with the remote one.
58bool ShouldApplyChange(const ResourceEntry& local_entry,
59                       const ResourceEntry& remote_entry) {
60  if (local_entry.metadata_edit_state() == ResourceEntry::CLEAN)
61    return true;
62  return base::Time::FromInternalValue(remote_entry.modification_date()) >
63      base::Time::FromInternalValue(local_entry.modification_date());
64}
65
66}  // namespace
67
68std::string DirectoryFetchInfo::ToString() const {
69  return ("local_id: " + local_id_ +
70          ", resource_id: " + resource_id_ +
71          ", changestamp: " + base::Int64ToString(changestamp_));
72}
73
74ChangeList::ChangeList() {}
75
76ChangeList::ChangeList(const google_apis::ChangeList& change_list)
77    : next_url_(change_list.next_link()),
78      largest_changestamp_(change_list.largest_change_id()) {
79  const ScopedVector<google_apis::ChangeResource>& items = change_list.items();
80  entries_.resize(items.size());
81  parent_resource_ids_.resize(items.size());
82  size_t entries_index = 0;
83  for (size_t i = 0; i < items.size(); ++i) {
84    if (ConvertToResourceEntry(
85            *util::ConvertChangeResourceToResourceEntry(*items[i]),
86            &entries_[entries_index],
87            &parent_resource_ids_[entries_index])) {
88      ++entries_index;
89    }
90  }
91  entries_.resize(entries_index);
92  parent_resource_ids_.resize(entries_index);
93}
94
95ChangeList::ChangeList(const google_apis::FileList& file_list)
96    : next_url_(file_list.next_link()),
97      largest_changestamp_(0) {
98  const ScopedVector<google_apis::FileResource>& items = file_list.items();
99  entries_.resize(items.size());
100  parent_resource_ids_.resize(items.size());
101  size_t entries_index = 0;
102  for (size_t i = 0; i < items.size(); ++i) {
103    if (ConvertToResourceEntry(
104            *util::ConvertFileResourceToResourceEntry(*items[i]),
105            &entries_[entries_index],
106            &parent_resource_ids_[entries_index])) {
107      ++entries_index;
108    }
109  }
110  entries_.resize(entries_index);
111  parent_resource_ids_.resize(entries_index);
112}
113
114ChangeList::~ChangeList() {}
115
116ChangeListProcessor::ChangeListProcessor(ResourceMetadata* resource_metadata)
117  : resource_metadata_(resource_metadata) {
118}
119
120ChangeListProcessor::~ChangeListProcessor() {
121}
122
123FileError ChangeListProcessor::Apply(
124    scoped_ptr<google_apis::AboutResource> about_resource,
125    ScopedVector<ChangeList> change_lists,
126    bool is_delta_update) {
127  DCHECK(about_resource);
128
129  int64 largest_changestamp = 0;
130  if (is_delta_update) {
131    if (!change_lists.empty()) {
132      // The changestamp appears in the first page of the change list.
133      // The changestamp does not appear in the full resource list.
134      largest_changestamp = change_lists[0]->largest_changestamp();
135      DCHECK_GE(change_lists[0]->largest_changestamp(), 0);
136    }
137  } else {
138    largest_changestamp = about_resource->largest_change_id();
139
140    DVLOG(1) << "Root folder ID is " << about_resource->root_folder_id();
141    DCHECK(!about_resource->root_folder_id().empty());
142  }
143
144  // Convert ChangeList to map.
145  ChangeListToEntryMapUMAStats uma_stats;
146  for (size_t i = 0; i < change_lists.size(); ++i) {
147    ChangeList* change_list = change_lists[i];
148
149    std::vector<ResourceEntry>* entries = change_list->mutable_entries();
150    for (size_t i = 0; i < entries->size(); ++i) {
151      ResourceEntry* entry = &(*entries)[i];
152
153      // Count the number of files.
154      if (!entry->file_info().is_directory()) {
155        uma_stats.IncrementNumFiles(
156            entry->file_specific_info().is_hosted_document());
157        if (entry->shared_with_me())
158          uma_stats.IncrementNumSharedWithMeEntries();
159      }
160      parent_resource_id_map_[entry->resource_id()] =
161          change_list->parent_resource_ids()[i];
162      entry_map_[entry->resource_id()].Swap(entry);
163      LOG_IF(WARNING, !entry->resource_id().empty())
164          << "Found duplicated file: " << entry->base_name();
165    }
166  }
167
168  // Add the largest changestamp for directories.
169  for (ResourceEntryMap::iterator it = entry_map_.begin();
170       it != entry_map_.end(); ++it) {
171    if (it->second.file_info().is_directory()) {
172      it->second.mutable_directory_specific_info()->set_changestamp(
173          largest_changestamp);
174    }
175  }
176
177  FileError error = ApplyEntryMap(largest_changestamp, about_resource.Pass());
178  if (error != FILE_ERROR_OK) {
179    DLOG(ERROR) << "ApplyEntryMap failed: " << FileErrorToString(error);
180    return error;
181  }
182
183  // Update changestamp.
184  error = resource_metadata_->SetLargestChangestamp(largest_changestamp);
185  if (error != FILE_ERROR_OK) {
186    DLOG(ERROR) << "SetLargestChangeStamp failed: " << FileErrorToString(error);
187    return error;
188  }
189
190  // Shouldn't record histograms when processing delta update.
191  if (!is_delta_update)
192    uma_stats.UpdateFileCountUmaHistograms();
193
194  return FILE_ERROR_OK;
195}
196
197FileError ChangeListProcessor::ApplyEntryMap(
198    int64 changestamp,
199    scoped_ptr<google_apis::AboutResource> about_resource) {
200  DCHECK(about_resource);
201
202  // Create the entry for "My Drive" directory with the latest changestamp.
203  ResourceEntry root;
204  FileError error = resource_metadata_->GetResourceEntryByPath(
205      util::GetDriveMyDriveRootPath(), &root);
206  if (error != FILE_ERROR_OK) {
207    LOG(ERROR) << "Failed to get root entry: " << FileErrorToString(error);
208    return error;
209  }
210
211  root.mutable_directory_specific_info()->set_changestamp(changestamp);
212  root.set_resource_id(about_resource->root_folder_id());
213  error = resource_metadata_->RefreshEntry(root);
214  if (error != FILE_ERROR_OK) {
215    LOG(ERROR) << "Failed to update root entry: " << FileErrorToString(error);
216    return error;
217  }
218
219  // Gather the set of changes in the old path.
220  // Note that we want to notify the change in both old and new paths (suppose
221  // /a/b/c is moved to /x/y/c. We want to notify both "/a/b" and "/x/y".)
222  // The old paths must be calculated before we apply any actual changes.
223  // The new paths are calculated after each change is applied. It correctly
224  // sets the new path because we apply changes in such an order (see below).
225  for (ResourceEntryMap::iterator it = entry_map_.begin();
226       it != entry_map_.end(); ++it) {
227    UpdateChangedDirs(it->second);
228  }
229
230  // Apply all entries except deleted ones to the metadata.
231  std::vector<std::string> deleted_resource_ids;
232  while (!entry_map_.empty()) {
233    ResourceEntryMap::iterator it = entry_map_.begin();
234
235    // Process deleted entries later to avoid deleting moved entries under it.
236    if (it->second.deleted()) {
237      deleted_resource_ids.push_back(it->first);
238      entry_map_.erase(it);
239      continue;
240    }
241
242    // Start from entry_map_.begin() and traverse ancestors using the
243    // parent-child relationships in the result (after this apply) tree.
244    // Then apply the topmost change first.
245    //
246    // By doing this, assuming the result tree does not contain any cycles, we
247    // can guarantee that no cycle is made during this apply (i.e. no entry gets
248    // moved under any of its descendants) because the following conditions are
249    // always satisfied in any move:
250    // - The new parent entry is not a descendant of the moved entry.
251    // - The new parent and its ancestors will no longer move during this apply.
252    std::vector<ResourceEntryMap::iterator> entries;
253    for (ResourceEntryMap::iterator it = entry_map_.begin();
254         it != entry_map_.end();) {
255      entries.push_back(it);
256
257      DCHECK(parent_resource_id_map_.count(it->first)) << it->first;
258      const std::string& parent_resource_id =
259          parent_resource_id_map_[it->first];
260
261      if (parent_resource_id.empty())  // This entry has no parent.
262        break;
263
264      ResourceEntryMap::iterator it_parent =
265          entry_map_.find(parent_resource_id);
266      if (it_parent == entry_map_.end()) {
267        // Current entry's parent is already updated or not going to be updated,
268        // get the parent from the local tree.
269        std::string parent_local_id;
270        FileError error = resource_metadata_->GetIdByResourceId(
271            parent_resource_id, &parent_local_id);
272        if (error != FILE_ERROR_OK) {
273          // See crbug.com/326043. In some complicated situations, parent folder
274          // for shared entries may be accessible (and hence its resource id is
275          // included), but not in the change/file list.
276          // In such a case, clear the parent and move it to drive/other.
277          if (error == FILE_ERROR_NOT_FOUND) {
278            parent_resource_id_map_[it->first] = "";
279          } else {
280            LOG(ERROR) << "Failed to get local ID: " << parent_resource_id
281                       << ", error = " << FileErrorToString(error);
282          }
283          break;
284        }
285        ResourceEntry parent_entry;
286        while (it_parent == entry_map_.end() && !parent_local_id.empty()) {
287          error = resource_metadata_->GetResourceEntryById(
288              parent_local_id, &parent_entry);
289          if (error != FILE_ERROR_OK) {
290            LOG(ERROR) << "Failed to get local entry: "
291                       << FileErrorToString(error);
292            break;
293          }
294          it_parent = entry_map_.find(parent_entry.resource_id());
295          parent_local_id = parent_entry.parent_local_id();
296        }
297      }
298      it = it_parent;
299    }
300
301    // Apply the parent first.
302    std::reverse(entries.begin(), entries.end());
303    for (size_t i = 0; i < entries.size(); ++i) {
304      // Skip root entry in the change list. We don't expect servers to send
305      // root entry, but we should better be defensive (see crbug.com/297259).
306      ResourceEntryMap::iterator it = entries[i];
307      if (it->first != root.resource_id()) {
308        FileError error = ApplyEntry(it->second);
309        if (error != FILE_ERROR_OK) {
310          LOG(ERROR) << "ApplyEntry failed: " << FileErrorToString(error)
311                     << ", title = " << it->second.title();
312          return error;
313        }
314      }
315      entry_map_.erase(it);
316    }
317  }
318
319  // Apply deleted entries.
320  for (size_t i = 0; i < deleted_resource_ids.size(); ++i) {
321    std::string local_id;
322    FileError error = resource_metadata_->GetIdByResourceId(
323        deleted_resource_ids[i], &local_id);
324    switch (error) {
325      case FILE_ERROR_OK:
326        error = resource_metadata_->RemoveEntry(local_id);
327        break;
328      case FILE_ERROR_NOT_FOUND:
329        error = FILE_ERROR_OK;
330        break;
331      default:
332        break;
333    }
334    if (error != FILE_ERROR_OK) {
335      LOG(ERROR) << "Failed to delete: " << FileErrorToString(error)
336                 << ", resource_id = " << deleted_resource_ids[i];
337      return error;
338    }
339  }
340
341  return FILE_ERROR_OK;
342}
343
344FileError ChangeListProcessor::ApplyEntry(const ResourceEntry& entry) {
345  DCHECK(!entry.deleted());
346  DCHECK(parent_resource_id_map_.count(entry.resource_id()));
347  const std::string& parent_resource_id =
348      parent_resource_id_map_[entry.resource_id()];
349
350  ResourceEntry new_entry(entry);
351  FileError error = SetParentLocalIdOfEntry(resource_metadata_, &new_entry,
352                                            parent_resource_id);
353  if (error != FILE_ERROR_OK)
354    return error;
355
356  // Lookup the entry.
357  std::string local_id;
358  error = resource_metadata_->GetIdByResourceId(entry.resource_id(), &local_id);
359
360  ResourceEntry existing_entry;
361  if (error == FILE_ERROR_OK)
362    error = resource_metadata_->GetResourceEntryById(local_id, &existing_entry);
363
364  switch (error) {
365    case FILE_ERROR_OK:
366      if (ShouldApplyChange(existing_entry, new_entry)) {
367        // Entry exists and needs to be refreshed.
368        new_entry.set_local_id(local_id);
369        error = resource_metadata_->RefreshEntry(new_entry);
370      } else {
371        if (entry.file_info().is_directory()) {
372          // No need to refresh, but update the changestamp.
373          new_entry = existing_entry;
374          new_entry.mutable_directory_specific_info()->set_changestamp(
375              new_entry.directory_specific_info().changestamp());
376          error = resource_metadata_->RefreshEntry(new_entry);
377        }
378        DVLOG(1) << "Change was discarded for: " << entry.resource_id();
379      }
380      break;
381    case FILE_ERROR_NOT_FOUND: {  // Adding a new entry.
382      std::string local_id;
383      error = resource_metadata_->AddEntry(new_entry, &local_id);
384      break;
385    }
386    default:
387      return error;
388  }
389  if (error != FILE_ERROR_OK)
390    return error;
391
392  UpdateChangedDirs(entry);
393  return FILE_ERROR_OK;
394}
395
396// static
397FileError ChangeListProcessor::RefreshDirectory(
398    ResourceMetadata* resource_metadata,
399    const DirectoryFetchInfo& directory_fetch_info,
400    scoped_ptr<ChangeList> change_list,
401    std::vector<ResourceEntry>* out_refreshed_entries) {
402  DCHECK(!directory_fetch_info.empty());
403
404  ResourceEntry directory;
405  FileError error = resource_metadata->GetResourceEntryById(
406      directory_fetch_info.local_id(), &directory);
407  if (error != FILE_ERROR_OK)
408    return error;
409
410  if (!directory.file_info().is_directory())
411    return FILE_ERROR_NOT_A_DIRECTORY;
412
413  std::vector<ResourceEntry>* entries = change_list->mutable_entries();
414  for (size_t i = 0; i < entries->size(); ++i) {
415    ResourceEntry* entry = &(*entries)[i];
416    const std::string& parent_resource_id =
417        change_list->parent_resource_ids()[i];
418
419    // Skip if the parent resource ID does not match. This is needed to
420    // handle entries with multiple parents. For such entries, the first
421    // parent is picked and other parents are ignored, hence some entries may
422    // have a parent resource ID which does not match the target directory's.
423    if (parent_resource_id != directory_fetch_info.resource_id()) {
424      DVLOG(1) << "Wrong-parent entry rejected: " << entry->resource_id();
425      continue;
426    }
427
428    entry->set_parent_local_id(directory_fetch_info.local_id());
429
430    std::string local_id;
431    error = resource_metadata->GetIdByResourceId(entry->resource_id(),
432                                                 &local_id);
433    if (error == FILE_ERROR_OK) {
434      entry->set_local_id(local_id);
435      error = resource_metadata->RefreshEntry(*entry);
436    }
437
438    if (error == FILE_ERROR_NOT_FOUND) {  // If refreshing fails, try adding.
439      entry->clear_local_id();
440      error = resource_metadata->AddEntry(*entry, &local_id);
441    }
442
443    if (error != FILE_ERROR_OK)
444      return error;
445
446    ResourceEntry result_entry;
447    error = resource_metadata->GetResourceEntryById(local_id, &result_entry);
448    if (error != FILE_ERROR_OK)
449      return error;
450    out_refreshed_entries->push_back(result_entry);
451  }
452  return FILE_ERROR_OK;
453}
454
455// static
456FileError ChangeListProcessor::SetParentLocalIdOfEntry(
457    ResourceMetadata* resource_metadata,
458    ResourceEntry* entry,
459    const std::string& parent_resource_id) {
460  std::string parent_local_id;
461  if (parent_resource_id.empty()) {
462    // Entries without parents should go under "other" directory.
463    parent_local_id = util::kDriveOtherDirLocalId;
464  } else {
465    FileError error = resource_metadata->GetIdByResourceId(
466        parent_resource_id, &parent_local_id);
467    if (error != FILE_ERROR_OK)
468      return error;
469  }
470  entry->set_parent_local_id(parent_local_id);
471  return FILE_ERROR_OK;
472}
473
474void ChangeListProcessor::UpdateChangedDirs(const ResourceEntry& entry) {
475  DCHECK(!entry.resource_id().empty());
476
477  std::string local_id;
478  base::FilePath file_path;
479  if (resource_metadata_->GetIdByResourceId(
480          entry.resource_id(), &local_id) == FILE_ERROR_OK)
481    resource_metadata_->GetFilePath(local_id, &file_path);
482
483  if (!file_path.empty()) {
484    // Notify parent.
485    changed_dirs_.insert(file_path.DirName());
486
487    if (entry.file_info().is_directory()) {
488      // Notify self if entry is a directory.
489      changed_dirs_.insert(file_path);
490
491      // Notify all descendants if it is a directory deletion.
492      if (entry.deleted()) {
493        std::set<base::FilePath> sub_directories;
494        resource_metadata_->GetSubDirectoriesRecursively(local_id,
495                                                         &sub_directories);
496        changed_dirs_.insert(sub_directories.begin(), sub_directories.end());
497      }
498    }
499  }
500}
501
502}  // namespace internal
503}  // namespace drive
504