change_list_processor.cc revision effb81e5f8246d0db0270817048dc992db66e9fb
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/drive/change_list_processor.h"
6
7#include "base/metrics/histogram.h"
8#include "base/strings/string_number_conversions.h"
9#include "chrome/browser/chromeos/drive/drive.pb.h"
10#include "chrome/browser/chromeos/drive/file_system_util.h"
11#include "chrome/browser/chromeos/drive/resource_entry_conversion.h"
12#include "chrome/browser/chromeos/drive/resource_metadata.h"
13#include "google_apis/drive/drive_api_parser.h"
14#include "google_apis/drive/gdata_wapi_parser.h"
15
16namespace drive {
17namespace internal {
18
19namespace {
20
21class ChangeListToEntryMapUMAStats {
22 public:
23  ChangeListToEntryMapUMAStats()
24    : num_regular_files_(0),
25      num_hosted_documents_(0),
26      num_shared_with_me_entries_(0) {
27  }
28
29  // Increments number of files.
30  void IncrementNumFiles(bool is_hosted_document) {
31    is_hosted_document ? num_hosted_documents_++ : num_regular_files_++;
32  }
33
34  // Increments number of shared-with-me entries.
35  void IncrementNumSharedWithMeEntries() {
36    num_shared_with_me_entries_++;
37  }
38
39  // Updates UMA histograms with file counts.
40  void UpdateFileCountUmaHistograms() {
41    const int num_total_files = num_hosted_documents_ + num_regular_files_;
42    UMA_HISTOGRAM_COUNTS("Drive.NumberOfRegularFiles", num_regular_files_);
43    UMA_HISTOGRAM_COUNTS("Drive.NumberOfHostedDocuments",
44                         num_hosted_documents_);
45    UMA_HISTOGRAM_COUNTS("Drive.NumberOfTotalFiles", num_total_files);
46    UMA_HISTOGRAM_COUNTS("Drive.NumberOfSharedWithMeEntries",
47                         num_shared_with_me_entries_);
48  }
49
50 private:
51  int num_regular_files_;
52  int num_hosted_documents_;
53  int num_shared_with_me_entries_;
54};
55
56// Returns true if it's OK to overwrite the local entry with the remote one.
57// |modification_date| is the time of the modification whose result is
58// |remote_entry|.
59bool ShouldApplyChange(const ResourceEntry& local_entry,
60                       const ResourceEntry& remote_entry,
61                       const base::Time& modification_date) {
62  // TODO(hashimoto): Implement more sophisticated conflict resolution.
63
64  // If the entry is locally available and is newly created in this change,
65  // No need to overwrite the local entry.
66  base::Time creation_time =
67      base::Time::FromInternalValue(remote_entry.file_info().creation_time());
68  const bool entry_is_new = creation_time == modification_date;
69  return !entry_is_new;
70}
71
72}  // namespace
73
74std::string DirectoryFetchInfo::ToString() const {
75  return ("local_id: " + local_id_ +
76          ", resource_id: " + resource_id_ +
77          ", changestamp: " + base::Int64ToString(changestamp_));
78}
79
80ChangeList::ChangeList() {}
81
82ChangeList::ChangeList(const google_apis::ResourceList& resource_list)
83    : largest_changestamp_(resource_list.largest_changestamp()) {
84  resource_list.GetNextFeedURL(&next_url_);
85
86  entries_.resize(resource_list.entries().size());
87  parent_resource_ids_.resize(resource_list.entries().size());
88  modification_dates_.resize(resource_list.entries().size());
89  size_t entries_index = 0;
90  for (size_t i = 0; i < resource_list.entries().size(); ++i) {
91    if (ConvertToResourceEntry(*resource_list.entries()[i],
92                               &entries_[entries_index],
93                               &parent_resource_ids_[entries_index])) {
94      modification_dates_[entries_index] =
95          resource_list.entries()[i]->modification_date();
96      ++entries_index;
97    }
98  }
99  entries_.resize(entries_index);
100  parent_resource_ids_.resize(entries_index);
101  modification_dates_.resize(entries_index);
102}
103
104ChangeList::~ChangeList() {}
105
106ChangeListProcessor::ChangeListProcessor(ResourceMetadata* resource_metadata)
107  : resource_metadata_(resource_metadata) {
108}
109
110ChangeListProcessor::~ChangeListProcessor() {
111}
112
113FileError ChangeListProcessor::Apply(
114    scoped_ptr<google_apis::AboutResource> about_resource,
115    ScopedVector<ChangeList> change_lists,
116    bool is_delta_update) {
117  DCHECK(about_resource);
118
119  int64 largest_changestamp = 0;
120  if (is_delta_update) {
121    if (!change_lists.empty()) {
122      // The changestamp appears in the first page of the change list.
123      // The changestamp does not appear in the full resource list.
124      largest_changestamp = change_lists[0]->largest_changestamp();
125      DCHECK_GE(change_lists[0]->largest_changestamp(), 0);
126    }
127  } else {
128    largest_changestamp = about_resource->largest_change_id();
129
130    DVLOG(1) << "Root folder ID is " << about_resource->root_folder_id();
131    DCHECK(!about_resource->root_folder_id().empty());
132  }
133
134  // Convert ChangeList to map.
135  ChangeListToEntryMapUMAStats uma_stats;
136  for (size_t i = 0; i < change_lists.size(); ++i) {
137    ChangeList* change_list = change_lists[i];
138
139    std::vector<ResourceEntry>* entries = change_list->mutable_entries();
140    for (size_t i = 0; i < entries->size(); ++i) {
141      ResourceEntry* entry = &(*entries)[i];
142
143      // Count the number of files.
144      if (!entry->file_info().is_directory()) {
145        uma_stats.IncrementNumFiles(
146            entry->file_specific_info().is_hosted_document());
147        if (entry->shared_with_me())
148          uma_stats.IncrementNumSharedWithMeEntries();
149      }
150      modification_date_map_[entry->resource_id()] =
151          change_list->modification_dates()[i];
152      parent_resource_id_map_[entry->resource_id()] =
153          change_list->parent_resource_ids()[i];
154      entry_map_[entry->resource_id()].Swap(entry);
155      LOG_IF(WARNING, !entry->resource_id().empty())
156          << "Found duplicated file: " << entry->base_name();
157    }
158  }
159
160  // Add the largest changestamp for directories.
161  for (ResourceEntryMap::iterator it = entry_map_.begin();
162       it != entry_map_.end(); ++it) {
163    if (it->second.file_info().is_directory()) {
164      it->second.mutable_directory_specific_info()->set_changestamp(
165          largest_changestamp);
166    }
167  }
168
169  FileError error = ApplyEntryMap(largest_changestamp, about_resource.Pass());
170  if (error != FILE_ERROR_OK) {
171    DLOG(ERROR) << "ApplyEntryMap failed: " << FileErrorToString(error);
172    return error;
173  }
174
175  // Update changestamp.
176  error = resource_metadata_->SetLargestChangestamp(largest_changestamp);
177  if (error != FILE_ERROR_OK) {
178    DLOG(ERROR) << "SetLargestChangeStamp failed: " << FileErrorToString(error);
179    return error;
180  }
181
182  // Shouldn't record histograms when processing delta update.
183  if (!is_delta_update)
184    uma_stats.UpdateFileCountUmaHistograms();
185
186  return FILE_ERROR_OK;
187}
188
189FileError ChangeListProcessor::ApplyEntryMap(
190    int64 changestamp,
191    scoped_ptr<google_apis::AboutResource> about_resource) {
192  DCHECK(about_resource);
193
194  // Create the entry for "My Drive" directory with the latest changestamp.
195  ResourceEntry root;
196  FileError error = resource_metadata_->GetResourceEntryByPath(
197      util::GetDriveMyDriveRootPath(), &root);
198  if (error != FILE_ERROR_OK) {
199    LOG(ERROR) << "Failed to get root entry: " << FileErrorToString(error);
200    return error;
201  }
202
203  root.mutable_directory_specific_info()->set_changestamp(changestamp);
204  root.set_resource_id(about_resource->root_folder_id());
205  error = resource_metadata_->RefreshEntry(root);
206  if (error != FILE_ERROR_OK) {
207    LOG(ERROR) << "Failed to update root entry: " << FileErrorToString(error);
208    return error;
209  }
210
211  // Gather the set of changes in the old path.
212  // Note that we want to notify the change in both old and new paths (suppose
213  // /a/b/c is moved to /x/y/c. We want to notify both "/a/b" and "/x/y".)
214  // The old paths must be calculated before we apply any actual changes.
215  // The new paths are calculated after each change is applied. It correctly
216  // sets the new path because we apply changes in such an order (see below).
217  for (ResourceEntryMap::iterator it = entry_map_.begin();
218       it != entry_map_.end(); ++it) {
219    UpdateChangedDirs(it->second);
220  }
221
222  // Apply all entries except deleted ones to the metadata.
223  std::vector<std::string> deleted_resource_ids;
224  while (!entry_map_.empty()) {
225    ResourceEntryMap::iterator it = entry_map_.begin();
226
227    // Process deleted entries later to avoid deleting moved entries under it.
228    if (it->second.deleted()) {
229      deleted_resource_ids.push_back(it->first);
230      entry_map_.erase(it);
231      continue;
232    }
233
234    // Start from entry_map_.begin() and traverse ancestors using the
235    // parent-child relationships in the result (after this apply) tree.
236    // Then apply the topmost change first.
237    //
238    // By doing this, assuming the result tree does not contain any cycles, we
239    // can guarantee that no cycle is made during this apply (i.e. no entry gets
240    // moved under any of its descendants) because the following conditions are
241    // always satisfied in any move:
242    // - The new parent entry is not a descendant of the moved entry.
243    // - The new parent and its ancestors will no longer move during this apply.
244    std::vector<ResourceEntryMap::iterator> entries;
245    for (ResourceEntryMap::iterator it = entry_map_.begin();
246         it != entry_map_.end();) {
247      entries.push_back(it);
248
249      DCHECK(parent_resource_id_map_.count(it->first)) << it->first;
250      const std::string& parent_resource_id =
251          parent_resource_id_map_[it->first];
252
253      if (parent_resource_id.empty())  // This entry has no parent.
254        break;
255
256      ResourceEntryMap::iterator it_parent =
257          entry_map_.find(parent_resource_id);
258      if (it_parent == entry_map_.end()) {
259        // Current entry's parent is already updated or not going to be updated,
260        // get the parent from the local tree.
261        std::string parent_local_id;
262        FileError error = resource_metadata_->GetIdByResourceId(
263            parent_resource_id, &parent_local_id);
264        if (error != FILE_ERROR_OK) {
265          // See crbug.com/326043. In some complicated situations, parent folder
266          // for shared entries may be accessible (and hence its resource id is
267          // included), but not in the change/file list.
268          // In such a case, clear the parent and move it to drive/other.
269          if (error == FILE_ERROR_NOT_FOUND) {
270            parent_resource_id_map_[it->first] = "";
271          } else {
272            LOG(ERROR) << "Failed to get local ID: " << parent_resource_id
273                       << ", error = " << FileErrorToString(error);
274          }
275          break;
276        }
277        ResourceEntry parent_entry;
278        while (it_parent == entry_map_.end() && !parent_local_id.empty()) {
279          error = resource_metadata_->GetResourceEntryById(
280              parent_local_id, &parent_entry);
281          if (error != FILE_ERROR_OK) {
282            LOG(ERROR) << "Failed to get local entry: "
283                       << FileErrorToString(error);
284            break;
285          }
286          it_parent = entry_map_.find(parent_entry.resource_id());
287          parent_local_id = parent_entry.parent_local_id();
288        }
289      }
290      it = it_parent;
291    }
292
293    // Apply the parent first.
294    std::reverse(entries.begin(), entries.end());
295    for (size_t i = 0; i < entries.size(); ++i) {
296      // Skip root entry in the change list. We don't expect servers to send
297      // root entry, but we should better be defensive (see crbug.com/297259).
298      ResourceEntryMap::iterator it = entries[i];
299      if (it->first != root.resource_id()) {
300        // TODO(hashimoto): Handle ApplyEntry errors correctly.
301        FileError error = ApplyEntry(it->second);
302        DLOG_IF(WARNING, error != FILE_ERROR_OK)
303            << "ApplyEntry failed: " << FileErrorToString(error)
304            << ", title = " << it->second.title();
305      }
306      entry_map_.erase(it);
307    }
308  }
309
310  // Apply deleted entries.
311  for (size_t i = 0; i < deleted_resource_ids.size(); ++i) {
312    std::string local_id;
313    FileError error = resource_metadata_->GetIdByResourceId(
314        deleted_resource_ids[i], &local_id);
315    if (error == FILE_ERROR_OK)
316      error = resource_metadata_->RemoveEntry(local_id);
317
318    DLOG_IF(WARNING, error != FILE_ERROR_OK && error != FILE_ERROR_NOT_FOUND)
319        << "Failed to delete: " << FileErrorToString(error)
320        << ", resource_id = " << deleted_resource_ids[i];
321  }
322
323  return FILE_ERROR_OK;
324}
325
326FileError ChangeListProcessor::ApplyEntry(const ResourceEntry& entry) {
327  DCHECK(!entry.deleted());
328  DCHECK(parent_resource_id_map_.count(entry.resource_id()));
329  DCHECK(modification_date_map_.count(entry.resource_id()));
330  const std::string& parent_resource_id =
331      parent_resource_id_map_[entry.resource_id()];
332  const base::Time& modification_date =
333      modification_date_map_[entry.resource_id()];
334
335  ResourceEntry new_entry(entry);
336  FileError error = SetParentLocalIdOfEntry(resource_metadata_, &new_entry,
337                                            parent_resource_id);
338  if (error != FILE_ERROR_OK)
339    return error;
340
341  // Lookup the entry.
342  std::string local_id;
343  error = resource_metadata_->GetIdByResourceId(entry.resource_id(), &local_id);
344
345  ResourceEntry existing_entry;
346  if (error == FILE_ERROR_OK)
347    error = resource_metadata_->GetResourceEntryById(local_id, &existing_entry);
348
349  switch (error) {
350    case FILE_ERROR_OK:
351      if (ShouldApplyChange(existing_entry, new_entry, modification_date)) {
352        // Entry exists and needs to be refreshed.
353        new_entry.set_local_id(local_id);
354        error = resource_metadata_->RefreshEntry(new_entry);
355      } else {
356        if (entry.file_info().is_directory()) {
357          // No need to refresh, but update the changestamp.
358          new_entry = existing_entry;
359          new_entry.mutable_directory_specific_info()->set_changestamp(
360              new_entry.directory_specific_info().changestamp());
361          error = resource_metadata_->RefreshEntry(new_entry);
362        }
363        DVLOG(1) << "Change was discarded for: "
364                 << resource_metadata_->GetFilePath(local_id).value();
365      }
366      break;
367    case FILE_ERROR_NOT_FOUND: {  // Adding a new entry.
368      std::string local_id;
369      error = resource_metadata_->AddEntry(new_entry, &local_id);
370      break;
371    }
372    default:
373      return error;
374  }
375  if (error != FILE_ERROR_OK)
376    return error;
377
378  UpdateChangedDirs(entry);
379  return FILE_ERROR_OK;
380}
381
382// static
383FileError ChangeListProcessor::RefreshDirectory(
384    ResourceMetadata* resource_metadata,
385    const DirectoryFetchInfo& directory_fetch_info,
386    scoped_ptr<ChangeList> change_list,
387    std::vector<ResourceEntry>* out_refreshed_entries) {
388  DCHECK(!directory_fetch_info.empty());
389
390  ResourceEntry directory;
391  FileError error = resource_metadata->GetResourceEntryById(
392      directory_fetch_info.local_id(), &directory);
393  if (error != FILE_ERROR_OK)
394    return error;
395
396  if (!directory.file_info().is_directory())
397    return FILE_ERROR_NOT_A_DIRECTORY;
398
399  std::vector<ResourceEntry>* entries = change_list->mutable_entries();
400  for (size_t i = 0; i < entries->size(); ++i) {
401    ResourceEntry* entry = &(*entries)[i];
402    const std::string& parent_resource_id =
403        change_list->parent_resource_ids()[i];
404
405    // Skip if the parent resource ID does not match. This is needed to
406    // handle entries with multiple parents. For such entries, the first
407    // parent is picked and other parents are ignored, hence some entries may
408    // have a parent resource ID which does not match the target directory's.
409    if (parent_resource_id != directory_fetch_info.resource_id()) {
410      DVLOG(1) << "Wrong-parent entry rejected: " << entry->resource_id();
411      continue;
412    }
413
414    entry->set_parent_local_id(directory_fetch_info.local_id());
415
416    std::string local_id;
417    error = resource_metadata->GetIdByResourceId(entry->resource_id(),
418                                                 &local_id);
419    if (error == FILE_ERROR_OK) {
420      entry->set_local_id(local_id);
421      error = resource_metadata->RefreshEntry(*entry);
422    }
423
424    if (error == FILE_ERROR_NOT_FOUND) {  // If refreshing fails, try adding.
425      entry->clear_local_id();
426      error = resource_metadata->AddEntry(*entry, &local_id);
427    }
428
429    if (error != FILE_ERROR_OK)
430      return error;
431
432    ResourceEntry result_entry;
433    error = resource_metadata->GetResourceEntryById(local_id, &result_entry);
434    if (error != FILE_ERROR_OK)
435      return error;
436    out_refreshed_entries->push_back(result_entry);
437  }
438  return FILE_ERROR_OK;
439}
440
441// static
442FileError ChangeListProcessor::SetParentLocalIdOfEntry(
443    ResourceMetadata* resource_metadata,
444    ResourceEntry* entry,
445    const std::string& parent_resource_id) {
446  std::string parent_local_id;
447  if (parent_resource_id.empty()) {
448    // Entries without parents should go under "other" directory.
449    parent_local_id = util::kDriveOtherDirLocalId;
450  } else {
451    FileError error = resource_metadata->GetIdByResourceId(
452        parent_resource_id, &parent_local_id);
453    if (error != FILE_ERROR_OK)
454      return error;
455  }
456  entry->set_parent_local_id(parent_local_id);
457  return FILE_ERROR_OK;
458}
459
460void ChangeListProcessor::UpdateChangedDirs(const ResourceEntry& entry) {
461  DCHECK(!entry.resource_id().empty());
462
463  std::string local_id;
464  base::FilePath file_path;
465  if (resource_metadata_->GetIdByResourceId(
466          entry.resource_id(), &local_id) == FILE_ERROR_OK)
467    file_path = resource_metadata_->GetFilePath(local_id);
468
469  if (!file_path.empty()) {
470    // Notify parent.
471    changed_dirs_.insert(file_path.DirName());
472
473    if (entry.file_info().is_directory()) {
474      // Notify self if entry is a directory.
475      changed_dirs_.insert(file_path);
476
477      // Notify all descendants if it is a directory deletion.
478      if (entry.deleted()) {
479        std::set<base::FilePath> sub_directories;
480        resource_metadata_->GetSubDirectoriesRecursively(local_id,
481                                                         &sub_directories);
482        changed_dirs_.insert(sub_directories.begin(), sub_directories.end());
483      }
484    }
485  }
486}
487
488}  // namespace internal
489}  // namespace drive
490