search_metadata.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/drive/search_metadata.h"
6
7#include <algorithm>
8#include <queue>
9
10#include "base/bind.h"
11#include "base/i18n/string_search.h"
12#include "base/metrics/histogram.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/chromeos/drive/file_system_util.h"
16#include "content/public/browser/browser_thread.h"
17#include "google_apis/drive/gdata_wapi_parser.h"
18#include "net/base/escape.h"
19
20using content::BrowserThread;
21
22namespace drive {
23namespace internal {
24
25namespace {
26
27struct ResultCandidate {
28  ResultCandidate(const std::string& local_id,
29                  const ResourceEntry& entry,
30                  const std::string& highlighted_base_name)
31      : local_id(local_id),
32        entry(entry),
33        highlighted_base_name(highlighted_base_name) {
34  }
35
36  std::string local_id;
37  ResourceEntry entry;
38  std::string highlighted_base_name;
39};
40
41// Used to sort the result candidates per the last accessed/modified time. The
42// recently accessed/modified files come first.
43bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
44  const PlatformFileInfoProto& a_file_info = a.file_info();
45  const PlatformFileInfoProto& b_file_info = b.file_info();
46
47  if (a_file_info.last_accessed() != b_file_info.last_accessed())
48    return a_file_info.last_accessed() > b_file_info.last_accessed();
49
50  // When the entries have the same last access time (which happens quite often
51  // because Drive server doesn't set the field until an entry is viewed via
52  // drive.google.com), we use last modified time as the tie breaker.
53  return a_file_info.last_modified() > b_file_info.last_modified();
54}
55
56struct ResultCandidateComparator {
57  bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
58    return CompareByTimestamp(a->entry, b->entry);
59  }
60};
61
62// A wrapper of std::priority_queue which deals with pointers of values.
63template<typename T, typename Compare>
64class ScopedPriorityQueue {
65 public:
66  ScopedPriorityQueue() {}
67
68  ~ScopedPriorityQueue() {
69    while (!empty())
70      pop();
71  }
72
73  bool empty() const { return queue_.empty(); }
74
75  size_t size() const { return queue_.size(); }
76
77  const T* top() const { return queue_.top(); }
78
79  void push(T* x) { queue_.push(x); }
80
81  void pop() {
82    // Keep top alive for the pop() call so that debug checks can access
83    // underlying data (e.g. validating heap property of the priority queue
84    // will call the comparator).
85    T* saved_top = queue_.top();
86    queue_.pop();
87    delete saved_top;
88  }
89
90 private:
91  std::priority_queue<T*, std::vector<T*>, Compare> queue_;
92
93  DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
94};
95
96// Classifies the given entry as trashed if it's placed under the trash.
97class TrashedEntryClassifier {
98 public:
99  explicit TrashedEntryClassifier(ResourceMetadata* metadata)
100      : metadata_(metadata) {
101    trashed_[""] = false;
102    trashed_[util::kDriveTrashDirLocalId] = true;
103  }
104
105  // |result| is set to true if |entry| is under the trash.
106  FileError IsTrashed(const ResourceEntry& entry, bool* result) {
107    // parent_local_id cannot be used to classify the trash itself.
108    if (entry.local_id() == util::kDriveTrashDirLocalId) {
109      *result = true;
110      return FILE_ERROR_OK;
111    }
112
113    // Look up for parents recursively.
114    std::vector<std::string> undetermined_ids;
115    undetermined_ids.push_back(entry.parent_local_id());
116
117    std::map<std::string, bool>::iterator it =
118        trashed_.find(undetermined_ids.back());
119    for (; it == trashed_.end(); it = trashed_.find(undetermined_ids.back())) {
120      ResourceEntry parent;
121      FileError error =
122          metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
123      if (error != FILE_ERROR_OK)
124        return error;
125      undetermined_ids.push_back(parent.parent_local_id());
126    }
127
128    // Cache the result to |trashed_|.
129    undetermined_ids.pop_back();  // The last one is already in |trashed_|.
130    for (size_t i = 0; i < undetermined_ids.size(); ++i)
131      trashed_[undetermined_ids[i]] = it->second;
132
133    *result = it->second;
134    return FILE_ERROR_OK;
135  }
136
137 private:
138  ResourceMetadata* metadata_;
139  std::map<std::string, bool> trashed_;  // local ID to is_trashed map.
140};
141
142// Returns true if |entry| is eligible for the search |options| and should be
143// tested for the match with the query.  If
144// SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
145// are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
146// directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
147// the entries with shared-with-me label will be tested. If
148// SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
149// match with the query. This option can not be used with other options.
150bool IsEligibleEntry(const ResourceEntry& entry,
151                     ResourceMetadata::Iterator* it,
152                     int options) {
153  if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
154      entry.file_specific_info().is_hosted_document())
155    return false;
156
157  if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
158      entry.file_info().is_directory())
159    return false;
160
161  if (options & SEARCH_METADATA_SHARED_WITH_ME)
162    return entry.shared_with_me();
163
164  if (options & SEARCH_METADATA_OFFLINE) {
165    if (entry.file_specific_info().is_hosted_document()) {
166      // Not all hosted documents are cached by Drive offline app.
167      // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
168      switch (google_apis::ResourceEntry::GetEntryKindFromExtension(
169          entry.file_specific_info().document_extension())) {
170        case google_apis::ENTRY_KIND_DOCUMENT:
171        case google_apis::ENTRY_KIND_SPREADSHEET:
172        case google_apis::ENTRY_KIND_PRESENTATION:
173        case google_apis::ENTRY_KIND_DRAWING:
174          return true;
175        default:
176          return false;
177      }
178    } else {
179      FileCacheEntry cache_entry;
180      it->GetCacheEntry(&cache_entry);
181      return cache_entry.is_present();
182    }
183  }
184
185  // Exclude "drive", "drive/root", and "drive/other".
186  if (it->GetID() == util::kDriveGrandRootLocalId ||
187      entry.parent_local_id() == util::kDriveGrandRootLocalId) {
188    return false;
189  }
190
191  return true;
192}
193
194// Used to implement SearchMetadata.
195// Adds entry to the result when appropriate.
196// In particular, if |query| is non-null, only adds files with the name matching
197// the query.
198FileError MaybeAddEntryToResult(
199    ResourceMetadata* resource_metadata,
200    ResourceMetadata::Iterator* it,
201    base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
202    int options,
203    size_t at_most_num_matches,
204    TrashedEntryClassifier* trashed_entry_classifier,
205    ScopedPriorityQueue<ResultCandidate,
206                        ResultCandidateComparator>* result_candidates) {
207  DCHECK_GE(at_most_num_matches, result_candidates->size());
208
209  const ResourceEntry& entry = it->GetValue();
210
211  // If the candidate set is already full, and this |entry| is old, do nothing.
212  // We perform this check first in order to avoid the costly find-and-highlight
213  // or FilePath lookup as much as possible.
214  if (result_candidates->size() == at_most_num_matches &&
215      !CompareByTimestamp(entry, result_candidates->top()->entry))
216    return FILE_ERROR_OK;
217
218  // Add |entry| to the result if the entry is eligible for the given
219  // |options| and matches the query. The base name of the entry must
220  // contain |query| to match the query.
221  std::string highlighted;
222  if (!IsEligibleEntry(entry, it, options) ||
223      (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
224    return FILE_ERROR_OK;
225
226  // Trashed entry should not be returned.
227  bool trashed = false;
228  FileError error = trashed_entry_classifier->IsTrashed(entry, &trashed);
229  if (error != FILE_ERROR_OK || trashed)
230    return error;
231
232  // Make space for |entry| when appropriate.
233  if (result_candidates->size() == at_most_num_matches)
234    result_candidates->pop();
235  result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
236  return FILE_ERROR_OK;
237}
238
239// Implements SearchMetadata().
240FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
241                                       const std::string& query_text,
242                                       int options,
243                                       int at_most_num_matches,
244                                       MetadataSearchResultVector* results) {
245  ScopedPriorityQueue<ResultCandidate,
246                      ResultCandidateComparator> result_candidates;
247
248  // Prepare data structure for searching.
249  base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
250      base::UTF8ToUTF16(query_text));
251
252  // Iterate over entries.
253  TrashedEntryClassifier trashed_entry_classifier(resource_metadata);
254  scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
255  for (; !it->IsAtEnd(); it->Advance()) {
256    FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
257                                            query_text.empty() ? NULL : &query,
258                                            options,
259                                            at_most_num_matches,
260                                            &trashed_entry_classifier,
261                                            &result_candidates);
262    if (error != FILE_ERROR_OK)
263      return error;
264  }
265
266  // Prepare the result.
267  for (; !result_candidates.empty(); result_candidates.pop()) {
268    const ResultCandidate& candidate = *result_candidates.top();
269    // The path field of entries in result_candidates are empty at this point,
270    // because we don't want to run the expensive metadata DB look up except for
271    // the final results. Hence, here we fill the part.
272    base::FilePath path = resource_metadata->GetFilePath(candidate.local_id);
273    if (path.empty())
274      return FILE_ERROR_FAILED;
275    results->push_back(MetadataSearchResult(
276        path, candidate.entry, candidate.highlighted_base_name));
277  }
278
279  // Reverse the order here because |result_candidates| puts the most
280  // uninteresting candidate at the top.
281  std::reverse(results->begin(), results->end());
282
283  return FILE_ERROR_OK;
284}
285
286// Runs the SearchMetadataCallback and updates the histogram.
287void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
288                               const base::TimeTicks& start_time,
289                               scoped_ptr<MetadataSearchResultVector> results,
290                               FileError error) {
291  if (error != FILE_ERROR_OK)
292    results.reset();
293  callback.Run(error, results.Pass());
294
295  UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
296                      base::TimeTicks::Now() - start_time);
297}
298
299}  // namespace
300
301void SearchMetadata(
302    scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
303    ResourceMetadata* resource_metadata,
304    const std::string& query,
305    int options,
306    int at_most_num_matches,
307    const SearchMetadataCallback& callback) {
308  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
309  DCHECK_LE(0, at_most_num_matches);
310  DCHECK(!callback.is_null());
311
312  const base::TimeTicks start_time = base::TimeTicks::Now();
313
314  scoped_ptr<MetadataSearchResultVector> results(
315      new MetadataSearchResultVector);
316  MetadataSearchResultVector* results_ptr = results.get();
317  base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
318                                   FROM_HERE,
319                                   base::Bind(&SearchMetadataOnBlockingPool,
320                                              resource_metadata,
321                                              query,
322                                              options,
323                                              at_most_num_matches,
324                                              results_ptr),
325                                   base::Bind(&RunSearchMetadataCallback,
326                                              callback,
327                                              start_time,
328                                              base::Passed(&results)));
329}
330
331bool FindAndHighlight(
332    const std::string& text,
333    base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
334    std::string* highlighted_text) {
335  DCHECK(query);
336  DCHECK(highlighted_text);
337  highlighted_text->clear();
338
339  base::string16 text16 = base::UTF8ToUTF16(text);
340  size_t match_start = 0;
341  size_t match_length = 0;
342  if (!query->Search(text16, &match_start, &match_length))
343    return false;
344
345  base::string16 pre = text16.substr(0, match_start);
346  base::string16 match = text16.substr(match_start, match_length);
347  base::string16 post = text16.substr(match_start + match_length);
348  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
349  highlighted_text->append("<b>");
350  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
351  highlighted_text->append("</b>");
352  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
353  return true;
354}
355
356}  // namespace internal
357}  // namespace drive
358