1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/drive/search_metadata.h"
6
7#include <algorithm>
8#include <queue>
9
10#include "base/bind.h"
11#include "base/i18n/string_search.h"
12#include "base/metrics/histogram.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/chromeos/drive/file_system_util.h"
16#include "chrome/browser/drive/drive_api_util.h"
17#include "content/public/browser/browser_thread.h"
18#include "net/base/escape.h"
19
20using content::BrowserThread;
21
22namespace drive {
23namespace internal {
24
25namespace {
26
27struct ResultCandidate {
28  ResultCandidate(const std::string& local_id,
29                  const ResourceEntry& entry,
30                  const std::string& highlighted_base_name)
31      : local_id(local_id),
32        entry(entry),
33        highlighted_base_name(highlighted_base_name) {
34  }
35
36  std::string local_id;
37  ResourceEntry entry;
38  std::string highlighted_base_name;
39};
40
41// Used to sort the result candidates per the last accessed/modified time. The
42// recently accessed/modified files come first.
43bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
44  const PlatformFileInfoProto& a_file_info = a.file_info();
45  const PlatformFileInfoProto& b_file_info = b.file_info();
46
47  if (a_file_info.last_accessed() != b_file_info.last_accessed())
48    return a_file_info.last_accessed() > b_file_info.last_accessed();
49
50  // When the entries have the same last access time (which happens quite often
51  // because Drive server doesn't set the field until an entry is viewed via
52  // drive.google.com), we use last modified time as the tie breaker.
53  return a_file_info.last_modified() > b_file_info.last_modified();
54}
55
56struct ResultCandidateComparator {
57  bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
58    return CompareByTimestamp(a->entry, b->entry);
59  }
60};
61
62// A wrapper of std::priority_queue which deals with pointers of values.
63template<typename T, typename Compare>
64class ScopedPriorityQueue {
65 public:
66  ScopedPriorityQueue() {}
67
68  ~ScopedPriorityQueue() {
69    while (!empty())
70      pop();
71  }
72
73  bool empty() const { return queue_.empty(); }
74
75  size_t size() const { return queue_.size(); }
76
77  const T* top() const { return queue_.top(); }
78
79  void push(T* x) { queue_.push(x); }
80
81  void pop() {
82    // Keep top alive for the pop() call so that debug checks can access
83    // underlying data (e.g. validating heap property of the priority queue
84    // will call the comparator).
85    T* saved_top = queue_.top();
86    queue_.pop();
87    delete saved_top;
88  }
89
90 private:
91  std::priority_queue<T*, std::vector<T*>, Compare> queue_;
92
93  DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
94};
95
96// Classifies the given entry as hidden if it's not under specific directories.
97class HiddenEntryClassifier {
98 public:
99  HiddenEntryClassifier(ResourceMetadata* metadata,
100                        const std::string& mydrive_local_id)
101      : metadata_(metadata) {
102    // Only things under My Drive and drive/other are not hidden.
103    is_hiding_child_[mydrive_local_id] = false;
104    is_hiding_child_[util::kDriveOtherDirLocalId] = false;
105
106    // Everything else is hidden, including the directories mentioned above
107    // themselves.
108    is_hiding_child_[""] = true;
109  }
110
111  // |result| is set to true if |entry| is hidden.
112  FileError IsHidden(const ResourceEntry& entry, bool* result) {
113    // Look up for parents recursively.
114    std::vector<std::string> undetermined_ids;
115    undetermined_ids.push_back(entry.parent_local_id());
116
117    std::map<std::string, bool>::iterator it =
118        is_hiding_child_.find(undetermined_ids.back());
119    for (; it == is_hiding_child_.end();
120         it = is_hiding_child_.find(undetermined_ids.back())) {
121      ResourceEntry parent;
122      FileError error =
123          metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
124      if (error != FILE_ERROR_OK)
125        return error;
126      undetermined_ids.push_back(parent.parent_local_id());
127    }
128
129    // Cache the result.
130    undetermined_ids.pop_back();  // The last one is already in the map.
131    for (size_t i = 0; i < undetermined_ids.size(); ++i)
132      is_hiding_child_[undetermined_ids[i]] = it->second;
133
134    *result = it->second;
135    return FILE_ERROR_OK;
136  }
137
138 private:
139  ResourceMetadata* metadata_;
140
141  // local ID to is_hidden map.
142  std::map<std::string, bool> is_hiding_child_;
143};
144
145// Returns true if |entry| is eligible for the search |options| and should be
146// tested for the match with the query.  If
147// SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
148// are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
149// directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
150// the entries with shared-with-me label will be tested. If
151// SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
152// match with the query. This option can not be used with other options.
153bool IsEligibleEntry(const ResourceEntry& entry, int options) {
154  if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
155      entry.file_specific_info().is_hosted_document())
156    return false;
157
158  if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
159      entry.file_info().is_directory())
160    return false;
161
162  if (options & SEARCH_METADATA_SHARED_WITH_ME)
163    return entry.shared_with_me();
164
165  if (options & SEARCH_METADATA_OFFLINE) {
166    if (entry.file_specific_info().is_hosted_document()) {
167      // Not all hosted documents are cached by Drive offline app.
168      // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
169      std::string mime_type = entry.file_specific_info().content_mime_type();
170      return mime_type == drive::util::kGoogleDocumentMimeType ||
171             mime_type == drive::util::kGoogleSpreadsheetMimeType ||
172             mime_type == drive::util::kGooglePresentationMimeType ||
173             mime_type == drive::util::kGoogleDrawingMimeType;
174    } else {
175      return entry.file_specific_info().cache_state().is_present();
176    }
177  }
178
179  return true;
180}
181
182// Used to implement SearchMetadata.
183// Adds entry to the result when appropriate.
184// In particular, if |query| is non-null, only adds files with the name matching
185// the query.
186FileError MaybeAddEntryToResult(
187    ResourceMetadata* resource_metadata,
188    ResourceMetadata::Iterator* it,
189    base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
190    int options,
191    size_t at_most_num_matches,
192    HiddenEntryClassifier* hidden_entry_classifier,
193    ScopedPriorityQueue<ResultCandidate,
194                        ResultCandidateComparator>* result_candidates) {
195  DCHECK_GE(at_most_num_matches, result_candidates->size());
196
197  const ResourceEntry& entry = it->GetValue();
198
199  // If the candidate set is already full, and this |entry| is old, do nothing.
200  // We perform this check first in order to avoid the costly find-and-highlight
201  // or FilePath lookup as much as possible.
202  if (result_candidates->size() == at_most_num_matches &&
203      !CompareByTimestamp(entry, result_candidates->top()->entry))
204    return FILE_ERROR_OK;
205
206  // Add |entry| to the result if the entry is eligible for the given
207  // |options| and matches the query. The base name of the entry must
208  // contain |query| to match the query.
209  std::string highlighted;
210  if (!IsEligibleEntry(entry, options) ||
211      (query && !FindAndHighlight(entry.base_name(), query, &highlighted)))
212    return FILE_ERROR_OK;
213
214  // Hidden entry should not be returned.
215  bool hidden = false;
216  FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
217  if (error != FILE_ERROR_OK || hidden)
218    return error;
219
220  // Make space for |entry| when appropriate.
221  if (result_candidates->size() == at_most_num_matches)
222    result_candidates->pop();
223  result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
224  return FILE_ERROR_OK;
225}
226
227// Implements SearchMetadata().
228FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
229                                       const std::string& query_text,
230                                       int options,
231                                       int at_most_num_matches,
232                                       MetadataSearchResultVector* results) {
233  ScopedPriorityQueue<ResultCandidate,
234                      ResultCandidateComparator> result_candidates;
235
236  // Prepare data structure for searching.
237  base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query(
238      base::UTF8ToUTF16(query_text));
239
240  // Prepare an object to filter out hidden entries.
241  ResourceEntry mydrive;
242  FileError error = resource_metadata->GetResourceEntryByPath(
243      util::GetDriveMyDriveRootPath(), &mydrive);
244  if (error != FILE_ERROR_OK)
245    return error;
246  HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
247                                                mydrive.local_id());
248
249  // Iterate over entries.
250  scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
251  for (; !it->IsAtEnd(); it->Advance()) {
252    FileError error = MaybeAddEntryToResult(resource_metadata, it.get(),
253                                            query_text.empty() ? NULL : &query,
254                                            options,
255                                            at_most_num_matches,
256                                            &hidden_entry_classifier,
257                                            &result_candidates);
258    if (error != FILE_ERROR_OK)
259      return error;
260  }
261
262  // Prepare the result.
263  for (; !result_candidates.empty(); result_candidates.pop()) {
264    const ResultCandidate& candidate = *result_candidates.top();
265    // The path field of entries in result_candidates are empty at this point,
266    // because we don't want to run the expensive metadata DB look up except for
267    // the final results. Hence, here we fill the part.
268    base::FilePath path;
269    error = resource_metadata->GetFilePath(candidate.local_id, &path);
270    if (error != FILE_ERROR_OK)
271      return error;
272    bool is_directory = candidate.entry.file_info().is_directory();
273    results->push_back(MetadataSearchResult(
274        path, is_directory, candidate.highlighted_base_name));
275  }
276
277  // Reverse the order here because |result_candidates| puts the most
278  // uninteresting candidate at the top.
279  std::reverse(results->begin(), results->end());
280
281  return FILE_ERROR_OK;
282}
283
284// Runs the SearchMetadataCallback and updates the histogram.
285void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
286                               const base::TimeTicks& start_time,
287                               scoped_ptr<MetadataSearchResultVector> results,
288                               FileError error) {
289  if (error != FILE_ERROR_OK)
290    results.reset();
291  callback.Run(error, results.Pass());
292
293  UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
294                      base::TimeTicks::Now() - start_time);
295}
296
297}  // namespace
298
299void SearchMetadata(
300    scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
301    ResourceMetadata* resource_metadata,
302    const std::string& query,
303    int options,
304    int at_most_num_matches,
305    const SearchMetadataCallback& callback) {
306  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
307  DCHECK_LE(0, at_most_num_matches);
308  DCHECK(!callback.is_null());
309
310  const base::TimeTicks start_time = base::TimeTicks::Now();
311
312  scoped_ptr<MetadataSearchResultVector> results(
313      new MetadataSearchResultVector);
314  MetadataSearchResultVector* results_ptr = results.get();
315  base::PostTaskAndReplyWithResult(blocking_task_runner.get(),
316                                   FROM_HERE,
317                                   base::Bind(&SearchMetadataOnBlockingPool,
318                                              resource_metadata,
319                                              query,
320                                              options,
321                                              at_most_num_matches,
322                                              results_ptr),
323                                   base::Bind(&RunSearchMetadataCallback,
324                                              callback,
325                                              start_time,
326                                              base::Passed(&results)));
327}
328
329bool FindAndHighlight(
330    const std::string& text,
331    base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query,
332    std::string* highlighted_text) {
333  DCHECK(query);
334  DCHECK(highlighted_text);
335  highlighted_text->clear();
336
337  base::string16 text16 = base::UTF8ToUTF16(text);
338  size_t match_start = 0;
339  size_t match_length = 0;
340  if (!query->Search(text16, &match_start, &match_length))
341    return false;
342
343  base::string16 pre = text16.substr(0, match_start);
344  base::string16 match = text16.substr(match_start, match_length);
345  base::string16 post = text16.substr(match_start + match_length);
346  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre)));
347  highlighted_text->append("<b>");
348  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match)));
349  highlighted_text->append("</b>");
350  highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post)));
351  return true;
352}
353
354}  // namespace internal
355}  // namespace drive
356