search_metadata.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/chromeos/drive/search_metadata.h" 6 7#include <algorithm> 8#include <queue> 9 10#include "base/bind.h" 11#include "base/i18n/string_search.h" 12#include "base/metrics/histogram.h" 13#include "base/strings/utf_string_conversions.h" 14#include "base/time/time.h" 15#include "chrome/browser/chromeos/drive/file_system_util.h" 16#include "content/public/browser/browser_thread.h" 17#include "google_apis/drive/gdata_wapi_parser.h" 18#include "net/base/escape.h" 19 20using content::BrowserThread; 21 22namespace drive { 23namespace internal { 24 25namespace { 26 27struct ResultCandidate { 28 ResultCandidate(const std::string& local_id, 29 const ResourceEntry& entry, 30 const std::string& highlighted_base_name) 31 : local_id(local_id), 32 entry(entry), 33 highlighted_base_name(highlighted_base_name) { 34 } 35 36 std::string local_id; 37 ResourceEntry entry; 38 std::string highlighted_base_name; 39}; 40 41// Used to sort the result candidates per the last accessed/modified time. The 42// recently accessed/modified files come first. 43bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) { 44 const PlatformFileInfoProto& a_file_info = a.file_info(); 45 const PlatformFileInfoProto& b_file_info = b.file_info(); 46 47 if (a_file_info.last_accessed() != b_file_info.last_accessed()) 48 return a_file_info.last_accessed() > b_file_info.last_accessed(); 49 50 // When the entries have the same last access time (which happens quite often 51 // because Drive server doesn't set the field until an entry is viewed via 52 // drive.google.com), we use last modified time as the tie breaker. 53 return a_file_info.last_modified() > b_file_info.last_modified(); 54} 55 56struct ResultCandidateComparator { 57 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const { 58 return CompareByTimestamp(a->entry, b->entry); 59 } 60}; 61 62// A wrapper of std::priority_queue which deals with pointers of values. 63template<typename T, typename Compare> 64class ScopedPriorityQueue { 65 public: 66 ScopedPriorityQueue() {} 67 68 ~ScopedPriorityQueue() { 69 while (!empty()) 70 pop(); 71 } 72 73 bool empty() const { return queue_.empty(); } 74 75 size_t size() const { return queue_.size(); } 76 77 const T* top() const { return queue_.top(); } 78 79 void push(T* x) { queue_.push(x); } 80 81 void pop() { 82 // Keep top alive for the pop() call so that debug checks can access 83 // underlying data (e.g. validating heap property of the priority queue 84 // will call the comparator). 85 T* saved_top = queue_.top(); 86 queue_.pop(); 87 delete saved_top; 88 } 89 90 private: 91 std::priority_queue<T*, std::vector<T*>, Compare> queue_; 92 93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue); 94}; 95 96// Classifies the given entry as trashed if it's placed under the trash. 97class TrashedEntryClassifier { 98 public: 99 explicit TrashedEntryClassifier(ResourceMetadata* metadata) 100 : metadata_(metadata) { 101 trashed_[""] = false; 102 trashed_[util::kDriveTrashDirLocalId] = true; 103 } 104 105 // |result| is set to true if |entry| is under the trash. 106 FileError IsTrashed(const ResourceEntry& entry, bool* result) { 107 // parent_local_id cannot be used to classify the trash itself. 108 if (entry.local_id() == util::kDriveTrashDirLocalId) { 109 *result = true; 110 return FILE_ERROR_OK; 111 } 112 113 // Look up for parents recursively. 114 std::vector<std::string> undetermined_ids; 115 undetermined_ids.push_back(entry.parent_local_id()); 116 117 std::map<std::string, bool>::iterator it = 118 trashed_.find(undetermined_ids.back()); 119 for (; it == trashed_.end(); it = trashed_.find(undetermined_ids.back())) { 120 ResourceEntry parent; 121 FileError error = 122 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent); 123 if (error != FILE_ERROR_OK) 124 return error; 125 undetermined_ids.push_back(parent.parent_local_id()); 126 } 127 128 // Cache the result to |trashed_|. 129 undetermined_ids.pop_back(); // The last one is already in |trashed_|. 130 for (size_t i = 0; i < undetermined_ids.size(); ++i) 131 trashed_[undetermined_ids[i]] = it->second; 132 133 *result = it->second; 134 return FILE_ERROR_OK; 135 } 136 137 private: 138 ResourceMetadata* metadata_; 139 std::map<std::string, bool> trashed_; // local ID to is_trashed map. 140}; 141 142// Returns true if |entry| is eligible for the search |options| and should be 143// tested for the match with the query. If 144// SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents 145// are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the 146// directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only 147// the entries with shared-with-me label will be tested. If 148// SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files 149// match with the query. This option can not be used with other options. 150bool IsEligibleEntry(const ResourceEntry& entry, 151 ResourceMetadata::Iterator* it, 152 int options) { 153 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) && 154 entry.file_specific_info().is_hosted_document()) 155 return false; 156 157 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) && 158 entry.file_info().is_directory()) 159 return false; 160 161 if (options & SEARCH_METADATA_SHARED_WITH_ME) 162 return entry.shared_with_me(); 163 164 if (options & SEARCH_METADATA_OFFLINE) { 165 if (entry.file_specific_info().is_hosted_document()) { 166 // Not all hosted documents are cached by Drive offline app. 167 // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467 168 switch (google_apis::ResourceEntry::GetEntryKindFromExtension( 169 entry.file_specific_info().document_extension())) { 170 case google_apis::ENTRY_KIND_DOCUMENT: 171 case google_apis::ENTRY_KIND_SPREADSHEET: 172 case google_apis::ENTRY_KIND_PRESENTATION: 173 case google_apis::ENTRY_KIND_DRAWING: 174 return true; 175 default: 176 return false; 177 } 178 } else { 179 FileCacheEntry cache_entry; 180 it->GetCacheEntry(&cache_entry); 181 return cache_entry.is_present(); 182 } 183 } 184 185 // Exclude "drive", "drive/root", and "drive/other". 186 if (it->GetID() == util::kDriveGrandRootLocalId || 187 entry.parent_local_id() == util::kDriveGrandRootLocalId) { 188 return false; 189 } 190 191 return true; 192} 193 194// Used to implement SearchMetadata. 195// Adds entry to the result when appropriate. 196// In particular, if |query| is non-null, only adds files with the name matching 197// the query. 198FileError MaybeAddEntryToResult( 199 ResourceMetadata* resource_metadata, 200 ResourceMetadata::Iterator* it, 201 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 202 int options, 203 size_t at_most_num_matches, 204 TrashedEntryClassifier* trashed_entry_classifier, 205 ScopedPriorityQueue<ResultCandidate, 206 ResultCandidateComparator>* result_candidates) { 207 DCHECK_GE(at_most_num_matches, result_candidates->size()); 208 209 const ResourceEntry& entry = it->GetValue(); 210 211 // If the candidate set is already full, and this |entry| is old, do nothing. 212 // We perform this check first in order to avoid the costly find-and-highlight 213 // or FilePath lookup as much as possible. 214 if (result_candidates->size() == at_most_num_matches && 215 !CompareByTimestamp(entry, result_candidates->top()->entry)) 216 return FILE_ERROR_OK; 217 218 // Add |entry| to the result if the entry is eligible for the given 219 // |options| and matches the query. The base name of the entry must 220 // contain |query| to match the query. 221 std::string highlighted; 222 if (!IsEligibleEntry(entry, it, options) || 223 (query && !FindAndHighlight(entry.base_name(), query, &highlighted))) 224 return FILE_ERROR_OK; 225 226 // Trashed entry should not be returned. 227 bool trashed = false; 228 FileError error = trashed_entry_classifier->IsTrashed(entry, &trashed); 229 if (error != FILE_ERROR_OK || trashed) 230 return error; 231 232 // Make space for |entry| when appropriate. 233 if (result_candidates->size() == at_most_num_matches) 234 result_candidates->pop(); 235 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted)); 236 return FILE_ERROR_OK; 237} 238 239// Implements SearchMetadata(). 240FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata, 241 const std::string& query_text, 242 int options, 243 int at_most_num_matches, 244 MetadataSearchResultVector* results) { 245 ScopedPriorityQueue<ResultCandidate, 246 ResultCandidateComparator> result_candidates; 247 248 // Prepare data structure for searching. 249 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents query( 250 base::UTF8ToUTF16(query_text)); 251 252 // Iterate over entries. 253 TrashedEntryClassifier trashed_entry_classifier(resource_metadata); 254 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator(); 255 for (; !it->IsAtEnd(); it->Advance()) { 256 FileError error = MaybeAddEntryToResult(resource_metadata, it.get(), 257 query_text.empty() ? NULL : &query, 258 options, 259 at_most_num_matches, 260 &trashed_entry_classifier, 261 &result_candidates); 262 if (error != FILE_ERROR_OK) 263 return error; 264 } 265 266 // Prepare the result. 267 for (; !result_candidates.empty(); result_candidates.pop()) { 268 const ResultCandidate& candidate = *result_candidates.top(); 269 // The path field of entries in result_candidates are empty at this point, 270 // because we don't want to run the expensive metadata DB look up except for 271 // the final results. Hence, here we fill the part. 272 base::FilePath path = resource_metadata->GetFilePath(candidate.local_id); 273 if (path.empty()) 274 return FILE_ERROR_FAILED; 275 results->push_back(MetadataSearchResult( 276 path, candidate.entry, candidate.highlighted_base_name)); 277 } 278 279 // Reverse the order here because |result_candidates| puts the most 280 // uninteresting candidate at the top. 281 std::reverse(results->begin(), results->end()); 282 283 return FILE_ERROR_OK; 284} 285 286// Runs the SearchMetadataCallback and updates the histogram. 287void RunSearchMetadataCallback(const SearchMetadataCallback& callback, 288 const base::TimeTicks& start_time, 289 scoped_ptr<MetadataSearchResultVector> results, 290 FileError error) { 291 if (error != FILE_ERROR_OK) 292 results.reset(); 293 callback.Run(error, results.Pass()); 294 295 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime", 296 base::TimeTicks::Now() - start_time); 297} 298 299} // namespace 300 301void SearchMetadata( 302 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner, 303 ResourceMetadata* resource_metadata, 304 const std::string& query, 305 int options, 306 int at_most_num_matches, 307 const SearchMetadataCallback& callback) { 308 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 309 DCHECK_LE(0, at_most_num_matches); 310 DCHECK(!callback.is_null()); 311 312 const base::TimeTicks start_time = base::TimeTicks::Now(); 313 314 scoped_ptr<MetadataSearchResultVector> results( 315 new MetadataSearchResultVector); 316 MetadataSearchResultVector* results_ptr = results.get(); 317 base::PostTaskAndReplyWithResult(blocking_task_runner.get(), 318 FROM_HERE, 319 base::Bind(&SearchMetadataOnBlockingPool, 320 resource_metadata, 321 query, 322 options, 323 at_most_num_matches, 324 results_ptr), 325 base::Bind(&RunSearchMetadataCallback, 326 callback, 327 start_time, 328 base::Passed(&results))); 329} 330 331bool FindAndHighlight( 332 const std::string& text, 333 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents* query, 334 std::string* highlighted_text) { 335 DCHECK(query); 336 DCHECK(highlighted_text); 337 highlighted_text->clear(); 338 339 base::string16 text16 = base::UTF8ToUTF16(text); 340 size_t match_start = 0; 341 size_t match_length = 0; 342 if (!query->Search(text16, &match_start, &match_length)) 343 return false; 344 345 base::string16 pre = text16.substr(0, match_start); 346 base::string16 match = text16.substr(match_start, match_length); 347 base::string16 post = text16.substr(match_start + match_length); 348 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(pre))); 349 highlighted_text->append("<b>"); 350 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(match))); 351 highlighted_text->append("</b>"); 352 highlighted_text->append(net::EscapeForHTML(base::UTF16ToUTF8(post))); 353 return true; 354} 355 356} // namespace internal 357} // namespace drive 358