1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/predictors/resource_prefetch_predictor_tables.h"
6
7#include <algorithm>
8#include <utility>
9
10#include "base/logging.h"
11#include "base/metrics/histogram.h"
12#include "base/strings/stringprintf.h"
13#include "content/public/browser/browser_thread.h"
14#include "sql/statement.h"
15
16using content::BrowserThread;
17using sql::Statement;
18
19namespace {
20
21const char kUrlResourceTableName[] = "resource_prefetch_predictor_url";
22const char kUrlMetadataTableName[] = "resource_prefetch_predictor_url_metadata";
23const char kHostResourceTableName[] = "resource_prefetch_predictor_host";
24const char kHostMetadataTableName[] =
25    "resource_prefetch_predictor_host_metadata";
26
27void BindResourceRowToStatement(
28    const predictors::ResourcePrefetchPredictorTables::ResourceRow& row,
29    const std::string& primary_key,
30    Statement* statement) {
31  statement->BindString(0, primary_key);
32  statement->BindString(1, row.resource_url.spec());
33  statement->BindInt(2, static_cast<int>(row.resource_type));
34  statement->BindInt(3, row.number_of_hits);
35  statement->BindInt(4, row.number_of_misses);
36  statement->BindInt(5, row.consecutive_misses);
37  statement->BindDouble(6, row.average_position);
38}
39
40bool StepAndInitializeResourceRow(
41    Statement* statement,
42    predictors::ResourcePrefetchPredictorTables::ResourceRow* row) {
43  if (!statement->Step())
44    return false;
45
46  row->primary_key = statement->ColumnString(0);
47  row->resource_url = GURL(statement->ColumnString(1));
48  row->resource_type = static_cast<content::ResourceType>(
49      statement->ColumnInt(2));
50  row->number_of_hits = statement->ColumnInt(3);
51  row->number_of_misses = statement->ColumnInt(4);
52  row->consecutive_misses = statement->ColumnInt(5);
53  row->average_position = statement->ColumnDouble(6);
54  return true;
55}
56
57}  // namespace
58
59namespace predictors {
60
61// static
62const size_t ResourcePrefetchPredictorTables::kMaxStringLength = 1024;
63
64ResourcePrefetchPredictorTables::ResourceRow::ResourceRow()
65    : resource_type(content::RESOURCE_TYPE_LAST_TYPE),
66      number_of_hits(0),
67      number_of_misses(0),
68      consecutive_misses(0),
69      average_position(0.0),
70      score(0.0) {
71}
72
73ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
74    const ResourceRow& other)
75        : primary_key(other.primary_key),
76          resource_url(other.resource_url),
77          resource_type(other.resource_type),
78          number_of_hits(other.number_of_hits),
79          number_of_misses(other.number_of_misses),
80          consecutive_misses(other.consecutive_misses),
81          average_position(other.average_position),
82          score(other.score) {
83}
84
85ResourcePrefetchPredictorTables::ResourceRow::ResourceRow(
86    const std::string& i_primary_key,
87    const std::string& i_resource_url,
88    content::ResourceType i_resource_type,
89    int i_number_of_hits,
90    int i_number_of_misses,
91    int i_consecutive_misses,
92    double i_average_position)
93        : primary_key(i_primary_key),
94          resource_url(i_resource_url),
95          resource_type(i_resource_type),
96          number_of_hits(i_number_of_hits),
97          number_of_misses(i_number_of_misses),
98          consecutive_misses(i_consecutive_misses),
99          average_position(i_average_position) {
100  UpdateScore();
101}
102
103void ResourcePrefetchPredictorTables::ResourceRow::UpdateScore() {
104  // The score is calculated so that when the rows are sorted, the stylesheets
105  // and scripts appear first, sorted by position(ascending) and then the rest
106  // of the resources sorted by position(ascending).
107  static const int kMaxResourcesPerType = 100;
108  switch (resource_type) {
109    case content::RESOURCE_TYPE_STYLESHEET:
110    case content::RESOURCE_TYPE_SCRIPT:
111      score = (2 * kMaxResourcesPerType) - average_position;
112      break;
113
114    case content::RESOURCE_TYPE_IMAGE:
115    default:
116      score = kMaxResourcesPerType - average_position;
117      break;
118  }
119}
120
121bool ResourcePrefetchPredictorTables::ResourceRow::operator==(
122    const ResourceRow& rhs) const {
123  return primary_key == rhs.primary_key &&
124      resource_url == rhs.resource_url &&
125      resource_type == rhs.resource_type &&
126      number_of_hits == rhs.number_of_hits &&
127      number_of_misses == rhs.number_of_misses &&
128      consecutive_misses == rhs.consecutive_misses &&
129      average_position == rhs.average_position &&
130      score == rhs.score;
131}
132
133bool ResourcePrefetchPredictorTables::ResourceRowSorter::operator()(
134    const ResourceRow& x, const ResourceRow& y) const {
135  return x.score > y.score;
136}
137
138ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
139    PrefetchKeyType i_key_type,
140    const std::string& i_primary_key)
141    : key_type(i_key_type),
142      primary_key(i_primary_key) {
143}
144
145ResourcePrefetchPredictorTables::PrefetchData::PrefetchData(
146    const PrefetchData& other)
147    : key_type(other.key_type),
148      primary_key(other.primary_key),
149      last_visit(other.last_visit),
150      resources(other.resources) {
151}
152
153ResourcePrefetchPredictorTables::PrefetchData::~PrefetchData() {
154}
155
156bool ResourcePrefetchPredictorTables::PrefetchData::operator==(
157    const PrefetchData& rhs) const {
158  return key_type == rhs.key_type && primary_key == rhs.primary_key &&
159      resources == rhs.resources;
160}
161
162void ResourcePrefetchPredictorTables::GetAllData(
163    PrefetchDataMap* url_data_map,
164    PrefetchDataMap* host_data_map) {
165  DCHECK_CURRENTLY_ON(BrowserThread::DB);
166  if (CantAccessDatabase())
167    return;
168
169  DCHECK(url_data_map);
170  DCHECK(host_data_map);
171  url_data_map->clear();
172  host_data_map->clear();
173
174  std::vector<std::string> urls_to_delete, hosts_to_delete;
175  GetAllDataHelper(PREFETCH_KEY_TYPE_URL, url_data_map, &urls_to_delete);
176  GetAllDataHelper(PREFETCH_KEY_TYPE_HOST, host_data_map, &hosts_to_delete);
177
178  if (!urls_to_delete.empty() || !hosts_to_delete.empty())
179    DeleteData(urls_to_delete, hosts_to_delete);
180}
181
182void ResourcePrefetchPredictorTables::UpdateData(
183    const PrefetchData& url_data,
184    const PrefetchData& host_data) {
185  DCHECK_CURRENTLY_ON(BrowserThread::DB);
186  if (CantAccessDatabase())
187    return;
188
189  DCHECK(!url_data.is_host() && host_data.is_host());
190  DCHECK(!url_data.primary_key.empty() || !host_data.primary_key.empty());
191
192  DB()->BeginTransaction();
193
194  bool success = (url_data.primary_key.empty() || UpdateDataHelper(url_data)) &&
195      (host_data.primary_key.empty() || UpdateDataHelper(host_data));
196  if (!success)
197    DB()->RollbackTransaction();
198
199  DB()->CommitTransaction();
200}
201
202void ResourcePrefetchPredictorTables::DeleteData(
203    const std::vector<std::string>& urls,
204    const std::vector<std::string>& hosts) {
205  DCHECK_CURRENTLY_ON(BrowserThread::DB);
206  if (CantAccessDatabase())
207    return;
208
209  DCHECK(!urls.empty() || !hosts.empty());
210
211  if (!urls.empty())
212    DeleteDataHelper(PREFETCH_KEY_TYPE_URL, urls);
213  if (!hosts.empty())
214    DeleteDataHelper(PREFETCH_KEY_TYPE_HOST, hosts);
215}
216
217void ResourcePrefetchPredictorTables::DeleteSingleDataPoint(
218    const std::string& key,
219    PrefetchKeyType key_type) {
220  DCHECK_CURRENTLY_ON(BrowserThread::DB);
221  if (CantAccessDatabase())
222    return;
223
224  DeleteDataHelper(key_type, std::vector<std::string>(1, key));
225}
226
227void ResourcePrefetchPredictorTables::DeleteAllData() {
228  if (CantAccessDatabase())
229    return;
230
231  Statement deleter(DB()->GetUniqueStatement(
232      base::StringPrintf("DELETE FROM %s", kUrlResourceTableName).c_str()));
233  deleter.Run();
234  deleter.Assign(DB()->GetUniqueStatement(
235      base::StringPrintf("DELETE FROM %s", kUrlMetadataTableName).c_str()));
236  deleter.Run();
237  deleter.Assign(DB()->GetUniqueStatement(
238      base::StringPrintf("DELETE FROM %s", kHostResourceTableName).c_str()));
239  deleter.Run();
240  deleter.Assign(DB()->GetUniqueStatement(
241      base::StringPrintf("DELETE FROM %s", kHostMetadataTableName).c_str()));
242  deleter.Run();
243}
244
245ResourcePrefetchPredictorTables::ResourcePrefetchPredictorTables()
246    : PredictorTableBase() {
247}
248
249ResourcePrefetchPredictorTables::~ResourcePrefetchPredictorTables() {
250}
251
252void ResourcePrefetchPredictorTables::GetAllDataHelper(
253    PrefetchKeyType key_type,
254    PrefetchDataMap* data_map,
255    std::vector<std::string>* to_delete) {
256  bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
257
258  // Read the resources table and organize it per primary key.
259  const char* resource_table_name = is_host ? kHostResourceTableName :
260      kUrlResourceTableName;
261  Statement resource_reader(DB()->GetUniqueStatement(
262      base::StringPrintf("SELECT * FROM %s", resource_table_name).c_str()));
263
264  ResourceRow row;
265  while (StepAndInitializeResourceRow(&resource_reader, &row)) {
266    row.UpdateScore();
267    std::string primary_key = row.primary_key;
268    // Don't need to store primary key since the data is grouped by primary key.
269    row.primary_key.clear();
270
271    PrefetchDataMap::iterator it = data_map->find(primary_key);
272    if (it == data_map->end()) {
273      it = data_map->insert(std::make_pair(
274          primary_key, PrefetchData(key_type, primary_key))).first;
275    }
276    it->second.resources.push_back(row);
277  }
278
279  // Sort each of the resource row vectors by score.
280  for (PrefetchDataMap::iterator it = data_map->begin(); it != data_map->end();
281       ++it) {
282    std::sort(it->second.resources.begin(),
283              it->second.resources.end(),
284              ResourceRowSorter());
285  }
286
287  // Read the metadata and keep track of entries that have metadata, but no
288  // resource entries, so they can be deleted.
289  const char* metadata_table_name = is_host ? kHostMetadataTableName :
290      kUrlMetadataTableName;
291  Statement metadata_reader(DB()->GetUniqueStatement(
292      base::StringPrintf("SELECT * FROM %s", metadata_table_name).c_str()));
293
294  while (metadata_reader.Step()) {
295    std::string primary_key = metadata_reader.ColumnString(0);
296
297    PrefetchDataMap::iterator it = data_map->find(primary_key);
298    if (it != data_map->end()) {
299      int64 last_visit = metadata_reader.ColumnInt64(1);
300      it->second.last_visit = base::Time::FromInternalValue(last_visit);
301    } else {
302      to_delete->push_back(primary_key);
303    }
304  }
305}
306
307bool ResourcePrefetchPredictorTables::UpdateDataHelper(
308    const PrefetchData& data) {
309  DCHECK(!data.primary_key.empty());
310
311  if (!StringsAreSmallerThanDBLimit(data)) {
312    UMA_HISTOGRAM_BOOLEAN("ResourcePrefetchPredictor.DbStringTooLong", true);
313    return false;
314  }
315
316  // Delete the older data from both the tables.
317  scoped_ptr<Statement> deleter(data.is_host() ?
318      GetHostResourceDeleteStatement() : GetUrlResourceDeleteStatement());
319  deleter->BindString(0, data.primary_key);
320  if (!deleter->Run())
321    return false;
322
323  deleter.reset(data.is_host() ? GetHostMetadataDeleteStatement() :
324      GetUrlMetadataDeleteStatement());
325  deleter->BindString(0, data.primary_key);
326  if (!deleter->Run())
327    return false;
328
329  // Add the new data to the tables.
330  const ResourceRows& resources = data.resources;
331  for (ResourceRows::const_iterator it = resources.begin();
332       it != resources.end(); ++it) {
333    scoped_ptr<Statement> resource_inserter(data.is_host() ?
334        GetHostResourceUpdateStatement() : GetUrlResourceUpdateStatement());
335    BindResourceRowToStatement(*it, data.primary_key, resource_inserter.get());
336    if (!resource_inserter->Run())
337      return false;
338  }
339
340  scoped_ptr<Statement> metadata_inserter(data.is_host() ?
341      GetHostMetadataUpdateStatement() : GetUrlMetadataUpdateStatement());
342  metadata_inserter->BindString(0, data.primary_key);
343  metadata_inserter->BindInt64(1, data.last_visit.ToInternalValue());
344  if (!metadata_inserter->Run())
345    return false;
346
347  return true;
348}
349
350void ResourcePrefetchPredictorTables::DeleteDataHelper(
351    PrefetchKeyType key_type,
352    const std::vector<std::string>& keys) {
353  bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
354
355  for (std::vector<std::string>::const_iterator it = keys.begin();
356       it != keys.end(); ++it) {
357    scoped_ptr<Statement> deleter(is_host ? GetHostResourceDeleteStatement() :
358        GetUrlResourceDeleteStatement());
359    deleter->BindString(0, *it);
360    deleter->Run();
361
362    deleter.reset(is_host ? GetHostMetadataDeleteStatement() :
363        GetUrlMetadataDeleteStatement());
364    deleter->BindString(0, *it);
365    deleter->Run();
366  }
367}
368
369bool ResourcePrefetchPredictorTables::StringsAreSmallerThanDBLimit(
370    const PrefetchData& data) const {
371  if (data.primary_key.length() > kMaxStringLength)
372    return false;
373
374  for (ResourceRows::const_iterator it = data.resources.begin();
375       it != data.resources.end(); ++it) {
376    if (it->resource_url.spec().length() > kMaxStringLength)
377      return false;
378  }
379  return true;
380}
381
382void ResourcePrefetchPredictorTables::CreateTableIfNonExistent() {
383  DCHECK_CURRENTLY_ON(BrowserThread::DB);
384  if (CantAccessDatabase())
385    return;
386
387  const char resource_table_creator[] =
388      "CREATE TABLE %s ( "
389      "main_page_url TEXT, "
390      "resource_url TEXT, "
391      "resource_type INTEGER, "
392      "number_of_hits INTEGER, "
393      "number_of_misses INTEGER, "
394      "consecutive_misses INTEGER, "
395      "average_position DOUBLE, "
396      "PRIMARY KEY(main_page_url, resource_url))";
397  const char* metadata_table_creator =
398      "CREATE TABLE %s ( "
399      "main_page_url TEXT, "
400      "last_visit_time INTEGER, "
401      "PRIMARY KEY(main_page_url))";
402
403  sql::Connection* db = DB();
404  bool success =
405      (db->DoesTableExist(kUrlResourceTableName) ||
406       db->Execute(base::StringPrintf(resource_table_creator,
407                                      kUrlResourceTableName).c_str())) &&
408      (db->DoesTableExist(kUrlMetadataTableName) ||
409       db->Execute(base::StringPrintf(metadata_table_creator,
410                                      kUrlMetadataTableName).c_str())) &&
411      (db->DoesTableExist(kHostResourceTableName) ||
412       db->Execute(base::StringPrintf(resource_table_creator,
413                                      kHostResourceTableName).c_str())) &&
414      (db->DoesTableExist(kHostMetadataTableName) ||
415       db->Execute(base::StringPrintf(metadata_table_creator,
416                                      kHostMetadataTableName).c_str()));
417
418  if (!success)
419    ResetDB();
420}
421
422void ResourcePrefetchPredictorTables::LogDatabaseStats()  {
423  DCHECK_CURRENTLY_ON(BrowserThread::DB);
424  if (CantAccessDatabase())
425    return;
426
427  Statement statement(DB()->GetUniqueStatement(
428      base::StringPrintf("SELECT count(*) FROM %s",
429                         kUrlResourceTableName).c_str()));
430  if (statement.Step())
431    UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableRowCount",
432                         statement.ColumnInt(0));
433
434  statement.Assign(DB()->GetUniqueStatement(
435      base::StringPrintf("SELECT count(*) FROM %s",
436                         kHostResourceTableName).c_str()));
437  if (statement.Step())
438    UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HostTableRowCount",
439                         statement.ColumnInt(0));
440}
441
442Statement*
443    ResourcePrefetchPredictorTables::GetUrlResourceDeleteStatement() {
444  return new Statement(DB()->GetCachedStatement(
445      SQL_FROM_HERE,
446      base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
447                         kUrlResourceTableName).c_str()));
448}
449
450Statement*
451    ResourcePrefetchPredictorTables::GetUrlResourceUpdateStatement() {
452  return new Statement(DB()->GetCachedStatement(
453      SQL_FROM_HERE,
454      base::StringPrintf(
455          "INSERT INTO %s "
456          "(main_page_url, resource_url, resource_type, number_of_hits, "
457          "number_of_misses, consecutive_misses, average_position) "
458          "VALUES (?,?,?,?,?,?,?)", kUrlResourceTableName).c_str()));
459}
460
461Statement*
462    ResourcePrefetchPredictorTables::GetUrlMetadataDeleteStatement() {
463  return new Statement(DB()->GetCachedStatement(
464      SQL_FROM_HERE,
465      base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
466                         kUrlMetadataTableName).c_str()));
467}
468
469Statement*
470    ResourcePrefetchPredictorTables::GetUrlMetadataUpdateStatement() {
471  return new Statement(DB()->GetCachedStatement(
472      SQL_FROM_HERE,
473      base::StringPrintf(
474          "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
475          kUrlMetadataTableName).c_str()));
476}
477
478Statement*
479    ResourcePrefetchPredictorTables::GetHostResourceDeleteStatement() {
480  return new Statement(DB()->GetCachedStatement(
481      SQL_FROM_HERE,
482      base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
483                         kHostResourceTableName).c_str()));
484}
485
486Statement*
487    ResourcePrefetchPredictorTables::GetHostResourceUpdateStatement() {
488  return new Statement(DB()->GetCachedStatement(
489      SQL_FROM_HERE,
490      base::StringPrintf(
491          "INSERT INTO %s "
492          "(main_page_url, resource_url, resource_type, number_of_hits, "
493          "number_of_misses, consecutive_misses, average_position) "
494          "VALUES (?,?,?,?,?,?,?)", kHostResourceTableName).c_str()));
495}
496
497Statement*
498    ResourcePrefetchPredictorTables::GetHostMetadataDeleteStatement() {
499  return new Statement(DB()->GetCachedStatement(
500      SQL_FROM_HERE,
501      base::StringPrintf("DELETE FROM %s WHERE main_page_url=?",
502                         kHostMetadataTableName).c_str()));
503}
504
505Statement* ResourcePrefetchPredictorTables::GetHostMetadataUpdateStatement() {
506  return new Statement(DB()->GetCachedStatement(
507      SQL_FROM_HERE,
508      base::StringPrintf(
509          "INSERT INTO %s (main_page_url, last_visit_time) VALUES (?,?)",
510          kHostMetadataTableName).c_str()));
511}
512
513}  // namespace predictors
514