1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/metrics/metrics_log_serializer.h"
6
7#include "base/base64.h"
8#include "base/md5.h"
9#include "base/metrics/histogram.h"
10#include "base/prefs/pref_service.h"
11#include "chrome/browser/browser_process.h"
12#include "chrome/browser/prefs/scoped_user_pref_update.h"
13#include "chrome/common/pref_names.h"
14
15namespace {
16
17// The number of "initial" logs to save, and hope to send during a future Chrome
18// session.  Initial logs contain crash stats, and are pretty small.
19const size_t kInitialLogsPersistLimit = 20;
20
21// The number of ongoing logs to save persistently, and hope to
22// send during a this or future sessions.  Note that each log may be pretty
23// large, as presumably the related "initial" log wasn't sent (probably nothing
24// was, as the user was probably off-line).  As a result, the log probably kept
25// accumulating while the "initial" log was stalled, and couldn't be sent.  As a
26// result, we don't want to save too many of these mega-logs.
27// A "standard shutdown" will create a small log, including just the data that
28// was not yet been transmitted, and that is normal (to have exactly one
29// ongoing_log_ at startup).
30const size_t kOngoingLogsPersistLimit = 8;
31
32// The number of bytes each of initial and ongoing logs that must be stored.
33// This ensures that a reasonable amount of history will be stored even if there
34// is a long series of very small logs.
35const size_t kStorageByteLimitPerLogType = 300000;
36
37// We append (2) more elements to persisted lists: the size of the list and a
38// checksum of the elements.
39const size_t kChecksumEntryCount = 2;
40
41MetricsLogSerializer::LogReadStatus MakeRecallStatusHistogram(
42    MetricsLogSerializer::LogReadStatus status) {
43  UMA_HISTOGRAM_ENUMERATION("PrefService.PersistentLogRecallProtobufs",
44                            status, MetricsLogSerializer::END_RECALL_STATUS);
45  return status;
46}
47
48}  // namespace
49
50
51MetricsLogSerializer::MetricsLogSerializer() {}
52
53MetricsLogSerializer::~MetricsLogSerializer() {}
54
55void MetricsLogSerializer::SerializeLogs(const std::vector<std::string>& logs,
56                                         MetricsLogManager::LogType log_type) {
57  PrefService* local_state = g_browser_process->local_state();
58  DCHECK(local_state);
59  const char* pref = NULL;
60  size_t store_length_limit = 0;
61  switch (log_type) {
62    case MetricsLogManager::INITIAL_LOG:
63      pref = prefs::kMetricsInitialLogs;
64      store_length_limit = kInitialLogsPersistLimit;
65      break;
66    case MetricsLogManager::ONGOING_LOG:
67      pref = prefs::kMetricsOngoingLogs;
68      store_length_limit = kOngoingLogsPersistLimit;
69      break;
70    case MetricsLogManager::NO_LOG:
71      NOTREACHED();
72      return;
73  };
74
75  ListPrefUpdate update(local_state, pref);
76  WriteLogsToPrefList(logs, store_length_limit, kStorageByteLimitPerLogType,
77                      update.Get());
78}
79
80void MetricsLogSerializer::DeserializeLogs(MetricsLogManager::LogType log_type,
81                                           std::vector<std::string>* logs) {
82  DCHECK(logs);
83  PrefService* local_state = g_browser_process->local_state();
84  DCHECK(local_state);
85
86  const char* pref;
87  if (log_type == MetricsLogManager::INITIAL_LOG)
88    pref = prefs::kMetricsInitialLogs;
89  else
90    pref = prefs::kMetricsOngoingLogs;
91
92  const ListValue* unsent_logs = local_state->GetList(pref);
93  ReadLogsFromPrefList(*unsent_logs, logs);
94}
95
96// static
97void MetricsLogSerializer::WriteLogsToPrefList(
98    const std::vector<std::string>& local_list,
99    size_t list_length_limit,
100    size_t byte_limit,
101    base::ListValue* list) {
102  // One of the limit arguments must be non-zero.
103  DCHECK(list_length_limit > 0 || byte_limit > 0);
104
105  list->Clear();
106  if (local_list.size() == 0)
107    return;
108
109  size_t start = 0;
110  // If there are too many logs, keep the most recent logs up to the length
111  // limit, and at least to the minimum number of bytes.
112  if (local_list.size() > list_length_limit) {
113    start = local_list.size();
114    size_t bytes_used = 0;
115    for (std::vector<std::string>::const_reverse_iterator
116         it = local_list.rbegin(); it != local_list.rend(); ++it) {
117      size_t log_size = it->length();
118      if (bytes_used >= byte_limit &&
119          (local_list.size() - start) >= list_length_limit)
120        break;
121      bytes_used += log_size;
122      --start;
123    }
124  }
125  DCHECK_LT(start, local_list.size());
126  if (start >= local_list.size())
127    return;
128
129  // Store size at the beginning of the list.
130  list->Append(Value::CreateIntegerValue(local_list.size() - start));
131
132  base::MD5Context ctx;
133  base::MD5Init(&ctx);
134  std::string encoded_log;
135  for (std::vector<std::string>::const_iterator it = local_list.begin() + start;
136       it != local_list.end(); ++it) {
137    // We encode the compressed log as Value::CreateStringValue() expects to
138    // take a valid UTF8 string.
139    if (!base::Base64Encode(*it, &encoded_log)) {
140      list->Clear();
141      return;
142    }
143    base::MD5Update(&ctx, encoded_log);
144    list->Append(Value::CreateStringValue(encoded_log));
145  }
146
147  // Append hash to the end of the list.
148  base::MD5Digest digest;
149  base::MD5Final(&digest, &ctx);
150  list->Append(Value::CreateStringValue(base::MD5DigestToBase16(digest)));
151  DCHECK(list->GetSize() >= 3);  // Minimum of 3 elements (size, data, hash).
152}
153
154// static
155MetricsLogSerializer::LogReadStatus MetricsLogSerializer::ReadLogsFromPrefList(
156    const ListValue& list,
157    std::vector<std::string>* local_list) {
158  if (list.GetSize() == 0)
159    return MakeRecallStatusHistogram(LIST_EMPTY);
160  if (list.GetSize() < 3)
161    return MakeRecallStatusHistogram(LIST_SIZE_TOO_SMALL);
162
163  // The size is stored at the beginning of the list.
164  int size;
165  bool valid = (*list.begin())->GetAsInteger(&size);
166  if (!valid)
167    return MakeRecallStatusHistogram(LIST_SIZE_MISSING);
168  // Account for checksum and size included in the list.
169  if (static_cast<unsigned int>(size) !=
170      list.GetSize() - kChecksumEntryCount) {
171    return MakeRecallStatusHistogram(LIST_SIZE_CORRUPTION);
172  }
173
174  // Allocate strings for all of the logs we are going to read in.
175  // Do this ahead of time so that we can decode the string values directly into
176  // the elements of |local_list|, and thereby avoid making copies of the
177  // serialized logs, which can be fairly large.
178  DCHECK(local_list->empty());
179  local_list->resize(size);
180
181  base::MD5Context ctx;
182  base::MD5Init(&ctx);
183  std::string encoded_log;
184  size_t local_index = 0;
185  for (ListValue::const_iterator it = list.begin() + 1;
186       it != list.end() - 1;  // Last element is the checksum.
187       ++it, ++local_index) {
188    bool valid = (*it)->GetAsString(&encoded_log);
189    if (!valid) {
190      local_list->clear();
191      return MakeRecallStatusHistogram(LOG_STRING_CORRUPTION);
192    }
193
194    base::MD5Update(&ctx, encoded_log);
195
196    DCHECK_LT(local_index, local_list->size());
197    std::string& decoded_log = (*local_list)[local_index];
198    if (!base::Base64Decode(encoded_log, &decoded_log)) {
199      local_list->clear();
200      return MakeRecallStatusHistogram(DECODE_FAIL);
201    }
202  }
203
204  // Verify checksum.
205  base::MD5Digest digest;
206  base::MD5Final(&digest, &ctx);
207  std::string recovered_md5;
208  // We store the hash at the end of the list.
209  valid = (*(list.end() - 1))->GetAsString(&recovered_md5);
210  if (!valid) {
211    local_list->clear();
212    return MakeRecallStatusHistogram(CHECKSUM_STRING_CORRUPTION);
213  }
214  if (recovered_md5 != base::MD5DigestToBase16(digest)) {
215    local_list->clear();
216    return MakeRecallStatusHistogram(CHECKSUM_CORRUPTION);
217  }
218  return MakeRecallStatusHistogram(RECALL_SUCCESS);
219}
220