external_metrics.cc revision a36e5920737c6adbddd3e43b760e5de8431db6e0
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/chromeos/external_metrics.h"
6
7#include <fcntl.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <string.h>
11#include <sys/file.h>
12#include <sys/stat.h>
13#include <sys/types.h>
14#include <unistd.h>
15
16#include <map>
17#include <string>
18
19#include "base/basictypes.h"
20#include "base/bind.h"
21#include "base/file_util.h"
22#include "base/files/file_path.h"
23#include "base/metrics/histogram.h"
24#include "base/metrics/sparse_histogram.h"
25#include "base/metrics/statistics_recorder.h"
26#include "base/perftimer.h"
27#include "base/posix/eintr_wrapper.h"
28#include "base/sys_info.h"
29#include "base/time/time.h"
30#include "chrome/browser/browser_process.h"
31#include "chrome/browser/metrics/metrics_service.h"
32#include "content/public/browser/browser_thread.h"
33#include "content/public/browser/user_metrics.h"
34
35using content::BrowserThread;
36using content::UserMetricsAction;
37
38namespace chromeos {
39
40namespace {
41
42bool CheckValues(const std::string& name,
43                 int minimum,
44                 int maximum,
45                 size_t bucket_count) {
46  if (!base::Histogram::InspectConstructionArguments(
47      name, &minimum, &maximum, &bucket_count))
48    return false;
49  base::HistogramBase* histogram =
50      base::StatisticsRecorder::FindHistogram(name);
51  if (!histogram)
52    return true;
53  return histogram->HasConstructionArguments(minimum, maximum, bucket_count);
54}
55
56bool CheckLinearValues(const std::string& name, int maximum) {
57  return CheckValues(name, 1, maximum, maximum + 1);
58}
59
60// Establishes field trial for wifi scanning in chromeos.  crbug.com/242733.
61void SetupProgressiveScanFieldTrial() {
62  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
63  const char name_of_experiment[] = "ProgressiveScan";
64  const char path_to_group_file[] = "/home/chronos/.progressive_scan_variation";
65  const base::FieldTrial::Probability kDivisor = 1000;
66  scoped_refptr<base::FieldTrial> trial =
67      base::FieldTrialList::FactoryGetFieldTrial(
68          name_of_experiment, kDivisor, "Default", 2013, 12, 31,
69          base::FieldTrial::SESSION_RANDOMIZED, NULL);
70
71  // Announce the groups with 0 percentage; the actual percentages come from
72  // the server configuration.
73  std::map<int, std::string> group_to_char;
74  group_to_char[trial->AppendGroup("FullScan", 0)] = "c";
75  group_to_char[trial->AppendGroup("33Percent_4MinMax", 0)] = "1";
76  group_to_char[trial->AppendGroup("50Percent_4MinMax", 0)] = "2";
77  group_to_char[trial->AppendGroup("50Percent_8MinMax", 0)] = "3";
78  group_to_char[trial->AppendGroup("100Percent_8MinMax", 0)] = "4";
79
80  // Announce the experiment to any listeners (especially important is the UMA
81  // software, which will append the group names to UMA statistics).
82  const int group_num = trial->group();
83  std::string group_char = "x";
84  if (ContainsKey(group_to_char, group_num))
85    group_char = group_to_char[group_num];
86
87  // Write the group to the file to be read by ChromeOS.
88  const base::FilePath kPathToGroupFile(path_to_group_file);
89
90  if (file_util::WriteFile(kPathToGroupFile, group_char.c_str(),
91                           group_char.length())) {
92    LOG(INFO) << "Configured in group '" << trial->group_name()
93              << "' ('" << group_char << "') for "
94              << name_of_experiment << " field trial";
95  } else {
96    LOG(ERROR) << "Couldn't write to " << path_to_group_file;
97  }
98}
99
100// Finds out if we're on a 2GB Parrot.
101//
102// This code reads and parses /etc/lsb-release. There are at least four other
103// places that open and parse /etc/lsb-release, and I wish I could fix the
104// mess.  At least this code is temporary.
105
106bool Is2GBParrot() {
107  base::FilePath path("/etc/lsb-release");
108  std::string contents;
109  if (!file_util::ReadFileToString(path, &contents))
110    return false;
111  if (contents.find("CHROMEOS_RELEASE_BOARD=parrot") == std::string::npos)
112    return false;
113  // There are 2GB and 4GB models.
114  return base::SysInfo::AmountOfPhysicalMemory() <= 2LL * 1024 * 1024 * 1024;
115}
116
117// Sets up field trial for measuring swap and CPU metrics after tab switch
118// and scroll events. crbug.com/253994
119void SetupSwapJankFieldTrial() {
120  const char name_of_experiment[] = "SwapJank64vs32Parrot";
121
122  // Determine if this is a 32 or 64 bit build of Chrome.
123  bool is_chrome_64 = sizeof(void*) == 8;
124
125  // Determine if this is a 32 or 64 bit kernel.
126  bool is_kernel_64 = base::SysInfo::OperatingSystemArchitecture() == "x86_64";
127
128  // A 32 bit kernel requires 32 bit Chrome.
129  DCHECK(is_kernel_64 || !is_chrome_64);
130
131  // Find out if we're on a 2GB Parrot.
132  bool is_parrot = Is2GBParrot();
133
134  // All groups are either on or off.
135  const base::FieldTrial::Probability kTotalProbability = 1;
136  scoped_refptr<base::FieldTrial> trial =
137      base::FieldTrialList::FactoryGetFieldTrial(
138          name_of_experiment, kTotalProbability, "default", 2013, 12, 31,
139          base::FieldTrial::SESSION_RANDOMIZED, NULL);
140
141  // Assign probability of 1 to this Chrome's group.  Assign 0 to all other
142  // choices.
143  trial->AppendGroup("kernel_64_chrome_64",
144                     is_parrot && is_kernel_64 && is_chrome_64 ?
145                     kTotalProbability : 0);
146  trial->AppendGroup("kernel_64_chrome_32",
147                     is_parrot && is_kernel_64 && !is_chrome_64 ?
148                     kTotalProbability : 0);
149  trial->AppendGroup("kernel_32_chrome_32",
150                     is_parrot && !is_kernel_64 && !is_chrome_64 ?
151                     kTotalProbability : 0);
152  trial->AppendGroup("not_parrot",
153                     !is_parrot ? kTotalProbability : 0);
154
155  // Announce the experiment to any listeners (especially important is the UMA
156  // software, which will append the group names to UMA statistics).
157  trial->group();
158  DVLOG(1) << "Configured in group '" << trial->group_name() << "' for "
159           << name_of_experiment << " field trial";
160}
161
162}  // namespace
163
164// The interval between external metrics collections in seconds
165static const int kExternalMetricsCollectionIntervalSeconds = 30;
166
167ExternalMetrics::ExternalMetrics() : test_recorder_(NULL) {}
168
169ExternalMetrics::~ExternalMetrics() {}
170
171void ExternalMetrics::Start() {
172  // Register user actions external to the browser.
173  // chrome/tools/extract_actions.py won't understand these lines, so all of
174  // these are explicitly added in that script.
175  // TODO(derat): We shouldn't need to verify actions before reporting them;
176  // remove all of this once http://crosbug.com/11125 is fixed.
177  valid_user_actions_.insert("Cryptohome.PKCS11InitFail");
178  valid_user_actions_.insert("Updater.ServerCertificateChanged");
179  valid_user_actions_.insert("Updater.ServerCertificateFailed");
180
181  // Initialize here field trials that don't need to read from files.
182  // (None for the moment.)
183
184  // Initialize any chromeos field trials that need to read from a file (e.g.,
185  // those that have an upstart script determine their experimental group for
186  // them) then schedule the data collection.  All of this is done on the file
187  // thread.
188  bool task_posted = BrowserThread::PostTask(
189      BrowserThread::FILE,
190      FROM_HERE,
191      base::Bind(&chromeos::ExternalMetrics::SetupFieldTrialsOnFileThread,
192                 this));
193  DCHECK(task_posted);
194}
195
196void ExternalMetrics::RecordActionUI(std::string action_string) {
197  if (valid_user_actions_.count(action_string)) {
198    content::RecordComputedAction(action_string);
199  } else {
200    DLOG(ERROR) << "undefined UMA action: " << action_string;
201  }
202}
203
204void ExternalMetrics::RecordAction(const char* action) {
205  std::string action_string(action);
206  BrowserThread::PostTask(
207      BrowserThread::UI, FROM_HERE,
208      base::Bind(&ExternalMetrics::RecordActionUI, this, action_string));
209}
210
211void ExternalMetrics::RecordCrashUI(const std::string& crash_kind) {
212  if (g_browser_process && g_browser_process->metrics_service()) {
213    g_browser_process->metrics_service()->LogChromeOSCrash(crash_kind);
214  }
215}
216
217void ExternalMetrics::RecordCrash(const std::string& crash_kind) {
218  BrowserThread::PostTask(
219      BrowserThread::UI, FROM_HERE,
220      base::Bind(&ExternalMetrics::RecordCrashUI, this, crash_kind));
221}
222
223void ExternalMetrics::RecordHistogram(const char* histogram_data) {
224  int sample, min, max, nbuckets;
225  char name[128];   // length must be consistent with sscanf format below.
226  int n = sscanf(histogram_data, "%127s %d %d %d %d",
227                 name, &sample, &min, &max, &nbuckets);
228  if (n != 5) {
229    DLOG(ERROR) << "bad histogram request: " << histogram_data;
230    return;
231  }
232
233  if (!CheckValues(name, min, max, nbuckets)) {
234    DLOG(ERROR) << "Invalid histogram " << name
235                << ", min=" << min
236                << ", max=" << max
237                << ", nbuckets=" << nbuckets;
238    return;
239  }
240  // Do not use the UMA_HISTOGRAM_... macros here.  They cache the Histogram
241  // instance and thus only work if |name| is constant.
242  base::HistogramBase* counter = base::Histogram::FactoryGet(
243      name, min, max, nbuckets, base::Histogram::kUmaTargetedHistogramFlag);
244  counter->Add(sample);
245}
246
247void ExternalMetrics::RecordLinearHistogram(const char* histogram_data) {
248  int sample, max;
249  char name[128];   // length must be consistent with sscanf format below.
250  int n = sscanf(histogram_data, "%127s %d %d", name, &sample, &max);
251  if (n != 3) {
252    DLOG(ERROR) << "bad linear histogram request: " << histogram_data;
253    return;
254  }
255
256  if (!CheckLinearValues(name, max)) {
257    DLOG(ERROR) << "Invalid linear histogram " << name
258                << ", max=" << max;
259    return;
260  }
261  // Do not use the UMA_HISTOGRAM_... macros here.  They cache the Histogram
262  // instance and thus only work if |name| is constant.
263  base::HistogramBase* counter = base::LinearHistogram::FactoryGet(
264      name, 1, max, max + 1, base::Histogram::kUmaTargetedHistogramFlag);
265  counter->Add(sample);
266}
267
268void ExternalMetrics::RecordSparseHistogram(const char* histogram_data) {
269  int sample;
270  char name[128];   // length must be consistent with sscanf format below.
271  int n = sscanf(histogram_data, "%127s %d", name, &sample);
272  if (n != 2) {
273    DLOG(ERROR) << "bad sparse histogram request: " << histogram_data;
274    return;
275  }
276
277  // Do not use the UMA_HISTOGRAM_... macros here.  They cache the Histogram
278  // instance and thus only work if |name| is constant.
279  base::HistogramBase* counter = base::SparseHistogram::FactoryGet(
280      name, base::HistogramBase::kUmaTargetedHistogramFlag);
281  counter->Add(sample);
282}
283
284void ExternalMetrics::CollectEvents() {
285  const char* event_file_path = "/var/log/metrics/uma-events";
286  struct stat stat_buf;
287  int result;
288  if (!test_path_.empty()) {
289    event_file_path = test_path_.value().c_str();
290  }
291  result = stat(event_file_path, &stat_buf);
292  if (result < 0) {
293    if (errno != ENOENT) {
294      DPLOG(ERROR) << event_file_path << ": bad metrics file stat";
295    }
296    // Nothing to collect---try later.
297    return;
298  }
299  if (stat_buf.st_size == 0) {
300    // Also nothing to collect.
301    return;
302  }
303  int fd = open(event_file_path, O_RDWR);
304  if (fd < 0) {
305    DPLOG(ERROR) << event_file_path << ": cannot open";
306    return;
307  }
308  result = flock(fd, LOCK_EX);
309  if (result < 0) {
310    DPLOG(ERROR) << event_file_path << ": cannot lock";
311    close(fd);
312    return;
313  }
314  // This processes all messages in the log.  Each message starts with a 4-byte
315  // field containing the length of the entire message.  The length is followed
316  // by a name-value pair of null-terminated strings.  When all messages are
317  // read and processed, or an error occurs, truncate the file to zero size.
318  for (;;) {
319    int32 message_size;
320    result = HANDLE_EINTR(read(fd, &message_size, sizeof(message_size)));
321    if (result < 0) {
322      DPLOG(ERROR) << "reading metrics message header";
323      break;
324    }
325    if (result == 0) {  // This indicates a normal EOF.
326      break;
327    }
328    if (result < static_cast<int>(sizeof(message_size))) {
329      DLOG(ERROR) << "bad read size " << result <<
330                     ", expecting " << sizeof(message_size);
331      break;
332    }
333    // kMetricsMessageMaxLength applies to the entire message: the 4-byte
334    // length field and the two null-terminated strings.
335    if (message_size < 2 + static_cast<int>(sizeof(message_size)) ||
336        message_size > static_cast<int>(kMetricsMessageMaxLength)) {
337      DLOG(ERROR) << "bad message size " << message_size;
338      break;
339    }
340    message_size -= sizeof(message_size);  // The message size includes itself.
341    uint8 buffer[kMetricsMessageMaxLength];
342    result = HANDLE_EINTR(read(fd, buffer, message_size));
343    if (result < 0) {
344      DPLOG(ERROR) << "reading metrics message body";
345      break;
346    }
347    if (result < message_size) {
348      DLOG(ERROR) << "message too short: length " << result <<
349                     ", expected " << message_size;
350      break;
351    }
352    // The buffer should now contain a pair of null-terminated strings.
353    uint8* p = reinterpret_cast<uint8*>(memchr(buffer, '\0', message_size));
354    uint8* q = NULL;
355    if (p != NULL) {
356      q = reinterpret_cast<uint8*>(
357          memchr(p + 1, '\0', message_size - (p + 1 - buffer)));
358    }
359    if (q == NULL) {
360      DLOG(ERROR) << "bad name-value pair for metrics";
361      break;
362    }
363    char* name = reinterpret_cast<char*>(buffer);
364    char* value = reinterpret_cast<char*>(p + 1);
365    if (test_recorder_ != NULL) {
366      test_recorder_(name, value);
367    } else if (strcmp(name, "crash") == 0) {
368      RecordCrash(value);
369    } else if (strcmp(name, "histogram") == 0) {
370      RecordHistogram(value);
371    } else if (strcmp(name, "linearhistogram") == 0) {
372      RecordLinearHistogram(value);
373    } else if (strcmp(name, "sparsehistogram") == 0) {
374      RecordSparseHistogram(value);
375    } else if (strcmp(name, "useraction") == 0) {
376      RecordAction(value);
377    } else {
378      DLOG(ERROR) << "invalid event type: " << name;
379    }
380  }
381
382  result = ftruncate(fd, 0);
383  if (result < 0) {
384    DPLOG(ERROR) << "truncate metrics log";
385  }
386  result = flock(fd, LOCK_UN);
387  if (result < 0) {
388    DPLOG(ERROR) << "unlock metrics log";
389  }
390  result = close(fd);
391  if (result < 0) {
392    DPLOG(ERROR) << "close metrics log";
393  }
394}
395
396void ExternalMetrics::CollectEventsAndReschedule() {
397  PerfTimer timer;
398  CollectEvents();
399  UMA_HISTOGRAM_TIMES("UMA.CollectExternalEventsTime", timer.Elapsed());
400  ScheduleCollector();
401}
402
403void ExternalMetrics::ScheduleCollector() {
404  bool result;
405  result = BrowserThread::PostDelayedTask(
406      BrowserThread::FILE, FROM_HERE,
407      base::Bind(&chromeos::ExternalMetrics::CollectEventsAndReschedule, this),
408      base::TimeDelta::FromSeconds(kExternalMetricsCollectionIntervalSeconds));
409  DCHECK(result);
410}
411
412void ExternalMetrics::SetupFieldTrialsOnFileThread() {
413  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
414  // Field trials that do not read from files can be initialized in
415  // ExternalMetrics::Start() above.
416  SetupProgressiveScanFieldTrial();
417  SetupSwapJankFieldTrial();
418
419  ScheduleCollector();
420}
421
422}  // namespace chromeos
423