metrics_helpers.cc revision 731df977c0511bca2206b5f333555b1205ff1f43
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/common/metrics_helpers.h"
6
7#if defined(USE_SYSTEM_LIBBZ2)
8#include <bzlib.h>
9#else
10#include "third_party/bzip2/bzlib.h"
11#endif
12
13#include "base/base64.h"
14#include "base/time.h"
15#include "base/basictypes.h"
16#include "base/file_util.h"
17#include "base/md5.h"
18#include "base/perftimer.h"
19#include "base/scoped_ptr.h"
20#include "base/string_number_conversions.h"
21#include "base/sys_info.h"
22#include "base/utf_string_conversions.h"
23#include "base/third_party/nspr/prtime.h"
24#include "chrome/common/logging_chrome.h"
25#include "googleurl/src/gurl.h"
26#include "libxml/xmlwriter.h"
27
28#define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name)
29
30using base::Histogram;
31using base::StatisticsRecorder;
32using base::Time;
33using base::TimeDelta;
34
35// http://blogs.msdn.com/oldnewthing/archive/2004/10/25/247180.aspx
36#if defined(OS_WIN)
37extern "C" IMAGE_DOS_HEADER __ImageBase;
38#endif
39
40namespace {
41
42// libxml take xmlChar*, which is unsigned char*
43inline const unsigned char* UnsignedChar(const char* input) {
44  return reinterpret_cast<const unsigned char*>(input);
45}
46
47}  // namespace
48
49class MetricsLogBase::XmlWrapper {
50 public:
51  XmlWrapper()
52      : doc_(NULL),
53        buffer_(NULL),
54        writer_(NULL) {
55    buffer_ = xmlBufferCreate();
56    DCHECK(buffer_);
57
58    #if defined(OS_CHROMEOS)
59      writer_ = xmlNewTextWriterDoc(&doc_, /* compression */ 0);
60    #else
61      writer_ = xmlNewTextWriterMemory(buffer_, /* compression */ 0);
62    #endif  // OS_CHROMEOS
63    DCHECK(writer_);
64
65    int result = xmlTextWriterSetIndent(writer_, 2);
66    DCHECK_EQ(0, result);
67  }
68
69  ~XmlWrapper() {
70    FreeDocWriter();
71    if (buffer_) {
72      xmlBufferFree(buffer_);
73      buffer_ = NULL;
74    }
75  }
76
77  void FreeDocWriter() {
78    if (writer_) {
79      xmlFreeTextWriter(writer_);
80      writer_ = NULL;
81    }
82    if (doc_) {
83      xmlFreeDoc(doc_);
84      doc_ = NULL;
85    }
86  }
87
88  xmlDocPtr doc() const { return doc_; }
89  xmlTextWriterPtr writer() const { return writer_; }
90  xmlBufferPtr buffer() const { return buffer_; }
91
92 private:
93  xmlDocPtr doc_;
94  xmlBufferPtr buffer_;
95  xmlTextWriterPtr writer_;
96};
97
98// static
99std::string MetricsLogBase::version_extension_;
100
101MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id,
102                               const std::string& version_string)
103    : start_time_(Time::Now()),
104      client_id_(client_id),
105      session_id_(base::IntToString(session_id)),
106      locked_(false),
107      xml_wrapper_(new XmlWrapper),
108      num_events_(0) {
109
110  StartElement("log");
111  WriteAttribute("clientid", client_id_);
112  WriteInt64Attribute("buildtime", GetBuildTime());
113  WriteAttribute("appversion", version_string);
114}
115
116MetricsLogBase::~MetricsLogBase() {
117  delete xml_wrapper_;
118}
119
120void MetricsLogBase::CloseLog() {
121  DCHECK(!locked_);
122  locked_ = true;
123
124  int result = xmlTextWriterEndDocument(xml_wrapper_->writer());
125  DCHECK_GE(result, 0);
126
127  result = xmlTextWriterFlush(xml_wrapper_->writer());
128  DCHECK_GE(result, 0);
129
130#if defined(OS_CHROMEOS)
131  xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc());
132  if (!hardware_class_.empty()) {
133    // The hardware class is determined after the first ongoing log is
134    // constructed, so this adds the root element's "hardwareclass"
135    // attribute when the log is closed instead.
136    xmlNewProp(root, UnsignedChar("hardwareclass"),
137               UnsignedChar(hardware_class_.c_str()));
138  }
139
140  // Flattens the XML tree into a character buffer.
141  PerfTimer dump_timer;
142  result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(),
143                       root, /* level */ 0, /* format */ 1);
144  DCHECK_GE(result, 0);
145  UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed());
146
147  PerfTimer free_timer;
148  xml_wrapper_->FreeDocWriter();
149  UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed());
150#endif  // OS_CHROMEOS
151}
152
153int MetricsLogBase::GetEncodedLogSize() {
154  DCHECK(locked_);
155  return xml_wrapper_->buffer()->use;
156}
157
158bool MetricsLogBase::GetEncodedLog(char* buffer, int buffer_size) {
159  DCHECK(locked_);
160  if (buffer_size < GetEncodedLogSize())
161    return false;
162
163  memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSize());
164  return true;
165}
166
167std::string MetricsLogBase::GetEncodedLogString() {
168  DCHECK(locked_);
169  return std::string(reinterpret_cast<char*>(xml_wrapper_->buffer()->content));
170}
171
172int MetricsLogBase::GetElapsedSeconds() {
173  return static_cast<int>((Time::Now() - start_time_).InSeconds());
174}
175
176std::string MetricsLogBase::CreateHash(const std::string& value) {
177  MD5Context ctx;
178  MD5Init(&ctx);
179  MD5Update(&ctx, value.data(), value.length());
180
181  MD5Digest digest;
182  MD5Final(&digest, &ctx);
183
184  uint64 reverse_uint64;
185  // UMA only uses first 8 chars of hash. We use the above uint64 instead
186  // of a unsigned char[8] so that we don't run into strict aliasing issues
187  // in the LOG statement below when trying to interpret reverse as a uint64.
188  unsigned char* reverse = reinterpret_cast<unsigned char *>(&reverse_uint64);
189  DCHECK(arraysize(digest.a) >= sizeof(reverse_uint64));
190  for (size_t i = 0; i < sizeof(reverse_uint64); ++i)
191    reverse[i] = digest.a[sizeof(reverse_uint64) - i - 1];
192  // The following log is VERY helpful when folks add some named histogram into
193  // the code, but forgot to update the descriptive list of histograms.  When
194  // that happens, all we get to see (server side) is a hash of the histogram
195  // name.  We can then use this logging to find out what histogram name was
196  // being hashed to a given MD5 value by just running the version of Chromium
197  // in question with --enable-logging.
198  VLOG(1) << "Metrics: Hash numeric [" << value
199          << "]=[" << reverse_uint64 << "]";
200  return std::string(reinterpret_cast<char*>(digest.a), arraysize(digest.a));
201}
202
203std::string MetricsLogBase::CreateBase64Hash(const std::string& string) {
204  std::string encoded_digest;
205  if (base::Base64Encode(CreateHash(string), &encoded_digest)) {
206    DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]";
207    return encoded_digest;
208  }
209  return std::string();
210}
211
212void MetricsLogBase::RecordUserAction(const char* key) {
213  DCHECK(!locked_);
214
215  std::string command_hash = CreateBase64Hash(key);
216  if (command_hash.empty()) {
217    NOTREACHED() << "Unable generate encoded hash of command: " << key;
218    return;
219  }
220
221  OPEN_ELEMENT_FOR_SCOPE("uielement");
222  WriteAttribute("action", "command");
223  WriteAttribute("targetidhash", command_hash);
224
225  // TODO(jhughes): Properly track windows.
226  WriteIntAttribute("window", 0);
227  WriteCommonEventAttributes();
228
229  ++num_events_;
230}
231
232void MetricsLogBase::RecordLoadEvent(int window_id,
233                                 const GURL& url,
234                                 PageTransition::Type origin,
235                                 int session_index,
236                                 TimeDelta load_time) {
237  DCHECK(!locked_);
238
239  OPEN_ELEMENT_FOR_SCOPE("document");
240  WriteAttribute("action", "load");
241  WriteIntAttribute("docid", session_index);
242  WriteIntAttribute("window", window_id);
243  WriteAttribute("loadtime", base::Int64ToString(load_time.InMilliseconds()));
244
245  std::string origin_string;
246
247  switch (PageTransition::StripQualifier(origin)) {
248    // TODO(jhughes): Some of these mappings aren't right... we need to add
249    // some values to the server's enum.
250    case PageTransition::LINK:
251    case PageTransition::MANUAL_SUBFRAME:
252      origin_string = "link";
253      break;
254
255    case PageTransition::TYPED:
256      origin_string = "typed";
257      break;
258
259    case PageTransition::AUTO_BOOKMARK:
260      origin_string = "bookmark";
261      break;
262
263    case PageTransition::AUTO_SUBFRAME:
264    case PageTransition::RELOAD:
265      origin_string = "refresh";
266      break;
267
268    case PageTransition::GENERATED:
269    case PageTransition::KEYWORD:
270      origin_string = "global-history";
271      break;
272
273    case PageTransition::START_PAGE:
274      origin_string = "start-page";
275      break;
276
277    case PageTransition::FORM_SUBMIT:
278      origin_string = "form-submit";
279      break;
280
281    default:
282      NOTREACHED() << "Received an unknown page transition type: " <<
283                       PageTransition::StripQualifier(origin);
284  }
285  if (!origin_string.empty())
286    WriteAttribute("origin", origin_string);
287
288  WriteCommonEventAttributes();
289
290  ++num_events_;
291}
292
293void MetricsLogBase::RecordWindowEvent(WindowEventType type,
294                                   int window_id,
295                                   int parent_id) {
296  DCHECK(!locked_);
297
298  OPEN_ELEMENT_FOR_SCOPE("window");
299  WriteAttribute("action", WindowEventTypeToString(type));
300  WriteAttribute("windowid", base::IntToString(window_id));
301  if (parent_id >= 0)
302    WriteAttribute("parent", base::IntToString(parent_id));
303  WriteCommonEventAttributes();
304
305  ++num_events_;
306}
307
308std::string MetricsLogBase::GetCurrentTimeString() {
309  return base::Uint64ToString(Time::Now().ToTimeT());
310}
311
312// These are the attributes that are common to every event.
313void MetricsLogBase::WriteCommonEventAttributes() {
314  WriteAttribute("session", session_id_);
315  WriteAttribute("time", GetCurrentTimeString());
316}
317
318void MetricsLogBase::WriteAttribute(const std::string& name,
319                                const std::string& value) {
320  DCHECK(!locked_);
321  DCHECK(!name.empty());
322
323  int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(),
324                                           UnsignedChar(name.c_str()),
325                                           UnsignedChar(value.c_str()));
326  DCHECK_GE(result, 0);
327}
328
329void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) {
330  WriteAttribute(name, base::IntToString(value));
331}
332
333void MetricsLogBase::WriteInt64Attribute(const std::string& name, int64 value) {
334  WriteAttribute(name, base::Int64ToString(value));
335}
336
337// static
338const char* MetricsLogBase::WindowEventTypeToString(WindowEventType type) {
339  switch (type) {
340    case WINDOW_CREATE:  return "create";
341    case WINDOW_OPEN:    return "open";
342    case WINDOW_CLOSE:   return "close";
343    case WINDOW_DESTROY: return "destroy";
344
345    default:
346      NOTREACHED();
347      return "unknown";  // Can't return NULL as this is used in a required
348                         // attribute.
349  }
350}
351
352void MetricsLogBase::StartElement(const char* name) {
353  DCHECK(!locked_);
354  DCHECK(name);
355
356  int result = xmlTextWriterStartElement(xml_wrapper_->writer(),
357                                         UnsignedChar(name));
358  DCHECK_GE(result, 0);
359}
360
361void MetricsLogBase::EndElement() {
362  DCHECK(!locked_);
363
364  int result = xmlTextWriterEndElement(xml_wrapper_->writer());
365  DCHECK_GE(result, 0);
366}
367
368// static
369int64 MetricsLogBase::GetBuildTime() {
370  static int64 integral_build_time = 0;
371  if (!integral_build_time) {
372    Time time;
373    const char* kDateTime = __DATE__ " " __TIME__ " GMT";
374    bool result = Time::FromString(ASCIIToWide(kDateTime).c_str(), &time);
375    DCHECK(result);
376    integral_build_time = static_cast<int64>(time.ToTimeT());
377  }
378  return integral_build_time;
379}
380
381// TODO(JAR): A The following should really be part of the histogram class.
382// Internal state is being needlessly exposed, and it would be hard to reuse
383// this code. If we moved this into the Histogram class, then we could use
384// the same infrastructure for logging StatsCounters, RatesCounters, etc.
385void MetricsLogBase::RecordHistogramDelta(
386    const Histogram& histogram,
387    const Histogram::SampleSet& snapshot) {
388  DCHECK(!locked_);
389  DCHECK_NE(0, snapshot.TotalCount());
390  snapshot.CheckSize(histogram);
391
392  // We will ignore the MAX_INT/infinite value in the last element of range[].
393
394  OPEN_ELEMENT_FOR_SCOPE("histogram");
395
396  WriteAttribute("name", CreateBase64Hash(histogram.histogram_name()));
397
398  WriteInt64Attribute("sum", snapshot.sum());
399  WriteInt64Attribute("sumsquares", snapshot.square_sum());
400
401  for (size_t i = 0; i < histogram.bucket_count(); i++) {
402    if (snapshot.counts(i)) {
403      OPEN_ELEMENT_FOR_SCOPE("histogrambucket");
404      WriteIntAttribute("min", histogram.ranges(i));
405      WriteIntAttribute("max", histogram.ranges(i + 1));
406      WriteIntAttribute("count", snapshot.counts(i));
407    }
408  }
409}
410
411
412// MetricsServiceBase
413MetricsServiceBase::MetricsServiceBase()
414    : pending_log_(NULL),
415      compressed_log_(),
416      current_log_(NULL),
417      logged_samples_() {
418}
419
420MetricsServiceBase::~MetricsServiceBase() {
421  if (pending_log_) {
422    delete pending_log_;
423    pending_log_ = NULL;
424  }
425  if (current_log_) {
426    delete current_log_;
427    current_log_ = NULL;
428  }
429}
430
431// This implementation is based on the Firefox MetricsService implementation.
432bool MetricsServiceBase::Bzip2Compress(const std::string& input,
433                                       std::string* output) {
434  bz_stream stream = {0};
435  // As long as our input is smaller than the bzip2 block size, we should get
436  // the best compression.  For example, if your input was 250k, using a block
437  // size of 300k or 500k should result in the same compression ratio.  Since
438  // our data should be under 100k, using the minimum block size of 100k should
439  // allocate less temporary memory, but result in the same compression ratio.
440  int result = BZ2_bzCompressInit(&stream,
441                                  1,   // 100k (min) block size
442                                  0,   // quiet
443                                  0);  // default "work factor"
444  if (result != BZ_OK) {  // out of memory?
445    return false;
446  }
447
448  output->clear();
449
450  stream.next_in = const_cast<char*>(input.data());
451  stream.avail_in = static_cast<int>(input.size());
452  // NOTE: we don't need a BZ_RUN phase since our input buffer contains
453  //       the entire input
454  do {
455    output->resize(output->size() + 1024);
456    stream.next_out = &((*output)[stream.total_out_lo32]);
457    stream.avail_out = static_cast<int>(output->size()) - stream.total_out_lo32;
458    result = BZ2_bzCompress(&stream, BZ_FINISH);
459  } while (result == BZ_FINISH_OK);
460  if (result != BZ_STREAM_END)  // unknown failure?
461    return false;
462  result = BZ2_bzCompressEnd(&stream);
463  DCHECK(result == BZ_OK);
464
465  output->resize(stream.total_out_lo32);
466
467  return true;
468}
469
470void MetricsServiceBase::RecordCurrentHistograms() {
471  DCHECK(current_log_);
472
473  StatisticsRecorder::Histograms histograms;
474  StatisticsRecorder::GetHistograms(&histograms);
475  for (StatisticsRecorder::Histograms::const_iterator it = histograms.begin();
476       histograms.end() != it;
477       ++it) {
478    if ((*it)->flags() & Histogram::kUmaTargetedHistogramFlag)
479      RecordHistogram(**it);
480  }
481}
482
483void MetricsServiceBase::RecordHistogram(const Histogram& histogram) {
484  // Get up-to-date snapshot of sample stats.
485  Histogram::SampleSet snapshot;
486  histogram.SnapshotSample(&snapshot);
487
488  const std::string& histogram_name = histogram.histogram_name();
489
490  // Find the already sent stats, or create an empty set.
491  LoggedSampleMap::iterator it = logged_samples_.find(histogram_name);
492  Histogram::SampleSet* already_logged;
493  if (logged_samples_.end() == it) {
494    // Add new entry
495    already_logged = &logged_samples_[histogram.histogram_name()];
496    already_logged->Resize(histogram);  // Complete initialization.
497  } else {
498    already_logged = &(it->second);
499    // Deduct any stats we've already logged from our snapshot.
500    snapshot.Subtract(*already_logged);
501  }
502
503  // Snapshot now contains only a delta to what we've already_logged.
504
505  if (snapshot.TotalCount() > 0) {
506    current_log_->RecordHistogramDelta(histogram, snapshot);
507    // Add new data into our running total.
508    already_logged->Add(snapshot);
509  }
510}
511
512void MetricsServiceBase::DiscardPendingLog() {
513  if (pending_log_) {  // Shutdown might have deleted it!
514    delete pending_log_;
515    pending_log_ = NULL;
516  }
517  compressed_log_.clear();
518}
519