metrics_helpers.cc revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/common/metrics_helpers.h" 6 7#if defined(USE_SYSTEM_LIBBZ2) 8#include <bzlib.h> 9#else 10#include "third_party/bzip2/bzlib.h" 11#endif 12 13#include "base/base64.h" 14#include "base/time.h" 15#include "base/basictypes.h" 16#include "base/file_util.h" 17#include "base/md5.h" 18#include "base/perftimer.h" 19#include "base/scoped_ptr.h" 20#include "base/string_number_conversions.h" 21#include "base/sys_info.h" 22#include "base/utf_string_conversions.h" 23#include "base/third_party/nspr/prtime.h" 24#include "chrome/common/logging_chrome.h" 25#include "googleurl/src/gurl.h" 26#include "libxml/xmlwriter.h" 27 28#define OPEN_ELEMENT_FOR_SCOPE(name) ScopedElement scoped_element(this, name) 29 30using base::Histogram; 31using base::StatisticsRecorder; 32using base::Time; 33using base::TimeDelta; 34 35// http://blogs.msdn.com/oldnewthing/archive/2004/10/25/247180.aspx 36#if defined(OS_WIN) 37extern "C" IMAGE_DOS_HEADER __ImageBase; 38#endif 39 40namespace { 41 42// libxml take xmlChar*, which is unsigned char* 43inline const unsigned char* UnsignedChar(const char* input) { 44 return reinterpret_cast<const unsigned char*>(input); 45} 46 47} // namespace 48 49class MetricsLogBase::XmlWrapper { 50 public: 51 XmlWrapper() 52 : doc_(NULL), 53 buffer_(NULL), 54 writer_(NULL) { 55 buffer_ = xmlBufferCreate(); 56 DCHECK(buffer_); 57 58 #if defined(OS_CHROMEOS) 59 writer_ = xmlNewTextWriterDoc(&doc_, /* compression */ 0); 60 #else 61 writer_ = xmlNewTextWriterMemory(buffer_, /* compression */ 0); 62 #endif // OS_CHROMEOS 63 DCHECK(writer_); 64 65 int result = xmlTextWriterSetIndent(writer_, 2); 66 DCHECK_EQ(0, result); 67 } 68 69 ~XmlWrapper() { 70 FreeDocWriter(); 71 if (buffer_) { 72 xmlBufferFree(buffer_); 73 buffer_ = NULL; 74 } 75 } 76 77 void FreeDocWriter() { 78 if (writer_) { 79 xmlFreeTextWriter(writer_); 80 writer_ = NULL; 81 } 82 if (doc_) { 83 xmlFreeDoc(doc_); 84 doc_ = NULL; 85 } 86 } 87 88 xmlDocPtr doc() const { return doc_; } 89 xmlTextWriterPtr writer() const { return writer_; } 90 xmlBufferPtr buffer() const { return buffer_; } 91 92 private: 93 xmlDocPtr doc_; 94 xmlBufferPtr buffer_; 95 xmlTextWriterPtr writer_; 96}; 97 98// static 99std::string MetricsLogBase::version_extension_; 100 101MetricsLogBase::MetricsLogBase(const std::string& client_id, int session_id, 102 const std::string& version_string) 103 : start_time_(Time::Now()), 104 client_id_(client_id), 105 session_id_(base::IntToString(session_id)), 106 locked_(false), 107 xml_wrapper_(new XmlWrapper), 108 num_events_(0) { 109 110 StartElement("log"); 111 WriteAttribute("clientid", client_id_); 112 WriteInt64Attribute("buildtime", GetBuildTime()); 113 WriteAttribute("appversion", version_string); 114} 115 116MetricsLogBase::~MetricsLogBase() { 117 delete xml_wrapper_; 118} 119 120void MetricsLogBase::CloseLog() { 121 DCHECK(!locked_); 122 locked_ = true; 123 124 int result = xmlTextWriterEndDocument(xml_wrapper_->writer()); 125 DCHECK_GE(result, 0); 126 127 result = xmlTextWriterFlush(xml_wrapper_->writer()); 128 DCHECK_GE(result, 0); 129 130#if defined(OS_CHROMEOS) 131 xmlNodePtr root = xmlDocGetRootElement(xml_wrapper_->doc()); 132 if (!hardware_class_.empty()) { 133 // The hardware class is determined after the first ongoing log is 134 // constructed, so this adds the root element's "hardwareclass" 135 // attribute when the log is closed instead. 136 xmlNewProp(root, UnsignedChar("hardwareclass"), 137 UnsignedChar(hardware_class_.c_str())); 138 } 139 140 // Flattens the XML tree into a character buffer. 141 PerfTimer dump_timer; 142 result = xmlNodeDump(xml_wrapper_->buffer(), xml_wrapper_->doc(), 143 root, /* level */ 0, /* format */ 1); 144 DCHECK_GE(result, 0); 145 UMA_HISTOGRAM_TIMES("UMA.XMLNodeDumpTime", dump_timer.Elapsed()); 146 147 PerfTimer free_timer; 148 xml_wrapper_->FreeDocWriter(); 149 UMA_HISTOGRAM_TIMES("UMA.XMLWriterDestructionTime", free_timer.Elapsed()); 150#endif // OS_CHROMEOS 151} 152 153int MetricsLogBase::GetEncodedLogSize() { 154 DCHECK(locked_); 155 return xml_wrapper_->buffer()->use; 156} 157 158bool MetricsLogBase::GetEncodedLog(char* buffer, int buffer_size) { 159 DCHECK(locked_); 160 if (buffer_size < GetEncodedLogSize()) 161 return false; 162 163 memcpy(buffer, xml_wrapper_->buffer()->content, GetEncodedLogSize()); 164 return true; 165} 166 167std::string MetricsLogBase::GetEncodedLogString() { 168 DCHECK(locked_); 169 return std::string(reinterpret_cast<char*>(xml_wrapper_->buffer()->content)); 170} 171 172int MetricsLogBase::GetElapsedSeconds() { 173 return static_cast<int>((Time::Now() - start_time_).InSeconds()); 174} 175 176std::string MetricsLogBase::CreateHash(const std::string& value) { 177 MD5Context ctx; 178 MD5Init(&ctx); 179 MD5Update(&ctx, value.data(), value.length()); 180 181 MD5Digest digest; 182 MD5Final(&digest, &ctx); 183 184 uint64 reverse_uint64; 185 // UMA only uses first 8 chars of hash. We use the above uint64 instead 186 // of a unsigned char[8] so that we don't run into strict aliasing issues 187 // in the LOG statement below when trying to interpret reverse as a uint64. 188 unsigned char* reverse = reinterpret_cast<unsigned char *>(&reverse_uint64); 189 DCHECK(arraysize(digest.a) >= sizeof(reverse_uint64)); 190 for (size_t i = 0; i < sizeof(reverse_uint64); ++i) 191 reverse[i] = digest.a[sizeof(reverse_uint64) - i - 1]; 192 // The following log is VERY helpful when folks add some named histogram into 193 // the code, but forgot to update the descriptive list of histograms. When 194 // that happens, all we get to see (server side) is a hash of the histogram 195 // name. We can then use this logging to find out what histogram name was 196 // being hashed to a given MD5 value by just running the version of Chromium 197 // in question with --enable-logging. 198 VLOG(1) << "Metrics: Hash numeric [" << value 199 << "]=[" << reverse_uint64 << "]"; 200 return std::string(reinterpret_cast<char*>(digest.a), arraysize(digest.a)); 201} 202 203std::string MetricsLogBase::CreateBase64Hash(const std::string& string) { 204 std::string encoded_digest; 205 if (base::Base64Encode(CreateHash(string), &encoded_digest)) { 206 DVLOG(1) << "Metrics: Hash [" << encoded_digest << "]=[" << string << "]"; 207 return encoded_digest; 208 } 209 return std::string(); 210} 211 212void MetricsLogBase::RecordUserAction(const char* key) { 213 DCHECK(!locked_); 214 215 std::string command_hash = CreateBase64Hash(key); 216 if (command_hash.empty()) { 217 NOTREACHED() << "Unable generate encoded hash of command: " << key; 218 return; 219 } 220 221 OPEN_ELEMENT_FOR_SCOPE("uielement"); 222 WriteAttribute("action", "command"); 223 WriteAttribute("targetidhash", command_hash); 224 225 // TODO(jhughes): Properly track windows. 226 WriteIntAttribute("window", 0); 227 WriteCommonEventAttributes(); 228 229 ++num_events_; 230} 231 232void MetricsLogBase::RecordLoadEvent(int window_id, 233 const GURL& url, 234 PageTransition::Type origin, 235 int session_index, 236 TimeDelta load_time) { 237 DCHECK(!locked_); 238 239 OPEN_ELEMENT_FOR_SCOPE("document"); 240 WriteAttribute("action", "load"); 241 WriteIntAttribute("docid", session_index); 242 WriteIntAttribute("window", window_id); 243 WriteAttribute("loadtime", base::Int64ToString(load_time.InMilliseconds())); 244 245 std::string origin_string; 246 247 switch (PageTransition::StripQualifier(origin)) { 248 // TODO(jhughes): Some of these mappings aren't right... we need to add 249 // some values to the server's enum. 250 case PageTransition::LINK: 251 case PageTransition::MANUAL_SUBFRAME: 252 origin_string = "link"; 253 break; 254 255 case PageTransition::TYPED: 256 origin_string = "typed"; 257 break; 258 259 case PageTransition::AUTO_BOOKMARK: 260 origin_string = "bookmark"; 261 break; 262 263 case PageTransition::AUTO_SUBFRAME: 264 case PageTransition::RELOAD: 265 origin_string = "refresh"; 266 break; 267 268 case PageTransition::GENERATED: 269 case PageTransition::KEYWORD: 270 origin_string = "global-history"; 271 break; 272 273 case PageTransition::START_PAGE: 274 origin_string = "start-page"; 275 break; 276 277 case PageTransition::FORM_SUBMIT: 278 origin_string = "form-submit"; 279 break; 280 281 default: 282 NOTREACHED() << "Received an unknown page transition type: " << 283 PageTransition::StripQualifier(origin); 284 } 285 if (!origin_string.empty()) 286 WriteAttribute("origin", origin_string); 287 288 WriteCommonEventAttributes(); 289 290 ++num_events_; 291} 292 293void MetricsLogBase::RecordWindowEvent(WindowEventType type, 294 int window_id, 295 int parent_id) { 296 DCHECK(!locked_); 297 298 OPEN_ELEMENT_FOR_SCOPE("window"); 299 WriteAttribute("action", WindowEventTypeToString(type)); 300 WriteAttribute("windowid", base::IntToString(window_id)); 301 if (parent_id >= 0) 302 WriteAttribute("parent", base::IntToString(parent_id)); 303 WriteCommonEventAttributes(); 304 305 ++num_events_; 306} 307 308std::string MetricsLogBase::GetCurrentTimeString() { 309 return base::Uint64ToString(Time::Now().ToTimeT()); 310} 311 312// These are the attributes that are common to every event. 313void MetricsLogBase::WriteCommonEventAttributes() { 314 WriteAttribute("session", session_id_); 315 WriteAttribute("time", GetCurrentTimeString()); 316} 317 318void MetricsLogBase::WriteAttribute(const std::string& name, 319 const std::string& value) { 320 DCHECK(!locked_); 321 DCHECK(!name.empty()); 322 323 int result = xmlTextWriterWriteAttribute(xml_wrapper_->writer(), 324 UnsignedChar(name.c_str()), 325 UnsignedChar(value.c_str())); 326 DCHECK_GE(result, 0); 327} 328 329void MetricsLogBase::WriteIntAttribute(const std::string& name, int value) { 330 WriteAttribute(name, base::IntToString(value)); 331} 332 333void MetricsLogBase::WriteInt64Attribute(const std::string& name, int64 value) { 334 WriteAttribute(name, base::Int64ToString(value)); 335} 336 337// static 338const char* MetricsLogBase::WindowEventTypeToString(WindowEventType type) { 339 switch (type) { 340 case WINDOW_CREATE: return "create"; 341 case WINDOW_OPEN: return "open"; 342 case WINDOW_CLOSE: return "close"; 343 case WINDOW_DESTROY: return "destroy"; 344 345 default: 346 NOTREACHED(); 347 return "unknown"; // Can't return NULL as this is used in a required 348 // attribute. 349 } 350} 351 352void MetricsLogBase::StartElement(const char* name) { 353 DCHECK(!locked_); 354 DCHECK(name); 355 356 int result = xmlTextWriterStartElement(xml_wrapper_->writer(), 357 UnsignedChar(name)); 358 DCHECK_GE(result, 0); 359} 360 361void MetricsLogBase::EndElement() { 362 DCHECK(!locked_); 363 364 int result = xmlTextWriterEndElement(xml_wrapper_->writer()); 365 DCHECK_GE(result, 0); 366} 367 368// static 369int64 MetricsLogBase::GetBuildTime() { 370 static int64 integral_build_time = 0; 371 if (!integral_build_time) { 372 Time time; 373 const char* kDateTime = __DATE__ " " __TIME__ " GMT"; 374 bool result = Time::FromString(ASCIIToWide(kDateTime).c_str(), &time); 375 DCHECK(result); 376 integral_build_time = static_cast<int64>(time.ToTimeT()); 377 } 378 return integral_build_time; 379} 380 381// TODO(JAR): A The following should really be part of the histogram class. 382// Internal state is being needlessly exposed, and it would be hard to reuse 383// this code. If we moved this into the Histogram class, then we could use 384// the same infrastructure for logging StatsCounters, RatesCounters, etc. 385void MetricsLogBase::RecordHistogramDelta( 386 const Histogram& histogram, 387 const Histogram::SampleSet& snapshot) { 388 DCHECK(!locked_); 389 DCHECK_NE(0, snapshot.TotalCount()); 390 snapshot.CheckSize(histogram); 391 392 // We will ignore the MAX_INT/infinite value in the last element of range[]. 393 394 OPEN_ELEMENT_FOR_SCOPE("histogram"); 395 396 WriteAttribute("name", CreateBase64Hash(histogram.histogram_name())); 397 398 WriteInt64Attribute("sum", snapshot.sum()); 399 WriteInt64Attribute("sumsquares", snapshot.square_sum()); 400 401 for (size_t i = 0; i < histogram.bucket_count(); i++) { 402 if (snapshot.counts(i)) { 403 OPEN_ELEMENT_FOR_SCOPE("histogrambucket"); 404 WriteIntAttribute("min", histogram.ranges(i)); 405 WriteIntAttribute("max", histogram.ranges(i + 1)); 406 WriteIntAttribute("count", snapshot.counts(i)); 407 } 408 } 409} 410 411 412// MetricsServiceBase 413MetricsServiceBase::MetricsServiceBase() 414 : pending_log_(NULL), 415 compressed_log_(), 416 current_log_(NULL), 417 logged_samples_() { 418} 419 420MetricsServiceBase::~MetricsServiceBase() { 421 if (pending_log_) { 422 delete pending_log_; 423 pending_log_ = NULL; 424 } 425 if (current_log_) { 426 delete current_log_; 427 current_log_ = NULL; 428 } 429} 430 431// This implementation is based on the Firefox MetricsService implementation. 432bool MetricsServiceBase::Bzip2Compress(const std::string& input, 433 std::string* output) { 434 bz_stream stream = {0}; 435 // As long as our input is smaller than the bzip2 block size, we should get 436 // the best compression. For example, if your input was 250k, using a block 437 // size of 300k or 500k should result in the same compression ratio. Since 438 // our data should be under 100k, using the minimum block size of 100k should 439 // allocate less temporary memory, but result in the same compression ratio. 440 int result = BZ2_bzCompressInit(&stream, 441 1, // 100k (min) block size 442 0, // quiet 443 0); // default "work factor" 444 if (result != BZ_OK) { // out of memory? 445 return false; 446 } 447 448 output->clear(); 449 450 stream.next_in = const_cast<char*>(input.data()); 451 stream.avail_in = static_cast<int>(input.size()); 452 // NOTE: we don't need a BZ_RUN phase since our input buffer contains 453 // the entire input 454 do { 455 output->resize(output->size() + 1024); 456 stream.next_out = &((*output)[stream.total_out_lo32]); 457 stream.avail_out = static_cast<int>(output->size()) - stream.total_out_lo32; 458 result = BZ2_bzCompress(&stream, BZ_FINISH); 459 } while (result == BZ_FINISH_OK); 460 if (result != BZ_STREAM_END) // unknown failure? 461 return false; 462 result = BZ2_bzCompressEnd(&stream); 463 DCHECK(result == BZ_OK); 464 465 output->resize(stream.total_out_lo32); 466 467 return true; 468} 469 470void MetricsServiceBase::RecordCurrentHistograms() { 471 DCHECK(current_log_); 472 473 StatisticsRecorder::Histograms histograms; 474 StatisticsRecorder::GetHistograms(&histograms); 475 for (StatisticsRecorder::Histograms::const_iterator it = histograms.begin(); 476 histograms.end() != it; 477 ++it) { 478 if ((*it)->flags() & Histogram::kUmaTargetedHistogramFlag) 479 RecordHistogram(**it); 480 } 481} 482 483void MetricsServiceBase::RecordHistogram(const Histogram& histogram) { 484 // Get up-to-date snapshot of sample stats. 485 Histogram::SampleSet snapshot; 486 histogram.SnapshotSample(&snapshot); 487 const std::string& histogram_name = histogram.histogram_name(); 488 489 int corruption = histogram.FindCorruption(snapshot); 490 if (corruption) { 491 NOTREACHED(); 492 // Don't send corrupt data to metrics survices. 493 UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowser", 494 corruption, Histogram::NEVER_EXCEEDED_VALUE); 495 typedef std::map<std::string, int> ProblemMap; 496 static ProblemMap* inconsistencies = new ProblemMap; 497 int old_corruption = (*inconsistencies)[histogram_name]; 498 if (old_corruption == (corruption | old_corruption)) 499 return; // We've already seen this corruption for this histogram. 500 (*inconsistencies)[histogram_name] |= corruption; 501 UMA_HISTOGRAM_ENUMERATION("Histogram.InconsistenciesBrowserUnique", 502 corruption, Histogram::NEVER_EXCEEDED_VALUE); 503 return; 504 } 505 506 // Find the already sent stats, or create an empty set. 507 LoggedSampleMap::iterator it = logged_samples_.find(histogram_name); 508 Histogram::SampleSet* already_logged; 509 if (logged_samples_.end() == it) { 510 // Add new entry 511 already_logged = &logged_samples_[histogram.histogram_name()]; 512 already_logged->Resize(histogram); // Complete initialization. 513 } else { 514 already_logged = &(it->second); 515 // Deduct any stats we've already logged from our snapshot. 516 snapshot.Subtract(*already_logged); 517 } 518 519 // Snapshot now contains only a delta to what we've already_logged. 520 521 if (snapshot.TotalCount() > 0) { 522 current_log_->RecordHistogramDelta(histogram, snapshot); 523 // Add new data into our running total. 524 already_logged->Add(snapshot); 525 } 526} 527 528void MetricsServiceBase::DiscardPendingLog() { 529 if (pending_log_) { // Shutdown might have deleted it! 530 delete pending_log_; 531 pending_log_ = NULL; 532 } 533 compressed_log_.clear(); 534} 535