histogram.cc revision dc0f95d653279beabeb9817299e2902918ba123e
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Histogram is an object that aggregates statistics, and can summarize them in
6// various forms, including ASCII graphical, HTML, and numerically (as a
7// vector of numbers corresponding to each of the aggregating buckets).
8// See header file for details and examples.
9
10#include "base/metrics/histogram.h"
11
12#include <math.h>
13
14#include <algorithm>
15#include <string>
16
17#include "base/logging.h"
18#include "base/pickle.h"
19#include "base/stringprintf.h"
20#include "base/synchronization/lock.h"
21
22namespace base {
23
24// Static table of checksums for all possible 8 bit bytes.
25const uint32 Histogram::kCrcTable[256] = {0x0, 0x77073096L, 0xee0e612cL,
260x990951baL, 0x76dc419L, 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0xedb8832L,
270x79dcb8a4L, 0xe0d5e91eL, 0x97d2d988L, 0x9b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
280x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, 0x1adad47dL,
290x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, 0x646ba8c0L, 0xfd62f97aL,
300x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L,
310x4c69105eL, 0xd56041e4L, 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL,
320xa50ab56bL, 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
330x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, 0xc8d75180L,
340xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, 0xb8bda50fL, 0x2802b89eL,
350x5f058808L, 0xc60cd9b2L, 0xb10be924L, 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL,
360xb6662d3dL, 0x76dc4190L, 0x1db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L,
370x6b6b51fL, 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0xf00f934L, 0x9609a88eL,
380xe10e9818L, 0x7f6a0dbbL, 0x86d3d2dL, 0x91646c97L, 0xe6635c01L, 0x6b6b51f4L,
390x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, 0x1b01a57bL, 0x8208f4c1L,
400xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL,
410x15da2d49L, 0x8cd37cf3L, 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L,
420xd4bb30e2L, 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
430x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, 0xaa0a4c5fL,
440xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, 0xc90c2086L, 0x5768b525L,
450x206f85b3L, 0xb966d409L, 0xce61e49fL, 0x5edef90eL, 0x29d9c998L, 0xb0d09822L,
460xc7d7a8b4L, 0x59b33d17L, 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L,
470x9abfb3b6L, 0x3b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x4db2615L,
480x73dc1683L, 0xe3630b12L, 0x94643b84L, 0xd6d6a3eL, 0x7a6a5aa8L, 0xe40ecf0bL,
490x9309ff9dL, 0xa00ae27L, 0x7d079eb1L, 0xf00f9344L, 0x8708a3d2L, 0x1e01f268L,
500x6906c2feL, 0xf762575dL, 0x806567cbL, 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L,
510x89d32be0L, 0x10da7a5aL, 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L,
520x60b08ed5L, 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
530xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, 0x36034af6L,
540x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, 0x4669be79L, 0xcb61b38cL,
550xbc66831aL, 0x256fd2a0L, 0x5268e236L, 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L,
560x5505262fL, 0xc5ba3bbeL, 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L,
570xb5d0cf31L, 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
580x26d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x5005713L, 0x95bf4a82L,
590xe2b87a14L, 0x7bb12baeL, 0xcb61b38L, 0x92d28e9bL, 0xe5d5be0dL, 0x7cdcefb7L,
600xbdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL,
610xf6b9265bL, 0x6fb077e1L, 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL,
620x11010b5cL, 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
630xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, 0x4969474dL,
640x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, 0x37d83bf0L, 0xa9bcae53L,
650xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L,
660x24b4a3a6L, 0xbad03605L, 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL,
670xc4614ab8L, 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
680x2d02ef8dL,
69};
70
71typedef Histogram::Count Count;
72
73// static
74const size_t Histogram::kBucketCount_MAX = 16384u;
75
76scoped_refptr<Histogram> Histogram::FactoryGet(const std::string& name,
77    Sample minimum, Sample maximum, size_t bucket_count, Flags flags) {
78  scoped_refptr<Histogram> histogram(NULL);
79
80  // Defensive code.
81  if (minimum < 1)
82    minimum = 1;
83  if (maximum > kSampleType_MAX - 1)
84    maximum = kSampleType_MAX - 1;
85
86  if (!StatisticsRecorder::FindHistogram(name, &histogram)) {
87    histogram = new Histogram(name, minimum, maximum, bucket_count);
88    histogram->InitializeBucketRange();
89    StatisticsRecorder::RegisterOrDiscardDuplicate(&histogram);
90  }
91
92  DCHECK_EQ(HISTOGRAM, histogram->histogram_type());
93  DCHECK(histogram->HasConstructorArguments(minimum, maximum, bucket_count));
94  histogram->SetFlags(flags);
95  return histogram;
96}
97
98scoped_refptr<Histogram> Histogram::FactoryTimeGet(const std::string& name,
99                                                   TimeDelta minimum,
100                                                   TimeDelta maximum,
101                                                   size_t bucket_count,
102                                                   Flags flags) {
103  return FactoryGet(name, minimum.InMilliseconds(), maximum.InMilliseconds(),
104                    bucket_count, flags);
105}
106
107void Histogram::Add(int value) {
108  if (value > kSampleType_MAX - 1)
109    value = kSampleType_MAX - 1;
110  if (value < 0)
111    value = 0;
112  size_t index = BucketIndex(value);
113  DCHECK_GE(value, ranges(index));
114  DCHECK_LT(value, ranges(index + 1));
115  Accumulate(value, 1, index);
116}
117
118void Histogram::AddBoolean(bool value) {
119  DCHECK(false);
120}
121
122void Histogram::AddSampleSet(const SampleSet& sample) {
123  sample_.Add(sample);
124}
125
126void Histogram::SetRangeDescriptions(const DescriptionPair descriptions[]) {
127  DCHECK(false);
128}
129
130// The following methods provide a graphical histogram display.
131void Histogram::WriteHTMLGraph(std::string* output) const {
132  // TBD(jar) Write a nice HTML bar chart, with divs an mouse-overs etc.
133  output->append("<PRE>");
134  WriteAscii(true, "<br>", output);
135  output->append("</PRE>");
136}
137
138void Histogram::WriteAscii(bool graph_it, const std::string& newline,
139                           std::string* output) const {
140  // Get local (stack) copies of all effectively volatile class data so that we
141  // are consistent across our output activities.
142  SampleSet snapshot;
143  SnapshotSample(&snapshot);
144  Count sample_count = snapshot.TotalCount();
145
146  WriteAsciiHeader(snapshot, sample_count, output);
147  output->append(newline);
148
149  // Prepare to normalize graphical rendering of bucket contents.
150  double max_size = 0;
151  if (graph_it)
152    max_size = GetPeakBucketSize(snapshot);
153
154  // Calculate space needed to print bucket range numbers.  Leave room to print
155  // nearly the largest bucket range without sliding over the histogram.
156  size_t largest_non_empty_bucket = bucket_count() - 1;
157  while (0 == snapshot.counts(largest_non_empty_bucket)) {
158    if (0 == largest_non_empty_bucket)
159      break;  // All buckets are empty.
160    --largest_non_empty_bucket;
161  }
162
163  // Calculate largest print width needed for any of our bucket range displays.
164  size_t print_width = 1;
165  for (size_t i = 0; i < bucket_count(); ++i) {
166    if (snapshot.counts(i)) {
167      size_t width = GetAsciiBucketRange(i).size() + 1;
168      if (width > print_width)
169        print_width = width;
170    }
171  }
172
173  int64 remaining = sample_count;
174  int64 past = 0;
175  // Output the actual histogram graph.
176  for (size_t i = 0; i < bucket_count(); ++i) {
177    Count current = snapshot.counts(i);
178    if (!current && !PrintEmptyBucket(i))
179      continue;
180    remaining -= current;
181    std::string range = GetAsciiBucketRange(i);
182    output->append(range);
183    for (size_t j = 0; range.size() + j < print_width + 1; ++j)
184      output->push_back(' ');
185    if (0 == current && i < bucket_count() - 1 && 0 == snapshot.counts(i + 1)) {
186      while (i < bucket_count() - 1 && 0 == snapshot.counts(i + 1))
187        ++i;
188      output->append("... ");
189      output->append(newline);
190      continue;  // No reason to plot emptiness.
191    }
192    double current_size = GetBucketSize(current, i);
193    if (graph_it)
194      WriteAsciiBucketGraph(current_size, max_size, output);
195    WriteAsciiBucketContext(past, current, remaining, i, output);
196    output->append(newline);
197    past += current;
198  }
199  DCHECK_EQ(sample_count, past);
200}
201
202// static
203std::string Histogram::SerializeHistogramInfo(const Histogram& histogram,
204                                              const SampleSet& snapshot) {
205  DCHECK_NE(NOT_VALID_IN_RENDERER, histogram.histogram_type());
206
207  Pickle pickle;
208  pickle.WriteString(histogram.histogram_name());
209  pickle.WriteInt(histogram.declared_min());
210  pickle.WriteInt(histogram.declared_max());
211  pickle.WriteSize(histogram.bucket_count());
212  pickle.WriteUInt32(histogram.range_checksum());
213  pickle.WriteInt(histogram.histogram_type());
214  pickle.WriteInt(histogram.flags());
215
216  snapshot.Serialize(&pickle);
217  return std::string(static_cast<const char*>(pickle.data()), pickle.size());
218}
219
220// static
221bool Histogram::DeserializeHistogramInfo(const std::string& histogram_info) {
222  if (histogram_info.empty()) {
223      return false;
224  }
225
226  Pickle pickle(histogram_info.data(),
227                static_cast<int>(histogram_info.size()));
228  std::string histogram_name;
229  int declared_min;
230  int declared_max;
231  size_t bucket_count;
232  uint32 range_checksum;
233  int histogram_type;
234  int pickle_flags;
235  SampleSet sample;
236
237  void* iter = NULL;
238  if (!pickle.ReadString(&iter, &histogram_name) ||
239      !pickle.ReadInt(&iter, &declared_min) ||
240      !pickle.ReadInt(&iter, &declared_max) ||
241      !pickle.ReadSize(&iter, &bucket_count) ||
242      !pickle.ReadUInt32(&iter, &range_checksum) ||
243      !pickle.ReadInt(&iter, &histogram_type) ||
244      !pickle.ReadInt(&iter, &pickle_flags) ||
245      !sample.Histogram::SampleSet::Deserialize(&iter, pickle)) {
246    LOG(ERROR) << "Pickle error decoding Histogram: " << histogram_name;
247    return false;
248  }
249  DCHECK(pickle_flags & kIPCSerializationSourceFlag);
250  // Since these fields may have come from an untrusted renderer, do additional
251  // checks above and beyond those in Histogram::Initialize()
252  if (declared_max <= 0 || declared_min <= 0 || declared_max < declared_min ||
253      INT_MAX / sizeof(Count) <= bucket_count || bucket_count < 2) {
254    LOG(ERROR) << "Values error decoding Histogram: " << histogram_name;
255    return false;
256  }
257
258  Flags flags = static_cast<Flags>(pickle_flags & ~kIPCSerializationSourceFlag);
259
260  DCHECK_NE(NOT_VALID_IN_RENDERER, histogram_type);
261
262  scoped_refptr<Histogram> render_histogram(NULL);
263
264  if (histogram_type == HISTOGRAM) {
265    render_histogram = Histogram::FactoryGet(
266        histogram_name, declared_min, declared_max, bucket_count, flags);
267  } else if (histogram_type == LINEAR_HISTOGRAM) {
268    render_histogram = LinearHistogram::FactoryGet(
269        histogram_name, declared_min, declared_max, bucket_count, flags);
270  } else if (histogram_type == BOOLEAN_HISTOGRAM) {
271    render_histogram = BooleanHistogram::FactoryGet(histogram_name, flags);
272  } else {
273    LOG(ERROR) << "Error Deserializing Histogram Unknown histogram_type: "
274               << histogram_type;
275    return false;
276  }
277
278  DCHECK_EQ(render_histogram->declared_min(), declared_min);
279  DCHECK_EQ(render_histogram->declared_max(), declared_max);
280  DCHECK_EQ(render_histogram->bucket_count(), bucket_count);
281  DCHECK_EQ(render_histogram->range_checksum(), range_checksum);
282  DCHECK_EQ(render_histogram->histogram_type(), histogram_type);
283
284  if (render_histogram->flags() & kIPCSerializationSourceFlag) {
285    DVLOG(1) << "Single process mode, histogram observed and not copied: "
286             << histogram_name;
287  } else {
288    DCHECK_EQ(flags & render_histogram->flags(), flags);
289    render_histogram->AddSampleSet(sample);
290  }
291
292  return true;
293}
294
295//------------------------------------------------------------------------------
296// Methods for the validating a sample and a related histogram.
297//------------------------------------------------------------------------------
298
299Histogram::Inconsistencies Histogram::FindCorruption(
300    const SampleSet& snapshot) const {
301  int inconsistencies = NO_INCONSISTENCIES;
302  Sample previous_range = -1;  // Bottom range is always 0.
303  int64 count = 0;
304  for (size_t index = 0; index < bucket_count(); ++index) {
305    count += snapshot.counts(index);
306    int new_range = ranges(index);
307    if (previous_range >= new_range)
308      inconsistencies |= BUCKET_ORDER_ERROR;
309    previous_range = new_range;
310  }
311
312  if (!HasValidRangeChecksum())
313    inconsistencies |= RANGE_CHECKSUM_ERROR;
314
315  int64 delta64 = snapshot.redundant_count() - count;
316  if (delta64 != 0) {
317    int delta = static_cast<int>(delta64);
318    if (delta != delta64)
319      delta = INT_MAX;  // Flag all giant errors as INT_MAX.
320    // Since snapshots of histograms are taken asynchronously relative to
321    // sampling (and snapped from different threads), it is pretty likely that
322    // we'll catch a redundant count that doesn't match the sample count.  We
323    // allow for a certain amount of slop before flagging this as an
324    // inconsistency.  Even with an inconsistency, we'll snapshot it again (for
325    // UMA in about a half hour, so we'll eventually get the data, if it was
326    // not the result of a corruption.  If histograms show that 1 is "too tight"
327    // then we may try to use 2 or 3 for this slop value.
328    const int kCommonRaceBasedCountMismatch = 1;
329    if (delta > 0) {
330      UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountHigh", delta);
331      if (delta > kCommonRaceBasedCountMismatch)
332        inconsistencies |= COUNT_HIGH_ERROR;
333    } else {
334      DCHECK_GT(0, delta);
335      UMA_HISTOGRAM_COUNTS("Histogram.InconsistentCountLow", -delta);
336      if (-delta > kCommonRaceBasedCountMismatch)
337        inconsistencies |= COUNT_LOW_ERROR;
338    }
339  }
340  return static_cast<Inconsistencies>(inconsistencies);
341}
342
343Histogram::ClassType Histogram::histogram_type() const {
344  return HISTOGRAM;
345}
346
347Histogram::Sample Histogram::ranges(size_t i) const {
348  return ranges_[i];
349}
350
351size_t Histogram::bucket_count() const {
352  return bucket_count_;
353}
354
355// Do a safe atomic snapshot of sample data.
356// This implementation assumes we are on a safe single thread.
357void Histogram::SnapshotSample(SampleSet* sample) const {
358  // Note locking not done in this version!!!
359  *sample = sample_;
360}
361
362bool Histogram::HasConstructorArguments(Sample minimum,
363                                        Sample maximum,
364                                        size_t bucket_count) {
365  return ((minimum == declared_min_) && (maximum == declared_max_) &&
366          (bucket_count == bucket_count_));
367}
368
369bool Histogram::HasConstructorTimeDeltaArguments(TimeDelta minimum,
370                                                 TimeDelta maximum,
371                                                 size_t bucket_count) {
372  return ((minimum.InMilliseconds() == declared_min_) &&
373          (maximum.InMilliseconds() == declared_max_) &&
374          (bucket_count == bucket_count_));
375}
376
377bool Histogram::HasValidRangeChecksum() const {
378  return CalculateRangeChecksum() == range_checksum_;
379}
380
381Histogram::Histogram(const std::string& name, Sample minimum,
382                     Sample maximum, size_t bucket_count)
383  : histogram_name_(name),
384    declared_min_(minimum),
385    declared_max_(maximum),
386    bucket_count_(bucket_count),
387    flags_(kNoFlags),
388    ranges_(bucket_count + 1, 0),
389    range_checksum_(0),
390    sample_() {
391  Initialize();
392}
393
394Histogram::Histogram(const std::string& name, TimeDelta minimum,
395                     TimeDelta maximum, size_t bucket_count)
396  : histogram_name_(name),
397    declared_min_(static_cast<int> (minimum.InMilliseconds())),
398    declared_max_(static_cast<int> (maximum.InMilliseconds())),
399    bucket_count_(bucket_count),
400    flags_(kNoFlags),
401    ranges_(bucket_count + 1, 0),
402    range_checksum_(0),
403    sample_() {
404  Initialize();
405}
406
407Histogram::~Histogram() {
408  if (StatisticsRecorder::dump_on_exit()) {
409    std::string output;
410    WriteAscii(true, "\n", &output);
411    LOG(INFO) << output;
412  }
413
414  // Just to make sure most derived class did this properly...
415  DCHECK(ValidateBucketRanges());
416}
417
418// Calculate what range of values are held in each bucket.
419// We have to be careful that we don't pick a ratio between starting points in
420// consecutive buckets that is sooo small, that the integer bounds are the same
421// (effectively making one bucket get no values).  We need to avoid:
422//   ranges_[i] == ranges_[i + 1]
423// To avoid that, we just do a fine-grained bucket width as far as we need to
424// until we get a ratio that moves us along at least 2 units at a time.  From
425// that bucket onward we do use the exponential growth of buckets.
426void Histogram::InitializeBucketRange() {
427  double log_max = log(static_cast<double>(declared_max()));
428  double log_ratio;
429  double log_next;
430  size_t bucket_index = 1;
431  Sample current = declared_min();
432  SetBucketRange(bucket_index, current);
433  while (bucket_count() > ++bucket_index) {
434    double log_current;
435    log_current = log(static_cast<double>(current));
436    // Calculate the count'th root of the range.
437    log_ratio = (log_max - log_current) / (bucket_count() - bucket_index);
438    // See where the next bucket would start.
439    log_next = log_current + log_ratio;
440    int next;
441    next = static_cast<int>(floor(exp(log_next) + 0.5));
442    if (next > current)
443      current = next;
444    else
445      ++current;  // Just do a narrow bucket, and keep trying.
446    SetBucketRange(bucket_index, current);
447  }
448  ResetRangeChecksum();
449
450  DCHECK_EQ(bucket_count(), bucket_index);
451}
452
453bool Histogram::PrintEmptyBucket(size_t index) const {
454  return true;
455}
456
457size_t Histogram::BucketIndex(Sample value) const {
458  // Use simple binary search.  This is very general, but there are better
459  // approaches if we knew that the buckets were linearly distributed.
460  DCHECK_LE(ranges(0), value);
461  DCHECK_GT(ranges(bucket_count()), value);
462  size_t under = 0;
463  size_t over = bucket_count();
464  size_t mid;
465
466  do {
467    DCHECK_GE(over, under);
468    mid = under + (over - under)/2;
469    if (mid == under)
470      break;
471    if (ranges(mid) <= value)
472      under = mid;
473    else
474      over = mid;
475  } while (true);
476
477  DCHECK_LE(ranges(mid), value);
478  CHECK_GT(ranges(mid+1), value);
479  return mid;
480}
481
482// Use the actual bucket widths (like a linear histogram) until the widths get
483// over some transition value, and then use that transition width.  Exponentials
484// get so big so fast (and we don't expect to see a lot of entries in the large
485// buckets), so we need this to make it possible to see what is going on and
486// not have 0-graphical-height buckets.
487double Histogram::GetBucketSize(Count current, size_t i) const {
488  DCHECK_GT(ranges(i + 1), ranges(i));
489  static const double kTransitionWidth = 5;
490  double denominator = ranges(i + 1) - ranges(i);
491  if (denominator > kTransitionWidth)
492    denominator = kTransitionWidth;  // Stop trying to normalize.
493  return current/denominator;
494}
495
496void Histogram::ResetRangeChecksum() {
497  range_checksum_ = CalculateRangeChecksum();
498}
499
500const std::string Histogram::GetAsciiBucketRange(size_t i) const {
501  std::string result;
502  if (kHexRangePrintingFlag & flags_)
503    StringAppendF(&result, "%#x", ranges(i));
504  else
505    StringAppendF(&result, "%d", ranges(i));
506  return result;
507}
508
509// Update histogram data with new sample.
510void Histogram::Accumulate(Sample value, Count count, size_t index) {
511  // Note locking not done in this version!!!
512  sample_.Accumulate(value, count, index);
513}
514
515void Histogram::SetBucketRange(size_t i, Sample value) {
516  DCHECK_GT(bucket_count_, i);
517  ranges_[i] = value;
518}
519
520bool Histogram::ValidateBucketRanges() const {
521  // Standard assertions that all bucket ranges should satisfy.
522  DCHECK_EQ(bucket_count_ + 1, ranges_.size());
523  DCHECK_EQ(0, ranges_[0]);
524  DCHECK_EQ(declared_min(), ranges_[1]);
525  DCHECK_EQ(declared_max(), ranges_[bucket_count_ - 1]);
526  DCHECK_EQ(kSampleType_MAX, ranges_[bucket_count_]);
527  return true;
528}
529
530uint32 Histogram::CalculateRangeChecksum() const {
531  DCHECK_EQ(ranges_.size(), bucket_count() + 1);
532  uint32 checksum = static_cast<uint32>(ranges_.size());  // Seed checksum.
533  for (size_t index = 0; index < bucket_count(); ++index)
534    checksum = Crc32(checksum, ranges(index));
535  return checksum;
536}
537
538void Histogram::Initialize() {
539  sample_.Resize(*this);
540  if (declared_min_ < 1)
541    declared_min_ = 1;
542  if (declared_max_ > kSampleType_MAX - 1)
543    declared_max_ = kSampleType_MAX - 1;
544  DCHECK_LE(declared_min_, declared_max_);
545  DCHECK_GT(bucket_count_, 1u);
546  CHECK_LT(bucket_count_, kBucketCount_MAX);
547  size_t maximal_bucket_count = declared_max_ - declared_min_ + 2;
548  DCHECK_LE(bucket_count_, maximal_bucket_count);
549  DCHECK_EQ(0, ranges_[0]);
550  ranges_[bucket_count_] = kSampleType_MAX;
551}
552
553// We generate the CRC-32 using the low order bits to select whether to XOR in
554// the reversed polynomial 0xedb88320L.  This is nice and simple, and allows us
555// to keep the quotient in a uint32.  Since we're not concerned about the nature
556// of corruptions (i.e., we don't care about bit sequencing, since we are
557// handling memory changes, which are more grotesque) so we don't bother to
558// get the CRC correct for big-endian vs little-ending calculations.  All we
559// need is a nice hash, that tends to depend on all the bits of the sample, with
560// very little chance of changes in one place impacting changes in another
561// place.
562uint32 Histogram::Crc32(uint32 sum, Histogram::Sample range) {
563  const bool kUseRealCrc = true;  // TODO(jar): Switch to false and watch stats.
564  if (kUseRealCrc) {
565    union {
566      Histogram::Sample range;
567      unsigned char bytes[sizeof(Histogram::Sample)];
568    } converter;
569    converter.range = range;
570    for (size_t i = 0; i < sizeof(converter); ++i)
571      sum = kCrcTable[(sum & 0xff) ^ converter.bytes[i]] ^ (sum >> 8);
572  } else {
573    // Use hash techniques provided in ReallyFastHash, except we don't care
574    // about "avalanching" (which would worsten the hash, and add collisions),
575    // and we don't care about edge cases since we have an even number of bytes.
576    union {
577      Histogram::Sample range;
578      uint16 ints[sizeof(Histogram::Sample) / 2];
579    } converter;
580    DCHECK_EQ(sizeof(Histogram::Sample), sizeof(converter));
581    converter.range = range;
582    sum += converter.ints[0];
583    sum = (sum << 16) ^ sum ^ (static_cast<uint32>(converter.ints[1]) << 11);
584    sum += sum >> 11;
585  }
586  return sum;
587}
588
589//------------------------------------------------------------------------------
590// Private methods
591
592double Histogram::GetPeakBucketSize(const SampleSet& snapshot) const {
593  double max = 0;
594  for (size_t i = 0; i < bucket_count() ; ++i) {
595    double current_size = GetBucketSize(snapshot.counts(i), i);
596    if (current_size > max)
597      max = current_size;
598  }
599  return max;
600}
601
602void Histogram::WriteAsciiHeader(const SampleSet& snapshot,
603                                 Count sample_count,
604                                 std::string* output) const {
605  StringAppendF(output,
606                "Histogram: %s recorded %d samples",
607                histogram_name().c_str(),
608                sample_count);
609  if (0 == sample_count) {
610    DCHECK_EQ(snapshot.sum(), 0);
611  } else {
612    double average = static_cast<float>(snapshot.sum()) / sample_count;
613    double variance = static_cast<float>(snapshot.square_sum())/sample_count
614                      - average * average;
615    double standard_deviation = sqrt(variance);
616
617    StringAppendF(output,
618                  ", average = %.1f, standard deviation = %.1f",
619                  average, standard_deviation);
620  }
621  if (flags_ & ~kHexRangePrintingFlag)
622    StringAppendF(output, " (flags = 0x%x)", flags_ & ~kHexRangePrintingFlag);
623}
624
625void Histogram::WriteAsciiBucketContext(const int64 past,
626                                        const Count current,
627                                        const int64 remaining,
628                                        const size_t i,
629                                        std::string* output) const {
630  double scaled_sum = (past + current + remaining) / 100.0;
631  WriteAsciiBucketValue(current, scaled_sum, output);
632  if (0 < i) {
633    double percentage = past / scaled_sum;
634    StringAppendF(output, " {%3.1f%%}", percentage);
635  }
636}
637
638void Histogram::WriteAsciiBucketValue(Count current, double scaled_sum,
639                                      std::string* output) const {
640  StringAppendF(output, " (%d = %3.1f%%)", current, current/scaled_sum);
641}
642
643void Histogram::WriteAsciiBucketGraph(double current_size, double max_size,
644                                      std::string* output) const {
645  const int k_line_length = 72;  // Maximal horizontal width of graph.
646  int x_count = static_cast<int>(k_line_length * (current_size / max_size)
647                                 + 0.5);
648  int x_remainder = k_line_length - x_count;
649
650  while (0 < x_count--)
651    output->append("-");
652  output->append("O");
653  while (0 < x_remainder--)
654    output->append(" ");
655}
656
657//------------------------------------------------------------------------------
658// Methods for the Histogram::SampleSet class
659//------------------------------------------------------------------------------
660
661Histogram::SampleSet::SampleSet()
662    : counts_(),
663      sum_(0),
664      square_sum_(0),
665      redundant_count_(0) {
666}
667
668Histogram::SampleSet::~SampleSet() {
669}
670
671void Histogram::SampleSet::Resize(const Histogram& histogram) {
672  counts_.resize(histogram.bucket_count(), 0);
673}
674
675void Histogram::SampleSet::CheckSize(const Histogram& histogram) const {
676  DCHECK_EQ(histogram.bucket_count(), counts_.size());
677}
678
679
680void Histogram::SampleSet::Accumulate(Sample value,  Count count,
681                                      size_t index) {
682  DCHECK(count == 1 || count == -1);
683  counts_[index] += count;
684  sum_ += count * value;
685  square_sum_ += (count * value) * static_cast<int64>(value);
686  redundant_count_ += count;
687  DCHECK_GE(counts_[index], 0);
688  DCHECK_GE(sum_, 0);
689  DCHECK_GE(square_sum_, 0);
690  DCHECK_GE(redundant_count_, 0);
691}
692
693Count Histogram::SampleSet::TotalCount() const {
694  Count total = 0;
695  for (Counts::const_iterator it = counts_.begin();
696       it != counts_.end();
697       ++it) {
698    total += *it;
699  }
700  return total;
701}
702
703void Histogram::SampleSet::Add(const SampleSet& other) {
704  DCHECK_EQ(counts_.size(), other.counts_.size());
705  sum_ += other.sum_;
706  square_sum_ += other.square_sum_;
707  redundant_count_ += other.redundant_count_;
708  for (size_t index = 0; index < counts_.size(); ++index)
709    counts_[index] += other.counts_[index];
710}
711
712void Histogram::SampleSet::Subtract(const SampleSet& other) {
713  DCHECK_EQ(counts_.size(), other.counts_.size());
714  // Note: Race conditions in snapshotting a sum or square_sum may lead to
715  // (temporary) negative values when snapshots are later combined (and deltas
716  // calculated).  As a result, we don't currently CHCEK() for positive values.
717  sum_ -= other.sum_;
718  square_sum_ -= other.square_sum_;
719  redundant_count_ -= other.redundant_count_;
720  for (size_t index = 0; index < counts_.size(); ++index) {
721    counts_[index] -= other.counts_[index];
722    DCHECK_GE(counts_[index], 0);
723  }
724}
725
726bool Histogram::SampleSet::Serialize(Pickle* pickle) const {
727  pickle->WriteInt64(sum_);
728  pickle->WriteInt64(square_sum_);
729  pickle->WriteInt64(redundant_count_);
730  pickle->WriteSize(counts_.size());
731
732  for (size_t index = 0; index < counts_.size(); ++index) {
733    pickle->WriteInt(counts_[index]);
734  }
735
736  return true;
737}
738
739bool Histogram::SampleSet::Deserialize(void** iter, const Pickle& pickle) {
740  DCHECK_EQ(counts_.size(), 0u);
741  DCHECK_EQ(sum_, 0);
742  DCHECK_EQ(square_sum_, 0);
743  DCHECK_EQ(redundant_count_, 0);
744
745  size_t counts_size;
746
747  if (!pickle.ReadInt64(iter, &sum_) ||
748      !pickle.ReadInt64(iter, &square_sum_) ||
749      !pickle.ReadInt64(iter, &redundant_count_) ||
750      !pickle.ReadSize(iter, &counts_size)) {
751    return false;
752  }
753
754  if (counts_size == 0)
755    return false;
756
757  int count = 0;
758  for (size_t index = 0; index < counts_size; ++index) {
759    int i;
760    if (!pickle.ReadInt(iter, &i))
761      return false;
762    counts_.push_back(i);
763    count += i;
764  }
765  DCHECK_EQ(count, redundant_count_);
766  return count == redundant_count_;
767}
768
769//------------------------------------------------------------------------------
770// LinearHistogram: This histogram uses a traditional set of evenly spaced
771// buckets.
772//------------------------------------------------------------------------------
773
774LinearHistogram::~LinearHistogram() {
775}
776
777scoped_refptr<Histogram> LinearHistogram::FactoryGet(const std::string& name,
778                                                     Sample minimum,
779                                                     Sample maximum,
780                                                     size_t bucket_count,
781                                                     Flags flags) {
782  scoped_refptr<Histogram> histogram(NULL);
783
784  if (minimum < 1)
785    minimum = 1;
786  if (maximum > kSampleType_MAX - 1)
787    maximum = kSampleType_MAX - 1;
788
789  if (!StatisticsRecorder::FindHistogram(name, &histogram)) {
790    LinearHistogram* linear_histogram =
791        new LinearHistogram(name, minimum, maximum, bucket_count);
792    linear_histogram->InitializeBucketRange();
793    histogram = linear_histogram;
794    StatisticsRecorder::RegisterOrDiscardDuplicate(&histogram);
795  }
796
797  DCHECK_EQ(LINEAR_HISTOGRAM, histogram->histogram_type());
798  DCHECK(histogram->HasConstructorArguments(minimum, maximum, bucket_count));
799  histogram->SetFlags(flags);
800  return histogram;
801}
802
803scoped_refptr<Histogram> LinearHistogram::FactoryTimeGet(
804    const std::string& name,
805    TimeDelta minimum,
806    TimeDelta maximum,
807    size_t bucket_count,
808    Flags flags) {
809  return FactoryGet(name, minimum.InMilliseconds(), maximum.InMilliseconds(),
810                    bucket_count, flags);
811}
812
813Histogram::ClassType LinearHistogram::histogram_type() const {
814  return LINEAR_HISTOGRAM;
815}
816
817void LinearHistogram::SetRangeDescriptions(
818    const DescriptionPair descriptions[]) {
819  for (int i =0; descriptions[i].description; ++i) {
820    bucket_description_[descriptions[i].sample] = descriptions[i].description;
821  }
822}
823
824LinearHistogram::LinearHistogram(const std::string& name,
825                                 Sample minimum,
826                                 Sample maximum,
827                                 size_t bucket_count)
828    : Histogram(name, minimum >= 1 ? minimum : 1, maximum, bucket_count) {
829}
830
831LinearHistogram::LinearHistogram(const std::string& name,
832                                 TimeDelta minimum,
833                                 TimeDelta maximum,
834                                 size_t bucket_count)
835    : Histogram(name, minimum >= TimeDelta::FromMilliseconds(1) ?
836                                 minimum : TimeDelta::FromMilliseconds(1),
837                maximum, bucket_count) {
838}
839
840void LinearHistogram::InitializeBucketRange() {
841  DCHECK_GT(declared_min(), 0);  // 0 is the underflow bucket here.
842  double min = declared_min();
843  double max = declared_max();
844  size_t i;
845  for (i = 1; i < bucket_count(); ++i) {
846    double linear_range = (min * (bucket_count() -1 - i) + max * (i - 1)) /
847                          (bucket_count() - 2);
848    SetBucketRange(i, static_cast<int> (linear_range + 0.5));
849  }
850  ResetRangeChecksum();
851}
852
853double LinearHistogram::GetBucketSize(Count current, size_t i) const {
854  DCHECK_GT(ranges(i + 1), ranges(i));
855  // Adjacent buckets with different widths would have "surprisingly" many (few)
856  // samples in a histogram if we didn't normalize this way.
857  double denominator = ranges(i + 1) - ranges(i);
858  return current/denominator;
859}
860
861const std::string LinearHistogram::GetAsciiBucketRange(size_t i) const {
862  int range = ranges(i);
863  BucketDescriptionMap::const_iterator it = bucket_description_.find(range);
864  if (it == bucket_description_.end())
865    return Histogram::GetAsciiBucketRange(i);
866  return it->second;
867}
868
869bool LinearHistogram::PrintEmptyBucket(size_t index) const {
870  return bucket_description_.find(ranges(index)) == bucket_description_.end();
871}
872
873
874//------------------------------------------------------------------------------
875// This section provides implementation for BooleanHistogram.
876//------------------------------------------------------------------------------
877
878scoped_refptr<Histogram> BooleanHistogram::FactoryGet(const std::string& name,
879                                                      Flags flags) {
880  scoped_refptr<Histogram> histogram(NULL);
881
882  if (!StatisticsRecorder::FindHistogram(name, &histogram)) {
883    BooleanHistogram* boolean_histogram = new BooleanHistogram(name);
884    boolean_histogram->InitializeBucketRange();
885    histogram = boolean_histogram;
886    StatisticsRecorder::RegisterOrDiscardDuplicate(&histogram);
887  }
888
889  DCHECK_EQ(BOOLEAN_HISTOGRAM, histogram->histogram_type());
890  histogram->SetFlags(flags);
891  return histogram;
892}
893
894Histogram::ClassType BooleanHistogram::histogram_type() const {
895  return BOOLEAN_HISTOGRAM;
896}
897
898void BooleanHistogram::AddBoolean(bool value) {
899  Add(value ? 1 : 0);
900}
901
902BooleanHistogram::BooleanHistogram(const std::string& name)
903    : LinearHistogram(name, 1, 2, 3) {
904}
905
906//------------------------------------------------------------------------------
907// CustomHistogram:
908//------------------------------------------------------------------------------
909
910scoped_refptr<Histogram> CustomHistogram::FactoryGet(
911    const std::string& name,
912    const std::vector<Sample>& custom_ranges,
913    Flags flags) {
914  scoped_refptr<Histogram> histogram(NULL);
915
916  // Remove the duplicates in the custom ranges array.
917  std::vector<int> ranges = custom_ranges;
918  ranges.push_back(0);  // Ensure we have a zero value.
919  std::sort(ranges.begin(), ranges.end());
920  ranges.erase(std::unique(ranges.begin(), ranges.end()), ranges.end());
921  if (ranges.size() <= 1) {
922    DCHECK(false);
923    // Note that we pushed a 0 in above, so for defensive code....
924    ranges.push_back(1);  // Put in some data so we can index to [1].
925  }
926
927  DCHECK_LT(ranges.back(), kSampleType_MAX);
928
929  if (!StatisticsRecorder::FindHistogram(name, &histogram)) {
930    CustomHistogram* custom_histogram = new CustomHistogram(name, ranges);
931    custom_histogram->InitializedCustomBucketRange(ranges);
932    histogram = custom_histogram;
933    StatisticsRecorder::RegisterOrDiscardDuplicate(&histogram);
934  }
935
936  DCHECK_EQ(histogram->histogram_type(), CUSTOM_HISTOGRAM);
937  DCHECK(histogram->HasConstructorArguments(ranges[1], ranges.back(),
938                                            ranges.size()));
939  histogram->SetFlags(flags);
940  return histogram;
941}
942
943Histogram::ClassType CustomHistogram::histogram_type() const {
944  return CUSTOM_HISTOGRAM;
945}
946
947CustomHistogram::CustomHistogram(const std::string& name,
948                                 const std::vector<Sample>& custom_ranges)
949    : Histogram(name, custom_ranges[1], custom_ranges.back(),
950                custom_ranges.size()) {
951  DCHECK_GT(custom_ranges.size(), 1u);
952  DCHECK_EQ(custom_ranges[0], 0);
953}
954
955void CustomHistogram::InitializedCustomBucketRange(
956    const std::vector<Sample>& custom_ranges) {
957  DCHECK_GT(custom_ranges.size(), 1u);
958  DCHECK_EQ(custom_ranges[0], 0);
959  DCHECK_LE(custom_ranges.size(), bucket_count());
960  for (size_t index = 0; index < custom_ranges.size(); ++index)
961    SetBucketRange(index, custom_ranges[index]);
962  ResetRangeChecksum();
963}
964
965double CustomHistogram::GetBucketSize(Count current, size_t i) const {
966  return 1;
967}
968
969//------------------------------------------------------------------------------
970// The next section handles global (central) support for all histograms, as well
971// as startup/teardown of this service.
972//------------------------------------------------------------------------------
973
974// This singleton instance should be started during the single threaded portion
975// of main(), and hence it is not thread safe.  It initializes globals to
976// provide support for all future calls.
977StatisticsRecorder::StatisticsRecorder() {
978  DCHECK(!histograms_);
979  if (lock_ == NULL) {
980    // This will leak on purpose. It's the only way to make sure we won't race
981    // against the static uninitialization of the module while one of our
982    // static methods relying on the lock get called at an inappropriate time
983    // during the termination phase. Since it's a static data member, we will
984    // leak one per process, which would be similar to the instance allocated
985    // during static initialization and released only on  process termination.
986    lock_ = new base::Lock;
987  }
988  base::AutoLock auto_lock(*lock_);
989  histograms_ = new HistogramMap;
990}
991
992StatisticsRecorder::~StatisticsRecorder() {
993  DCHECK(histograms_ && lock_);
994
995  if (dump_on_exit_) {
996    std::string output;
997    WriteGraph("", &output);
998    LOG(INFO) << output;
999  }
1000  // Clean up.
1001  HistogramMap* histograms = NULL;
1002  {
1003    base::AutoLock auto_lock(*lock_);
1004    histograms = histograms_;
1005    histograms_ = NULL;
1006  }
1007  delete histograms;
1008  // We don't delete lock_ on purpose to avoid having to properly protect
1009  // against it going away after we checked for NULL in the static methods.
1010}
1011
1012// static
1013bool StatisticsRecorder::IsActive() {
1014  if (lock_ == NULL)
1015    return false;
1016  base::AutoLock auto_lock(*lock_);
1017  return NULL != histograms_;
1018}
1019
1020// Note: We can't accept a ref_ptr to |histogram| because we *might* not keep a
1021// reference, and we are called while in the Histogram constructor. In that
1022// scenario, a ref_ptr would have incremented the ref count when the histogram
1023// was passed to us, decremented it when we returned, and the instance would be
1024// destroyed before assignment (when value was returned by new).
1025// static
1026void StatisticsRecorder::RegisterOrDiscardDuplicate(
1027    scoped_refptr<Histogram>* histogram) {
1028  DCHECK((*histogram)->HasValidRangeChecksum());
1029  if (lock_ == NULL)
1030    return;
1031  base::AutoLock auto_lock(*lock_);
1032  if (!histograms_)
1033    return;
1034  const std::string name = (*histogram)->histogram_name();
1035  HistogramMap::iterator it = histograms_->find(name);
1036  // Avoid overwriting a previous registration.
1037  if (histograms_->end() == it)
1038    (*histograms_)[name] = *histogram;
1039  else
1040    *histogram = it->second;
1041}
1042
1043// static
1044void StatisticsRecorder::WriteHTMLGraph(const std::string& query,
1045                                        std::string* output) {
1046  if (!IsActive())
1047    return;
1048  output->append("<html><head><title>About Histograms");
1049  if (!query.empty())
1050    output->append(" - " + query);
1051  output->append("</title>"
1052                 // We'd like the following no-cache... but it doesn't work.
1053                 // "<META HTTP-EQUIV=\"Pragma\" CONTENT=\"no-cache\">"
1054                 "</head><body>");
1055
1056  Histograms snapshot;
1057  GetSnapshot(query, &snapshot);
1058  for (Histograms::iterator it = snapshot.begin();
1059       it != snapshot.end();
1060       ++it) {
1061    (*it)->WriteHTMLGraph(output);
1062    output->append("<br><hr><br>");
1063  }
1064  output->append("</body></html>");
1065}
1066
1067// static
1068void StatisticsRecorder::WriteGraph(const std::string& query,
1069                                    std::string* output) {
1070  if (!IsActive())
1071    return;
1072  if (query.length())
1073    StringAppendF(output, "Collections of histograms for %s\n", query.c_str());
1074  else
1075    output->append("Collections of all histograms\n");
1076
1077  Histograms snapshot;
1078  GetSnapshot(query, &snapshot);
1079  for (Histograms::iterator it = snapshot.begin();
1080       it != snapshot.end();
1081       ++it) {
1082    (*it)->WriteAscii(true, "\n", output);
1083    output->append("\n");
1084  }
1085}
1086
1087// static
1088void StatisticsRecorder::GetHistograms(Histograms* output) {
1089  if (lock_ == NULL)
1090    return;
1091  base::AutoLock auto_lock(*lock_);
1092  if (!histograms_)
1093    return;
1094  for (HistogramMap::iterator it = histograms_->begin();
1095       histograms_->end() != it;
1096       ++it) {
1097    DCHECK_EQ(it->first, it->second->histogram_name());
1098    output->push_back(it->second);
1099  }
1100}
1101
1102bool StatisticsRecorder::FindHistogram(const std::string& name,
1103                                       scoped_refptr<Histogram>* histogram) {
1104  if (lock_ == NULL)
1105    return false;
1106  base::AutoLock auto_lock(*lock_);
1107  if (!histograms_)
1108    return false;
1109  HistogramMap::iterator it = histograms_->find(name);
1110  if (histograms_->end() == it)
1111    return false;
1112  *histogram = it->second;
1113  return true;
1114}
1115
1116// private static
1117void StatisticsRecorder::GetSnapshot(const std::string& query,
1118                                     Histograms* snapshot) {
1119  if (lock_ == NULL)
1120    return;
1121  base::AutoLock auto_lock(*lock_);
1122  if (!histograms_)
1123    return;
1124  for (HistogramMap::iterator it = histograms_->begin();
1125       histograms_->end() != it;
1126       ++it) {
1127    if (it->first.find(query) != std::string::npos)
1128      snapshot->push_back(it->second);
1129  }
1130}
1131
1132// static
1133StatisticsRecorder::HistogramMap* StatisticsRecorder::histograms_ = NULL;
1134// static
1135base::Lock* StatisticsRecorder::lock_ = NULL;
1136// static
1137bool StatisticsRecorder::dump_on_exit_ = false;
1138
1139}  // namespace base
1140