1# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Histogram generation tools."""
6
7from collections import defaultdict
8
9import format_utils
10
11
12class Histogram(object):
13  """A histogram generating object.
14
15  This object serves the sole purpose of formatting (key, val) pairs as an
16  ASCII histogram, including bars and percentage markers, and taking care of
17  label alignment, scaling, etc. In addition to the standard __init__
18  interface, two static methods are provided for conveniently converting data
19  in different formats into a histogram. Histogram generation is exported via
20  its __str__ method, and looks as follows:
21
22    Yes |################    | 5 (83.3%)
23    No  |###                 | 1 (16.6%)
24
25  TODO(garnold) we may want to add actual methods for adding data or tweaking
26  the output layout and formatting. For now, though, this is fine.
27
28  """
29
30  def __init__(self, data, scale=20, formatter=None):
31    """Initialize a histogram object.
32
33    Args:
34      data: list of (key, count) pairs constituting the histogram
35      scale: number of characters used to indicate 100%
36      formatter: function used for formatting raw histogram values
37
38    """
39    self.data = data
40    self.scale = scale
41    self.formatter = formatter or str
42    self.max_key_len = max([len(str(key)) for key, count in self.data])
43    self.total = sum([count for key, count in self.data])
44
45  @staticmethod
46  def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
47    """Takes a dictionary of counts and returns a histogram object.
48
49    This simply converts a mapping from names to counts into a list of (key,
50    count) pairs, optionally translating keys into name strings, then
51    generating and returning a histogram for them. This is a useful convenience
52    call for clients that update a dictionary of counters as they (say) scan a
53    data stream.
54
55    Args:
56      count_dict: dictionary mapping keys to occurrence counts
57      scale: number of characters used to indicate 100%
58      formatter: function used for formatting raw histogram values
59      key_names: dictionary mapping keys to name strings
60    Returns:
61      A histogram object based on the given data.
62
63    """
64    namer = None
65    if key_names:
66      namer = lambda key: key_names[key]
67    else:
68      namer = lambda key: key
69
70    hist = [(namer(key), count) for key, count in count_dict.items()]
71    return Histogram(hist, scale, formatter)
72
73  @staticmethod
74  def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
75    """Takes a list of (possibly recurring) keys and returns a histogram object.
76
77    This converts the list into a dictionary of counters, then uses
78    FromCountDict() to generate the actual histogram. For example:
79
80      ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
81
82    Args:
83      key_list: list of (possibly recurring) keys
84      scale: number of characters used to indicate 100%
85      formatter: function used for formatting raw histogram values
86      key_names: dictionary mapping keys to name strings
87    Returns:
88      A histogram object based on the given data.
89
90    """
91    count_dict = defaultdict(int)  # Unset items default to zero
92    for key in key_list:
93      count_dict[key] += 1
94    return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
95
96  def __str__(self):
97    hist_lines = []
98    hist_bar = '|'
99    for key, count in self.data:
100      if self.total:
101        bar_len = count * self.scale / self.total
102        hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
103
104      line = '%s %s %s' % (
105          str(key).ljust(self.max_key_len),
106          hist_bar,
107          self.formatter(count))
108      percent_str = format_utils.NumToPercent(count, self.total)
109      if percent_str:
110        line += ' (%s)' % percent_str
111      hist_lines.append(line)
112
113    return '\n'.join(hist_lines)
114
115  def GetKeys(self):
116    """Returns the keys of the histogram."""
117    return [key for key, _ in self.data]
118