commits.py revision 8d2b206a675ec20ea07100c35df34e65ee1e45e8
1#!/usr/bin/env python
2# Copyright 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Print statistics about the rate of commits to a repository."""
7
8import datetime
9import itertools
10import json
11import math
12import urllib
13import urllib2
14
15
16_BASE_URL = 'https://chromium.googlesource.com/'
17# Can be up to 10,000.
18_REVISION_COUNT = 1000
19
20_REPOSITORIES = [
21    'chromium/src',
22    'angle/angle',
23    'skia',
24    'v8/v8',
25]
26
27
28def Pairwise(iterable):
29  """s -> (s0,s1), (s1,s2), (s2, s3), ..."""
30  a, b = itertools.tee(iterable)
31  next(b, None)
32  return itertools.izip(a, b)
33
34
35def Percentile(data, percentile):
36  """Find a percentile of a list of values.
37
38  Parameters:
39    data: A sorted list of values.
40    percentile: The percentile to look up, from 0.0 to 1.0.
41
42  Returns:
43    The percentile.
44
45  Raises:
46    ValueError: If data is empty.
47  """
48  if not data:
49    raise ValueError()
50
51  k = (len(data) - 1) * percentile
52  f = math.floor(k)
53  c = math.ceil(k)
54
55  if f == c:
56    return data[int(k)]
57  return data[int(f)] * (c - k) + data[int(c)] * (k - f)
58
59
60def CommitTimes(repository, revision_count):
61  parameters = urllib.urlencode((('n', revision_count), ('format', 'JSON')))
62  url = '%s/%s/+log?%s' % (_BASE_URL, urllib.quote(repository), parameters)
63  data = json.loads(''.join(urllib2.urlopen(url).read().splitlines()[1:]))
64
65  commit_times = []
66  for revision in data['log']:
67    commit_time_string = revision['committer']['time']
68    commit_time = datetime.datetime.strptime(
69        commit_time_string, '%a %b %d %H:%M:%S %Y')
70    commit_times.append(commit_time)
71
72  return commit_times
73
74
75def main():
76  for repository in _REPOSITORIES:
77    commit_times = CommitTimes(repository, _REVISION_COUNT)
78
79    commit_durations = []
80    for time1, time2 in Pairwise(commit_times):
81      commit_durations.append((time1 - time2).total_seconds())
82    commit_durations.sort()
83
84    print 'REPOSITORY:', repository
85    print 'Start Date:', min(commit_times)
86    print '  End Date:', max(commit_times)
87    print '  Duration:', max(commit_times) - min(commit_times)
88    print '         n:', len(commit_times)
89
90    for p in (0.00, 0.05, 0.25, 0.50, 0.75, 0.95, 1.00):
91      percentile = Percentile(commit_durations, p)
92      print '%3d%% commit duration:' % (p * 100), '%6ds' % percentile
93    mean = math.fsum(commit_durations) / len(commit_durations)
94    print ' Min commit duration:', '%6ds' % min(commit_durations)
95    print 'Mean commit duration:', '%6ds' % mean
96    print ' Max commit duration:', '%6ds' % max(commit_durations)
97    print
98
99
100if __name__ == '__main__':
101  main()
102