1#!/usr/bin/env python
2
3# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7# Script to check the history of stage calls made to devserver.
8# Following are some sample use cases:
9#
10# 1. Find all stage request for autotest and image nyan_big-release/R38-6055.0.0
11#    in the last 10 days across all devservers.
12# ./devserver_history.py --image_filters nyan_big 38 6055.0.0 -l 240 \
13#                        --artifact_filters autotest -v
14# output:
15# ==============================================================================
16# 170.21.64.22
17# ==============================================================================
18# Number of calls:         1
19# Number of unique images: 1
20# 2014-08-23 12:45:00: nyan_big-release/R38-6055.0.0    autotest
21# ==============================================================================
22# 170.21.64.23
23# ==============================================================================
24# Number of calls:         2
25# Number of unique images: 1
26# 2014-08-23 12:45:00: nyan_big-release/R38-6055.0.0    autotest, test_suites
27# 2014-08-23 12:55:00: nyan_big-release/R38-6055.0.0    autotest, test_suites
28#
29# 2. Find all duplicated stage request for the last 10 days.
30# ./devserver_history.py -d -l 240
31# output:
32# Detecting artifacts staged in multiple devservers.
33# ==============================================================================
34# nyan_big-release/R38-6055.0.0
35# ==============================================================================
36# 170.21.64.22: 23  requests 2014-09-04 22:44:28 -- 2014-09-05 00:03:23
37# 170.21.64.23: 6   requests 2014-09-04 22:48:58 -- 2014-09-04 22:49:42
38#
39# Count of images with duplicated stages on each devserver:
40# 170.21.64.22   : 22
41# 170.21.64.23   : 11
42
43
44import argparse
45import datetime
46import logging
47import operator
48import re
49import time
50from itertools import groupby
51
52import common
53from autotest_lib.client.common_lib import global_config
54from autotest_lib.client.common_lib import time_utils
55from autotest_lib.client.common_lib.cros.graphite import autotest_es
56
57
58class devserver_call(object):
59    """A container to store the information of devserver stage call.
60    """
61
62    def __init__(self, hit):
63        """Retrieve information from a ES query hit.
64        """
65        self.devserver = hit['devserver']
66        self.subname = hit['subname']
67        self.artifacts = hit['artifacts'].split(' ')
68        self.image = hit['image']
69        self.value = hit['value']
70        self.time_recorded = time_utils.epoch_time_to_date_string(
71                hit['time_recorded'])
72
73
74    def __str__(self):
75        pairs = ['%-20s: %s' % (attr, getattr(self, attr)) for attr in dir(self)
76                  if not attr.startswith('__') and
77                  not callable(getattr(self, attr))]
78        return '\n'.join(pairs)
79
80
81def get_calls(time_start, time_end, artifact_filters=None,
82              regex_constraints=None, devserver=None, size=1e7):
83    """Gets all devserver calls from es db with the given constraints.
84
85    @param time_start: Earliest time entry was recorded.
86    @param time_end: Latest time entry was recorded.
87    @param artifact_filters: A list of names to match artifacts.
88    @param regex_constraints: A list of regex constraints for ES query.
89    @param devserver: name of devserver to query for. If it's set to None,
90                      return calls for all devservers. Default is set to None.
91    @param size: Max number of entries to return, default to 1 million.
92
93    @returns: Entries from esdb.
94    """
95    eqs = [('_type', 'devserver')]
96    if devserver:
97        eqs.append(('devserver', devserver))
98    if artifact_filters:
99        for artifact in artifact_filters:
100            eqs.append(('artifacts', artifact))
101    time_start_epoch = time_utils.to_epoch_time(time_start)
102    time_end_epoch = time_utils.to_epoch_time(time_end)
103    results = autotest_es.query(
104            fields_returned=None,
105            equality_constraints=eqs,
106            range_constraints=[('time_recorded', time_start_epoch,
107                                time_end_epoch)],
108            size=size,
109            sort_specs=[{'time_recorded': 'desc'}],
110            regex_constraints=regex_constraints)
111    devserver_calls = []
112    for hit in results.hits:
113        devserver_calls.append(devserver_call(hit))
114    logging.info('Found %d calls.', len(devserver_calls))
115    return devserver_calls
116
117
118def print_call_details(calls, verbose):
119    """Print details of each call to devserver to stage artifacts.
120
121    @param calls: A list of devserver stage requests.
122    @param verbose: Set to True to print out all devserver calls.
123    """
124    calls = sorted(calls, key=lambda c: c.devserver)
125    for devserver,calls_for_devserver in groupby(calls, lambda c: c.devserver):
126        calls_for_devserver = list(calls_for_devserver)
127        print '='*80
128        print devserver
129        print '='*80
130        print 'Number of calls:         %d' % len(calls_for_devserver)
131        print ('Number of unique images: %d' %
132               len(set([call.image for call in calls_for_devserver])))
133        if verbose:
134            for call in sorted(calls_for_devserver,
135                               key=lambda c: c.time_recorded):
136                print ('%s %s    %s' % (call.time_recorded, call.image,
137                                         ', '.join(call.artifacts)))
138
139
140def detect_duplicated_stage(calls):
141    """Detect any artifact for same build was staged in multiple devservers.
142
143    @param calls: A list of devserver stage requests.
144    """
145    print '\nDetecting artifacts staged in multiple devservers.'
146    calls = sorted(calls, key=lambda c: c.image)
147    # Count how many times a devserver staged duplicated artifacts. A number
148    # significantly larger then others can indicate that the devserver failed
149    # check_health too often and needs to be removed from production.
150    duplicated_stage_count = {}
151    for image,calls_for_image in groupby(calls, lambda c: c.image):
152        calls_for_image = list(calls_for_image)
153        devservers = set([call.devserver for call in calls_for_image])
154        if len(devservers) > 1:
155            print '='*80
156            print image
157            print '='*80
158            calls_for_image = sorted(calls_for_image, key=lambda c: c.devserver)
159            for devserver,calls_for_devserver in groupby(calls_for_image,
160                                                         lambda c: c.devserver):
161                timestamps = [c.time_recorded for c in calls_for_devserver]
162                print ('%s: %-3d requests %s -- %s' %
163                       (devserver, len(timestamps), min(timestamps),
164                        max(timestamps)))
165                duplicated_stage_count[devserver] = (
166                        duplicated_stage_count.get(devserver, 0) + 1)
167    print '\nCount of images with duplicated stages on each devserver:'
168    counts = sorted(duplicated_stage_count.iteritems(),
169                    key=operator.itemgetter(1), reverse=True)
170    for k,v in counts:
171        print '%-15s: %d' % (k, v)
172
173
174def main():
175    """main script. """
176    t_now = time.time()
177    t_now_minus_one_day = t_now - 3600 * 24
178    parser = argparse.ArgumentParser()
179    parser.add_argument('-l', type=float, dest='last',
180                        help='last hours to search results across',
181                        default=None)
182    parser.add_argument('--start', type=str, dest='start',
183                        help=('Enter start time as: yyyy-mm-dd hh-mm-ss,'
184                              'defualts to 24h ago. This option is ignored when'
185                              ' -l is used.'),
186                        default=time_utils.epoch_time_to_date_string(
187                                t_now_minus_one_day))
188    parser.add_argument('--end', type=str, dest='end',
189                        help=('Enter end time in as: yyyy-mm-dd hh-mm-ss,'
190                              'defualts to current time. This option is ignored'
191                              ' when -l is used.'),
192                        default=time_utils.epoch_time_to_date_string(t_now))
193    parser.add_argument('--devservers', nargs='+', dest='devservers',
194                         help=('Enter space deliminated devservers. Default are'
195                               ' all devservers specified in global config.'),
196                         default=[])
197    parser.add_argument('--artifact_filters', nargs='+',
198                        dest='artifact_filters',
199                        help=('Enter space deliminated filters on artifact '
200                              'name. For example "autotest test_suites". The '
201                              'filter does not support regex.'),
202                        default=[])
203    parser.add_argument('--image_filters', nargs='+', dest='image_filters',
204                         help=('Enter space deliminated filters on image name. '
205                               'For example "nyan 38 6566", search will use '
206                               'regex to match each filter. Do not use filters '
207                               'with mixed letter and number, e.g., R38.'),
208                         default=[])
209    parser.add_argument('-d', '--detect_duplicated_stage', action='store_true',
210                        dest='detect_duplicated_stage',
211                        help=('Set to True to detect if an artifacts for a same'
212                              ' build was staged in multiple devservers. '
213                              'Default is True.'),
214                        default=False)
215    parser.add_argument('-v', action='store_true', dest='verbose',
216                        default=False,
217                        help='-v to print out ALL entries.')
218    options = parser.parse_args()
219    if options.verbose:
220        logging.getLogger().setLevel(logging.INFO)
221
222    if options.last:
223        end_time = datetime.datetime.now()
224        start_time = end_time - datetime.timedelta(seconds=3600 * options.last)
225    else:
226        start_time = datetime.datetime.strptime(options.start,
227                                                time_utils.TIME_FMT)
228        end_time = datetime.datetime.strptime(options.end, time_utils.TIME_FMT)
229    logging.info('Searching devserver calls from %s to %s', start_time,
230                 end_time)
231
232    devservers = options.devservers
233    if not devservers:
234        devserver_urls = global_config.global_config.get_config_value(
235                'CROS', 'dev_server', type=list, default=[])
236        devservers = []
237        for url in devserver_urls:
238            match = re.match('http://([^:]*):*\d*', url)
239            devservers.append(match.groups(0)[0] if match else url)
240    logging.info('Found devservers: %s', devservers)
241
242    regex_constraints = []
243    for filter in options.image_filters:
244        regex_constraints.append(('image', '.*%s.*' % filter))
245    calls = []
246    for devserver in devservers:
247        calls.extend(get_calls(start_time, end_time, options.artifact_filters,
248                               regex_constraints, devserver=devserver))
249
250    print_call_details(calls, options.verbose)
251
252    if options.detect_duplicated_stage:
253        detect_duplicated_stage(calls)
254
255
256if __name__ == '__main__':
257    main()
258