1#!/usr/bin/env python
2# Copyright 2014 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Report whether DUTs are working or broken.
7
8usage: dut_status [ <options> ] [hostname ...]
9
10Reports on the history and status of selected DUT hosts, to
11determine whether they're "working" or "broken".  For purposes of
12the script, "broken" means "the DUT requires manual intervention
13before it can be used for further testing", and "working" means "not
14broken".  The status determination is based on the history of
15completed jobs for the DUT in a given time interval; still-running
16jobs are not considered.
17
18Time Interval Selection
19~~~~~~~~~~~~~~~~~~~~~~~
20A DUT's reported status is based on the DUT's job history in a time
21interval determined by command line options.  The interval is
22specified with up to two of three options:
23  --until/-u DATE/TIME - Specifies an end time for the search
24      range.  (default: now)
25  --since/-s DATE/TIME - Specifies a start time for the search
26      range. (no default)
27  --duration/-d HOURS - Specifies the length of the search interval
28      in hours. (default: 24 hours)
29
30Any two time options completely specify the time interval.  If only
31one option is provided, these defaults are used:
32  --until - Use the given end time with the default duration.
33  --since - Use the given start time with the default end time.
34  --duration - Use the given duration with the default end time.
35
36If no time options are given, use the default end time and duration.
37
38DATE/TIME values are of the form '2014-11-06 17:21:34'.
39
40DUT Selection
41~~~~~~~~~~~~~
42By default, information is reported for DUTs named as command-line
43arguments.  Options are also available for selecting groups of
44hosts:
45  --board/-b BOARD - Only include hosts with the given board.
46  --pool/-p POOL - Only include hosts in the given pool.
47
48The selected hosts may also be filtered based on status:
49  -w/--working - Only include hosts in a working state.
50  -n/--broken - Only include hosts in a non-working state.  Hosts
51      with no job history are considered non-working.
52
53Output Formats
54~~~~~~~~~~~~~~
55There are four available output formats:
56  * A simple list of host names.
57  * A status summary showing one line per host.
58  * A detailed job history for all selected DUTs, sorted by
59    time of execution.
60  * A job history for all selected DUTs showing only the history
61    surrounding the DUT's last change from working to broken,
62    or vice versa.
63
64The default format depends on whether hosts are filtered by
65status:
66  * With the --working or --broken options, the list of host names
67    is the default format.
68  * Without those options, the default format is the one-line status
69    summary.
70
71These options override the default formats:
72  -o/--oneline - Use the one-line summary with the --working or
73      --broken options.
74  -f/--full_history - Print detailed per-host job history.
75  -g/--diagnosis - Print the job history surrounding a status
76      change.
77
78Examples
79~~~~~~~~
80    $ dut_status chromeos2-row4-rack2-host12
81    hostname                     S   last checked         URL
82    chromeos2-row4-rack2-host12  NO  2014-11-06 15:25:29  http://...
83
84'NO' means the DUT is broken.  That diagnosis is based on a job that
85failed:  'last checked' is the time of the failed job, and the URL
86points to the job's logs.
87
88    $ dut_status.py -u '2014-11-06 15:30:00' -d 1 -f chromeos2-row4-rack2-host12
89    chromeos2-row4-rack2-host12
90        2014-11-06 15:25:29  NO http://...
91        2014-11-06 14:44:07  -- http://...
92        2014-11-06 14:42:56  OK http://...
93
94The times are the start times of the jobs; the URL points to the
95job's logs.  The status indicates the working or broken status after
96the job:
97  'NO' Indicates that the DUT was believed broken after the job.
98  'OK' Indicates that the DUT was believed working after the job.
99  '--' Indicates that the job probably didn't change the DUT's
100       status.
101Typically, logs of the actual failure will be found at the last job
102to report 'OK', or the first job to report '--'.
103
104"""
105
106import argparse
107import sys
108import time
109
110import common
111from autotest_lib.client.common_lib import time_utils
112from autotest_lib.server import frontend
113from autotest_lib.site_utils import status_history
114
115
116# The fully qualified name makes for lines that are too long, so
117# shorten it locally.
118HostJobHistory = status_history.HostJobHistory
119
120# _DIAGNOSIS_IDS -
121#     Dictionary to map the known diagnosis codes to string values.
122
123_DIAGNOSIS_IDS = {
124    status_history.UNUSED: '??',
125    status_history.UNKNOWN: '--',
126    status_history.WORKING: 'OK',
127    status_history.BROKEN: 'NO'
128}
129
130
131# Default time interval for the --duration option when a value isn't
132# specified on the command line.
133_DEFAULT_DURATION = 24
134
135
136def _include_status(status, arguments):
137    """Determine whether the given status should be filtered.
138
139    Checks the given `status` against the command line options in
140    `arguments`.  Return whether a host with that status should be
141    printed based on the options.
142
143    @param status Status of a host to be printed or skipped.
144    @param arguments Parsed arguments object as returned by
145                     ArgumentParser.parse_args().
146
147    @return Returns `True` if the command-line options call for
148            printing hosts with the status, or `False` otherwise.
149
150    """
151    if status == status_history.WORKING:
152        return arguments.working
153    else:
154        return arguments.broken
155
156
157def _print_host_summaries(history_list, arguments):
158    """Print one-line summaries of host history.
159
160    This function handles the output format of the --oneline option.
161
162    @param history_list A list of HostHistory objects to be printed.
163    @param arguments    Parsed arguments object as returned by
164                        ArgumentParser.parse_args().
165
166    """
167    fmt = '%-30s %-2s  %-19s  %s'
168    print fmt % ('hostname', 'S', 'last checked', 'URL')
169    for history in history_list:
170        status, event = history.last_diagnosis()
171        if not _include_status(status, arguments):
172            continue
173        datestr = '---'
174        url = '---'
175        if event is not None:
176            datestr = time_utils.epoch_time_to_date_string(
177                    event.start_time)
178            url = event.job_url
179
180        print fmt % (history.hostname,
181                     _DIAGNOSIS_IDS[status],
182                     datestr,
183                     url)
184
185
186def _print_event_summary(event):
187    """Print a one-line summary of a job or special task."""
188    start_time = time_utils.epoch_time_to_date_string(
189            event.start_time)
190    print '    %s  %s %s' % (
191            start_time,
192            _DIAGNOSIS_IDS[event.diagnosis],
193            event.job_url)
194
195
196def _print_hosts(history_list, arguments):
197    """Print hosts, optionally with a job history.
198
199    This function handles both the default format for --working
200    and --broken options, as well as the output for the
201    --full_history and --diagnosis options.  The `arguments`
202    parameter determines the format to use.
203
204    @param history_list A list of HostHistory objects to be printed.
205    @param arguments    Parsed arguments object as returned by
206                        ArgumentParser.parse_args().
207
208    """
209    for history in history_list:
210        status, _ = history.last_diagnosis()
211        if not _include_status(status, arguments):
212            continue
213        print history.hostname
214        if arguments.full_history:
215            for event in history:
216                _print_event_summary(event)
217        elif arguments.diagnosis:
218            for event in history.diagnosis_interval():
219                _print_event_summary(event)
220
221
222def _validate_time_range(arguments):
223    """Validate the time range requested on the command line.
224
225    Enforces the rules for the --until, --since, and --duration
226    options are followed, and calculates defaults:
227      * It isn't allowed to supply all three options.
228      * If only two options are supplied, they completely determine
229        the time interval.
230      * If only one option is supplied, or no options, then apply
231        specified defaults to the arguments object.
232
233    @param arguments Parsed arguments object as returned by
234                     ArgumentParser.parse_args().
235
236    """
237    if (arguments.duration is not None and
238            arguments.since is not None and arguments.until is not None):
239        print >>sys.stderr, ('FATAL: Can specify at most two of '
240                             '--since, --until, and --duration')
241        sys.exit(1)
242    if (arguments.until is None and (arguments.since is None or
243                                     arguments.duration is None)):
244        arguments.until = int(time.time())
245    if arguments.since is None:
246        if arguments.duration is None:
247            arguments.duration = _DEFAULT_DURATION
248        arguments.since = (arguments.until -
249                           arguments.duration * 60 * 60)
250    elif arguments.until is None:
251        arguments.until = (arguments.since +
252                           arguments.duration * 60 * 60)
253
254
255def _get_host_histories(afe, arguments):
256    """Return HostJobHistory objects for the requested hosts.
257
258    Checks that individual hosts specified on the command line are
259    valid.  Invalid hosts generate a warning message, and are
260    omitted from futher processing.
261
262    The return value is a list of HostJobHistory objects for the
263    valid requested hostnames, using the time range supplied on the
264    command line.
265
266    @param afe       Autotest frontend
267    @param arguments Parsed arguments object as returned by
268                     ArgumentParser.parse_args().
269    @return List of HostJobHistory objects for the hosts requested
270            on the command line.
271
272    """
273    histories = []
274    saw_error = False
275    for hostname in arguments.hostnames:
276        try:
277            h = HostJobHistory.get_host_history(
278                    afe, hostname, arguments.since, arguments.until)
279            histories.append(h)
280        except:
281            print >>sys.stderr, ('WARNING: Ignoring unknown host %s' %
282                                  hostname)
283            saw_error = True
284    if saw_error:
285        # Create separation from the output that follows
286        print >>sys.stderr
287    return histories
288
289
290def _validate_host_list(afe, arguments):
291    """Validate the user-specified list of hosts.
292
293    Hosts may be specified implicitly with --board or --pool, or
294    explictly as command line arguments.  This enforces these
295    rules:
296      * If --board or --pool, or both are specified, individual
297        hosts may not be specified.
298      * However specified, there must be at least one host.
299
300    The return value is a list of HostJobHistory objects for the
301    requested hosts, using the time range supplied on the command
302    line.
303
304    @param afe       Autotest frontend
305    @param arguments Parsed arguments object as returned by
306                     ArgumentParser.parse_args().
307    @return List of HostJobHistory objects for the hosts requested
308            on the command line.
309
310    """
311    if arguments.board or arguments.pool:
312        if arguments.hostnames:
313            print >>sys.stderr, ('FATAL: Hostname arguments provided '
314                                 'with --board or --pool')
315            sys.exit(1)
316        histories = HostJobHistory.get_multiple_histories(
317                afe, arguments.since, arguments.until,
318                board=arguments.board, pool=arguments.pool)
319    else:
320        histories = _get_host_histories(afe, arguments)
321    if not histories:
322        print >>sys.stderr, 'FATAL: no valid hosts found'
323        sys.exit(1)
324    return histories
325
326
327def _validate_format_options(arguments):
328    """Check the options for what output format to use.
329
330    Enforce these rules:
331      * If neither --broken nor --working was used, then --oneline
332        becomes the selected format.
333      * If neither --broken nor --working was used, included both
334        working and broken DUTs.
335
336    @param arguments Parsed arguments object as returned by
337                     ArgumentParser.parse_args().
338
339    """
340    if (not arguments.oneline and not arguments.diagnosis and
341            not arguments.full_history):
342        arguments.oneline = (not arguments.working and
343                             not arguments.broken)
344    if not arguments.working and not arguments.broken:
345        arguments.working = True
346        arguments.broken = True
347
348
349def _validate_command(afe, arguments):
350    """Check that the command's arguments are valid.
351
352    This performs command line checking to enforce command line
353    rules that ArgumentParser can't handle.  Additionally, this
354    handles calculation of default arguments/options when a simple
355    constant default won't do.
356
357    Areas checked:
358      * Check that a valid time range was provided, supplying
359        defaults as necessary.
360      * Identify invalid host names.
361
362    @param afe       Autotest frontend
363    @param arguments Parsed arguments object as returned by
364                     ArgumentParser.parse_args().
365    @return List of HostJobHistory objects for the hosts requested
366            on the command line.
367
368    """
369    _validate_time_range(arguments)
370    _validate_format_options(arguments)
371    return _validate_host_list(afe, arguments)
372
373
374def _parse_command(argv):
375    """Parse the command line arguments.
376
377    Create an argument parser for this command's syntax, parse the
378    command line, and return the result of the ArgumentParser
379    parse_args() method.
380
381    @param argv Standard command line argument vector; argv[0] is
382                assumed to be the command name.
383    @return Result returned by ArgumentParser.parse_args().
384
385    """
386    parser = argparse.ArgumentParser(
387            prog=argv[0],
388            description='Report DUT status and execution history',
389            epilog='You can specify one or two of --since, --until, '
390                   'and --duration, but not all three.\n'
391                   'The date/time format is "YYYY-MM-DD HH:MM:SS".')
392    parser.add_argument('-s', '--since', type=status_history.parse_time,
393                        metavar='DATE/TIME',
394                        help='starting time for history display')
395    parser.add_argument('-u', '--until', type=status_history.parse_time,
396                        metavar='DATE/TIME',
397                        help='ending time for history display'
398                             ' (default: now)')
399    parser.add_argument('-d', '--duration', type=int,
400                        metavar='HOURS',
401                        help='number of hours of history to display'
402                             ' (default: %d)' % _DEFAULT_DURATION)
403
404    format_group = parser.add_mutually_exclusive_group()
405    format_group.add_argument('-f', '--full_history', action='store_true',
406                              help='Display host history from most '
407                                   'to least recent for each DUT')
408    format_group.add_argument('-g', '--diagnosis', action='store_true',
409                              help='Display host history for the '
410                                   'most recent DUT status change')
411    format_group.add_argument('-o', '--oneline', action='store_true',
412                              help='Display host status summary')
413
414    parser.add_argument('-w', '--working', action='store_true',
415                        help='List working devices by name only')
416    parser.add_argument('-n', '--broken', action='store_true',
417                        help='List non-working devices by name only')
418
419    parser.add_argument('-b', '--board',
420                        help='Display history for all DUTs '
421                             'of the given board')
422    parser.add_argument('-p', '--pool',
423                        help='Display history for all DUTs '
424                             'in the given pool')
425    parser.add_argument('hostnames',
426                        nargs='*',
427                        help='host names of DUTs to report on')
428    parser.add_argument('--web',
429                        help='Master autotest frontend hostname. If no value '
430                             'is given, the one in global config will be used.',
431                        default=None)
432    arguments = parser.parse_args(argv[1:])
433    return arguments
434
435
436def main(argv):
437    """Standard main() for command line processing.
438
439    @param argv Command line arguments (normally sys.argv).
440
441    """
442    arguments = _parse_command(argv)
443    afe = frontend.AFE(server=arguments.web)
444    history_list = _validate_command(afe, arguments)
445    if arguments.oneline:
446        _print_host_summaries(history_list, arguments)
447    else:
448        _print_hosts(history_list, arguments)
449
450
451if __name__ == '__main__':
452    main(sys.argv)
453