status_history.py revision bc9a79521993c4b475bedfa4ac4478d6b59f7be9
1# Copyright 2015 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6from autotest_lib.frontend import setup_django_environment
7from django.db import models as django_models
8
9from autotest_lib.client.common_lib import global_config
10from autotest_lib.client.common_lib import time_utils
11from autotest_lib.frontend.afe import models as afe_models
12from autotest_lib.site_utils.suite_scheduler import constants
13
14
15# Values used to describe the diagnosis of a DUT.  These values are
16# used to indicate both DUT status after a job or task, and also
17# diagnosis of whether the DUT was working at the end of a given
18# time interval.
19#
20# UNUSED:  Used when there are no events recorded in a given
21#     time interval.
22# UNKNOWN:  For an individual event, indicates that the DUT status
23#     is unchanged from the previous event.  For a time interval,
24#     indicates that the DUT's status can't be determined from the
25#     DUT's history.
26# WORKING:  Indicates that the DUT was working normally after the
27#     event, or at the end of the time interval.
28# BROKEN:  Indicates that the DUT needed manual repair after the
29#     event, or at the end of the time interval.
30#
31UNUSED = 0
32UNKNOWN = 1
33WORKING = 2
34BROKEN = 3
35
36
37def parse_time(time_string):
38    """Parse time according to a canonical form.
39
40    The "canonical" form is the form in which date/time
41    values are stored in the database.
42
43    @param time_string Time to be parsed.
44    """
45    return int(time_utils.to_epoch_time(time_string))
46
47
48class _JobEvent(object):
49    """Information about an event in host history.
50
51    This remembers the relevant data from a single event in host
52    history.  An event is any change in DUT state caused by a job
53    or special task.  The data captured are the start and end times
54    of the event, the URL of logs to the job or task causing the
55    event, and a diagnosis of whether the DUT was working or failed
56    afterwards.
57
58    This class is an adapter around the database model objects
59    describing jobs and special tasks.  This is an abstract
60    superclass, with concrete subclasses for `HostQueueEntry` and
61    `SpecialTask` objects.
62
63    @property start_time  Time the job or task began execution.
64    @property end_time    Time the job or task finished execution.
65    @property job_url     URL to the logs for the event's job.
66    @property diagnosis   Working status of the DUT after the event.
67
68    """
69
70    get_config_value = global_config.global_config.get_config_value
71    _LOG_URL_PATTERN = get_config_value('CROS', 'log_url_pattern')
72
73    @classmethod
74    def get_log_url(cls, afe_hostname, logdir):
75        """Return a URL to job results.
76
77        The URL is constructed from a base URL determined by the
78        global config, plus the relative path of the job's log
79        directory.
80
81        @param afe_hostname Hostname for autotest frontend
82        @param logdir Relative path of the results log directory.
83
84        @return A URL to the requested results log.
85
86        """
87        return cls._LOG_URL_PATTERN % (afe_hostname, logdir)
88
89
90    def __init__(self, start_time, end_time):
91        self.start_time = parse_time(start_time)
92        if end_time:
93            self.end_time = parse_time(end_time)
94        else:
95            self.end_time = None
96
97
98    def __cmp__(self, other):
99        """Compare two jobs by their start time.
100
101        This is a standard Python `__cmp__` method to allow sorting
102        `_JobEvent` objects by their times.
103
104        @param other The `_JobEvent` object to compare to `self`.
105
106        """
107        return self.start_time - other.start_time
108
109
110    @property
111    def job_url(self):
112        """Return the URL for this event's job logs."""
113        raise NotImplemented()
114
115
116    @property
117    def diagnosis(self):
118        """Return the status of the DUT after this event.
119
120        The diagnosis is interpreted as follows:
121          UNKNOWN - The DUT status was the same before and after
122              the event.
123          WORKING - The DUT appeared to be working after the event.
124          BROKEN - The DUT likely required manual intervention
125              after the event.
126
127        @return A valid diagnosis value.
128
129        """
130        raise NotImplemented()
131
132
133class _SpecialTaskEvent(_JobEvent):
134    """`_JobEvent` adapter for special tasks.
135
136    This class wraps the standard `_JobEvent` interface around a row
137    in the `afe_special_tasks` table.
138
139    """
140
141    @classmethod
142    def get_tasks(cls, afe, host_id, start_time, end_time):
143        """Return special tasks for a host in a given time range.
144
145        Return a list of `_SpecialTaskEvent` objects representing all
146        special tasks that ran on the given host in the given time
147        range.  The list is ordered as it was returned by the query
148        (i.e. unordered).
149
150        @param afe         Autotest frontend
151        @param host_id     Database host id of the desired host.
152        @param start_time  Start time of the range of interest.
153        @param end_time    End time of the range of interest.
154
155        @return A list of `_SpecialTaskEvent` objects.
156
157        """
158        tasks = afe.get_host_special_tasks(
159                host_id,
160                time_started__gte=start_time,
161                time_finished__lte=end_time,
162                is_complete=1)
163        return [cls(afe.server, t) for t in tasks]
164
165
166    @classmethod
167    def get_status_task(cls, afe, host_id, end_time):
168        """Return the task indicating a host's status at a given time.
169
170        The task returned determines the status of the DUT; the
171        diagnosis on the task indicates the diagnosis for the DUT at
172        the given `end_time`.
173
174        @param afe         Autotest frontend
175        @param host_id     Database host id of the desired host.
176        @param end_time    Find status as of this time.
177
178        @return A `_SpecialTaskEvent` object for the requested task,
179                or `None` if no task was found.
180
181        """
182        task = afe.get_status_task(host_id, end_time)
183        return cls(afe.server, task) if task else None
184
185
186    def __init__(self, afe_hostname, afetask):
187        self._afe_hostname = afe_hostname
188        self._afetask = afetask
189        super(_SpecialTaskEvent, self).__init__(
190                afetask.time_started, afetask.time_finished)
191
192
193    @property
194    def job_url(self):
195        logdir = ('hosts/%s/%s-%s' %
196                  (self._afetask.host.hostname, self._afetask.id,
197                   self._afetask.task.lower()))
198        return _SpecialTaskEvent.get_log_url(self._afe_hostname, logdir)
199
200
201    @property
202    def diagnosis(self):
203        if self._afetask.success:
204            return WORKING
205        elif self._afetask.task == 'Repair':
206            return BROKEN
207        else:
208            return UNKNOWN
209
210
211class _TestJobEvent(_JobEvent):
212    """`_JobEvent` adapter for regular test jobs.
213
214    This class wraps the standard `_JobEvent` interface around a row
215    in the `afe_host_queue_entries` table.
216
217    """
218
219    @classmethod
220    def get_hqes(cls, afe, host_id, start_time, end_time):
221        """Return HQEs for a host in a given time range.
222
223        Return a list of `_TestJobEvent` objects representing all the
224        HQEs of all the jobs that ran on the given host in the given
225        time range.  The list is ordered as it was returned by the
226        query (i.e. unordered).
227
228        @param afe         Autotest frontend
229        @param host_id     Database host id of the desired host.
230        @param start_time  Start time of the range of interest.
231        @param end_time    End time of the range of interest.
232
233        @return A list of `_TestJobEvent` objects.
234
235        """
236        hqelist = afe.get_host_queue_entries(
237                host_id=host_id,
238                start_time=start_time,
239                end_time=end_time,
240                complete=1)
241        return [cls(afe.server, hqe) for hqe in hqelist]
242
243
244    def __init__(self, afe_hostname, hqe):
245        self._afe_hostname = afe_hostname
246        self._hqe = hqe
247        super(_TestJobEvent, self).__init__(
248                hqe.started_on, hqe.finished_on)
249
250
251    @property
252    def job_url(self):
253        logdir = '%s-%s' % (self._hqe.job.id, self._hqe.job.owner)
254        return _TestJobEvent.get_log_url(self._afe_hostname, logdir)
255
256
257    @property
258    def diagnosis(self):
259        return UNKNOWN
260
261
262class HostJobHistory(object):
263    """Class to query and remember DUT execution history.
264
265    This class is responsible for querying the database to determine
266    the history of a single DUT in a time interval of interest, and
267    for remembering the query results for reporting.
268
269    @property hostname    Host name of the DUT.
270    @property start_time  Start of the requested time interval.
271    @property end_time    End of the requested time interval.
272    @property host        Database host object for the DUT.
273    @property history     A list of jobs and special tasks that
274                          ran on the DUT in the requested time
275                          interval, ordered in reverse, from latest
276                          to earliest.
277
278    """
279
280    @classmethod
281    def get_host_history(cls, afe, hostname, start_time, end_time):
282        """Create a HostJobHistory instance for a single host.
283
284        Simple factory method to construct host history from a
285        hostname.  Simply looks up the host in the AFE database, and
286        passes it to the class constructor.
287
288        @param afe         Autotest frontend
289        @param hostname    Name of the host.
290        @param start_time  Start time for the history's time
291                           interval.
292        @param end_time    End time for the history's time interval.
293
294        @return A new HostJobHistory instance.
295
296        """
297        afehost = afe.get_hosts(hostname=hostname)[0]
298        return cls(afe, afehost, start_time, end_time)
299
300
301    @classmethod
302    def get_multiple_histories(cls, afe, start_time, end_time,
303                               board=None, pool=None):
304        """Create HostJobHistory instances for a set of hosts.
305
306        The set of hosts can be specified as "all hosts of a given
307        board type", "all hosts in a given pool", or "all hosts
308        of a given board and pool".
309
310        @param afe         Autotest frontend
311        @param start_time  Start time for the history's time
312                           interval.
313        @param end_time    End time for the history's time interval.
314        @param board       All hosts must have this board type; if
315                           `None`, all boards are allowed.
316        @param pool        All hosts must be in this pool; if
317                           `None`, all pools are allowed.
318
319        @return A list of new HostJobHistory instances.
320
321        """
322        # If `board` or `pool` are both `None`, we could search the
323        # entire database, which is more expensive than we want.
324        # Our caller currently won't (can't) do this, but assert to
325        # be safe.
326        assert board is not None or pool is not None
327        labels = []
328        if board is not None:
329            labels.append(constants.Labels.BOARD_PREFIX + board)
330        if pool is not None:
331            labels.append(constants.Labels.POOL_PREFIX + pool)
332        kwargs = {'multiple_labels': labels}
333        hosts = afe.get_hosts(**kwargs)
334        return [cls(afe, h, start_time, end_time) for h in hosts]
335
336
337    def __init__(self, afe, afehost, start_time, end_time):
338        self._afe = afe
339        self.hostname = afehost.hostname
340        self.start_time = start_time
341        self.end_time = end_time
342        self._host = afehost
343        # Don't spend time on queries until they're needed.
344        self._history = None
345        self._status_diagnosis = None
346        self._status_task = None
347
348
349    def _get_history(self):
350        """Fill in `self._history`."""
351        if self._history is not None:
352            return
353        start_time = time_utils.epoch_time_to_date_string(self.start_time)
354        end_time = time_utils.epoch_time_to_date_string(self.end_time)
355        newtasks = _SpecialTaskEvent.get_tasks(
356                self._afe, self._host.id, start_time, end_time)
357        newhqes = _TestJobEvent.get_hqes(
358                self._afe, self._host.id, start_time, end_time)
359        newhistory = newtasks + newhqes
360        newhistory.sort(reverse=True)
361        self._history = newhistory
362
363
364    def __iter__(self):
365        self._get_history()
366        return self._history.__iter__()
367
368
369    def _extract_prefixed_label(self, prefix):
370        label = [l for l in self._host.labels
371                    if l.startswith(prefix)][0]
372        return label[len(prefix) : ]
373
374
375    def get_host_board(self):
376        """Return the board name for this history's DUT."""
377        prefix = constants.Labels.BOARD_PREFIX
378        return self._extract_prefixed_label(prefix)
379
380
381    def get_host_pool(self):
382        """Return the pool name for this history's DUT."""
383        prefix = constants.Labels.POOL_PREFIX
384        return self._extract_prefixed_label(prefix)
385
386
387    def _get_status_task(self):
388        """Fill in `self._status_diagnosis` and `_status_task`."""
389        if self._status_diagnosis is not None:
390            return
391        end_time = time_utils.epoch_time_to_date_string(self.end_time)
392        self._status_task = _SpecialTaskEvent.get_status_task(
393                self._afe, self._host.id, end_time)
394        if self._status_task is not None:
395            self._status_diagnosis = self._status_task.diagnosis
396        else:
397            self._status_diagnosis = UNKNOWN
398
399
400    def last_diagnosis(self):
401        """Return the diagnosis of whether the DUT is working.
402
403        This searches the DUT's job history from most to least
404        recent, looking for jobs that indicate whether the DUT
405        was working.  Return a tuple of `(diagnosis, task)`.
406
407        The `diagnosis` entry in the tuple is one of these values:
408          * WORKING - The DUT is working.
409          * BROKEN - The DUT likely requires manual intervention.
410          * UNKNOWN - No task could be found indicating status for
411              the DUT.
412
413        The `task` entry in the tuple is the task that led to the
414        diagnosis.  The task will be `None` if the diagnosis is
415        `UNKNOWN`.
416
417        @return A tuple with the DUT's diagnosis and the task that
418                determined it.
419
420        """
421        self._get_status_task()
422        return self._status_diagnosis, self._status_task
423
424
425def get_status_task(host_id, end_time):
426    """Get the task indicating a host's status at a given time.
427
428    This is the RPC endpoint for `_SpecialTaskEvent.get_status_task()`.
429    This performs a database query to find the status task for the
430    given host at the given time.
431
432    The status task is the last diagnostic task before `end_time`.
433    A "diagnostic task" is any Repair task or a succesful special
434    task of any type.  The status of the last diagnostic task
435    (`WORKING` or `BROKEN`) determines whether the host is working
436    or broken.
437
438    @param host_id     Database host id of the desired host.
439    @param end_time    End time of the range of interest.
440
441    @return A Django query-set selecting the single special task of
442            interest.
443
444    """
445    # Selects diag tasks:  any Repair task, or any successful task.
446    diag_tasks = (django_models.Q(task='Repair') |
447                  django_models.Q(success=True))
448    # Our caller needs a Django query set in order to serialize the
449    # result, so we don't resolve the query here; we just return a
450    # slice with at most one element.
451    return afe_models.SpecialTask.objects.filter(
452            diag_tasks,
453            host_id=host_id,
454            time_finished__lte=end_time,
455            is_complete=True).order_by('time_started').reverse()[0:1]
456