status_history.py revision bc9a79521993c4b475bedfa4ac4478d6b59f7be9
1# Copyright 2015 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import common 6from autotest_lib.frontend import setup_django_environment 7from django.db import models as django_models 8 9from autotest_lib.client.common_lib import global_config 10from autotest_lib.client.common_lib import time_utils 11from autotest_lib.frontend.afe import models as afe_models 12from autotest_lib.site_utils.suite_scheduler import constants 13 14 15# Values used to describe the diagnosis of a DUT. These values are 16# used to indicate both DUT status after a job or task, and also 17# diagnosis of whether the DUT was working at the end of a given 18# time interval. 19# 20# UNUSED: Used when there are no events recorded in a given 21# time interval. 22# UNKNOWN: For an individual event, indicates that the DUT status 23# is unchanged from the previous event. For a time interval, 24# indicates that the DUT's status can't be determined from the 25# DUT's history. 26# WORKING: Indicates that the DUT was working normally after the 27# event, or at the end of the time interval. 28# BROKEN: Indicates that the DUT needed manual repair after the 29# event, or at the end of the time interval. 30# 31UNUSED = 0 32UNKNOWN = 1 33WORKING = 2 34BROKEN = 3 35 36 37def parse_time(time_string): 38 """Parse time according to a canonical form. 39 40 The "canonical" form is the form in which date/time 41 values are stored in the database. 42 43 @param time_string Time to be parsed. 44 """ 45 return int(time_utils.to_epoch_time(time_string)) 46 47 48class _JobEvent(object): 49 """Information about an event in host history. 50 51 This remembers the relevant data from a single event in host 52 history. An event is any change in DUT state caused by a job 53 or special task. The data captured are the start and end times 54 of the event, the URL of logs to the job or task causing the 55 event, and a diagnosis of whether the DUT was working or failed 56 afterwards. 57 58 This class is an adapter around the database model objects 59 describing jobs and special tasks. This is an abstract 60 superclass, with concrete subclasses for `HostQueueEntry` and 61 `SpecialTask` objects. 62 63 @property start_time Time the job or task began execution. 64 @property end_time Time the job or task finished execution. 65 @property job_url URL to the logs for the event's job. 66 @property diagnosis Working status of the DUT after the event. 67 68 """ 69 70 get_config_value = global_config.global_config.get_config_value 71 _LOG_URL_PATTERN = get_config_value('CROS', 'log_url_pattern') 72 73 @classmethod 74 def get_log_url(cls, afe_hostname, logdir): 75 """Return a URL to job results. 76 77 The URL is constructed from a base URL determined by the 78 global config, plus the relative path of the job's log 79 directory. 80 81 @param afe_hostname Hostname for autotest frontend 82 @param logdir Relative path of the results log directory. 83 84 @return A URL to the requested results log. 85 86 """ 87 return cls._LOG_URL_PATTERN % (afe_hostname, logdir) 88 89 90 def __init__(self, start_time, end_time): 91 self.start_time = parse_time(start_time) 92 if end_time: 93 self.end_time = parse_time(end_time) 94 else: 95 self.end_time = None 96 97 98 def __cmp__(self, other): 99 """Compare two jobs by their start time. 100 101 This is a standard Python `__cmp__` method to allow sorting 102 `_JobEvent` objects by their times. 103 104 @param other The `_JobEvent` object to compare to `self`. 105 106 """ 107 return self.start_time - other.start_time 108 109 110 @property 111 def job_url(self): 112 """Return the URL for this event's job logs.""" 113 raise NotImplemented() 114 115 116 @property 117 def diagnosis(self): 118 """Return the status of the DUT after this event. 119 120 The diagnosis is interpreted as follows: 121 UNKNOWN - The DUT status was the same before and after 122 the event. 123 WORKING - The DUT appeared to be working after the event. 124 BROKEN - The DUT likely required manual intervention 125 after the event. 126 127 @return A valid diagnosis value. 128 129 """ 130 raise NotImplemented() 131 132 133class _SpecialTaskEvent(_JobEvent): 134 """`_JobEvent` adapter for special tasks. 135 136 This class wraps the standard `_JobEvent` interface around a row 137 in the `afe_special_tasks` table. 138 139 """ 140 141 @classmethod 142 def get_tasks(cls, afe, host_id, start_time, end_time): 143 """Return special tasks for a host in a given time range. 144 145 Return a list of `_SpecialTaskEvent` objects representing all 146 special tasks that ran on the given host in the given time 147 range. The list is ordered as it was returned by the query 148 (i.e. unordered). 149 150 @param afe Autotest frontend 151 @param host_id Database host id of the desired host. 152 @param start_time Start time of the range of interest. 153 @param end_time End time of the range of interest. 154 155 @return A list of `_SpecialTaskEvent` objects. 156 157 """ 158 tasks = afe.get_host_special_tasks( 159 host_id, 160 time_started__gte=start_time, 161 time_finished__lte=end_time, 162 is_complete=1) 163 return [cls(afe.server, t) for t in tasks] 164 165 166 @classmethod 167 def get_status_task(cls, afe, host_id, end_time): 168 """Return the task indicating a host's status at a given time. 169 170 The task returned determines the status of the DUT; the 171 diagnosis on the task indicates the diagnosis for the DUT at 172 the given `end_time`. 173 174 @param afe Autotest frontend 175 @param host_id Database host id of the desired host. 176 @param end_time Find status as of this time. 177 178 @return A `_SpecialTaskEvent` object for the requested task, 179 or `None` if no task was found. 180 181 """ 182 task = afe.get_status_task(host_id, end_time) 183 return cls(afe.server, task) if task else None 184 185 186 def __init__(self, afe_hostname, afetask): 187 self._afe_hostname = afe_hostname 188 self._afetask = afetask 189 super(_SpecialTaskEvent, self).__init__( 190 afetask.time_started, afetask.time_finished) 191 192 193 @property 194 def job_url(self): 195 logdir = ('hosts/%s/%s-%s' % 196 (self._afetask.host.hostname, self._afetask.id, 197 self._afetask.task.lower())) 198 return _SpecialTaskEvent.get_log_url(self._afe_hostname, logdir) 199 200 201 @property 202 def diagnosis(self): 203 if self._afetask.success: 204 return WORKING 205 elif self._afetask.task == 'Repair': 206 return BROKEN 207 else: 208 return UNKNOWN 209 210 211class _TestJobEvent(_JobEvent): 212 """`_JobEvent` adapter for regular test jobs. 213 214 This class wraps the standard `_JobEvent` interface around a row 215 in the `afe_host_queue_entries` table. 216 217 """ 218 219 @classmethod 220 def get_hqes(cls, afe, host_id, start_time, end_time): 221 """Return HQEs for a host in a given time range. 222 223 Return a list of `_TestJobEvent` objects representing all the 224 HQEs of all the jobs that ran on the given host in the given 225 time range. The list is ordered as it was returned by the 226 query (i.e. unordered). 227 228 @param afe Autotest frontend 229 @param host_id Database host id of the desired host. 230 @param start_time Start time of the range of interest. 231 @param end_time End time of the range of interest. 232 233 @return A list of `_TestJobEvent` objects. 234 235 """ 236 hqelist = afe.get_host_queue_entries( 237 host_id=host_id, 238 start_time=start_time, 239 end_time=end_time, 240 complete=1) 241 return [cls(afe.server, hqe) for hqe in hqelist] 242 243 244 def __init__(self, afe_hostname, hqe): 245 self._afe_hostname = afe_hostname 246 self._hqe = hqe 247 super(_TestJobEvent, self).__init__( 248 hqe.started_on, hqe.finished_on) 249 250 251 @property 252 def job_url(self): 253 logdir = '%s-%s' % (self._hqe.job.id, self._hqe.job.owner) 254 return _TestJobEvent.get_log_url(self._afe_hostname, logdir) 255 256 257 @property 258 def diagnosis(self): 259 return UNKNOWN 260 261 262class HostJobHistory(object): 263 """Class to query and remember DUT execution history. 264 265 This class is responsible for querying the database to determine 266 the history of a single DUT in a time interval of interest, and 267 for remembering the query results for reporting. 268 269 @property hostname Host name of the DUT. 270 @property start_time Start of the requested time interval. 271 @property end_time End of the requested time interval. 272 @property host Database host object for the DUT. 273 @property history A list of jobs and special tasks that 274 ran on the DUT in the requested time 275 interval, ordered in reverse, from latest 276 to earliest. 277 278 """ 279 280 @classmethod 281 def get_host_history(cls, afe, hostname, start_time, end_time): 282 """Create a HostJobHistory instance for a single host. 283 284 Simple factory method to construct host history from a 285 hostname. Simply looks up the host in the AFE database, and 286 passes it to the class constructor. 287 288 @param afe Autotest frontend 289 @param hostname Name of the host. 290 @param start_time Start time for the history's time 291 interval. 292 @param end_time End time for the history's time interval. 293 294 @return A new HostJobHistory instance. 295 296 """ 297 afehost = afe.get_hosts(hostname=hostname)[0] 298 return cls(afe, afehost, start_time, end_time) 299 300 301 @classmethod 302 def get_multiple_histories(cls, afe, start_time, end_time, 303 board=None, pool=None): 304 """Create HostJobHistory instances for a set of hosts. 305 306 The set of hosts can be specified as "all hosts of a given 307 board type", "all hosts in a given pool", or "all hosts 308 of a given board and pool". 309 310 @param afe Autotest frontend 311 @param start_time Start time for the history's time 312 interval. 313 @param end_time End time for the history's time interval. 314 @param board All hosts must have this board type; if 315 `None`, all boards are allowed. 316 @param pool All hosts must be in this pool; if 317 `None`, all pools are allowed. 318 319 @return A list of new HostJobHistory instances. 320 321 """ 322 # If `board` or `pool` are both `None`, we could search the 323 # entire database, which is more expensive than we want. 324 # Our caller currently won't (can't) do this, but assert to 325 # be safe. 326 assert board is not None or pool is not None 327 labels = [] 328 if board is not None: 329 labels.append(constants.Labels.BOARD_PREFIX + board) 330 if pool is not None: 331 labels.append(constants.Labels.POOL_PREFIX + pool) 332 kwargs = {'multiple_labels': labels} 333 hosts = afe.get_hosts(**kwargs) 334 return [cls(afe, h, start_time, end_time) for h in hosts] 335 336 337 def __init__(self, afe, afehost, start_time, end_time): 338 self._afe = afe 339 self.hostname = afehost.hostname 340 self.start_time = start_time 341 self.end_time = end_time 342 self._host = afehost 343 # Don't spend time on queries until they're needed. 344 self._history = None 345 self._status_diagnosis = None 346 self._status_task = None 347 348 349 def _get_history(self): 350 """Fill in `self._history`.""" 351 if self._history is not None: 352 return 353 start_time = time_utils.epoch_time_to_date_string(self.start_time) 354 end_time = time_utils.epoch_time_to_date_string(self.end_time) 355 newtasks = _SpecialTaskEvent.get_tasks( 356 self._afe, self._host.id, start_time, end_time) 357 newhqes = _TestJobEvent.get_hqes( 358 self._afe, self._host.id, start_time, end_time) 359 newhistory = newtasks + newhqes 360 newhistory.sort(reverse=True) 361 self._history = newhistory 362 363 364 def __iter__(self): 365 self._get_history() 366 return self._history.__iter__() 367 368 369 def _extract_prefixed_label(self, prefix): 370 label = [l for l in self._host.labels 371 if l.startswith(prefix)][0] 372 return label[len(prefix) : ] 373 374 375 def get_host_board(self): 376 """Return the board name for this history's DUT.""" 377 prefix = constants.Labels.BOARD_PREFIX 378 return self._extract_prefixed_label(prefix) 379 380 381 def get_host_pool(self): 382 """Return the pool name for this history's DUT.""" 383 prefix = constants.Labels.POOL_PREFIX 384 return self._extract_prefixed_label(prefix) 385 386 387 def _get_status_task(self): 388 """Fill in `self._status_diagnosis` and `_status_task`.""" 389 if self._status_diagnosis is not None: 390 return 391 end_time = time_utils.epoch_time_to_date_string(self.end_time) 392 self._status_task = _SpecialTaskEvent.get_status_task( 393 self._afe, self._host.id, end_time) 394 if self._status_task is not None: 395 self._status_diagnosis = self._status_task.diagnosis 396 else: 397 self._status_diagnosis = UNKNOWN 398 399 400 def last_diagnosis(self): 401 """Return the diagnosis of whether the DUT is working. 402 403 This searches the DUT's job history from most to least 404 recent, looking for jobs that indicate whether the DUT 405 was working. Return a tuple of `(diagnosis, task)`. 406 407 The `diagnosis` entry in the tuple is one of these values: 408 * WORKING - The DUT is working. 409 * BROKEN - The DUT likely requires manual intervention. 410 * UNKNOWN - No task could be found indicating status for 411 the DUT. 412 413 The `task` entry in the tuple is the task that led to the 414 diagnosis. The task will be `None` if the diagnosis is 415 `UNKNOWN`. 416 417 @return A tuple with the DUT's diagnosis and the task that 418 determined it. 419 420 """ 421 self._get_status_task() 422 return self._status_diagnosis, self._status_task 423 424 425def get_status_task(host_id, end_time): 426 """Get the task indicating a host's status at a given time. 427 428 This is the RPC endpoint for `_SpecialTaskEvent.get_status_task()`. 429 This performs a database query to find the status task for the 430 given host at the given time. 431 432 The status task is the last diagnostic task before `end_time`. 433 A "diagnostic task" is any Repair task or a succesful special 434 task of any type. The status of the last diagnostic task 435 (`WORKING` or `BROKEN`) determines whether the host is working 436 or broken. 437 438 @param host_id Database host id of the desired host. 439 @param end_time End time of the range of interest. 440 441 @return A Django query-set selecting the single special task of 442 interest. 443 444 """ 445 # Selects diag tasks: any Repair task, or any successful task. 446 diag_tasks = (django_models.Q(task='Repair') | 447 django_models.Q(success=True)) 448 # Our caller needs a Django query set in order to serialize the 449 # result, so we don't resolve the query here; we just return a 450 # slice with at most one element. 451 return afe_models.SpecialTask.objects.filter( 452 diag_tasks, 453 host_id=host_id, 454 time_finished__lte=end_time, 455 is_complete=True).order_by('time_started').reverse()[0:1] 456