152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# Use of this source code is governed by a BSD-style license that can be 3e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# found in the LICENSE file. 4e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis 507e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi#pylint: disable-msg=C0111 607e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi 7e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtisimport os 8a858a233889949263ded6d0d6578495aba54a9ebSimran Basiimport logging 9baf4d37b1c12b1e23a0dcd9f6fad0449a0d11af6Fang Dengimport time 10e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis 1152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongfrom autotest_lib.client.common_lib import global_config 1252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongfrom autotest_lib.frontend.afe import models 13bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basifrom autotest_lib.scheduler import email_manager 14a858a233889949263ded6d0d6578495aba54a9ebSimran Basifrom autotest_lib.scheduler import scheduler_config, scheduler_models 15da8c60af1e1e3ee97170c700d0b72991687e35a2Michael Liang 16e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis 17e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# Override default parser with our site parser. 18e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtisdef parser_path(install_dir): 1907e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi """Return site implementation of parser. 2007e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi 2107e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi @param install_dir: installation directory. 2207e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi """ 23e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis return os.path.join(install_dir, 'tko', 'site_parse') 2452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 2552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 2652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongclass SiteAgentTask(object): 2752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong """ 2852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong SiteAgentTask subclasses BaseAgentTask in monitor_db. 2952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong """ 3052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 3152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 3252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong def _archive_results(self, queue_entries): 3352ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong """ 3452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong Set the status of queue_entries to ARCHIVING. 3552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 3652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong This method sets the status of the queue_entries to ARCHIVING 3752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong if the enable_archiving flag is true in global_config.ini. 3852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong Otherwise, it bypasses the archiving step and sets the queue entries 3952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong to the final status of current step. 4052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong """ 4152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong enable_archiving = global_config.global_config.get_config_value( 4252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool) 4352ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong # Set the status of the queue entries to archiving or self final status 4452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong if enable_archiving: 4552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong status = models.HostQueueEntry.Status.ARCHIVING 4652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong else: 4752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong status = self._final_status() 4852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong 4952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong for queue_entry in self.queue_entries: 5052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong queue_entry.set_status(status) 51a858a233889949263ded6d0d6578495aba54a9ebSimran Basi 52a858a233889949263ded6d0d6578495aba54a9ebSimran Basi 53bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses, 54bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi allowed_host_statuses=None): 55bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi """ 56bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi Forked from monitor_db.py 57bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi """ 58bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi class_name = self.__class__.__name__ 59bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi for entry in queue_entries: 60bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi if entry.status not in allowed_hqe_statuses: 61bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # In the orignal code, here we raise an exception. In an 62bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # effort to prevent downtime we will instead abort the job and 63bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # send out an email notifying us this has occured. 64bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi error_message = ('%s attempting to start entry with invalid ' 65bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 'status %s: %s. Aborting Job: %s.' 66bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi % (class_name, entry.status, entry, 67bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi entry.job)) 68bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi logging.error(error_message) 69bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi email_manager.manager.enqueue_notify_email( 70bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 'Job Aborted - Invalid Host Queue Entry Status', 71bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi error_message) 72bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi entry.job.request_abort() 73bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi invalid_host_status = ( 74bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi allowed_host_statuses is not None 75bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi and entry.host.status not in allowed_host_statuses) 76bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi if invalid_host_status: 77bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # In the orignal code, here we raise an exception. In an 78bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # effort to prevent downtime we will instead abort the job and 79bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi # send out an email notifying us this has occured. 80bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi error_message = ('%s attempting to start on queue entry with ' 81bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 'invalid host status %s: %s. Aborting Job: %s' 82bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi % (class_name, entry.host.status, entry, 83bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi entry.job)) 84bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi logging.error(error_message) 85bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi email_manager.manager.enqueue_notify_email( 86bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 'Job Aborted - Invalid Host Status', error_message) 87bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi entry.job.request_abort() 88bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 89bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi 90a858a233889949263ded6d0d6578495aba54a9ebSimran Basiclass SiteDispatcher(object): 91a858a233889949263ded6d0d6578495aba54a9ebSimran Basi """ 92a858a233889949263ded6d0d6578495aba54a9ebSimran Basi SiteDispatcher subclasses BaseDispatcher in monitor_db. 93a858a233889949263ded6d0d6578495aba54a9ebSimran Basi """ 94a858a233889949263ded6d0d6578495aba54a9ebSimran Basi DEFAULT_REQUESTED_BY_USER_ID = 1 95a858a233889949263ded6d0d6578495aba54a9ebSimran Basi 96a858a233889949263ded6d0d6578495aba54a9ebSimran Basi 97a858a233889949263ded6d0d6578495aba54a9ebSimran Basi def _reverify_hosts_where(self, where, 98a858a233889949263ded6d0d6578495aba54a9ebSimran Basi print_message='Reverifying host %s'): 99a858a233889949263ded6d0d6578495aba54a9ebSimran Basi """ 100a858a233889949263ded6d0d6578495aba54a9ebSimran Basi This is an altered version of _reverify_hosts_where the class to 101a858a233889949263ded6d0d6578495aba54a9ebSimran Basi models.SpecialTask.objects.create passes in an argument for 10207e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi requested_by, in order to allow the Reset task to be created 103a858a233889949263ded6d0d6578495aba54a9ebSimran Basi properly. 104a858a233889949263ded6d0d6578495aba54a9ebSimran Basi """ 105a858a233889949263ded6d0d6578495aba54a9ebSimran Basi full_where='locked = 0 AND invalid = 0 AND ' + where 106a858a233889949263ded6d0d6578495aba54a9ebSimran Basi for host in scheduler_models.Host.fetch(where=full_where): 107a858a233889949263ded6d0d6578495aba54a9ebSimran Basi if self.host_has_agent(host): 108a858a233889949263ded6d0d6578495aba54a9ebSimran Basi # host has already been recovered in some way 109a858a233889949263ded6d0d6578495aba54a9ebSimran Basi continue 110a858a233889949263ded6d0d6578495aba54a9ebSimran Basi if self._host_has_scheduled_special_task(host): 111a858a233889949263ded6d0d6578495aba54a9ebSimran Basi # host will have a special task scheduled on the next cycle 112a858a233889949263ded6d0d6578495aba54a9ebSimran Basi continue 113a858a233889949263ded6d0d6578495aba54a9ebSimran Basi if print_message: 114a858a233889949263ded6d0d6578495aba54a9ebSimran Basi logging.error(print_message, host.hostname) 115a858a233889949263ded6d0d6578495aba54a9ebSimran Basi try: 116a858a233889949263ded6d0d6578495aba54a9ebSimran Basi user = models.User.objects.get(login='autotest_system') 117a858a233889949263ded6d0d6578495aba54a9ebSimran Basi except models.User.DoesNotExist: 118a858a233889949263ded6d0d6578495aba54a9ebSimran Basi user = models.User.objects.get( 119a858a233889949263ded6d0d6578495aba54a9ebSimran Basi id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID) 120a858a233889949263ded6d0d6578495aba54a9ebSimran Basi models.SpecialTask.objects.create( 12107e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi task=models.SpecialTask.Task.RESET, 122a858a233889949263ded6d0d6578495aba54a9ebSimran Basi host=models.Host.objects.get(id=host.id), 1233d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi requested_by=user) 1243d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi 1253d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi 1263d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi def _check_for_unrecovered_verifying_entries(self): 12707e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi # Verify is replaced by Reset. 1283d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi queue_entries = scheduler_models.HostQueueEntry.fetch( 12907e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi where='status = "%s"' % models.HostQueueEntry.Status.RESETTING) 1303d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi for queue_entry in queue_entries: 1313d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi special_tasks = models.SpecialTask.objects.filter( 1323d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi task__in=(models.SpecialTask.Task.CLEANUP, 13307e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi models.SpecialTask.Task.VERIFY, 13407e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi models.SpecialTask.Task.RESET), 1353d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi queue_entry__id=queue_entry.id, 1363d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi is_complete=False) 1373d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi if special_tasks.count() == 0: 13807e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi logging.error('Unrecovered Resetting host queue entry: %s. ' 1393d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi 'Setting status to Queued.', str(queue_entry)) 1403d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # Essentially this host queue entry was set to be Verifying 1413d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # however no special task exists for entry. This occurs if the 1423d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # scheduler dies between changing the status and creating the 1433d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # special task. By setting it to queued, the job can restart 1443d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # from the beginning and proceed correctly. This is much more 1453d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi # preferable than having monitor_db not launching. 14605d7b4cd023d4dcaee3c0744dc960f3e01ec6fbeAlex Miller queue_entry.set_status('Queued') 147