15e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps#pylint: disable-msg=C0111
25e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
35e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps"""
45e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsPrejob tasks.
55e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
65e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsPrejob tasks _usually_ run before a job and verify the state of a machine.
75e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsCleanup and repair are exceptions, cleanup can run after a job too, while
85e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsrepair will run anytime the host needs a repair, which could be pre or post
95e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsjob. Most of the work specific to this module is achieved through the prolog
105e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsand epilog of each task.
115e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
125e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsAll prejob tasks must have a host, though they may not have an HQE. If a
135e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsprejob task has a hqe, it will activate the hqe through its on_pending
14ec1c4b22229677d8654159a6f40bbb9d80199278beepsmethod on successful completion. A row in afe_special_tasks with values:
155e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    host=C1, unlocked, is_active=0, is_complete=0, type=Verify
165e2bb4aa28611aaacaa8798fd07943ede1df46c6beepswill indicate to the scheduler that it needs to schedule a new special task
175e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsof type=Verify, against the C1 host. While the special task is running
185e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsthe scheduler only monitors it through the Agent, and its is_active bit=1.
195e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsOnce a special task finishes, we set its is_active=0, is_complete=1 and
205e2bb4aa28611aaacaa8798fd07943ede1df46c6beepssuccess bits, so the scheduler ignores it.
215e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsHQE.on_pending:
225e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    Host, HQE -> Pending, Starting
235e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    This status is acted upon in the scheduler, to assign an AgentTask.
245e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsPreJobTask:
255e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    epilog:
265e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        failure:
275e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            requeue hqe
285e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            repair the host
295e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsChildren PreJobTasks:
305e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    prolog:
315e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        set Host, HQE status
325e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    epilog:
335e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        success:
345e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            on_pending
355e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        failure:
365e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            repair throgh PreJobTask
375e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            set Host, HQE status
38ec1c4b22229677d8654159a6f40bbb9d80199278beeps
39ec1c4b22229677d8654159a6f40bbb9d80199278beepsFailing a prejob task effects both the Host and the HQE, as follows:
40ec1c4b22229677d8654159a6f40bbb9d80199278beeps
41ec1c4b22229677d8654159a6f40bbb9d80199278beeps- Host: PreJob failure will result in a Repair job getting queued against
42ec1c4b22229677d8654159a6f40bbb9d80199278beepsthe host, is we haven't already tried repairing it more than the
43ec1c4b22229677d8654159a6f40bbb9d80199278beepsmax_repair_limit. When this happens, the host will remain in whatever status
44ec1c4b22229677d8654159a6f40bbb9d80199278beepsthe prejob task left it in, till the Repair job puts it into 'Repairing'. This
45ec1c4b22229677d8654159a6f40bbb9d80199278beepsway the host_scheduler won't pick bad hosts and assign them to jobs.
46ec1c4b22229677d8654159a6f40bbb9d80199278beeps
47ec1c4b22229677d8654159a6f40bbb9d80199278beepsIf we have already tried repairing the host too many times, the PreJobTask
48ec1c4b22229677d8654159a6f40bbb9d80199278beepswill flip the host to 'RepairFailed' in its epilog, and it will remain in this
49ec1c4b22229677d8654159a6f40bbb9d80199278beepsstate till it is recovered and reverified.
50ec1c4b22229677d8654159a6f40bbb9d80199278beeps
51ec1c4b22229677d8654159a6f40bbb9d80199278beeps- HQE: Is either requeued or failed. Requeuing the HQE involves putting it
52ec1c4b22229677d8654159a6f40bbb9d80199278beepsin the Queued state and setting its host_id to None, so it gets a new host
53ec1c4b22229677d8654159a6f40bbb9d80199278beepsin the next scheduler tick. Failing the HQE results in either a Parsing
54ec1c4b22229677d8654159a6f40bbb9d80199278beepsor Archiving postjob task, and an eventual Failed status for the HQE.
555e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps"""
56ec1c4b22229677d8654159a6f40bbb9d80199278beeps
575e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsimport logging
585e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsimport os
595e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
605e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsfrom autotest_lib.client.common_lib import host_protections
615e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsfrom autotest_lib.frontend.afe import models
625e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsfrom autotest_lib.scheduler import agent_task, scheduler_config
635e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsfrom autotest_lib.server import autoserv_utils
645e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsfrom autotest_lib.server.cros import provision
655e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
665e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
675e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass PreJobTask(agent_task.SpecialAgentTask):
685e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def _copy_to_results_repository(self):
695e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if not self.queue_entry or self.queue_entry.meta_host:
705e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            return
715e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
725e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.queue_entry.set_execution_subdir()
735e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        log_name = os.path.basename(self.task.execution_path())
745e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        source = os.path.join(self.task.execution_path(), 'debug',
755e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                              'autoserv.DEBUG')
765e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        destination = os.path.join(
775e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.execution_path(), log_name)
785e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
795e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.monitor.try_copy_to_results_repository(
805e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                source, destination_path=destination)
815e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
825e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
835e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
845e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(PreJobTask, self).epilog()
855e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
865e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.success:
875e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            return
885e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
895e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.host.protection == host_protections.Protection.DO_NOT_VERIFY:
905e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            # effectively ignore failure for these hosts
915e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.success = True
925e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            return
935e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
945e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.queue_entry:
955e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            # If we requeue a HQE, we should cancel any remaining pre-job
965e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            # tasks against this host, otherwise we'll be left in a state
975e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            # where a queued HQE has special tasks to run against a host.
985e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            models.SpecialTask.objects.filter(
995e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    queue_entry__id=self.queue_entry.id,
1005e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    host__id=self.host.id,
1015e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    is_complete=0).update(is_complete=1, success=0)
1025e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1035e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            previous_provisions = models.SpecialTask.objects.filter(
1045e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    task=models.SpecialTask.Task.PROVISION,
1055e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    queue_entry_id=self.queue_entry.id).count()
1065e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if (previous_provisions >
1075e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                scheduler_config.config.max_provision_retries):
1085e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self._actually_fail_queue_entry()
1095e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # This abort will mark the aborted bit on the HQE itself, to
1105e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # signify that we're killing it.  Technically it also will do
1115e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # the recursive aborting of all child jobs, but that shouldn't
1125e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # matter here, as only suites have children, and those are
1135e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # hostless and thus don't have provisioning.
1145e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # TODO(milleral) http://crbug.com/188217
1155e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # However, we can't actually do this yet, as if we set the
1165e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # abort bit the FinalReparseTask will set the status of the HQE
1175e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # to ABORTED, which then means that we don't show the status in
1185e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # run_suite.  So in the meantime, don't mark the HQE as
1195e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # aborted.
1205e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # queue_entry.abort()
1215e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            else:
1225e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # requeue() must come after handling provision retries, since
1235e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # _actually_fail_queue_entry needs an execution subdir.
1245e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # We also don't want to requeue if we hit the provision retry
1255e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                # limit, since then we overwrite the PARSING state of the HQE.
1265e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.requeue()
1275e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
128a1f0d02e55fb497e0051c4fee7edc0b99ff378b7Dan Shi            # Limit the repair on a host when a prejob task fails, e.g., reset,
129a1f0d02e55fb497e0051c4fee7edc0b99ff378b7Dan Shi            # verify etc. The number of repair jobs is limited to the specific
130a1f0d02e55fb497e0051c4fee7edc0b99ff378b7Dan Shi            # HQE and host.
1315e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            previous_repairs = models.SpecialTask.objects.filter(
1325e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    task=models.SpecialTask.Task.REPAIR,
133a1f0d02e55fb497e0051c4fee7edc0b99ff378b7Dan Shi                    queue_entry_id=self.queue_entry.id,
134a1f0d02e55fb497e0051c4fee7edc0b99ff378b7Dan Shi                    host_id=self.queue_entry.host_id).count()
1355e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if previous_repairs >= scheduler_config.config.max_repair_limit:
1365e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.host.set_status(models.Host.Status.REPAIR_FAILED)
1375e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self._fail_queue_entry()
1385e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                return
1395e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1405e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            queue_entry = models.HostQueueEntry.objects.get(
1415e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    id=self.queue_entry.id)
1425e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        else:
1435e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            queue_entry = None
1445e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1455e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        models.SpecialTask.objects.create(
1465e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                host=models.Host.objects.get(id=self.host.id),
1475e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                task=models.SpecialTask.Task.REPAIR,
1485e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                queue_entry=queue_entry,
1495e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                requested_by=self.task.requested_by)
1505e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1515e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1525e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def _should_pending(self):
1535e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        """
1545e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        Decide if we should call the host queue entry's on_pending method.
1555e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        We should if:
1565e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        1) There exists an associated host queue entry.
1575e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        2) The current special task completed successfully.
1585e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        3) There do not exist any more special tasks to be run before the
1595e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps           host queue entry starts.
1605e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1615e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        @returns: True if we should call pending, false if not.
1625e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1635e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        """
1645e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if not self.queue_entry or not self.success:
1655e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            return False
1665e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1675e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # We know if this is the last one when we create it, so we could add
1685e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # another column to the database to keep track of this information, but
1695e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # I expect the overhead of querying here to be minimal.
1705e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        queue_entry = models.HostQueueEntry.objects.get(id=self.queue_entry.id)
1715e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        queued = models.SpecialTask.objects.filter(
1725e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                host__id=self.host.id, is_active=False,
1735e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                is_complete=False, queue_entry=queue_entry)
1745e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        queued = queued.exclude(id=self.task.id)
1755e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        return queued.count() == 0
1765e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1775e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1785e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass VerifyTask(PreJobTask):
1795e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    TASK_TYPE = models.SpecialTask.Task.VERIFY
1805e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1815e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1825e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def __init__(self, task):
183ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        args = ['-v']
184ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        if task.queue_entry:
185ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller            args.extend(self._generate_autoserv_label_args(task))
186ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        super(VerifyTask, self).__init__(task, args)
1875e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._set_ids(host=self.host, queue_entries=[self.queue_entry])
1885e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1895e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1905e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def prolog(self):
1915e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(VerifyTask, self).prolog()
1925e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1935e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        logging.info("starting verify on %s", self.host.hostname)
1945e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.queue_entry:
1955e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.queue_entry.set_status(models.HostQueueEntry.Status.VERIFYING)
1965e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.host.set_status(models.Host.Status.VERIFYING)
1975e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
1985e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Delete any queued manual reverifies for this host.  One verify will do
1995e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # and there's no need to keep records of other requests.
2005e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.remove_special_tasks(models.SpecialTask.Task.VERIFY,
2015e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                                  keep_last_one=True)
2025e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2035e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2045e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
2055e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(VerifyTask, self).epilog()
2065e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.success:
2075e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if self._should_pending():
2085e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.on_pending()
2095e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            else:
2105e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.host.set_status(models.Host.Status.READY)
2115e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2125e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2135e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass CleanupTask(PreJobTask):
2145e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # note this can also run post-job, but when it does, it's running standalone
2155e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # against the host (not related to the job), so it's not considered a
2165e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # PostJobTask
2175e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2185e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    TASK_TYPE = models.SpecialTask.Task.CLEANUP
2195e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2205e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2215e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def __init__(self, task, recover_run_monitor=None):
222ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        args = ['--cleanup']
223ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        if task.queue_entry:
224ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller            args.extend(self._generate_autoserv_label_args(task))
225ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        super(CleanupTask, self).__init__(task, args)
2265e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._set_ids(host=self.host, queue_entries=[self.queue_entry])
2275e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2285e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2295e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def prolog(self):
2305e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(CleanupTask, self).prolog()
2315e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        logging.info("starting cleanup task for host: %s", self.host.hostname)
2325e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.host.set_status(models.Host.Status.CLEANING)
2335e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.queue_entry:
2345e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.queue_entry.set_status(models.HostQueueEntry.Status.CLEANING)
2355e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2365e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2375e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def _finish_epilog(self):
2385e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if not self.queue_entry or not self.success:
2395e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            return
2405e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2415e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        do_not_verify_protection = host_protections.Protection.DO_NOT_VERIFY
2425e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        should_run_verify = (
2435e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.job.run_verify
2445e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                and self.host.protection != do_not_verify_protection)
2455e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if should_run_verify:
2465e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            entry = models.HostQueueEntry.objects.get(id=self.queue_entry.id)
2475e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            models.SpecialTask.objects.create(
2485e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    host=models.Host.objects.get(id=self.host.id),
2495e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    queue_entry=entry,
2505e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                    task=models.SpecialTask.Task.VERIFY)
2515e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        else:
2525e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if self._should_pending():
2535e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.on_pending()
2545e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2555e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2565e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
2575e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(CleanupTask, self).epilog()
2585e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2595e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.success:
2605e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.host.update_field('dirty', 0)
2615e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.host.set_status(models.Host.Status.READY)
2625e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2635e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._finish_epilog()
2645e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2655e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2665e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass ResetTask(PreJobTask):
2675e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    """Task to reset a DUT, including cleanup and verify."""
2685e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # note this can also run post-job, but when it does, it's running standalone
2695e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # against the host (not related to the job), so it's not considered a
2705e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    # PostJobTask
2715e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2725e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    TASK_TYPE = models.SpecialTask.Task.RESET
2735e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2745e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2755e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def __init__(self, task, recover_run_monitor=None):
276ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        args = ['--reset']
277ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        if task.queue_entry:
278ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller            args.extend(self._generate_autoserv_label_args(task))
279ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        super(ResetTask, self).__init__(task, args)
2805e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._set_ids(host=self.host, queue_entries=[self.queue_entry])
2815e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2825e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2835e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def prolog(self):
2845e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(ResetTask, self).prolog()
2855e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        logging.info('starting reset task for host: %s',
2865e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                     self.host.hostname)
2875e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.host.set_status(models.Host.Status.RESETTING)
2885e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.queue_entry:
2895e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.queue_entry.set_status(models.HostQueueEntry.Status.RESETTING)
2905e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2915e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Delete any queued cleanups for this host.
2925e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.remove_special_tasks(models.SpecialTask.Task.CLEANUP,
2935e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                                  keep_last_one=False)
2945e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2955e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Delete any queued reverifies for this host.
2965e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.remove_special_tasks(models.SpecialTask.Task.VERIFY,
2975e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                                  keep_last_one=False)
2985e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
2995e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Only one reset is needed.
3005e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.remove_special_tasks(models.SpecialTask.Task.RESET,
3015e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                                  keep_last_one=True)
3025e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3035e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3045e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
3055e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(ResetTask, self).epilog()
3065e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3075e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.success:
3085e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.host.update_field('dirty', 0)
3095e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3105e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if self._should_pending():
3115e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.queue_entry.on_pending()
3125e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            else:
3135e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self.host.set_status(models.Host.Status.READY)
3145e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3155e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3165e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass ProvisionTask(PreJobTask):
3175e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    TASK_TYPE = models.SpecialTask.Task.PROVISION
3185e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3195e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def __init__(self, task):
3205e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Provisioning requires that we be associated with a job/queue entry
3215e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        assert task.queue_entry, "No HQE associated with provision task!"
3225e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # task.queue_entry is an afe model HostQueueEntry object.
3235e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # self.queue_entry is a scheduler models HostQueueEntry object, but
3245e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # it gets constructed and assigned in __init__, so it's not available
3255e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # yet.  Therefore, we're stuck pulling labels off of the afe model
3265e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # so that we can pass the --provision args into the __init__ call.
327ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        labels = {x.name for x in task.queue_entry.job.labels}
3285e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        _, provisionable = provision.filter_labels(labels)
329df15ec56a94b6f7bad47d1c9d75a57c7b6f7feecAlex Miller        extra_command_args = ['--provision',
330ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller                              '--job-labels', ','.join(provisionable)]
3315e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(ProvisionTask, self).__init__(task, extra_command_args)
3325e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._set_ids(host=self.host, queue_entries=[self.queue_entry])
3335e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3345e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3355e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def _command_line(self):
3365e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # If we give queue_entry to _autoserv_command_line, then it will append
3375e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # -c for this invocation if the queue_entry is a client side test. We
3385e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # don't want that, as it messes with provisioning, so we just drop it
3395e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # from the arguments here.
3405e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # Note that we also don't verify job_repo_url as provisioining tasks are
3415e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # required to stage whatever content we need, and the job itself will
3425e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # force autotest to be staged if it isn't already.
3435e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        return autoserv_utils._autoserv_command_line(self.host.hostname,
3448e6affb2614b6b6f03ab97b52740f03df1470adeSimran Basi                                                     self._extra_command_args,
3458e6affb2614b6b6f03ab97b52740f03df1470adeSimran Basi                                                     in_lab=True)
3465e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3475e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3485e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def prolog(self):
3495e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(ProvisionTask, self).prolog()
3505e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # add check for previous provision task and abort if exist.
3515e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        logging.info("starting provision task for host: %s", self.host.hostname)
3525e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.queue_entry.set_status(
3535e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                models.HostQueueEntry.Status.PROVISIONING)
3545e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.host.set_status(models.Host.Status.PROVISIONING)
3555e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3565e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3575e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
3585e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(ProvisionTask, self).epilog()
3595e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
360ec1c4b22229677d8654159a6f40bbb9d80199278beeps        # If we were not successful in provisioning the machine
361ec1c4b22229677d8654159a6f40bbb9d80199278beeps        # leave the DUT in whatever status was set in the PreJobTask's
362ec1c4b22229677d8654159a6f40bbb9d80199278beeps        # epilog. If this task was successful the host status will get
363ec1c4b22229677d8654159a6f40bbb9d80199278beeps        # set appropriately as a fallout of the hqe's on_pending. If
364ec1c4b22229677d8654159a6f40bbb9d80199278beeps        # we don't call on_pending, it can only be because:
365ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #   1. This task was not successful:
366ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #       a. Another repair is queued: this repair job will set the host
367ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #       status, and it will remain in 'Provisioning' till then.
368ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #       b. We have hit the max_repair_limit: in which case the host
369ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #       status is set to 'RepairFailed' in the epilog of PreJobTask.
370ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #   2. The task was successful, but there are other special tasks:
371ec1c4b22229677d8654159a6f40bbb9d80199278beeps        #      Those special tasks will set the host status appropriately.
3725e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self._should_pending():
3735e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.queue_entry.on_pending()
3745e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3755e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3765e2bb4aa28611aaacaa8798fd07943ede1df46c6beepsclass RepairTask(agent_task.SpecialAgentTask):
3775e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    TASK_TYPE = models.SpecialTask.Task.REPAIR
3785e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3795e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3805e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def __init__(self, task):
3815e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        """\
3825e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        queue_entry: queue entry to mark failed if this repair fails.
3835e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        """
3845e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        protection = host_protections.Protection.get_string(
3855e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                task.host.protection)
3865e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # normalize the protection name
3875e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        protection = host_protections.Protection.get_attr_name(protection)
3885e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
389ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        args = ['-R', '--host-protection', protection]
390ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        if task.queue_entry:
391ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller            args.extend(self._generate_autoserv_label_args(task))
392ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller
393ec21225c11b0983b563c24bb71828f5bb6bbcaeeAlex Miller        super(RepairTask, self).__init__(task, args)
3945e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3955e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # *don't* include the queue entry in IDs -- if the queue entry is
3965e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        # aborted, we want to leave the repair task running
3975e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self._set_ids(host=self.host)
3985e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
3995e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
4005e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def prolog(self):
4015e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(RepairTask, self).prolog()
4025e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        logging.info("repair_task starting")
4035e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        self.host.set_status(models.Host.Status.REPAIRING)
4045e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
4055e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
4065e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps    def epilog(self):
4075e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        super(RepairTask, self).epilog()
4085e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps
4095e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        if self.success:
4105e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.host.set_status(models.Host.Status.READY)
4115e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps        else:
4125e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            self.host.set_status(models.Host.Status.REPAIR_FAILED)
4135e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps            if self.queue_entry:
4145e2bb4aa28611aaacaa8798fd07943ede1df46c6beeps                self._fail_queue_entry()
415