152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# Use of this source code is governed by a BSD-style license that can be
3e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# found in the LICENSE file.
4e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis
507e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi#pylint: disable-msg=C0111
607e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi
7e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtisimport os
8a858a233889949263ded6d0d6578495aba54a9ebSimran Basiimport logging
9baf4d37b1c12b1e23a0dcd9f6fad0449a0d11af6Fang Dengimport time
10e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis
1152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongfrom autotest_lib.client.common_lib import global_config
1252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongfrom autotest_lib.frontend.afe import models
13bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basifrom autotest_lib.scheduler import email_manager
14a858a233889949263ded6d0d6578495aba54a9ebSimran Basifrom autotest_lib.scheduler import scheduler_config, scheduler_models
15da8c60af1e1e3ee97170c700d0b72991687e35a2Michael Liang
16e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis
17e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis# Override default parser with our site parser.
18e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtisdef parser_path(install_dir):
1907e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi    """Return site implementation of parser.
2007e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi
2107e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi    @param install_dir: installation directory.
2207e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi    """
23e5436f3561713c9f68eebc4c9347ffb59e36bd8fDale Curtis    return os.path.join(install_dir, 'tko', 'site_parse')
2452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
2552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
2652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hongclass SiteAgentTask(object):
2752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong    """
2852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong    SiteAgentTask subclasses BaseAgentTask in monitor_db.
2952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong    """
3052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
3152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
3252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong    def _archive_results(self, queue_entries):
3352ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        """
3452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        Set the status of queue_entries to ARCHIVING.
3552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
3652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        This method sets the status of the queue_entries to ARCHIVING
3752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        if the enable_archiving flag is true in global_config.ini.
3852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        Otherwise, it bypasses the archiving step and sets the queue entries
3952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        to the final status of current step.
4052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        """
4152ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        enable_archiving = global_config.global_config.get_config_value(
4252ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong            scheduler_config.CONFIG_SECTION, 'enable_archiving', type=bool)
4352ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        # Set the status of the queue entries to archiving or self final status
4452ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        if enable_archiving:
4552ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong            status = models.HostQueueEntry.Status.ARCHIVING
4652ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        else:
4752ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong            status = self._final_status()
4852ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong
4952ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong        for queue_entry in self.queue_entries:
5052ce11d6291bbbd1bde435a62afcaf364db1b502Yu-Ju Hong            queue_entry.set_status(status)
51a858a233889949263ded6d0d6578495aba54a9ebSimran Basi
52a858a233889949263ded6d0d6578495aba54a9ebSimran Basi
53bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi    def _check_queue_entry_statuses(self, queue_entries, allowed_hqe_statuses,
54bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                    allowed_host_statuses=None):
55bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi        """
56bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi        Forked from monitor_db.py
57bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi        """
58bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi        class_name = self.__class__.__name__
59bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi        for entry in queue_entries:
60bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi            if entry.status not in allowed_hqe_statuses:
61bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # In the orignal code, here we raise an exception. In an
62bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # effort to prevent downtime we will instead abort the job and
63bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # send out an email notifying us this has occured.
64bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                error_message = ('%s attempting to start entry with invalid '
65bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                 'status %s: %s. Aborting Job: %s.'
66bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                 % (class_name, entry.status, entry,
67bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                    entry.job))
68bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                logging.error(error_message)
69bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                email_manager.manager.enqueue_notify_email(
70bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                    'Job Aborted - Invalid Host Queue Entry Status',
71bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                    error_message)
72bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                entry.job.request_abort()
73bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi            invalid_host_status = (
74bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                    allowed_host_statuses is not None
75bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                    and entry.host.status not in allowed_host_statuses)
76bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi            if invalid_host_status:
77bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # In the orignal code, here we raise an exception. In an
78bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # effort to prevent downtime we will instead abort the job and
79bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                # send out an email notifying us this has occured.
80bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                error_message = ('%s attempting to start on queue entry with '
81bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                 'invalid host status %s: %s. Aborting Job: %s'
82bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                 % (class_name, entry.host.status, entry,
83bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                                    entry.job))
84bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                logging.error(error_message)
85bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                email_manager.manager.enqueue_notify_email(
86bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                    'Job Aborted - Invalid Host Status', error_message)
87bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi                entry.job.request_abort()
88bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi
89bf2e21f65ba1e665554f7cb917bb06fdef238ad2Simran Basi
90a858a233889949263ded6d0d6578495aba54a9ebSimran Basiclass SiteDispatcher(object):
91a858a233889949263ded6d0d6578495aba54a9ebSimran Basi    """
92a858a233889949263ded6d0d6578495aba54a9ebSimran Basi    SiteDispatcher subclasses BaseDispatcher in monitor_db.
93a858a233889949263ded6d0d6578495aba54a9ebSimran Basi    """
94a858a233889949263ded6d0d6578495aba54a9ebSimran Basi    DEFAULT_REQUESTED_BY_USER_ID = 1
95a858a233889949263ded6d0d6578495aba54a9ebSimran Basi
96a858a233889949263ded6d0d6578495aba54a9ebSimran Basi
97a858a233889949263ded6d0d6578495aba54a9ebSimran Basi    def _reverify_hosts_where(self, where,
98a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                              print_message='Reverifying host %s'):
99a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        """
100a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        This is an altered version of _reverify_hosts_where the class to
101a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        models.SpecialTask.objects.create passes in an argument for
10207e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi        requested_by, in order to allow the Reset task to be created
103a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        properly.
104a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        """
105a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        full_where='locked = 0 AND invalid = 0 AND ' + where
106a858a233889949263ded6d0d6578495aba54a9ebSimran Basi        for host in scheduler_models.Host.fetch(where=full_where):
107a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            if self.host_has_agent(host):
108a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                # host has already been recovered in some way
109a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                continue
110a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            if self._host_has_scheduled_special_task(host):
111a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                # host will have a special task scheduled on the next cycle
112a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                continue
113a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            if print_message:
114a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                logging.error(print_message, host.hostname)
115a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            try:
116a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                user = models.User.objects.get(login='autotest_system')
117a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            except models.User.DoesNotExist:
118a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                user = models.User.objects.get(
119a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                        id=SiteDispatcher.DEFAULT_REQUESTED_BY_USER_ID)
120a858a233889949263ded6d0d6578495aba54a9ebSimran Basi            models.SpecialTask.objects.create(
12107e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi                    task=models.SpecialTask.Task.RESET,
122a858a233889949263ded6d0d6578495aba54a9ebSimran Basi                    host=models.Host.objects.get(id=host.id),
1233d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                    requested_by=user)
1243d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi
1253d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi
1263d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi    def _check_for_unrecovered_verifying_entries(self):
12707e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi        # Verify is replaced by Reset.
1283d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi        queue_entries = scheduler_models.HostQueueEntry.fetch(
12907e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi                where='status = "%s"' % models.HostQueueEntry.Status.RESETTING)
1303d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi        for queue_entry in queue_entries:
1313d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi            special_tasks = models.SpecialTask.objects.filter(
1323d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                    task__in=(models.SpecialTask.Task.CLEANUP,
13307e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi                              models.SpecialTask.Task.VERIFY,
13407e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi                              models.SpecialTask.Task.RESET),
1353d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                    queue_entry__id=queue_entry.id,
1363d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                    is_complete=False)
1373d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi            if special_tasks.count() == 0:
13807e09aff0baf871b33e5479e337e5e3e0523b729Dan Shi                logging.error('Unrecovered Resetting host queue entry: %s. '
1393d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                              'Setting status to Queued.', str(queue_entry))
1403d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # Essentially this host queue entry was set to be Verifying
1413d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # however no special task exists for entry. This occurs if the
1423d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # scheduler dies between changing the status and creating the
1433d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # special task. By setting it to queued, the job can restart
1443d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # from the beginning and proceed correctly. This is much more
1453d89973417ed013f328b57a7f750dcc2a788fd21Simran Basi                # preferable than having monitor_db not launching.
14605d7b4cd023d4dcaee3c0744dc960f3e01ec6fbeAlex Miller                queue_entry.set_status('Queued')
147