1f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B#pylint: disable-msg=C0111 2f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 3f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B# Copyright (c) 2014 The Chromium OS Authors. All rights reserved. 4f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B# Use of this source code is governed by a BSD-style license that can be 5f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B# found in the LICENSE file. 6f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 7f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B"""Scheduler library classes. 8f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B""" 9f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 10f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bimport collections 11f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bimport logging 12f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 13f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bimport common 14f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 151e1c41b1b4a1b97c0b7086b8430856ed45e064d3Gabe Blackfrom autotest_lib.client.common_lib.cros.graphite import autotest_stats 16f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bfrom autotest_lib.frontend import setup_django_environment 17f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bfrom autotest_lib.frontend.afe import models 18522bc53707b006ecb636e9968a57d8ac08e29300Fang Dengfrom autotest_lib.server.cros.dynamic_suite import constants 19f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bfrom autotest_lib.scheduler import scheduler_models 20f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bfrom autotest_lib.scheduler import scheduler_lib 21f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 22f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 231e1c41b1b4a1b97c0b7086b8430856ed45e064d3Gabe Black_job_timer = autotest_stats.Timer('scheduler.job_query_manager') 24f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bclass AFEJobQueryManager(object): 25f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Query manager for AFE Jobs.""" 26f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 27f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # A subquery to only get inactive hostless jobs. 28f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hostless_query = 'host_id IS NULL AND meta_host IS NULL' 29f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 30f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 31efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_job_timer.decorate 32f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def get_pending_queue_entries(self, only_hostless=False): 33f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 34f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B Fetch a list of new host queue entries. 35f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 36f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B The ordering of this list is important, as every new agent 37f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B we schedule can potentially contribute to the process count 38f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B on the drone, which has a static limit. The sort order 39f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B prioritizes jobs as follows: 40f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 1. High priority jobs: Based on the afe_job's priority 41f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 2. With hosts and metahosts: This will only happen if we don't 42f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B activate the hqe after assigning a host to it in 43f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B schedule_new_jobs. 44f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 3. With hosts but without metahosts: When tests are scheduled 45f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B through the frontend the owner of the job would have chosen 46f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B a host for it. 47f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 4. Without hosts but with metahosts: This is the common case of 48f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B a new test that needs a DUT. We assign a host and set it to 49f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B active so it shouldn't show up in case 2 on the next tick. 50f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 5. Without hosts and without metahosts: Hostless suite jobs, that 51f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B will result in new jobs that fall under category 4. 52f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 53f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B A note about the ordering of cases 3 and 4: 54f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B Prioritizing one case above the other leads to earlier acquisition 55f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B of the following resources: 1. process slots on the drone 2. machines. 56f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B - When a user schedules a job through the afe they choose a specific 57f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host for it. Jobs with metahost can utilize any host that satisfies 58f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B the metahost criterion. This means that if we had scheduled 4 before 59f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 3 there is a good chance that a job which could've used another host, 60f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B will now use the host assigned to a metahost-less job. Given the 61f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B availability of machines in pool:suites, this almost guarantees 62f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B starvation for jobs scheduled through the frontend. 63f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B - Scheduling 4 before 3 also has its pros however, since a suite 64f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B has the concept of a time out, whereas users can wait. If we hit the 65f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B process count on the drone a suite can timeout waiting on the test, 66f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B but a user job generally has a much longer timeout, and relatively 67f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B harmless consequences. 68f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B The current ordering was chosed because it is more likely that we will 69f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B run out of machines in pool:suites than processes on the drone. 70f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 71f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @returns A list of HQEs ordered according to sort_order. 72f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 73f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B sort_order = ('afe_jobs.priority DESC, ' 74f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'ISNULL(host_id), ' 75f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'ISNULL(meta_host), ' 76f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'parent_job_id, ' 77f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'job_id') 78efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # Don't execute jobs that should be executed by a shard in the global 79efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # scheduler. 80efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # This won't prevent the shard scheduler to run this, as the shard db 81efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # doesn't have an an entry in afe_shards_labels. 82f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query=('NOT complete AND NOT active AND status="Queued"' 83efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich 'AND NOT aborted AND afe_shards_labels.id IS NULL') 84efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich 85efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # TODO(jakobjuelich, beeps): Optimize this query. Details: 86efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # Compressed output of EXPLAIN <query>: 87efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # +------------------------+--------+-------------------------+-------+ 88efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # | table | type | key | rows | 89efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # +------------------------+--------+-------------------------+-------+ 90efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # | afe_host_queue_entries | ref | host_queue_entry_status | 30536 | 91efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # | afe_shards_labels | ref | shard_label_id_fk | 1 | 92efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # | afe_jobs | eq_ref | PRIMARY | 1 | 93efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # +------------------------+--------+-------------------------+-------+ 94efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # This shows the first part of the query fetches a lot of objects, that 95efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # are then filtered. The joins are comparably fast: There's usually just 96efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # one or none shard mapping that can be answered fully using an index 97efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # (shard_label_id_fk), similar thing applies to the job. 98efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # 99efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # This works for now, but once O(#Jobs in shard) << O(#Jobs in Queued), 100efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # it might be more efficient to filter on the meta_host first, instead 101efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich # of the status. 102f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B if only_hostless: 103f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = '%s AND (%s)' % (query, self.hostless_query) 104f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return list(scheduler_models.HostQueueEntry.fetch( 105efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich joins=('INNER JOIN afe_jobs ON (job_id=afe_jobs.id) ' 106efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich 'LEFT JOIN afe_shards_labels ON (' 107efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich 'meta_host=afe_shards_labels.label_id)'), 108f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where=query, order_by=sort_order)) 109f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 110f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 111efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_job_timer.decorate 1124ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B def get_prioritized_special_tasks(self, only_tasks_with_leased_hosts=False): 113f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 114f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B Returns all queued SpecialTasks prioritized for repair first, then 115f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B cleanup, then verify. 116f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 1174ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B @param only_tasks_with_leased_hosts: If true, this method only returns 1184ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B tasks with leased hosts. 1194ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B 120f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @return: list of afe.models.SpecialTasks sorted according to priority. 121f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 122f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B queued_tasks = models.SpecialTask.objects.filter(is_active=False, 123f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B is_complete=False, 124f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host__locked=False) 125f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # exclude hosts with active queue entries unless the SpecialTask is for 126f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # that queue entry 127f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B queued_tasks = models.SpecialTask.objects.add_join( 128f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B queued_tasks, 'afe_host_queue_entries', 'host_id', 129f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B join_condition='afe_host_queue_entries.active', 130f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B join_from_key='host_id', force_left_join=True) 131f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B queued_tasks = queued_tasks.extra( 132f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where=['(afe_host_queue_entries.id IS NULL OR ' 133f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'afe_host_queue_entries.id = ' 134f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'afe_special_tasks.queue_entry_id)']) 1354ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B if only_tasks_with_leased_hosts: 1364ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B queued_tasks = queued_tasks.filter(host__leased=True) 137f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 138f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # reorder tasks by priority 139f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B task_priority_order = [models.SpecialTask.Task.REPAIR, 140f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B models.SpecialTask.Task.CLEANUP, 141f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B models.SpecialTask.Task.VERIFY, 142f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B models.SpecialTask.Task.RESET, 143f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B models.SpecialTask.Task.PROVISION] 144f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def task_priority_key(task): 145f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return task_priority_order.index(task.task) 146f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return sorted(queued_tasks, key=task_priority_key) 147f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 148f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 149f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @classmethod 150f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def get_overlapping_jobs(cls): 151f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """A helper method to get all active jobs using the same host. 152f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 153f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @return: A list of dictionaries with the hqe id, job_id and host_id 154f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B of the currently overlapping jobs. 155f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 156f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # Filter all active hqes and stand alone special tasks to make sure 157f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # a host isn't being used by two jobs at the same time. An incomplete 158f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # stand alone special task can share a host with an active hqe, an 159f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # example of this is the cleanup scheduled in gathering. 160f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hqe_hosts = list(models.HostQueueEntry.objects.filter( 161f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B active=1, complete=0, host_id__isnull=False).values_list( 162f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'host_id', flat=True)) 163f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B special_task_hosts = list(models.SpecialTask.objects.filter( 1644ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B is_active=1, is_complete=0, host_id__isnull=False, 1654ec9867f46deb969c154bebf2e64729d56c3a1d3Prashanth B queue_entry_id__isnull=True).values_list('host_id', flat=True)) 166f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host_counts = collections.Counter( 167f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hqe_hosts + special_task_hosts).most_common() 168f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B multiple_hosts = [count[0] for count in host_counts if count[1] > 1] 169f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return list(models.HostQueueEntry.objects.filter( 170f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host_id__in=multiple_hosts, active=True).values( 171f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'id', 'job_id', 'host_id')) 172f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 173f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 174522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng @_job_timer.decorate 175522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng def get_suite_host_assignment(self): 176522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng """A helper method to get how many hosts each suite is holding. 177522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 178522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng @return: Two dictionaries (suite_host_num, hosts_to_suites) 179522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng suite_host_num maps suite job id to number of hosts 180522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng holding by its child jobs. 181522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng hosts_to_suites contains current hosts held by 182522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng any suites, and maps the host id to its parent_job_id. 183522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng """ 184522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng query = models.HostQueueEntry.objects.filter( 185522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng host_id__isnull=False, complete=0, active=1, 186522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng job__parent_job_id__isnull=False) 187522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng suite_host_num = {} 188522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng hosts_to_suites = {} 189522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng for hqe in query: 190522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng host_id = hqe.host_id 191522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng parent_job_id = hqe.job.parent_job_id 192522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng count = suite_host_num.get(parent_job_id, 0) 193522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng suite_host_num[parent_job_id] = count + 1 194522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng hosts_to_suites[host_id] = parent_job_id 195522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng return suite_host_num, hosts_to_suites 196522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 197522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 198522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng @_job_timer.decorate 199522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng def get_min_duts_of_suites(self, suite_job_ids): 200522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng """Load suite_min_duts job keyval for a set of suites. 201522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 202522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng @param suite_job_ids: A set of suite job ids. 203522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 204522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng @return: A dictionary where the key is a suite job id, 205522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng the value is the value of 'suite_min_duts'. 206522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng """ 207522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng query = models.JobKeyval.objects.filter( 208522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng job_id__in=suite_job_ids, 209522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng key=constants.SUITE_MIN_DUTS_KEY, value__isnull=False) 210522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng return dict((keyval.job_id, int(keyval.value)) for keyval in query) 211522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 212522bc53707b006ecb636e9968a57d8ac08e29300Fang Deng 2131e1c41b1b4a1b97c0b7086b8430856ed45e064d3Gabe Black_host_timer = autotest_stats.Timer('scheduler.host_query_manager') 214f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth Bclass AFEHostQueryManager(object): 215f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Query manager for AFE Hosts.""" 216f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 217f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def __init__(self): 218f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Create an AFEHostQueryManager. 219f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 220f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @param db: A connection to the database with the afe_hosts table. 221f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 222f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._db = scheduler_lib.ConnectionManager().get_connection() 223f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 224f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 225f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _process_many2many_dict(self, rows, flip=False): 226f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B result = {} 227f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B for row in rows: 228f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B left_id, right_id = int(row[0]), int(row[1]) 229f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B if flip: 230f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B left_id, right_id = right_id, left_id 231f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B result.setdefault(left_id, set()).add(right_id) 232f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return result 233f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 234f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 235f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_sql_id_list(self, id_list): 236f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return ','.join(str(item_id) for item_id in id_list) 237f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 238f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 239f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_many2many_dict(self, query, id_list, flip=False): 240f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B if not id_list: 241f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return {} 242f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query %= self._get_sql_id_list(id_list) 243f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B rows = self._db.execute(query) 244f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return self._process_many2many_dict(rows, flip) 245f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 246f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 247efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 248f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_ready_hosts(self): 249f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # We don't lose anything by re-doing these checks 250f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # even though we release hosts on the same conditions. 251f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # In the future we might have multiple clients that 252f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # release_hosts and/or lock them independent of the 253f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # scheduler tick. 254f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hosts = scheduler_models.Host.fetch( 255f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where="NOT afe_hosts.leased " 256f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "AND NOT afe_hosts.locked " 257f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "AND (afe_hosts.status IS NULL " 258f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "OR afe_hosts.status = 'Ready')") 259f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return dict((host.id, host) for host in hosts) 260f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 261f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 262efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 263f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_job_acl_groups(self, job_ids): 264f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = """ 265f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B SELECT afe_jobs.id, afe_acl_groups_users.aclgroup_id 266f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B FROM afe_jobs 267f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B INNER JOIN afe_users ON afe_users.login = afe_jobs.owner 268f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B INNER JOIN afe_acl_groups_users ON 269f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B afe_acl_groups_users.user_id = afe_users.id 270f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B WHERE afe_jobs.id IN (%s) 271f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 272f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return self._get_many2many_dict(query, job_ids) 273f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 274f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 275efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 276f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_job_ineligible_hosts(self, job_ids): 277f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = """ 278f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B SELECT job_id, host_id 279f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B FROM afe_ineligible_host_queues 280f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B WHERE job_id IN (%s) 281f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 282f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return self._get_many2many_dict(query, job_ids) 283f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 284f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 285efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 286f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_job_dependencies(self, job_ids): 287f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = """ 288f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B SELECT job_id, label_id 289f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B FROM afe_jobs_dependency_labels 290f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B WHERE job_id IN (%s) 291f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 292f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return self._get_many2many_dict(query, job_ids) 293f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 294efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 295f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_host_acls(self, host_ids): 296f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = """ 297f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B SELECT host_id, aclgroup_id 298f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B FROM afe_acl_groups_hosts 299f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B WHERE host_id IN (%s) 300f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 301f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return self._get_many2many_dict(query, host_ids) 302f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 303f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 304efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 305f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_label_hosts(self, host_ids): 306f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B if not host_ids: 307f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return {}, {} 308f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B query = """ 309f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B SELECT label_id, host_id 310f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B FROM afe_hosts_labels 311f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B WHERE host_id IN (%s) 312f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ % self._get_sql_id_list(host_ids) 313f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B rows = self._db.execute(query) 314f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B labels_to_hosts = self._process_many2many_dict(rows) 315f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hosts_to_labels = self._process_many2many_dict(rows, flip=True) 316f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return labels_to_hosts, hosts_to_labels 317f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 318f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 319f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @classmethod 320f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def find_unused_healty_hosts(cls): 321f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Get hosts that are currently unused and in the READY state. 322f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 323f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @return: A list of host objects, one for each unused healthy host. 324f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 325f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # Avoid any host with a currently active queue entry against it. 326f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B hqe_join = ('LEFT JOIN afe_host_queue_entries AS active_hqe ' 327f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'ON (afe_hosts.id = active_hqe.host_id AND ' 328f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'active_hqe.active)') 329f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 330f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # Avoid any host with a new special task against it. There are 2 cases 331f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # when an inactive but incomplete special task will not use the host 332f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # this tick: 1. When the host is locked 2. When an active hqe already 333f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # has special tasks for the same host. In both these cases this host 334f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # will not be in the ready hosts list anyway. In all other cases, 335f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # an incomplete special task will grab the host before a new job does 336f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # by assigning an agent to it. 337f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B special_task_join = ('LEFT JOIN afe_special_tasks as new_tasks ' 338f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'ON (afe_hosts.id = new_tasks.host_id AND ' 339f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 'new_tasks.is_complete=0)') 340f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 341f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return scheduler_models.Host.fetch( 342f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B joins='%s %s' % (hqe_join, special_task_join), 343f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where="active_hqe.host_id IS NULL AND new_tasks.host_id IS NULL " 344f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "AND afe_hosts.leased " 345f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "AND NOT afe_hosts.locked " 346f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "AND (afe_hosts.status IS NULL " 347f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B "OR afe_hosts.status = 'Ready')") 348f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 349f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 350efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 351f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def set_leased(self, leased_value, **kwargs): 352f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Modify the leased bit on the hosts with ids in host_ids. 353f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 354f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @param leased_value: The True/False value of the leased column for 355f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B the hosts with ids in host_ids. 356f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @param kwargs: The args to use in finding matching hosts. 357f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 358f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B logging.info('Setting leased = %s for the hosts that match %s', 359f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B leased_value, kwargs) 360f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B models.Host.objects.filter(**kwargs).update(leased=leased_value) 361f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 362f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 363efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 364f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def _get_labels(self, job_dependencies): 365f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 366f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B Calculate a dict mapping label id to label object so that we don't 367f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B frequently round trip to the database every time we need a label. 368f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 369f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @param job_dependencies: A dict mapping an integer job id to a list of 370f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B integer label id's. ie. {job_id: [label_id]} 371f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @return: A dict mapping an integer label id to a scheduler model label 372f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B object. ie. {label_id: label_object} 373f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 374f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 375f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B id_to_label = dict() 376f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # Pull all the labels on hosts we might look at 377f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host_labels = scheduler_models.Label.fetch( 378f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where="id IN (SELECT label_id FROM afe_hosts_labels)") 379f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B id_to_label.update([(label.id, label) for label in host_labels]) 380f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # and pull all the labels on jobs we might look at. 381f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B job_label_set = set() 382f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B for job_deps in job_dependencies.values(): 383f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B job_label_set.update(job_deps) 384f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # On the rare/impossible chance that no jobs have any labels, we 385f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B # can skip this. 386f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B if job_label_set: 387f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B job_string_label_list = ','.join([str(x) for x in job_label_set]) 388f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B job_labels = scheduler_models.Label.fetch( 389f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B where="id IN (%s)" % job_string_label_list) 390f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B id_to_label.update([(label.id, label) for label in job_labels]) 391f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B return id_to_label 392f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 393f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 394efa95313e9886896e3fcf43f2ae0cd939ec3562cJakob Juelich @_host_timer.decorate 395f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B def refresh(self, pending_queue_entries): 396f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """Update the query manager. 397f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 398f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B Cache information about a list of queue entries and eligible hosts 399f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B from the database so clients can avoid expensive round trips during 400f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host acquisition. 401f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B 402f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B @param pending_queue_entries: A list of queue entries about which we 403f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B need information. 404f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B """ 405f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._hosts_available = self._get_ready_hosts() 406f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B relevant_jobs = [queue_entry.job_id 407f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B for queue_entry in pending_queue_entries] 408f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._job_acls = self._get_job_acl_groups(relevant_jobs) 409f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._ineligible_hosts = (self._get_job_ineligible_hosts(relevant_jobs)) 410f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._job_dependencies = (self._get_job_dependencies(relevant_jobs)) 411f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B host_ids = self._hosts_available.keys() 412f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._host_acls = self._get_host_acls(host_ids) 413f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._label_hosts, self._host_labels = ( 414f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._get_label_hosts(host_ids)) 415f66d51b5caa96995b91e7c155ff4378cdef4baafPrashanth B self._labels = self._get_labels(self._job_dependencies) 416