shard_client.py revision b7620760be69fb6a3aeb4efda835383fb1186992
13b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich#!/usr/bin/python
23b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich#pylint: disable-msg=C0111
33b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
43b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
53b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich# Use of this source code is governed by a BSD-style license that can be
63b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich# found in the LICENSE file.
73b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
83b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport argparse
93b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport datetime
103b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport logging
113b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport os
123b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport signal
13f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelichimport socket
143b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport time
153b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
163b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichimport common
173b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.frontend import setup_django_environment
183b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.client.common_lib import error
193b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.client.common_lib import global_config
203b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.client.common_lib import logging_manager
21f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelichfrom autotest_lib.client.common_lib.cros.graphite import stats
223b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.frontend.afe import models, rpc_utils
233b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichfrom autotest_lib.scheduler import email_manager
248421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelichfrom autotest_lib.server.cros.dynamic_suite import frontend_wrappers
25b7620760be69fb6a3aeb4efda835383fb1186992Jakob Juelichfrom autotest_lib.scheduler.shard import shard_logging_config
263b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
273b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
283b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
293b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich"""
303b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichAutotest shard client
313b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
323b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichThe shard client can be run as standalone service. It periodically polls the
333b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichmaster in a heartbeat, retrieves new jobs and hosts and inserts them into the
343b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichlocal database.
353b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
363b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichA shard is set up (by a human) and pointed to the global AFE (cautotest).
373b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichOn the shard, this script periodically makes so called heartbeat requests to the
383b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichglobal AFE, which will then complete the following actions:
393b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
403b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich1. Find the previously created (with atest) record for the shard. Shards are
413b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   identified by their hostnames, specified in the shadow_config.
423b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich2. Take the records that were sent in the heartbeat and insert them into the
433b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   global database.
443b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - This is to set the status of jobs to completed in the master database after
453b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     they were run by a slave. This is necessary so one can just look at the
463b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     master's afe to see the statuses of all jobs. Otherwise one would have to
473b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     check the tko tables or the individual slave AFEs.
483b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich3. Find labels that have been assigned to this shard.
493b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich4. Assign hosts
503b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - All hosts that have the specified label and aren't leased will be assigned
513b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich5. Assign jobs, that:
523b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - depend on the specified label
533b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - haven't been assigned before
543b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - aren't started yet
553b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - aren't completed yet
563b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich6. Serialize the chosen jobs and hosts:
573b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - Find objects that the Host/Job objects depend on: Labels, AclGroups, Users,
583b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     and many more. Details about this can be found around
593b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     model_logic.serialize()
603b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich7. Send these objects to the slave.
613b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
623b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
633b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichOn the client side, this will happen:
643b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich1. Deserialize the objects sent from the master and persist them to the local
653b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   database.
663b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich2. monitor_db on the shard will pick up these jobs and schedule them on the
673b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   available hosts (which were retrieved from a heartbeat).
683b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich3. Once a job is finished, it's shard_id is set to NULL
693b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich4. The shard_client will pick up all jobs where shard_id=NULL and will
703b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   send them to the master in the request of the next heartbeat.
713b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - The master will persist them as described earlier.
723b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich   - the shard_id will be set back to the shard's id, so the record won't be
733b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich     uploaded again.
743b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich"""
753b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
763b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
773b27dbc2358aef655e050a92510ff8e9e080bf81Jakob JuelichHEARTBEAT_AFE_ENDPOINT = 'shard_heartbeat'
78f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich
798421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob JuelichRPC_TIMEOUT_MIN = 5
808421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob JuelichRPC_DELAY_SEC = 5
818421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
828421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob JuelichSTATS_KEY = 'shard_client.%s' % socket.gethostname()
83f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelichtimer = stats.Timer(STATS_KEY)
843b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
853b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
863b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichclass ShardClient(object):
873b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """Performs client side tasks of sharding, i.e. the heartbeat.
883b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
898421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    This class contains the logic to do periodic heartbeats to a global AFE,
903b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    to retrieve new jobs from it and to report completed jobs back.
913b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """
923b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
933b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def __init__(self, global_afe_hostname, shard_hostname, tick_pause_sec):
948421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        self.afe = frontend_wrappers.RetryingAFE(server=global_afe_hostname,
958421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                                                 timeout_min=RPC_TIMEOUT_MIN,
968421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                                                 delay_sec=RPC_DELAY_SEC)
973b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self.hostname = shard_hostname
983b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self.tick_pause_sec = tick_pause_sec
993b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self._shutdown = False
1008421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        self._shard = None
1013b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1023b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
103f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich    @timer.decorate
1043b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def process_heartbeat_response(self, heartbeat_response):
1053b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """Save objects returned by a heartbeat to the local database.
1063b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1073b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        This deseralizes hosts and jobs including their dependencies and saves
1083b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        them to the local database.
1093b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1103b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        @param heartbeat_response: A dictionary with keys 'hosts' and 'jobs',
1113b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich                                   as returned by the `shard_heartbeat` rpc
1123b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich                                   call.
1133b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """
1143b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        hosts_serialized = heartbeat_response['hosts']
1153b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        jobs_serialized = heartbeat_response['jobs']
1163b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
117f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich        stats.Gauge(STATS_KEY).send(
118f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich            'hosts_received', len(hosts_serialized))
119f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich        stats.Gauge(STATS_KEY).send(
120f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich            'jobs_received', len(jobs_serialized))
121f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich
1223b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        # Persisting is automatically done inside deserialize
1233b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        for host in hosts_serialized:
1243b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich            models.Host.deserialize(host)
1253b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        for job in jobs_serialized:
1263b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich            models.Job.deserialize(job)
1273b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1283b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1298421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    @property
1308421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    def shard(self):
1318421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        """Return this shard's own shard object, fetched from the database.
1328421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1338421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        A shard's object is fetched from the master with the first jobs. It will
1348421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        not exist before that time.
1358421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1368421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        @returns: The shard object if it already exists, otherwise None
1378421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        """
1388421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        if self._shard is None:
1398421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            try:
1408421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                self._shard = models.Shard.smart_get(self.hostname)
1418421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            except models.Shard.DoesNotExist:
1428421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                # This might happen before any jobs are assigned to this shard.
1438421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                # This is okay because then there is nothing to offload anyway.
1448421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich                pass
1458421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        return self._shard
1468421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1478421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1488421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    def _get_jobs_to_upload(self):
1498421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        jobs = []
1508421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # The scheduler sets shard to None upon completion of the job.
1518421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # For more information on the shard field's semantic see
1528421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # models.Job.shard.
1538421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        job_ids = list(models.Job.objects.filter(
1548421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            shard=None).values_list('pk', flat=True))
1558421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1568421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        for job_to_upload in models.Job.objects.filter(pk__in=job_ids).all():
1578421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            jobs.append(job_to_upload)
1588421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        return jobs
1598421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1608421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1618421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    def _mark_jobs_as_uploaded(self, jobs):
1628421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        job_ids = [job.id for job in jobs]
1638421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # self.shard might be None if no jobs were downloaded yet.
1648421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # But then job_ids is empty, so this is harmless.
1658421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        # Even if there were jobs we'd in the worst case upload them twice.
1668421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        models.Job.objects.filter(pk__in=job_ids).update(shard=self.shard)
1678421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1688421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1698421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    def _get_hqes_for_jobs(self, jobs):
1708421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        hqes = []
1718421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        for job in jobs:
1728421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            hqes.extend(job.hostqueueentry_set.all())
1738421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        return hqes
1748421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1758421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
176f960d89a7b197fe3b3bd28546c6c89c2331b9f14Jakob Juelich    @timer.decorate
1773b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def do_heartbeat(self):
1783b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """Perform a heartbeat: Retreive new jobs.
1793b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1803b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        This function executes a `shard_heartbeat` RPC. It retrieves the
1813b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        response of this call and processes the response by storing the returned
1823b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        objects in the local database.
1833b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """
1843b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        logging.info("Performing heartbeat.")
1858421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1868421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        jobs = self._get_jobs_to_upload()
1878421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        hqes = self._get_hqes_for_jobs(jobs)
1888421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1898421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        response = self.afe.run(
1908421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            HEARTBEAT_AFE_ENDPOINT, shard_hostname=self.hostname,
1918421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            jobs=[job.serialize(include_dependencies=False) for job in jobs],
1928421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            hqes=[hqe.serialize(include_dependencies=False) for hqe in hqes])
1938421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1948421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        self._mark_jobs_as_uploaded(jobs)
1958421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
1963b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self.process_heartbeat_response(response)
1973b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        logging.info("Heartbeat completed.")
1983b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
1993b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2003b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def tick(self):
2013b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """Performs all tasks the shard clients needs to do periodically."""
2023b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self.do_heartbeat()
2033b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2043b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2053b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def loop(self):
2063b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """Calls tick() until shutdown() is called."""
2073b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        while not self._shutdown:
2083b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich            self.tick()
2093b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich            time.sleep(self.tick_pause_sec)
2103b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2113b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2123b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    def shutdown(self):
2133b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        """Stops the shard client after the current tick."""
2143b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        logging.info("Shutdown request received.")
2153b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        self._shutdown = True
2163b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2173b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2183b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef handle_signal(signum, frame):
2193b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """Sigint handler so we don't crash mid-tick."""
2203b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    global handle_signal
2213b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    _heartbeat_client.shutdown()
2223b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2233b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2243b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef _get_global_afe_hostname():
2253b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """Read the hostname of the global AFE from the global configuration."""
2263b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    return global_config.global_config.get_config_value(
2273b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich            'SHARD', 'global_afe_hostname')
2283b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2293b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2308421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelichdef _get_shard_hostname_and_ensure_running_on_shard():
2318421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    """Read the hostname the local shard from the global configuration.
2328421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
2338421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    Raise an exception if run from elsewhere than a shard.
2348421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich
2358421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    @raises error.HeartbeatOnlyAllowedInShardModeException if run from
2368421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            elsewhere than from a shard.
2378421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    """
2388421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    hostname = global_config.global_config.get_config_value(
2398421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        'SHARD', 'shard_hostname', default=None)
2408421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    if not hostname:
2418421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich        raise error.HeartbeatOnlyAllowedInShardModeException(
2428421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            'To run the shard client, shard_hostname must neither be None nor '
2438421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich            'empty.')
2448421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    return hostname
2453b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2463b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2473b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef _get_tick_pause_sec():
2483b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """Read pause to make between two ticks from the global configuration."""
2493b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    return global_config.global_config.get_config_value(
2503b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        'SHARD', 'heartbeat_pause_sec', type=float)
2513b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2523b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2533b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef get_shard_client():
2543b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """Instantiate a shard client instance.
2553b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2563b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    Configuration values will be read from the global configuration.
2573b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2583b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    @returns A shard client instance.
2593b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    """
2603b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    global_afe_hostname = _get_global_afe_hostname()
2618421d5905ab0aed8689c2eea6be8d9c4042ce618Jakob Juelich    shard_hostname = _get_shard_hostname_and_ensure_running_on_shard()
2623b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    tick_pause_sec = _get_tick_pause_sec()
2633b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    return ShardClient(global_afe_hostname, shard_hostname, tick_pause_sec)
2643b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2653b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2663b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef main():
2673b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    try:
268b7620760be69fb6a3aeb4efda835383fb1186992Jakob Juelich        stats.Counter(STATS_KEY + 'starts').increment()
2693b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        main_without_exception_handling()
2703b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    except Exception as e:
2713b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        message = 'Uncaught exception; terminating shard_client.'
2723b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        email_manager.manager.log_stacktrace(message)
2733b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        logging.exception(message)
274b7620760be69fb6a3aeb4efda835383fb1186992Jakob Juelich        stats.Counter(STATS_KEY + 'uncaught_exceptions').increment()
2753b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        raise
2763b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    finally:
2773b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        email_manager.manager.send_queued_emails()
2783b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2793b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2803b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichdef main_without_exception_handling():
2813b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    parser = argparse.ArgumentParser(description='Shard client.')
2823b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    options = parser.parse_args()
2833b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2843b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    logging_manager.configure_logging(
2853b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich        shard_logging_config.ShardLoggingConfig())
2863b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2873b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    logging.info("Setting signal handler.")
2883b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    signal.signal(signal.SIGINT, handle_signal)
2893b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    signal.signal(signal.SIGTERM, handle_signal)
2903b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2913b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    logging.info("Starting shard client.")
2923b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    global _heartbeat_client
2933b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    _heartbeat_client = get_shard_client()
2943b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    _heartbeat_client.loop()
2953b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2963b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich
2973b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelichif __name__ == '__main__':
2983b27dbc2358aef655e050a92510ff8e9e080bf81Jakob Juelich    main()
299