1#!/usr/bin/python
2# Copyright 2016 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Kill slow queries in local autotest database."""
7
8import logging
9import optparse
10import sys
11import time
12
13import common
14from autotest_lib.client.common_lib import global_config
15from autotest_lib.site_utils import gmail_lib
16from autotest_lib.client.common_lib import utils
17from autotest_lib.site_utils.stats import mysql_stats
18
19try:
20    from chromite.lib import metrics
21    from chromite.lib import ts_mon_config
22except ImportError:
23    metrics = utils.metrics_mock
24    ts_mon_config = utils.metrics_mock
25
26AT_DIR='/usr/local/autotest'
27DEFAULT_USER = global_config.global_config.get_config_value(
28        'CROS', 'db_backup_user', type=str, default='')
29DEFAULT_PASSWD = global_config.global_config.get_config_value(
30        'CROS', 'db_backup_password', type=str, default='')
31DEFAULT_MAIL = global_config.global_config.get_config_value(
32        'SCHEDULER', 'notify_email', type=str, default='')
33
34
35def parse_options():
36    """Parse the command line arguments."""
37    usage = 'usage: %prog [options]'
38    parser = optparse.OptionParser(usage=usage)
39    parser.add_option('-u', '--user', default=DEFAULT_USER,
40                      help='User to login to the Autotest DB. Default is the '
41                           'one defined in config file.')
42    parser.add_option('-p', '--password', default=DEFAULT_PASSWD,
43                      help='Password to login to the Autotest DB. Default is '
44                           'the one defined in config file.')
45    parser.add_option('-t', '--timeout', type=int, default=300,
46                      help='Timeout boundry of the slow database query. '
47                           'Default is 300s')
48    parser.add_option('-m', '--mail', default=DEFAULT_MAIL,
49                      help='Mail address to send the summary to. Default is '
50                           'ChromeOS infra Deputy')
51    options, args = parser.parse_args()
52    return parser, options, args
53
54
55def verify_options_and_args(options, args):
56    """Verify the validity of options and args.
57
58    @param options: The parsed options to verify.
59    @param args: The parsed args to verify.
60
61    @returns: True if verification passes, False otherwise.
62    """
63    if args:
64        logging.error('Unknown arguments: ' + str(args))
65        return False
66
67    if not (options.user and options.password):
68        logging.error('Failed to get the default user of password for Autotest'
69                      ' DB. Please specify them through the command line.')
70        return False
71    return True
72
73
74def format_the_output(slow_queries):
75    """Convert a list of slow queries into a readable string format.
76
77    e.g. [(a, b, c...)]  -->
78         "Id: a
79          Host: b
80          User: c
81          ...
82         "
83    @param slow_queries: A list of tuples, one tuple contains all the info about
84                         one single slow query.
85
86    @returns: one clean string representation of all the slow queries.
87    """
88    query_str_list = [('Id: %s\nUser: %s\nHost: %s\ndb: %s\nCommand: %s\n'
89                       'Time: %s\nState: %s\nInfo: %s\n') %
90                      q for q in slow_queries]
91    return '\n'.join(query_str_list)
92
93
94def kill_slow_queries(user, password, timeout):
95    """Kill the slow database queries running beyond the timeout limit.
96
97    @param user: User to login to the Autotest DB.
98    @param password: Password to login to the Autotest DB.
99    @param timeout: Timeout limit to kill the slow queries.
100
101    @returns: a tuple, first element is the string representation of all the
102              killed slow queries, second element is the total number of them.
103    """
104    cursor = mysql_stats.RetryingConnection('localhost', user, password)
105    cursor.Connect()
106
107    # Get the processlist.
108    cursor.Execute('SHOW FULL PROCESSLIST')
109    processlist = cursor.Fetchall()
110    # Filter out the slow queries and kill them.
111    slow_queries = [p for p in processlist if p[4]=='Query' and p[5]>=timeout]
112    queries_str = ''
113    num_killed_queries = 0
114    if slow_queries:
115        queries_str = format_the_output(slow_queries)
116        queries_ids = [q[0] for q in slow_queries]
117        logging.info('Start killing following slow queries\n%s', queries_str)
118        for query_id in queries_ids:
119            logging.info('Killing %s...', query_id)
120            cursor.Execute('KILL %d' % query_id)
121            logging.info('Done!')
122            num_killed_queries += 1
123    else:
124        logging.info('No slow queries over %ds!', timeout)
125    return (queries_str, num_killed_queries)
126
127
128def main():
129    """Main entry."""
130    # Clear all loggers to make sure the following basicConfig take effect.
131    logging.shutdown()
132    reload(logging)
133    logging.basicConfig(format='%(asctime)s %(message)s',
134                        datefmt='%m/%d/%Y %H:%M:%S', level=logging.DEBUG)
135
136    with ts_mon_config.SetupTsMonGlobalState(service_name='kill_slow_queries',
137                                             indirect=True):
138        count = 0
139        parser, options, args = parse_options()
140        if not verify_options_and_args(options, args):
141            parser.print_help()
142            return 1
143        try:
144            while True:
145                result_log_strs, count = kill_slow_queries(
146                    options.user, options.password, options.timeout)
147                if result_log_strs:
148                    gmail_lib.send_email(
149                        options.mail,
150                        'Successfully killed slow autotest db queries',
151                        'Below are killed queries:\n%s' % result_log_strs)
152                    m = 'chromeos/autotest/afe_db/killed_slow_queries'
153                    metrics.Counter(m).increment_by(count)
154                time.sleep(options.timeout)
155        except Exception as e:
156            m = 'chromeos/autotest/afe_db/failed_to_kill_query'
157            metrics.Counter(m).increment()
158            logging.error('Failed to kill slow db queries.\n%s', e)
159            raise
160
161
162if __name__ == '__main__':
163    sys.exit(main())
164
165