1#!/usr/bin/python
2
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Module used to back up the mysql db and upload to Google Storage.
8
9Usage:
10  backup_mysql_db.py --type=weekly --gs_bucket=gs://my_bucket --keep 10
11
12  gs_bucket may refer to a local location by omitting gs:// and giving a local
13  path if desired for testing. The example usage above creates a dump
14  of the autotest db, uploads it to gs://my_bucket/weekly/dump_file.date and
15  cleans up older dumps if there are more than 10 in that directory.
16"""
17
18import datetime
19from distutils import version
20import logging
21import optparse
22import os
23import tempfile
24
25import common
26
27from autotest_lib.client.common_lib import error
28from autotest_lib.client.common_lib import global_config, logging_manager, utils
29from autotest_lib.utils import test_importer
30
31
32_ATTEMPTS = 3
33_GSUTIL_BIN = 'gsutil'
34_GS_BUCKET = 'gs://chromeos-lab/backup/database'
35# TODO(scottz): Should we need to ignore more than one database a general
36# function should be designed that lists tables in the database and properly
37# creates the --ignore-table= args to be passed to mysqldump.
38# Tables to ignore when dumping all databases.
39# performance_schema is an internal database that cannot be dumped
40IGNORE_TABLES = ['performance_schema.cond_instances',
41                 'performance_schema.events_waits_current',
42                 'performance_schema.cond_instances',
43                 'performance_schema.events_waits_history',
44                 'performance_schema.events_waits_history_long',
45                 'performance_schema.events_waits_summary_by_instance',
46                 ('performance_schema.'
47                  'events_waits_summary_by_thread_by_event_name'),
48                 'performance_schema.events_waits_summary_global_by_event_name',
49                 'performance_schema.file_instances',
50                 'performance_schema.file_summary_by_event_name',
51                 'performance_schema.file_summary_by_instance',
52                 'performance_schema.mutex_instances',
53                 'performance_schema.performance_timers',
54                 'performance_schema.rwlock_instances',
55                 'performance_schema.setup_consumers',
56                 'performance_schema.setup_instruments',
57                 'performance_schema.setup_timers',
58                 'performance_schema.threads']
59
60# Conventional mysqldump schedules.
61_DAILY = 'daily'
62_WEEKLY = 'weekly'
63_MONTHLY = 'monthly'
64
65# Back up server db
66_SERVER_DB = 'server_db'
67
68# Contrary to a conventional mysql dump which takes O(hours) on large databases,
69# a host dump is the cheapest form of backup possible. We dump the output of a
70# of a mysql command showing all hosts and their pool labels to a text file that
71# is backed up to google storage.
72_ONLY_HOSTS = 'only_hosts'
73_ONLY_SHARDS = 'only_shards'
74_SCHEDULER_TYPES = [_SERVER_DB, _ONLY_HOSTS, _ONLY_SHARDS, _DAILY, _WEEKLY, _MONTHLY]
75
76class BackupError(Exception):
77  """Raised for error occurred during backup."""
78
79
80class MySqlArchiver(object):
81    """Class that archives the Autotest MySQL DB to Google Storage.
82
83    Vars:
84      gs_dir:  The path to the directory in Google Storage that this dump file
85               will be uploaded to.
86      number_to_keep:  The number of dumps we should store.
87    """
88    _AUTOTEST_DB = "chromeos_autotest_db"
89    _SERVER_DB = "chromeos_lab_servers"
90
91
92    def __init__(self, scheduled_type, number_to_keep, gs_bucket):
93        # For conventional scheduled type, we back up all databases.
94        # self._db is only used when scheduled_type is not
95        # conventional scheduled type.
96        self._db = self._get_db_name(scheduled_type)
97        self._gs_dir = '/'.join([gs_bucket, scheduled_type])
98        self._number_to_keep = number_to_keep
99        self._type = scheduled_type
100
101
102    @classmethod
103    def _get_db_name(cls, scheduled_type):
104        """Get the db name to backup.
105
106        @param scheduled_type: one of _SCHEDULER_TYPES.
107
108        @returns: The name of the db to backup.
109                  Or None for backup all dbs.
110        """
111        if scheduled_type == _SERVER_DB:
112            return cls._SERVER_DB
113        elif scheduled_type in [_ONLY_HOSTS, _ONLY_SHARDS]:
114            return cls._AUTOTEST_DB
115        else:
116            return None
117
118    @staticmethod
119    def _get_user_pass():
120        """Returns a tuple containing the user/pass to use to access the DB."""
121        user = global_config.global_config.get_config_value(
122                'CROS', 'db_backup_user')
123        password = global_config.global_config.get_config_value(
124                'CROS', 'db_backup_password')
125        return user, password
126
127
128    def create_mysql_dump(self):
129        """Returns the path to a mysql dump of the current autotest DB."""
130        user, password = self._get_user_pass()
131        _, filename = tempfile.mkstemp('autotest_db_dump')
132        logging.debug('Dumping mysql database to file %s', filename)
133        extra_dump_args = ''
134        for entry in IGNORE_TABLES:
135            extra_dump_args += '--ignore-table=%s ' % entry
136
137        if not self._db:
138            extra_dump_args += "--all-databases"
139        db_name = self._db or ''
140        utils.system('set -o pipefail; mysqldump --user=%s '
141                     '--password=%s %s %s| gzip - > %s' % (
142                     user, password, extra_dump_args, db_name, filename))
143        return filename
144
145
146    def _create_dump_from_query(self, query):
147        """Dumps result of a query into a text file.
148
149        @param query: Query to execute.
150
151        @return: The path to a tempfile containing the response of the query.
152        """
153        if not self._db:
154            raise BackupError("_create_dump_from_query requires a specific db.")
155        parameters = {'db': self._db, 'query': query}
156        parameters['user'], parameters['password'] = self._get_user_pass()
157        _, parameters['filename'] = tempfile.mkstemp('autotest_db_dump')
158        utils.system(
159                'set -o pipefail; mysql -u %(user)s -p%(password)s '
160                '%(db)s -e "%(query)s" > %(filename)s' %
161                parameters)
162        return parameters['filename']
163
164
165    def create_host_dump(self):
166        """Dumps hosts and their labels into a text file.
167
168        @return: The path to a tempfile containing a dump of
169                 hosts and their pool labels.
170        """
171        query = ('SELECT hostname, labels.name FROM afe_hosts AS hosts '
172                 'JOIN afe_hosts_labels ON hosts.id = afe_hosts_labels.host_id '
173                 'JOIN afe_labels AS labels '
174                 'ON labels.id = afe_hosts_labels.label_id '
175                 'WHERE labels.name LIKE \'%%pool%%\';')
176        return self._create_dump_from_query(query)
177
178
179    def create_shards_dump(self):
180        """Dumps shards and their labels into a text file.
181
182        @return: The path to a tempfile containing a dump of
183                 shards and their labels.
184        """
185        query = ('SELECT hostname, labels.name FROM afe_shards AS shards '
186                 'JOIN afe_shards_labels '
187                 'ON shards.id = afe_shards_labels.shard_id '
188                 'JOIN afe_labels AS labels '
189                 'ON labels.id = afe_shards_labels.label_id;')
190        return self._create_dump_from_query(query)
191
192
193    def dump(self):
194        """Creates a data dump based on the type of schedule.
195
196        @return: The path to a file containing the dump.
197        """
198        if self._type == _ONLY_HOSTS:
199            return self.create_host_dump()
200        if self._type == _ONLY_SHARDS:
201            return self.create_shards_dump()
202        return self.create_mysql_dump()
203
204
205    def _get_name(self):
206        """Returns the name of the dump as presented to google storage."""
207        if self._type in [_ONLY_HOSTS, _ONLY_SHARDS]:
208            file_type = 'txt'
209        else:
210            file_type = 'gz'
211        return 'autotest-dump.%s.%s' % (
212                datetime.datetime.now().strftime('%y.%m.%d'), file_type)
213
214
215    @staticmethod
216    def _retry_run(cmd):
217        """Run the specified |cmd| string, retrying if necessary.
218
219        Args:
220          cmd: The command to run.
221        """
222        for attempt in range(_ATTEMPTS):
223            try:
224                return utils.system_output(cmd)
225            except error.CmdError:
226                if attempt == _ATTEMPTS - 1:
227                    raise
228                else:
229                    logging.error('Failed to run %r', cmd)
230
231
232    def upload_to_google_storage(self, dump_file):
233        """Uploads the given |dump_file| to Google Storage.
234
235        @param dump_file: The path to the file containing the dump.
236        """
237        cmd = '%(gs_util)s cp %(dump_file)s %(gs_dir)s/%(name)s'
238        input_dict = dict(gs_util=_GSUTIL_BIN, dump_file=dump_file,
239                          name=self._get_name(), gs_dir=self._gs_dir)
240        cmd = cmd % input_dict
241        logging.debug('Uploading mysql dump to google storage')
242        self._retry_run(cmd)
243        os.remove(dump_file)
244
245
246    def _get_gs_command(self, cmd):
247        """Returns an array representing the command for rm or ls."""
248        # Helpful code to allow us to test without gs.
249        assert cmd in ['rm', 'ls']
250        gs_bin = _GSUTIL_BIN
251        if self._gs_dir.startswith('gs://'):
252            cmd_array = [gs_bin, cmd]
253        else:
254            cmd_array = [cmd]
255
256        return cmd_array
257
258
259    def _do_ls(self):
260        """Returns the output of running ls on the gs bucket."""
261        cmd = self._get_gs_command('ls') + [self._gs_dir]
262        return self._retry_run(' '.join(cmd))
263
264
265    def cleanup(self):
266        """Cleans up the gs bucket to ensure we don't over archive."""
267        logging.debug('Cleaning up previously archived dump files.')
268        listing = self._do_ls()
269        ordered_listing = sorted(listing.splitlines(), key=version.LooseVersion)
270        if len(ordered_listing) < self._number_to_keep:
271            logging.debug('Cleanup found nothing to do.')
272            return
273
274        to_remove = ordered_listing[:-self._number_to_keep]
275        rm_cmd = self._get_gs_command('rm')
276        for artifact in to_remove:
277            cmd = ' '.join(rm_cmd + [self._gs_dir + '/' + artifact])
278            self._retry_run(cmd)
279
280
281def parse_options():
282    """Parses given options."""
283    parser = optparse.OptionParser()
284    parser.add_option('--gs_bucket', default=_GS_BUCKET,
285                      help='Google storage bucket to store mysql db dumps.')
286    parser.add_option('--keep', default=10, type=int,
287                      help='Number of dumps to keep of specified type.')
288    parser.add_option('--type', default=_DAILY,
289                      help='The type of mysql dump to store.')
290    parser.add_option('--verbose', default=False, action='store_true',
291                      help='Google storage bucket to store mysql db dumps.')
292    options = parser.parse_args()[0]
293    if options.type not in _SCHEDULER_TYPES:
294        parser.error('Type must be either: %s.' % ', '.join(_SCHEDULER_TYPES))
295
296    return options
297
298
299def main():
300    """Runs the program."""
301    options = parse_options()
302    logging_manager.configure_logging(test_importer.TestImporterLoggingConfig(),
303                                      verbose=options.verbose)
304    logging.debug('Start db backup: %s', options.type)
305    archiver = MySqlArchiver(options.type, options.keep, options.gs_bucket)
306    dump_file = archiver.dump()
307    logging.debug('Uploading backup: %s', options.type)
308    archiver.upload_to_google_storage(dump_file)
309    archiver.cleanup()
310    logging.debug('Db backup completed: %s', options.type)
311
312
313if __name__ == '__main__':
314    main()
315