1#!/usr/bin/python
2# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6import argparse
7import datetime
8import logging
9import os
10import re
11import sys
12import time
13
14os.environ['DJANGO_SETTINGS_MODULE'] = 'frontend.settings'
15
16import common
17from autotest_lib.server import utils
18from django.db import connections, transaction
19
20
21# Format Appears as: [Date] [Time] - [Msg Level] - [Message]
22LOGGING_FORMAT = '%(asctime)s - %(levelname)s - %(message)s'
23# This regex makes sure the input is in the format of YYYY-MM-DD (2012-02-01)
24DATE_FORMAT_REGEX = ('^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]'
25                     '|3[01])$')
26SELECT_CMD_FORMAT = """
27SELECT %(table)s.%(primary_key)s FROM %(table)s
28WHERE %(table)s.%(time_column)s <= "%(date)s"
29"""
30SELECT_JOIN_CMD_FORMAT = """
31SELECT %(table)s.%(primary_key)s FROM %(table)s
32INNER JOIN %(related_table)s
33  ON %(table)s.%(foreign_key)s=%(related_table)s.%(related_primary_key)s
34WHERE %(related_table)s.%(time_column)s <= "%(date)s"
35"""
36SELECT_WITH_INDIRECTION_FORMAT = """
37SELECT %(table)s.%(primary_key)s FROM %(table)s
38INNER JOIN %(indirection_table)s
39  ON %(table)s.%(foreign_key)s =
40     %(indirection_table)s.%(indirection_primary_key)s
41INNER JOIN %(related_table)s
42  ON %(indirection_table)s.%(indirection_foreign_key)s =
43  %(related_table)s.%(related_primary_key)s
44WHERE %(related_table)s.%(time_column)s <= "%(date)s"
45"""
46DELETE_ROWS_FORMAT = """
47DELETE FROM %(table)s
48WHERE %(table)s.%(primary_key)s IN (%(rows)s)
49"""
50
51
52AFE_JOB_ID = 'afe_job_id'
53JOB_ID = 'job_id'
54JOB_IDX = 'job_idx'
55TEST_IDX = 'test_idx'
56
57# CAUTION: Make sure only the 'default' connection is used. Otherwise
58# db_cleanup may delete stuff from the global database, which is generally not
59# intended.
60cursor = connections['default'].cursor()
61
62# Globals for command line flag constants, for convenience.
63DRY_RUN = False
64STEP_SIZE = None
65LOAD_RATIO = 1.0
66
67class ProgressBar(object):
68    TEXT = "{:<40s} [{:<20s}] ({:>9d}/{:>9d})"
69
70    def __init__(self, name, amount):
71        self._name = name
72        self._amount = amount
73        self._cur = 0
74
75    def __enter__(self):
76        return self
77
78    def __exit__(self, a, b, c):
79        sys.stdout.write('\n')
80        sys.stdout.flush()
81
82    def update(self, x):
83        """
84        Advance the counter by `x`.
85
86        @param x: An integer of how many more elements were processed.
87        """
88        self._cur += x
89
90    def show(self):
91        """
92        Display the progress bar on the current line.  Repeated invocations
93        "update" the display.
94        """
95        if self._amount == 0:
96            barlen = 20
97        else:
98            barlen = int(20 * self._cur / float(self._amount))
99        if barlen:
100            bartext = '=' * (barlen-1) + '>'
101        else:
102            bartext = ''
103        text = self.TEXT.format(self._name, bartext, self._cur, self._amount)
104        sys.stdout.write('\r')
105        sys.stdout.write(text)
106        sys.stdout.flush()
107
108
109def grouper(iterable, n):
110    """
111    Group the elements of `iterable` into groups of maximum size `n`.
112
113    @param iterable: An iterable.
114    @param n: Max size of returned groups.
115    @returns: Yields iterables of size <= n.
116
117    >>> grouper('ABCDEFG', 3)
118    [['A', 'B', C'], ['D', 'E', 'F'], ['G']]
119    """
120    args = [iter(iterable)] * n
121    while True:
122        lst = []
123        try:
124            for itr in args:
125                lst.append(next(itr))
126            yield lst
127        except StopIteration:
128            if lst:
129                yield lst
130            break
131
132
133def _delete_table_data_before_date(table_to_delete_from, primary_key,
134                                   related_table, related_primary_key,
135                                   date, foreign_key=None,
136                                   time_column="started_time",
137                                   indirection_table=None,
138                                   indirection_primary_key=None,
139                                   indirection_foreign_key=None):
140    """
141    We want a delete statement that will only delete from one table while
142    using a related table to find the rows to delete.
143
144    An example mysql command:
145    DELETE FROM tko_iteration_result USING tko_iteration_result INNER JOIN
146    tko_tests WHERE tko_iteration_result.test_idx=tko_tests.test_idx AND
147    tko_tests.started_time <= '2012-02-01';
148
149    There are also tables that require 2 joins to determine which rows we want
150    to delete and we determine these rows by joining the table we want to
151    delete from with an indirection table to the actual jobs table.
152
153    @param table_to_delete_from: Table whose rows we want to delete.
154    @param related_table: Table with the date information we are selecting by.
155    @param foreign_key: Foreign key used in table_to_delete_from to reference
156                        the related table. If None, the primary_key is used.
157    @param primary_key: Primary key in the related table.
158    @param date: End date of the information we are trying to delete.
159    @param time_column: Column that we want to use to compare the date to.
160    @param indirection_table: Table we use to link the data we are trying to
161                              delete with the table with the date information.
162    @param indirection_primary_key: Key we use to connect the indirection table
163                                    to the table we are trying to delete rows
164                                    from.
165    @param indirection_foreign_key: Key we use to connect the indirection table
166                                    to the table with the date information.
167    """
168    if not foreign_key:
169        foreign_key = primary_key
170
171    if not related_table:
172        # Deleting from a table directly.
173        variables = dict(table=table_to_delete_from, primary_key=primary_key,
174                         time_column=time_column, date=date)
175        sql = SELECT_CMD_FORMAT % variables
176    elif not indirection_table:
177        # Deleting using a single JOIN to get the date information.
178        variables = dict(primary_key=primary_key, table=table_to_delete_from,
179                         foreign_key=foreign_key, related_table=related_table,
180                         related_primary_key=related_primary_key,
181                         time_column=time_column, date=date)
182        sql = SELECT_JOIN_CMD_FORMAT % variables
183    else:
184        # There are cases where we need to JOIN 3 TABLES to determine the rows
185        # we want to delete.
186        variables = dict(primary_key=primary_key, table=table_to_delete_from,
187                         indirection_table=indirection_table,
188                         foreign_key=foreign_key,
189                         indirection_primary_key=indirection_primary_key,
190                         related_table=related_table,
191                         related_primary_key=related_primary_key,
192                         indirection_foreign_key=indirection_foreign_key,
193                         time_column=time_column, date=date)
194        sql = SELECT_WITH_INDIRECTION_FORMAT % variables
195
196    logging.debug('SQL: %s', sql)
197    cursor.execute(sql, [])
198    rows = [x[0] for x in cursor.fetchall()]
199    logging.debug(rows)
200
201    if not rows or rows == [None]:
202        with ProgressBar(table_to_delete_from, 0) as pb:
203            pb.show()
204        logging.debug('Nothing to delete for %s', table_to_delete_from)
205        return
206
207    with ProgressBar(table_to_delete_from, len(rows)) as pb:
208        for row_keys in grouper(rows, STEP_SIZE):
209            variables['rows'] = ','.join([str(x) for x in row_keys])
210            sql = DELETE_ROWS_FORMAT % variables
211
212            start = time.time()
213            logging.debug('SQL: %s', sql)
214            if not DRY_RUN:
215                cursor.execute(sql, [])
216                transaction.commit_unless_managed(using='default')
217            end = time.time()
218
219            pb.update(len(row_keys))
220            pb.show()
221
222            if LOAD_RATIO != 1.0:
223                assert 0 < LOAD_RATIO <= 1, (
224                        'Load ratio must be a fraction between 0 and 1.')
225                time.sleep((end - start) / LOAD_RATIO)
226
227
228def _subtract_days(date, days_to_subtract):
229    """
230    Return a date (string) that is 'days' before 'date'
231
232    @param date: date (string) we are subtracting from.
233    @param days_to_subtract: days (int) we are subtracting.
234    """
235    date_obj = datetime.datetime.strptime(date, '%Y-%m-%d')
236    difference = date_obj - datetime.timedelta(days=days_to_subtract)
237    return difference.strftime('%Y-%m-%d')
238
239
240def _delete_all_data_before_date(date):
241    """
242    Delete all the database data before a given date.
243
244    This function focuses predominately on the data for jobs in tko_jobs.
245    However not all jobs in afe_jobs are also in tko_jobs.
246
247    Therefore we delete all the afe_job and foreign key relations prior to two
248    days before date. Then we do the queries using tko_jobs and these
249    tables to ensure all the related information is gone. Even though we are
250    repeating deletes on these tables, the second delete will be quick and
251    completely thorough in ensuring we clean up all the foreign key
252    dependencies correctly.
253
254    @param date: End date of the information we are trying to delete.
255    @param step: Rows to delete per SQL query.
256    """
257    # First cleanup all afe_job related data (prior to 2 days before date).
258    # The reason for this is not all afe_jobs may be in tko_jobs.
259    afe_date = _subtract_days(date, 2)
260    logging.info('Cleaning up all afe_job data prior to %s.', afe_date)
261    _delete_table_data_before_date('afe_aborted_host_queue_entries',
262                                   'queue_entry_id',
263                                   'afe_jobs', 'id', afe_date,
264                                   time_column= 'created_on',
265                                   foreign_key='queue_entry_id',
266                                   indirection_table='afe_host_queue_entries',
267                                   indirection_primary_key='id',
268                                   indirection_foreign_key='job_id')
269    _delete_table_data_before_date('afe_special_tasks', 'id',
270                                   'afe_jobs', 'id',
271                                   afe_date, time_column='created_on',
272                                   foreign_key='queue_entry_id',
273                                   indirection_table='afe_host_queue_entries',
274                                   indirection_primary_key='id',
275                                   indirection_foreign_key='job_id')
276    _delete_table_data_before_date('afe_host_queue_entries', 'id',
277                                   'afe_jobs', 'id',
278                                   afe_date, time_column='created_on',
279                                   foreign_key=JOB_ID)
280    _delete_table_data_before_date('afe_job_keyvals', 'id',
281                                   'afe_jobs', 'id',
282                                   afe_date, time_column='created_on',
283                                   foreign_key=JOB_ID)
284    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
285                                   'afe_jobs', 'id',
286                                   afe_date, time_column='created_on',
287                                   foreign_key=JOB_ID)
288    _delete_table_data_before_date('afe_jobs', 'id',
289                                   None, None,
290                                   afe_date, time_column='created_on')
291    # Special tasks that aren't associated with an HQE
292    # Since we don't do the queue_entry_id=NULL check, we might wipe out a bit
293    # more than we should, but I doubt anyone will notice or care.
294    _delete_table_data_before_date('afe_special_tasks', 'id',
295                                   None, None,
296                                   afe_date, time_column='time_requested')
297
298    # Now go through and clean up all the rows related to tko_jobs prior to
299    # date.
300    logging.info('Cleaning up all data related to tko_jobs prior to %s.',
301                  date)
302    _delete_table_data_before_date('tko_test_attributes', 'id',
303                                   'tko_tests', TEST_IDX,
304                                   date, foreign_key=TEST_IDX)
305    _delete_table_data_before_date('tko_test_labels_tests', 'id',
306                                   'tko_tests', TEST_IDX,
307                                   date, foreign_key= 'test_id')
308    _delete_table_data_before_date('tko_iteration_result', TEST_IDX,
309                                   'tko_tests', TEST_IDX,
310                                   date)
311    _delete_table_data_before_date('tko_iteration_perf_value', TEST_IDX,
312                                   'tko_tests', TEST_IDX,
313                                   date)
314    _delete_table_data_before_date('tko_iteration_attributes', TEST_IDX,
315                                   'tko_tests', TEST_IDX,
316                                   date)
317    _delete_table_data_before_date('tko_job_keyvals', 'id',
318                                   'tko_jobs', JOB_IDX,
319                                   date, foreign_key='job_id')
320    _delete_table_data_before_date('afe_aborted_host_queue_entries',
321                                   'queue_entry_id',
322                                   'tko_jobs', AFE_JOB_ID, date,
323                                   foreign_key='queue_entry_id',
324                                   indirection_table='afe_host_queue_entries',
325                                   indirection_primary_key='id',
326                                   indirection_foreign_key='job_id')
327    _delete_table_data_before_date('afe_special_tasks', 'id',
328                                   'tko_jobs', AFE_JOB_ID,
329                                   date, foreign_key='queue_entry_id',
330                                   indirection_table='afe_host_queue_entries',
331                                   indirection_primary_key='id',
332                                   indirection_foreign_key='job_id')
333    _delete_table_data_before_date('afe_host_queue_entries', 'id',
334                                   'tko_jobs', AFE_JOB_ID,
335                                   date, foreign_key='job_id')
336    _delete_table_data_before_date('afe_job_keyvals', 'id',
337                                   'tko_jobs', AFE_JOB_ID,
338                                   date, foreign_key='job_id')
339    _delete_table_data_before_date('afe_jobs_dependency_labels', 'id',
340                                   'tko_jobs', AFE_JOB_ID,
341                                   date, foreign_key='job_id')
342    _delete_table_data_before_date('afe_jobs', 'id',
343                                   'tko_jobs', AFE_JOB_ID,
344                                   date, foreign_key='id')
345    _delete_table_data_before_date('tko_tests', TEST_IDX,
346                                   'tko_jobs', JOB_IDX,
347                                   date, foreign_key=JOB_IDX)
348    _delete_table_data_before_date('tko_jobs', JOB_IDX,
349                                   None, None, date)
350
351
352def parse_args():
353    """Parse command line arguments"""
354    parser = argparse.ArgumentParser()
355    parser.add_argument('-v', '--verbose', action='store_true',
356                        help='Print SQL commands and results')
357    parser.add_argument('--step', type=int, action='store',
358                        default=1000,
359                        help='Number of rows to delete at once')
360    parser.add_argument('--dry_run', action='store_true',
361                        help='Print SQL queries instead of executing them.')
362    parser.add_argument('--load_ratio', type=float, action='store', default=0.2,
363                        help=('The fraction of time the script should be '
364                              'performing deletes. For example --load_ratio=.2 '
365                              'will cause the script to sleep 80% of the time, '
366                              'and perform work for the other 20%.'))
367    parser.add_argument('date', help='Keep results newer than')
368    return parser.parse_args()
369
370
371def main():
372    args = parse_args()
373
374    verbose = args.verbose or args.dry_run
375    level = logging.DEBUG if verbose else logging.INFO
376    logging.basicConfig(level=level, format=LOGGING_FORMAT)
377    logging.info('Calling: %s', sys.argv)
378
379    if not re.match(DATE_FORMAT_REGEX, args.date):
380        print 'DATE must be in yyyy-mm-dd format!'
381        return
382
383    global STEP_SIZE, DRY_RUN, LOAD_RATIO
384    STEP_SIZE = args.step
385    DRY_RUN = args.dry_run
386    LOAD_RATIO = args.load_ratio
387
388    _delete_all_data_before_date(args.date)
389
390
391if __name__ == '__main__':
392    main()
393