retry.py revision 445cb74570782a6d80c1e00dc02eb95fcae7f178
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging, random, signal, sys, time 6 7from autotest_lib.client.common_lib import error 8from autotest_lib.frontend.afe.json_rpc import proxy 9 10 11class TimeoutException(Exception): 12 """ 13 Exception to be raised for when alarm is triggered. 14 """ 15 pass 16 17 18def handler(signum, frame): 19 """ 20 Register a handler for the timeout. 21 """ 22 raise TimeoutException('Call is timed out.') 23 24 25def install_sigalarm_handler(new_handler): 26 """ 27 Try installing a sigalarm handler. 28 29 In order to protect apache, wsgi intercepts any attempt to install a 30 sigalarm handler, so our function will feel the full force of a sigalarm 31 even if we try to install a pacifying signal handler. To avoid this we 32 need to confirm that the handler we tried to install really was installed. 33 34 @param new_handler: The new handler to install. This must be a callable 35 object, or signal.SIG_IGN/SIG_DFL which correspond to 36 the numbers 1,0 respectively. 37 @return: True if the installation of new_handler succeeded, False otherwise. 38 """ 39 if (new_handler is None or 40 (not callable(new_handler) and 41 new_handler != signal.SIG_IGN and 42 new_handler != signal.SIG_DFL)): 43 logging.warning('Trying to install an invalid sigalarm handler.') 44 return False 45 46 signal.signal(signal.SIGALRM, new_handler) 47 installed_handler = signal.getsignal(signal.SIGALRM) 48 return installed_handler == new_handler 49 50 51def set_sigalarm_timeout(timeout_secs, default_timeout=60): 52 """ 53 Set the sigalarm timeout. 54 55 This methods treats any timeout <= 0 as a possible error and falls back to 56 using it's default timeout, since negative timeouts can have 'alarming' 57 effects. Though 0 is a valid timeout, it is often used to cancel signals; in 58 order to set a sigalarm of 0 please call signal.alarm directly as there are 59 many situations where a 0 timeout is considered invalid. 60 61 @param timeout_secs: The new timeout, in seconds. 62 @param default_timeout: The default timeout to use, if timeout <= 0. 63 @return: The old sigalarm timeout 64 """ 65 timeout_sec_n = int(timeout_secs) 66 if timeout_sec_n <= 0: 67 timeout_sec_n = int(default_timeout) 68 return signal.alarm(timeout_sec_n) 69 70 71def timeout(func, args=(), kwargs={}, timeout_sec=60.0, default_result=None): 72 """ 73 This function run the given function using the args, kwargs and 74 return the given default value if the timeout_sec is exceeded. 75 76 @param func: function to be called. 77 @param args: arguments for function to be called. 78 @param kwargs: keyword arguments for function to be called. 79 @param timeout_sec: timeout setting for call to exit, in seconds. 80 @param default_result: default return value for the function call. 81 82 @return 1: is_timeout 2: result of the function call. If 83 is_timeout is True, the call is timed out. If the 84 value is False, the call is finished on time. 85 """ 86 old_alarm_sec = 0 87 old_handler = signal.getsignal(signal.SIGALRM) 88 installed_handler = install_sigalarm_handler(handler) 89 if installed_handler: 90 old_alarm_sec = set_sigalarm_timeout(timeout_sec, default_timeout=60) 91 92 # If old_timeout_time = 0 we either didn't install a handler, or sigalrm 93 # had a signal.SIG_DFL handler with 0 timeout. In the latter case we still 94 # need to restore the handler/timeout. 95 old_timeout_time = (time.time() + old_alarm_sec) if old_alarm_sec > 0 else 0 96 97 try: 98 default_result = func(*args, **kwargs) 99 return False, default_result 100 except TimeoutException: 101 return True, default_result 102 finally: 103 # If we installed a sigalarm handler, cancel it since our function 104 # returned on time. If we can successfully restore the old handler, 105 # reset the old timeout, or, if the old timeout's deadline has passed, 106 # set the sigalarm to fire in one second. If the old_timeout_time is 0 107 # we don't need to set the sigalarm timeout since we have already set it 108 # as a byproduct of cancelling the current signal. 109 if installed_handler: 110 signal.alarm(0) 111 if install_sigalarm_handler(old_handler) and old_timeout_time: 112 set_sigalarm_timeout(int(old_timeout_time - time.time()), 113 default_timeout=1) 114 115 116 117def retry(ExceptionToCheck, timeout_min=1.0, delay_sec=3, blacklist=None): 118 """Retry calling the decorated function using a delay with jitter. 119 120 Will raise RPC ValidationError exceptions from the decorated 121 function without retrying; a malformed RPC isn't going to 122 magically become good. Will raise exceptions in blacklist as well. 123 124 original from: 125 http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ 126 127 @param ExceptionToCheck: the exception to check. May be a tuple of 128 exceptions to check. 129 @param timeout_min: timeout in minutes until giving up. 130 @param delay_sec: pre-jittered delay between retries in seconds. Actual 131 delays will be centered around this value, ranging up to 132 50% off this midpoint. 133 @param blacklist: a list of exceptions that will be raised without retrying 134 """ 135 def deco_retry(func): 136 random.seed() 137 138 139 def delay(): 140 """ 141 'Jitter' the delay, up to 50% in either direction. 142 """ 143 random_delay = random.uniform(.5 * delay_sec, 1.5 * delay_sec) 144 logging.warning('Retrying in %f seconds...', random_delay) 145 time.sleep(random_delay) 146 147 148 def func_retry(*args, **kwargs): 149 # Used to cache exception to be raised later. 150 exc_info = None 151 delayed_enabled = False 152 exception_tuple = () if blacklist is None else tuple(blacklist) 153 start_time = time.time() 154 remaining_time = timeout_min * 60 155 156 while remaining_time > 0: 157 if delayed_enabled: 158 delay() 159 else: 160 delayed_enabled = True 161 try: 162 # Clear the cache 163 exc_info = None 164 is_timeout, result = timeout(func, args, kwargs, 165 remaining_time) 166 if not is_timeout: 167 return result 168 except exception_tuple: 169 raise 170 except (error.CrosDynamicSuiteException, 171 proxy.ValidationError): 172 raise 173 except ExceptionToCheck as e: 174 logging.warning('%s(%s)', e.__class__, e) 175 # Cache the exception to be raised later. 176 exc_info = sys.exc_info() 177 178 remaining_time = int(timeout_min*60 - 179 (time.time() - start_time)) 180 181 # The call must have timed out or raised ExceptionToCheck. 182 if not exc_info: 183 raise TimeoutException('Call is timed out.') 184 # Raise the cached exception with original backtrace. 185 raise exc_info[0], exc_info[1], exc_info[2] 186 187 188 return func_retry # true decorator 189 return deco_retry 190