autoserv.py revision a1ecd5c903928f359cd6cbcff5c986652e109599
1#!/usr/bin/python -u 2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run a control file through the server side engine 7""" 8 9import sys, os, re, traceback, signal, time, logging, getpass 10 11import common 12 13from autotest_lib.client.common_lib import control_data 14from autotest_lib.client.common_lib import global_config 15require_atfork = global_config.global_config.get_config_value( 16 'AUTOSERV', 'require_atfork_module', type=bool, default=True) 17 18 19try: 20 import atfork 21 atfork.monkeypatch_os_fork_functions() 22 import atfork.stdlib_fixer 23 # Fix the Python standard library for threading+fork safety with its 24 # internal locks. http://code.google.com/p/python-atfork/ 25 import warnings 26 warnings.filterwarnings('ignore', 'logging module already imported') 27 atfork.stdlib_fixer.fix_logging_module() 28except ImportError, e: 29 from autotest_lib.client.common_lib import global_config 30 if global_config.global_config.get_config_value( 31 'AUTOSERV', 'require_atfork_module', type=bool, default=False): 32 print >>sys.stderr, 'Please run utils/build_externals.py' 33 print e 34 sys.exit(1) 35 36from autotest_lib.server import frontend 37from autotest_lib.server.hosts import site_host 38from autotest_lib.server import server_logging_config 39from autotest_lib.server import server_job, utils, autoserv_parser, autotest 40from autotest_lib.server import utils as server_utils 41 42from autotest_lib.client.common_lib import pidfile, logging_manager 43from autotest_lib.site_utils.graphite import stats 44 45def log_alarm(signum, frame): 46 logging.error("Received SIGALARM. Ignoring and continuing on.") 47 sys.exit(1) 48 49def run_autoserv(pid_file_manager, results, parser): 50 # send stdin to /dev/null 51 dev_null = os.open(os.devnull, os.O_RDONLY) 52 os.dup2(dev_null, sys.stdin.fileno()) 53 os.close(dev_null) 54 55 # Create separate process group 56 os.setpgrp() 57 58 # Implement SIGTERM handler 59 def handle_sigterm(signum, frame): 60 if pid_file_manager: 61 pid_file_manager.close_file(1, signal.SIGTERM) 62 os.killpg(os.getpgrp(), signal.SIGKILL) 63 64 # Set signal handler 65 signal.signal(signal.SIGTERM, handle_sigterm) 66 67 # Ignore SIGTTOU's generated by output from forked children. 68 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 69 70 # If we received a SIGALARM, let's be loud about it. 71 signal.signal(signal.SIGALRM, log_alarm) 72 73 # Server side tests that call shell scripts often depend on $USER being set 74 # but depending on how you launch your autotest scheduler it may not be set. 75 os.environ['USER'] = getpass.getuser() 76 77 if parser.options.machines: 78 machines = parser.options.machines.replace(',', ' ').strip().split() 79 else: 80 machines = [] 81 machines_file = parser.options.machines_file 82 label = parser.options.label 83 group_name = parser.options.group_name 84 user = parser.options.user 85 client = parser.options.client 86 server = parser.options.server 87 install_before = parser.options.install_before 88 install_after = parser.options.install_after 89 verify = parser.options.verify 90 repair = parser.options.repair 91 cleanup = parser.options.cleanup 92 provision = parser.options.provision 93 reset = parser.options.reset 94 no_tee = parser.options.no_tee 95 parse_job = parser.options.parse_job 96 execution_tag = parser.options.execution_tag 97 if not execution_tag: 98 execution_tag = parse_job 99 host_protection = parser.options.host_protection 100 ssh_user = parser.options.ssh_user 101 ssh_port = parser.options.ssh_port 102 ssh_pass = parser.options.ssh_pass 103 collect_crashinfo = parser.options.collect_crashinfo 104 control_filename = parser.options.control_filename 105 test_retry = parser.options.test_retry 106 verify_job_repo_url = parser.options.verify_job_repo_url 107 skip_crash_collection = parser.options.skip_crash_collection 108 109 # can't be both a client and a server side test 110 if client and server: 111 parser.parser.error("Can not specify a test as both server and client!") 112 113 if provision and client: 114 parser.parser.error("Cannot specify provisioning and client!") 115 116 is_special_task = (verify or repair or cleanup or collect_crashinfo or 117 provision or reset) 118 if len(parser.args) < 1 and not is_special_task: 119 parser.parser.error("Missing argument: control file") 120 121 # We have a control file unless it's just a verify/repair/cleanup job 122 if len(parser.args) > 0: 123 control = parser.args[0] 124 else: 125 control = None 126 127 if machines_file: 128 machines = [] 129 for m in open(machines_file, 'r').readlines(): 130 # remove comments, spaces 131 m = re.sub('#.*', '', m).strip() 132 if m: 133 machines.append(m) 134 print "Read list of machines from file: %s" % machines_file 135 print ','.join(machines) 136 137 if machines: 138 for machine in machines: 139 if not machine or re.search('\s', machine): 140 parser.parser.error("Invalid machine: %s" % str(machine)) 141 machines = list(set(machines)) 142 machines.sort() 143 144 if group_name and len(machines) < 2: 145 parser.parser.error("-G %r may only be supplied with more than one machine." 146 % group_name) 147 148 kwargs = {'group_name': group_name, 'tag': execution_tag} 149 if control_filename: 150 kwargs['control_filename'] = control_filename 151 job = server_job.server_job(control, parser.args[1:], results, label, 152 user, machines, client, parse_job, 153 ssh_user, ssh_port, ssh_pass, test_retry, 154 **kwargs) 155 job.logging.start_logging() 156 job.init_parser() 157 158 # perform checks 159 job.precheck() 160 161 # run the job 162 exit_code = 0 163 try: 164 try: 165 if repair: 166 job.repair(host_protection) 167 elif verify: 168 job.verify() 169 elif provision: 170 job.provision(provision) 171 elif reset: 172 job.reset() 173 else: 174 job.run(cleanup, install_before, install_after, 175 verify_job_repo_url=verify_job_repo_url, 176 only_collect_crashinfo=collect_crashinfo, 177 skip_crash_collection=skip_crash_collection) 178 finally: 179 while job.hosts: 180 host = job.hosts.pop() 181 host.close() 182 except: 183 exit_code = 1 184 traceback.print_exc() 185 186 if pid_file_manager: 187 pid_file_manager.num_tests_failed = job.num_tests_failed 188 pid_file_manager.close_file(exit_code) 189 job.cleanup_parser() 190 191 sys.exit(exit_code) 192 193 194def main(): 195 # White list of tests with run time measurement enabled. 196 measure_run_time_tests_names = global_config.global_config.get_config_value( 197 'AUTOSERV', 'measure_run_time_tests', type=str) 198 if measure_run_time_tests_names: 199 measure_run_time_tests = [t.strip() for t in 200 measure_run_time_tests_names.split(',')] 201 else: 202 measure_run_time_tests = [] 203 # grab the parser 204 parser = autoserv_parser.autoserv_parser 205 parser.parse_args() 206 207 if len(sys.argv) == 1: 208 parser.parser.print_help() 209 sys.exit(1) 210 211 if parser.options.no_logging: 212 results = None 213 else: 214 results = parser.options.results 215 if not results: 216 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 217 results = os.path.abspath(results) 218 resultdir_exists = False 219 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 220 if os.path.exists(os.path.join(results, filename)): 221 resultdir_exists = True 222 if not parser.options.use_existing_results and resultdir_exists: 223 error = "Error: results directory already exists: %s\n" % results 224 sys.stderr.write(error) 225 sys.exit(1) 226 227 # Now that we certified that there's no leftover results dir from 228 # previous jobs, lets create the result dir since the logging system 229 # needs to create the log file in there. 230 if not os.path.isdir(results): 231 os.makedirs(results) 232 233 logging_manager.configure_logging( 234 server_logging_config.ServerLoggingConfig(), results_dir=results, 235 use_console=not parser.options.no_tee, 236 verbose=parser.options.verbose, 237 no_console_prefix=parser.options.no_console_prefix) 238 if results: 239 logging.info("Results placed in %s" % results) 240 241 # wait until now to perform this check, so it get properly logged 242 if parser.options.use_existing_results and not resultdir_exists: 243 logging.error("No existing results directory found: %s", results) 244 sys.exit(1) 245 246 247 if parser.options.write_pidfile: 248 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 249 results) 250 pid_file_manager.open_file() 251 else: 252 pid_file_manager = None 253 254 autotest.BaseAutotest.set_install_in_tmpdir( 255 parser.options.install_in_tmpdir) 256 257 timer = None 258 try: 259 # Take the first argument as control file name, get the test name from 260 # the control file. If the test name exists in the list of tests with 261 # run time measurement enabled, start a timer to begin measurement. 262 if (len(parser.args) > 0 and parser.args[0] != '' and 263 parser.options.machines): 264 test_name = control_data.parse_control(parser.args[0]).name 265 if test_name in measure_run_time_tests: 266 machines = parser.options.machines.replace(',', ' ' 267 ).strip().split() 268 afe = frontend.AFE() 269 board = server_utils.get_board_from_afe(machines[0], afe) 270 timer = stats.Timer('autoserv_run_time.%s.%s' % 271 (board, test_name)) 272 timer.start() 273 except control_data.ControlVariableException as e: 274 logging.error(str(e)) 275 exit_code = 0 276 try: 277 try: 278 run_autoserv(pid_file_manager, results, parser) 279 except SystemExit, e: 280 exit_code = e.code 281 except: 282 traceback.print_exc() 283 # If we don't know what happened, we'll classify it as 284 # an 'abort' and return 1. 285 exit_code = 1 286 finally: 287 if pid_file_manager: 288 pid_file_manager.close_file(exit_code) 289 if timer: 290 timer.stop() 291 sys.exit(exit_code) 292 293 294if __name__ == '__main__': 295 main() 296