autoserv revision 07e09aff0baf871b33e5479e337e5e3e0523b729
1#!/usr/bin/python -u 2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run a control file through the server side engine 7""" 8 9import sys, os, re, traceback, signal, time, logging, getpass 10 11import common 12 13from autotest_lib.client.common_lib.global_config import global_config 14require_atfork = global_config.get_config_value( 15 'AUTOSERV', 'require_atfork_module', type=bool, default=True) 16 17try: 18 import atfork 19 atfork.monkeypatch_os_fork_functions() 20 import atfork.stdlib_fixer 21 # Fix the Python standard library for threading+fork safety with its 22 # internal locks. http://code.google.com/p/python-atfork/ 23 import warnings 24 warnings.filterwarnings('ignore', 'logging module already imported') 25 atfork.stdlib_fixer.fix_logging_module() 26except ImportError, e: 27 from autotest_lib.client.common_lib import global_config 28 if global_config.global_config.get_config_value( 29 'AUTOSERV', 'require_atfork_module', type=bool, default=False): 30 print >>sys.stderr, 'Please run utils/build_externals.py' 31 print e 32 sys.exit(1) 33 34from autotest_lib.server import server_logging_config 35from autotest_lib.server import server_job, utils, autoserv_parser, autotest 36from autotest_lib.client.common_lib import pidfile, logging_manager 37 38def log_alarm(signum, frame): 39 logging.error("Received SIGALARM. Ignoring and continuing on.") 40 sys.exit(1) 41 42def run_autoserv(pid_file_manager, results, parser): 43 # send stdin to /dev/null 44 dev_null = os.open(os.devnull, os.O_RDONLY) 45 os.dup2(dev_null, sys.stdin.fileno()) 46 os.close(dev_null) 47 48 # Create separate process group 49 os.setpgrp() 50 51 # Implement SIGTERM handler 52 def handle_sigterm(signum, frame): 53 if pid_file_manager: 54 pid_file_manager.close_file(1, signal.SIGTERM) 55 os.killpg(os.getpgrp(), signal.SIGKILL) 56 57 # Set signal handler 58 signal.signal(signal.SIGTERM, handle_sigterm) 59 60 # Ignore SIGTTOU's generated by output from forked children. 61 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 62 63 # If we received a SIGALARM, let's be loud about it. 64 signal.signal(signal.SIGALRM, log_alarm) 65 66 # Server side tests that call shell scripts often depend on $USER being set 67 # but depending on how you launch your autotest scheduler it may not be set. 68 os.environ['USER'] = getpass.getuser() 69 70 if parser.options.machines: 71 machines = parser.options.machines.replace(',', ' ').strip().split() 72 else: 73 machines = [] 74 machines_file = parser.options.machines_file 75 label = parser.options.label 76 group_name = parser.options.group_name 77 user = parser.options.user 78 client = parser.options.client 79 server = parser.options.server 80 install_before = parser.options.install_before 81 install_after = parser.options.install_after 82 verify = parser.options.verify 83 repair = parser.options.repair 84 cleanup = parser.options.cleanup 85 provision = parser.options.provision 86 reset = parser.options.reset 87 no_tee = parser.options.no_tee 88 parse_job = parser.options.parse_job 89 execution_tag = parser.options.execution_tag 90 if not execution_tag: 91 execution_tag = parse_job 92 host_protection = parser.options.host_protection 93 ssh_user = parser.options.ssh_user 94 ssh_port = parser.options.ssh_port 95 ssh_pass = parser.options.ssh_pass 96 collect_crashinfo = parser.options.collect_crashinfo 97 control_filename = parser.options.control_filename 98 test_retry = parser.options.test_retry 99 verify_job_repo_url = parser.options.verify_job_repo_url 100 skip_crash_collection = parser.options.skip_crash_collection 101 102 # can't be both a client and a server side test 103 if client and server: 104 parser.parser.error("Can not specify a test as both server and client!") 105 106 if provision and client: 107 parser.parser.error("Cannot specify provisioning and client!") 108 109 is_special_task = (verify or repair or cleanup or collect_crashinfo or 110 provision or reset) 111 if len(parser.args) < 1 and not is_special_task: 112 parser.parser.error("Missing argument: control file") 113 114 # We have a control file unless it's just a verify/repair/cleanup job 115 if len(parser.args) > 0: 116 control = parser.args[0] 117 else: 118 control = None 119 120 if machines_file: 121 machines = [] 122 for m in open(machines_file, 'r').readlines(): 123 # remove comments, spaces 124 m = re.sub('#.*', '', m).strip() 125 if m: 126 machines.append(m) 127 print "Read list of machines from file: %s" % machines_file 128 print ','.join(machines) 129 130 if machines: 131 for machine in machines: 132 if not machine or re.search('\s', machine): 133 parser.parser.error("Invalid machine: %s" % str(machine)) 134 machines = list(set(machines)) 135 machines.sort() 136 137 if group_name and len(machines) < 2: 138 parser.parser.error("-G %r may only be supplied with more than one machine." 139 % group_name) 140 141 kwargs = {'group_name': group_name, 'tag': execution_tag} 142 if control_filename: 143 kwargs['control_filename'] = control_filename 144 job = server_job.server_job(control, parser.args[1:], results, label, 145 user, machines, client, parse_job, 146 ssh_user, ssh_port, ssh_pass, test_retry, 147 **kwargs) 148 job.logging.start_logging() 149 job.init_parser() 150 151 # perform checks 152 job.precheck() 153 154 # run the job 155 exit_code = 0 156 try: 157 try: 158 if repair: 159 job.repair(host_protection) 160 elif verify: 161 job.verify() 162 elif provision: 163 job.provision(provision) 164 elif reset: 165 job.reset() 166 else: 167 job.run(cleanup, install_before, install_after, 168 verify_job_repo_url=verify_job_repo_url, 169 only_collect_crashinfo=collect_crashinfo, 170 skip_crash_collection=skip_crash_collection) 171 finally: 172 while job.hosts: 173 host = job.hosts.pop() 174 host.close() 175 except: 176 exit_code = 1 177 traceback.print_exc() 178 179 if pid_file_manager: 180 pid_file_manager.num_tests_failed = job.num_tests_failed 181 pid_file_manager.close_file(exit_code) 182 job.cleanup_parser() 183 184 sys.exit(exit_code) 185 186 187def main(): 188 # grab the parser 189 parser = autoserv_parser.autoserv_parser 190 parser.parse_args() 191 192 if len(sys.argv) == 1: 193 parser.parser.print_help() 194 sys.exit(1) 195 196 if parser.options.no_logging: 197 results = None 198 else: 199 results = parser.options.results 200 if not results: 201 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 202 results = os.path.abspath(results) 203 resultdir_exists = False 204 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 205 if os.path.exists(os.path.join(results, filename)): 206 resultdir_exists = True 207 if not parser.options.use_existing_results and resultdir_exists: 208 error = "Error: results directory already exists: %s\n" % results 209 sys.stderr.write(error) 210 sys.exit(1) 211 212 # Now that we certified that there's no leftover results dir from 213 # previous jobs, lets create the result dir since the logging system 214 # needs to create the log file in there. 215 if not os.path.isdir(results): 216 os.makedirs(results) 217 218 logging_manager.configure_logging( 219 server_logging_config.ServerLoggingConfig(), results_dir=results, 220 use_console=not parser.options.no_tee, 221 verbose=parser.options.verbose, 222 no_console_prefix=parser.options.no_console_prefix) 223 if results: 224 logging.info("Results placed in %s" % results) 225 226 # wait until now to perform this check, so it get properly logged 227 if parser.options.use_existing_results and not resultdir_exists: 228 logging.error("No existing results directory found: %s", results) 229 sys.exit(1) 230 231 232 if parser.options.write_pidfile: 233 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 234 results) 235 pid_file_manager.open_file() 236 else: 237 pid_file_manager = None 238 239 autotest.BaseAutotest.set_install_in_tmpdir( 240 parser.options.install_in_tmpdir) 241 242 exit_code = 0 243 try: 244 try: 245 run_autoserv(pid_file_manager, results, parser) 246 except SystemExit, e: 247 exit_code = e.code 248 except: 249 traceback.print_exc() 250 # If we don't know what happened, we'll classify it as 251 # an 'abort' and return 1. 252 exit_code = 1 253 finally: 254 if pid_file_manager: 255 pid_file_manager.close_file(exit_code) 256 sys.exit(exit_code) 257 258 259if __name__ == '__main__': 260 main() 261