autoserv.py revision f1af17e4efeaf4407710bcf21b2d2e7b07f51a99
1#!/usr/bin/python -u 2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. 3# Released under the GPL v2 4 5""" 6Run an control file through the server side engine 7""" 8 9import sys, os, re, traceback, signal, time, logging, getpass 10 11import common 12 13from autotest_lib.client.common_lib.global_config import global_config 14require_atfork = global_config.get_config_value( 15 'AUTOSERV', 'require_atfork_module', type=bool, default=True) 16 17try: 18 import atfork 19 atfork.monkeypatch_os_fork_functions() 20 import atfork.stdlib_fixer 21 # Fix the Python standard library for threading+fork safety with its 22 # internal locks. http://code.google.com/p/python-atfork/ 23 import warnings 24 warnings.filterwarnings('ignore', 'logging module already imported') 25 atfork.stdlib_fixer.fix_logging_module() 26except ImportError, e: 27 from autotest_lib.client.common_lib import global_config 28 if global_config.global_config.get_config_value( 29 'AUTOSERV', 'require_atfork_module', type=bool, default=False): 30 print >>sys.stderr, 'Please run utils/build_externals.py' 31 print e 32 sys.exit(1) 33 34from autotest_lib.server import server_logging_config 35from autotest_lib.server import server_job, utils, autoserv_parser, autotest 36from autotest_lib.client.common_lib import pidfile, logging_manager 37 38def log_alarm(signum, frame): 39 logging.error("Received SIGALARM. Ignoring and continuing on.") 40 # TODO(milleral): Uncomment sys.exit once SIGALARM bug has been found. 41 #sys.exit(1) 42 43def run_autoserv(pid_file_manager, results, parser): 44 # send stdin to /dev/null 45 dev_null = os.open(os.devnull, os.O_RDONLY) 46 os.dup2(dev_null, sys.stdin.fileno()) 47 os.close(dev_null) 48 49 # Create separate process group 50 os.setpgrp() 51 52 # Implement SIGTERM handler 53 def handle_sigterm(signum, frame): 54 if pid_file_manager: 55 pid_file_manager.close_file(1, signal.SIGTERM) 56 os.killpg(os.getpgrp(), signal.SIGKILL) 57 58 # Set signal handler 59 signal.signal(signal.SIGTERM, handle_sigterm) 60 61 # Ignore SIGTTOU's generated by output from forked children. 62 signal.signal(signal.SIGTTOU, signal.SIG_IGN) 63 64 # If we received a SIGALARM, let's be loud about it. 65 signal.signal(signal.SIGALRM, log_alarm) 66 67 # Server side tests that call shell scripts often depend on $USER being set 68 # but depending on how you launch your autotest scheduler it may not be set. 69 os.environ['USER'] = getpass.getuser() 70 71 if parser.options.machines: 72 machines = parser.options.machines.replace(',', ' ').strip().split() 73 else: 74 machines = [] 75 machines_file = parser.options.machines_file 76 label = parser.options.label 77 group_name = parser.options.group_name 78 user = parser.options.user 79 client = parser.options.client 80 server = parser.options.server 81 install_before = parser.options.install_before 82 install_after = parser.options.install_after 83 verify = parser.options.verify 84 repair = parser.options.repair 85 cleanup = parser.options.cleanup 86 no_tee = parser.options.no_tee 87 parse_job = parser.options.parse_job 88 execution_tag = parser.options.execution_tag 89 if not execution_tag: 90 execution_tag = parse_job 91 host_protection = parser.options.host_protection 92 ssh_user = parser.options.ssh_user 93 ssh_port = parser.options.ssh_port 94 ssh_pass = parser.options.ssh_pass 95 collect_crashinfo = parser.options.collect_crashinfo 96 control_filename = parser.options.control_filename 97 98 # can't be both a client and a server side test 99 if client and server: 100 parser.parser.error("Can not specify a test as both server and client!") 101 102 if len(parser.args) < 1 and not (verify or repair or cleanup 103 or collect_crashinfo): 104 parser.parser.error("Missing argument: control file") 105 106 # We have a control file unless it's just a verify/repair/cleanup job 107 if len(parser.args) > 0: 108 control = parser.args[0] 109 else: 110 control = None 111 112 if machines_file: 113 machines = [] 114 for m in open(machines_file, 'r').readlines(): 115 # remove comments, spaces 116 m = re.sub('#.*', '', m).strip() 117 if m: 118 machines.append(m) 119 print "Read list of machines from file: %s" % machines_file 120 print ','.join(machines) 121 122 if machines: 123 for machine in machines: 124 if not machine or re.search('\s', machine): 125 parser.parser.error("Invalid machine: %s" % str(machine)) 126 machines = list(set(machines)) 127 machines.sort() 128 129 if group_name and len(machines) < 2: 130 parser.parser.error("-G %r may only be supplied with more than one machine." 131 % group_name) 132 133 kwargs = {'group_name': group_name, 'tag': execution_tag} 134 if control_filename: 135 kwargs['control_filename'] = control_filename 136 job = server_job.server_job(control, parser.args[1:], results, label, 137 user, machines, client, parse_job, 138 ssh_user, ssh_port, ssh_pass, **kwargs) 139 job.logging.start_logging() 140 job.init_parser() 141 142 # perform checks 143 job.precheck() 144 145 # run the job 146 exit_code = 0 147 try: 148 try: 149 if repair: 150 job.repair(host_protection) 151 elif verify: 152 job.verify() 153 else: 154 job.run(cleanup, install_before, install_after, 155 only_collect_crashinfo=collect_crashinfo) 156 finally: 157 while job.hosts: 158 host = job.hosts.pop() 159 host.close() 160 except: 161 exit_code = 1 162 traceback.print_exc() 163 164 if pid_file_manager: 165 pid_file_manager.num_tests_failed = job.num_tests_failed 166 pid_file_manager.close_file(exit_code) 167 job.cleanup_parser() 168 169 sys.exit(exit_code) 170 171 172def main(): 173 # grab the parser 174 parser = autoserv_parser.autoserv_parser 175 parser.parse_args() 176 177 if len(sys.argv) == 1: 178 parser.parser.print_help() 179 sys.exit(1) 180 181 if parser.options.no_logging: 182 results = None 183 else: 184 results = parser.options.results 185 if not results: 186 results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') 187 results = os.path.abspath(results) 188 resultdir_exists = False 189 for filename in ('control.srv', 'status.log', '.autoserv_execute'): 190 if os.path.exists(os.path.join(results, filename)): 191 resultdir_exists = True 192 if not parser.options.use_existing_results and resultdir_exists: 193 error = "Error: results directory already exists: %s\n" % results 194 sys.stderr.write(error) 195 sys.exit(1) 196 197 # Now that we certified that there's no leftover results dir from 198 # previous jobs, lets create the result dir since the logging system 199 # needs to create the log file in there. 200 if not os.path.isdir(results): 201 os.makedirs(results) 202 203 logging_manager.configure_logging( 204 server_logging_config.ServerLoggingConfig(), results_dir=results, 205 use_console=not parser.options.no_tee, 206 verbose=parser.options.verbose, 207 no_console_prefix=parser.options.no_console_prefix) 208 if results: 209 logging.info("Results placed in %s" % results) 210 211 # wait until now to perform this check, so it get properly logged 212 if parser.options.use_existing_results and not resultdir_exists: 213 logging.error("No existing results directory found: %s", results) 214 sys.exit(1) 215 216 217 if parser.options.write_pidfile: 218 pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, 219 results) 220 pid_file_manager.open_file() 221 else: 222 pid_file_manager = None 223 224 autotest.BaseAutotest.set_install_in_tmpdir( 225 parser.options.install_in_tmpdir) 226 227 exit_code = 0 228 try: 229 try: 230 run_autoserv(pid_file_manager, results, parser) 231 except SystemExit, e: 232 exit_code = e.code 233 except: 234 traceback.print_exc() 235 # If we don't know what happened, we'll classify it as 236 # an 'abort' and return 1. 237 exit_code = 1 238 finally: 239 if pid_file_manager: 240 pid_file_manager.close_file(exit_code) 241 sys.exit(exit_code) 242 243 244if __name__ == '__main__': 245 main() 246