autoserv revision 07e09aff0baf871b33e5479e337e5e3e0523b729
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import sys, os, re, traceback, signal, time, logging, getpass
10
11import common
12
13from autotest_lib.client.common_lib.global_config import global_config
14require_atfork = global_config.get_config_value(
15        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
16
17try:
18    import atfork
19    atfork.monkeypatch_os_fork_functions()
20    import atfork.stdlib_fixer
21    # Fix the Python standard library for threading+fork safety with its
22    # internal locks.  http://code.google.com/p/python-atfork/
23    import warnings
24    warnings.filterwarnings('ignore', 'logging module already imported')
25    atfork.stdlib_fixer.fix_logging_module()
26except ImportError, e:
27    from autotest_lib.client.common_lib import global_config
28    if global_config.global_config.get_config_value(
29            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
30        print >>sys.stderr, 'Please run utils/build_externals.py'
31        print e
32        sys.exit(1)
33
34from autotest_lib.server import server_logging_config
35from autotest_lib.server import server_job, utils, autoserv_parser, autotest
36from autotest_lib.client.common_lib import pidfile, logging_manager
37
38def log_alarm(signum, frame):
39    logging.error("Received SIGALARM. Ignoring and continuing on.")
40    sys.exit(1)
41
42def run_autoserv(pid_file_manager, results, parser):
43    # send stdin to /dev/null
44    dev_null = os.open(os.devnull, os.O_RDONLY)
45    os.dup2(dev_null, sys.stdin.fileno())
46    os.close(dev_null)
47
48    # Create separate process group
49    os.setpgrp()
50
51    # Implement SIGTERM handler
52    def handle_sigterm(signum, frame):
53        if pid_file_manager:
54            pid_file_manager.close_file(1, signal.SIGTERM)
55        os.killpg(os.getpgrp(), signal.SIGKILL)
56
57    # Set signal handler
58    signal.signal(signal.SIGTERM, handle_sigterm)
59
60    # Ignore SIGTTOU's generated by output from forked children.
61    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
62
63    # If we received a SIGALARM, let's be loud about it.
64    signal.signal(signal.SIGALRM, log_alarm)
65
66    # Server side tests that call shell scripts often depend on $USER being set
67    # but depending on how you launch your autotest scheduler it may not be set.
68    os.environ['USER'] = getpass.getuser()
69
70    if parser.options.machines:
71        machines = parser.options.machines.replace(',', ' ').strip().split()
72    else:
73        machines = []
74    machines_file = parser.options.machines_file
75    label = parser.options.label
76    group_name = parser.options.group_name
77    user = parser.options.user
78    client = parser.options.client
79    server = parser.options.server
80    install_before = parser.options.install_before
81    install_after = parser.options.install_after
82    verify = parser.options.verify
83    repair = parser.options.repair
84    cleanup = parser.options.cleanup
85    provision = parser.options.provision
86    reset = parser.options.reset
87    no_tee = parser.options.no_tee
88    parse_job = parser.options.parse_job
89    execution_tag = parser.options.execution_tag
90    if not execution_tag:
91        execution_tag = parse_job
92    host_protection = parser.options.host_protection
93    ssh_user = parser.options.ssh_user
94    ssh_port = parser.options.ssh_port
95    ssh_pass = parser.options.ssh_pass
96    collect_crashinfo = parser.options.collect_crashinfo
97    control_filename = parser.options.control_filename
98    test_retry = parser.options.test_retry
99    verify_job_repo_url = parser.options.verify_job_repo_url
100    skip_crash_collection = parser.options.skip_crash_collection
101
102    # can't be both a client and a server side test
103    if client and server:
104        parser.parser.error("Can not specify a test as both server and client!")
105
106    if provision and client:
107        parser.parser.error("Cannot specify provisioning and client!")
108
109    is_special_task = (verify or repair or cleanup or collect_crashinfo or
110                       provision or reset)
111    if len(parser.args) < 1 and not is_special_task:
112        parser.parser.error("Missing argument: control file")
113
114    # We have a control file unless it's just a verify/repair/cleanup job
115    if len(parser.args) > 0:
116        control = parser.args[0]
117    else:
118        control = None
119
120    if machines_file:
121        machines = []
122        for m in open(machines_file, 'r').readlines():
123            # remove comments, spaces
124            m = re.sub('#.*', '', m).strip()
125            if m:
126                machines.append(m)
127        print "Read list of machines from file: %s" % machines_file
128        print ','.join(machines)
129
130    if machines:
131        for machine in machines:
132            if not machine or re.search('\s', machine):
133                parser.parser.error("Invalid machine: %s" % str(machine))
134        machines = list(set(machines))
135        machines.sort()
136
137    if group_name and len(machines) < 2:
138        parser.parser.error("-G %r may only be supplied with more than one machine."
139               % group_name)
140
141    kwargs = {'group_name': group_name, 'tag': execution_tag}
142    if control_filename:
143        kwargs['control_filename'] = control_filename
144    job = server_job.server_job(control, parser.args[1:], results, label,
145                                user, machines, client, parse_job,
146                                ssh_user, ssh_port, ssh_pass, test_retry,
147                                **kwargs)
148    job.logging.start_logging()
149    job.init_parser()
150
151    # perform checks
152    job.precheck()
153
154    # run the job
155    exit_code = 0
156    try:
157        try:
158            if repair:
159                job.repair(host_protection)
160            elif verify:
161                job.verify()
162            elif provision:
163                job.provision(provision)
164            elif reset:
165                job.reset()
166            else:
167                job.run(cleanup, install_before, install_after,
168                        verify_job_repo_url=verify_job_repo_url,
169                        only_collect_crashinfo=collect_crashinfo,
170                        skip_crash_collection=skip_crash_collection)
171        finally:
172            while job.hosts:
173                host = job.hosts.pop()
174                host.close()
175    except:
176        exit_code = 1
177        traceback.print_exc()
178
179    if pid_file_manager:
180        pid_file_manager.num_tests_failed = job.num_tests_failed
181        pid_file_manager.close_file(exit_code)
182    job.cleanup_parser()
183
184    sys.exit(exit_code)
185
186
187def main():
188    # grab the parser
189    parser = autoserv_parser.autoserv_parser
190    parser.parse_args()
191
192    if len(sys.argv) == 1:
193        parser.parser.print_help()
194        sys.exit(1)
195
196    if parser.options.no_logging:
197        results = None
198    else:
199        results = parser.options.results
200        if not results:
201            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
202        results  = os.path.abspath(results)
203        resultdir_exists = False
204        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
205            if os.path.exists(os.path.join(results, filename)):
206                resultdir_exists = True
207        if not parser.options.use_existing_results and resultdir_exists:
208            error = "Error: results directory already exists: %s\n" % results
209            sys.stderr.write(error)
210            sys.exit(1)
211
212        # Now that we certified that there's no leftover results dir from
213        # previous jobs, lets create the result dir since the logging system
214        # needs to create the log file in there.
215        if not os.path.isdir(results):
216            os.makedirs(results)
217
218    logging_manager.configure_logging(
219            server_logging_config.ServerLoggingConfig(), results_dir=results,
220            use_console=not parser.options.no_tee,
221            verbose=parser.options.verbose,
222            no_console_prefix=parser.options.no_console_prefix)
223    if results:
224        logging.info("Results placed in %s" % results)
225
226        # wait until now to perform this check, so it get properly logged
227        if parser.options.use_existing_results and not resultdir_exists:
228            logging.error("No existing results directory found: %s", results)
229            sys.exit(1)
230
231
232    if parser.options.write_pidfile:
233        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
234                                                  results)
235        pid_file_manager.open_file()
236    else:
237        pid_file_manager = None
238
239    autotest.BaseAutotest.set_install_in_tmpdir(
240        parser.options.install_in_tmpdir)
241
242    exit_code = 0
243    try:
244        try:
245            run_autoserv(pid_file_manager, results, parser)
246        except SystemExit, e:
247            exit_code = e.code
248        except:
249            traceback.print_exc()
250            # If we don't know what happened, we'll classify it as
251            # an 'abort' and return 1.
252            exit_code = 1
253    finally:
254        if pid_file_manager:
255            pid_file_manager.close_file(exit_code)
256    sys.exit(exit_code)
257
258
259if __name__ == '__main__':
260    main()
261