autoserv.py revision c5947faa755945f81537c6c33c322dccacac0ade
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import sys, os, re, traceback, signal, time, logging, getpass
10
11import common
12
13from autotest_lib.client.common_lib import control_data
14from autotest_lib.client.common_lib import global_config
15require_atfork = global_config.global_config.get_config_value(
16        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
17
18
19try:
20    import atfork
21    atfork.monkeypatch_os_fork_functions()
22    import atfork.stdlib_fixer
23    # Fix the Python standard library for threading+fork safety with its
24    # internal locks.  http://code.google.com/p/python-atfork/
25    import warnings
26    warnings.filterwarnings('ignore', 'logging module already imported')
27    atfork.stdlib_fixer.fix_logging_module()
28except ImportError, e:
29    from autotest_lib.client.common_lib import global_config
30    if global_config.global_config.get_config_value(
31            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
32        print >>sys.stderr, 'Please run utils/build_externals.py'
33        print e
34        sys.exit(1)
35
36from autotest_lib.server import frontend
37from autotest_lib.server import server_logging_config
38from autotest_lib.server import server_job, utils, autoserv_parser, autotest
39from autotest_lib.server import utils as server_utils
40
41from autotest_lib.client.common_lib import pidfile, logging_manager
42from autotest_lib.site_utils.graphite import stats
43
44def log_alarm(signum, frame):
45    logging.error("Received SIGALARM. Ignoring and continuing on.")
46    sys.exit(1)
47
48def run_autoserv(pid_file_manager, results, parser):
49    # send stdin to /dev/null
50    dev_null = os.open(os.devnull, os.O_RDONLY)
51    os.dup2(dev_null, sys.stdin.fileno())
52    os.close(dev_null)
53
54    # Create separate process group
55    os.setpgrp()
56
57    # Implement SIGTERM handler
58    def handle_sigterm(signum, frame):
59        if pid_file_manager:
60            pid_file_manager.close_file(1, signal.SIGTERM)
61        os.killpg(os.getpgrp(), signal.SIGKILL)
62
63    # Set signal handler
64    signal.signal(signal.SIGTERM, handle_sigterm)
65
66    # Ignore SIGTTOU's generated by output from forked children.
67    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
68
69    # If we received a SIGALARM, let's be loud about it.
70    signal.signal(signal.SIGALRM, log_alarm)
71
72    # Server side tests that call shell scripts often depend on $USER being set
73    # but depending on how you launch your autotest scheduler it may not be set.
74    os.environ['USER'] = getpass.getuser()
75
76    if parser.options.machines:
77        machines = parser.options.machines.replace(',', ' ').strip().split()
78    else:
79        machines = []
80    machines_file = parser.options.machines_file
81    label = parser.options.label
82    group_name = parser.options.group_name
83    user = parser.options.user
84    client = parser.options.client
85    server = parser.options.server
86    install_before = parser.options.install_before
87    install_after = parser.options.install_after
88    verify = parser.options.verify
89    repair = parser.options.repair
90    cleanup = parser.options.cleanup
91    provision = parser.options.provision
92    reset = parser.options.reset
93    no_tee = parser.options.no_tee
94    parse_job = parser.options.parse_job
95    execution_tag = parser.options.execution_tag
96    if not execution_tag:
97        execution_tag = parse_job
98    host_protection = parser.options.host_protection
99    ssh_user = parser.options.ssh_user
100    ssh_port = parser.options.ssh_port
101    ssh_pass = parser.options.ssh_pass
102    collect_crashinfo = parser.options.collect_crashinfo
103    control_filename = parser.options.control_filename
104    test_retry = parser.options.test_retry
105    verify_job_repo_url = parser.options.verify_job_repo_url
106    skip_crash_collection = parser.options.skip_crash_collection
107    ssh_verbosity = int(parser.options.ssh_verbosity)
108    ssh_options = parser.options.ssh_options
109
110    # can't be both a client and a server side test
111    if client and server:
112        parser.parser.error("Can not specify a test as both server and client!")
113
114    if provision and client:
115        parser.parser.error("Cannot specify provisioning and client!")
116
117    is_special_task = (verify or repair or cleanup or collect_crashinfo or
118                       provision or reset)
119    if len(parser.args) < 1 and not is_special_task:
120        parser.parser.error("Missing argument: control file")
121
122    if ssh_verbosity > 0:
123        # ssh_verbosity is an integer between 0 and 3, inclusive
124        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
125    else:
126        ssh_verbosity_flag = ''
127
128    # We have a control file unless it's just a verify/repair/cleanup job
129    if len(parser.args) > 0:
130        control = parser.args[0]
131    else:
132        control = None
133
134    if machines_file:
135        machines = []
136        for m in open(machines_file, 'r').readlines():
137            # remove comments, spaces
138            m = re.sub('#.*', '', m).strip()
139            if m:
140                machines.append(m)
141        print "Read list of machines from file: %s" % machines_file
142        print ','.join(machines)
143
144    if machines:
145        for machine in machines:
146            if not machine or re.search('\s', machine):
147                parser.parser.error("Invalid machine: %s" % str(machine))
148        machines = list(set(machines))
149        machines.sort()
150
151    if group_name and len(machines) < 2:
152        parser.parser.error("-G %r may only be supplied with more than one machine."
153               % group_name)
154
155    kwargs = {'group_name': group_name, 'tag': execution_tag,
156              'disable_sysinfo': parser.options.disable_sysinfo}
157    if control_filename:
158        kwargs['control_filename'] = control_filename
159    job = server_job.server_job(control, parser.args[1:], results, label,
160                                user, machines, client, parse_job,
161                                ssh_user, ssh_port, ssh_pass,
162                                ssh_verbosity_flag, ssh_options,
163                                test_retry, **kwargs)
164    job.logging.start_logging()
165    job.init_parser()
166
167    # perform checks
168    job.precheck()
169
170    # run the job
171    exit_code = 0
172    try:
173        try:
174            if repair:
175                job.repair(host_protection)
176            elif verify:
177                job.verify()
178            elif provision:
179                job.provision(provision)
180            elif reset:
181                job.reset()
182            else:
183                job.run(cleanup, install_before, install_after,
184                        verify_job_repo_url=verify_job_repo_url,
185                        only_collect_crashinfo=collect_crashinfo,
186                        skip_crash_collection=skip_crash_collection)
187        finally:
188            while job.hosts:
189                host = job.hosts.pop()
190                host.close()
191    except:
192        exit_code = 1
193        traceback.print_exc()
194
195    if pid_file_manager:
196        pid_file_manager.num_tests_failed = job.num_tests_failed
197        pid_file_manager.close_file(exit_code)
198    job.cleanup_parser()
199
200    sys.exit(exit_code)
201
202
203def main():
204    # White list of tests with run time measurement enabled.
205    measure_run_time_tests_names = global_config.global_config.get_config_value(
206                        'AUTOSERV', 'measure_run_time_tests', type=str)
207    if measure_run_time_tests_names:
208        measure_run_time_tests = [t.strip() for t in
209                                  measure_run_time_tests_names.split(',')]
210    else:
211        measure_run_time_tests = []
212    # grab the parser
213    parser = autoserv_parser.autoserv_parser
214    parser.parse_args()
215
216    if len(sys.argv) == 1:
217        parser.parser.print_help()
218        sys.exit(1)
219
220    if parser.options.no_logging:
221        results = None
222    else:
223        results = parser.options.results
224        if not results:
225            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
226        results  = os.path.abspath(results)
227        resultdir_exists = False
228        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
229            if os.path.exists(os.path.join(results, filename)):
230                resultdir_exists = True
231        if not parser.options.use_existing_results and resultdir_exists:
232            error = "Error: results directory already exists: %s\n" % results
233            sys.stderr.write(error)
234            sys.exit(1)
235
236        # Now that we certified that there's no leftover results dir from
237        # previous jobs, lets create the result dir since the logging system
238        # needs to create the log file in there.
239        if not os.path.isdir(results):
240            os.makedirs(results)
241
242    logging_manager.configure_logging(
243            server_logging_config.ServerLoggingConfig(), results_dir=results,
244            use_console=not parser.options.no_tee,
245            verbose=parser.options.verbose,
246            no_console_prefix=parser.options.no_console_prefix)
247    if results:
248        logging.info("Results placed in %s" % results)
249
250        # wait until now to perform this check, so it get properly logged
251        if parser.options.use_existing_results and not resultdir_exists:
252            logging.error("No existing results directory found: %s", results)
253            sys.exit(1)
254
255    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
256
257    if parser.options.write_pidfile:
258        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
259                                                  results)
260        pid_file_manager.open_file()
261    else:
262        pid_file_manager = None
263
264    autotest.BaseAutotest.set_install_in_tmpdir(
265        parser.options.install_in_tmpdir)
266
267    timer = None
268    try:
269        # Take the first argument as control file name, get the test name from
270        # the control file. If the test name exists in the list of tests with
271        # run time measurement enabled, start a timer to begin measurement.
272        if (len(parser.args) > 0 and parser.args[0] != '' and
273            parser.options.machines):
274            try:
275                test_name = control_data.parse_control(parser.args[0],
276                                                       raise_warnings=True).name
277            except control_data.ControlVariableException:
278                logging.debug('Failed to retrieve test name from control file.')
279                test_name = None
280            if test_name in measure_run_time_tests:
281                machines = parser.options.machines.replace(',', ' '
282                                                           ).strip().split()
283                afe = frontend.AFE()
284                board = server_utils.get_board_from_afe(machines[0], afe)
285                timer = stats.Timer('autoserv_run_time.%s.%s' %
286                                    (board, test_name))
287                timer.start()
288    except control_data.ControlVariableException as e:
289        logging.error(str(e))
290    exit_code = 0
291    try:
292        try:
293            run_autoserv(pid_file_manager, results, parser)
294        except SystemExit as e:
295            exit_code = e.code
296            if exit_code:
297                logging.exception(e)
298        except Exception as e:
299            # If we don't know what happened, we'll classify it as
300            # an 'abort' and return 1.
301            logging.exception(e)
302            exit_code = 1
303    finally:
304        if pid_file_manager:
305            pid_file_manager.close_file(exit_code)
306        if timer:
307            timer.stop()
308    sys.exit(exit_code)
309
310
311if __name__ == '__main__':
312    main()
313