autoserv.py revision 5c40ec6dc042191a3925b089f5647fd212179868
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import sys, os, re, traceback, signal, time, logging, getpass
10
11import common
12
13from autotest_lib.client.common_lib import control_data
14from autotest_lib.client.common_lib import global_config
15require_atfork = global_config.global_config.get_config_value(
16        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
17
18
19try:
20    import atfork
21    atfork.monkeypatch_os_fork_functions()
22    import atfork.stdlib_fixer
23    # Fix the Python standard library for threading+fork safety with its
24    # internal locks.  http://code.google.com/p/python-atfork/
25    import warnings
26    warnings.filterwarnings('ignore', 'logging module already imported')
27    atfork.stdlib_fixer.fix_logging_module()
28except ImportError, e:
29    from autotest_lib.client.common_lib import global_config
30    if global_config.global_config.get_config_value(
31            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
32        print >>sys.stderr, 'Please run utils/build_externals.py'
33        print e
34        sys.exit(1)
35
36from autotest_lib.server import frontend
37from autotest_lib.server.hosts import site_host
38from autotest_lib.server import server_logging_config
39from autotest_lib.server import server_job, utils, autoserv_parser, autotest
40from autotest_lib.server import utils as server_utils
41
42from autotest_lib.client.common_lib import pidfile, logging_manager
43from autotest_lib.site_utils.graphite import stats
44
45def log_alarm(signum, frame):
46    logging.error("Received SIGALARM. Ignoring and continuing on.")
47    sys.exit(1)
48
49def run_autoserv(pid_file_manager, results, parser):
50    # send stdin to /dev/null
51    dev_null = os.open(os.devnull, os.O_RDONLY)
52    os.dup2(dev_null, sys.stdin.fileno())
53    os.close(dev_null)
54
55    # Create separate process group
56    os.setpgrp()
57
58    # Implement SIGTERM handler
59    def handle_sigterm(signum, frame):
60        if pid_file_manager:
61            pid_file_manager.close_file(1, signal.SIGTERM)
62        os.killpg(os.getpgrp(), signal.SIGKILL)
63
64    # Set signal handler
65    signal.signal(signal.SIGTERM, handle_sigterm)
66
67    # Ignore SIGTTOU's generated by output from forked children.
68    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
69
70    # If we received a SIGALARM, let's be loud about it.
71    signal.signal(signal.SIGALRM, log_alarm)
72
73    # Server side tests that call shell scripts often depend on $USER being set
74    # but depending on how you launch your autotest scheduler it may not be set.
75    os.environ['USER'] = getpass.getuser()
76
77    if parser.options.machines:
78        machines = parser.options.machines.replace(',', ' ').strip().split()
79    else:
80        machines = []
81    machines_file = parser.options.machines_file
82    label = parser.options.label
83    group_name = parser.options.group_name
84    user = parser.options.user
85    client = parser.options.client
86    server = parser.options.server
87    install_before = parser.options.install_before
88    install_after = parser.options.install_after
89    verify = parser.options.verify
90    repair = parser.options.repair
91    cleanup = parser.options.cleanup
92    provision = parser.options.provision
93    reset = parser.options.reset
94    no_tee = parser.options.no_tee
95    parse_job = parser.options.parse_job
96    execution_tag = parser.options.execution_tag
97    if not execution_tag:
98        execution_tag = parse_job
99    host_protection = parser.options.host_protection
100    ssh_user = parser.options.ssh_user
101    ssh_port = parser.options.ssh_port
102    ssh_pass = parser.options.ssh_pass
103    collect_crashinfo = parser.options.collect_crashinfo
104    control_filename = parser.options.control_filename
105    test_retry = parser.options.test_retry
106    verify_job_repo_url = parser.options.verify_job_repo_url
107    skip_crash_collection = parser.options.skip_crash_collection
108    ssh_verbosity = int(parser.options.ssh_verbosity)
109
110    # can't be both a client and a server side test
111    if client and server:
112        parser.parser.error("Can not specify a test as both server and client!")
113
114    if provision and client:
115        parser.parser.error("Cannot specify provisioning and client!")
116
117    is_special_task = (verify or repair or cleanup or collect_crashinfo or
118                       provision or reset)
119    if len(parser.args) < 1 and not is_special_task:
120        parser.parser.error("Missing argument: control file")
121
122    if ssh_verbosity > 0:
123        # ssh_verbosity is an integer between 0 and 3, inclusive
124        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
125        site_host.GLOBAL_SSH_COMMAND_OPTIONS += ssh_verbosity_flag
126
127    # We have a control file unless it's just a verify/repair/cleanup job
128    if len(parser.args) > 0:
129        control = parser.args[0]
130    else:
131        control = None
132
133    if machines_file:
134        machines = []
135        for m in open(machines_file, 'r').readlines():
136            # remove comments, spaces
137            m = re.sub('#.*', '', m).strip()
138            if m:
139                machines.append(m)
140        print "Read list of machines from file: %s" % machines_file
141        print ','.join(machines)
142
143    if machines:
144        for machine in machines:
145            if not machine or re.search('\s', machine):
146                parser.parser.error("Invalid machine: %s" % str(machine))
147        machines = list(set(machines))
148        machines.sort()
149
150    if group_name and len(machines) < 2:
151        parser.parser.error("-G %r may only be supplied with more than one machine."
152               % group_name)
153
154    kwargs = {'group_name': group_name, 'tag': execution_tag,
155              'disable_sysinfo': parser.options.disable_sysinfo}
156    if control_filename:
157        kwargs['control_filename'] = control_filename
158    job = server_job.server_job(control, parser.args[1:], results, label,
159                                user, machines, client, parse_job,
160                                ssh_user, ssh_port, ssh_pass, test_retry,
161                                **kwargs)
162    job.logging.start_logging()
163    job.init_parser()
164
165    # perform checks
166    job.precheck()
167
168    # run the job
169    exit_code = 0
170    try:
171        try:
172            if repair:
173                job.repair(host_protection)
174            elif verify:
175                job.verify()
176            elif provision:
177                job.provision(provision)
178            elif reset:
179                job.reset()
180            else:
181                job.run(cleanup, install_before, install_after,
182                        verify_job_repo_url=verify_job_repo_url,
183                        only_collect_crashinfo=collect_crashinfo,
184                        skip_crash_collection=skip_crash_collection)
185        finally:
186            while job.hosts:
187                host = job.hosts.pop()
188                host.close()
189    except:
190        exit_code = 1
191        traceback.print_exc()
192
193    if pid_file_manager:
194        pid_file_manager.num_tests_failed = job.num_tests_failed
195        pid_file_manager.close_file(exit_code)
196    job.cleanup_parser()
197
198    sys.exit(exit_code)
199
200
201def main():
202    # White list of tests with run time measurement enabled.
203    measure_run_time_tests_names = global_config.global_config.get_config_value(
204                        'AUTOSERV', 'measure_run_time_tests', type=str)
205    if measure_run_time_tests_names:
206        measure_run_time_tests = [t.strip() for t in
207                                  measure_run_time_tests_names.split(',')]
208    else:
209        measure_run_time_tests = []
210    # grab the parser
211    parser = autoserv_parser.autoserv_parser
212    parser.parse_args()
213
214    if len(sys.argv) == 1:
215        parser.parser.print_help()
216        sys.exit(1)
217
218    if parser.options.no_logging:
219        results = None
220    else:
221        results = parser.options.results
222        if not results:
223            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
224        results  = os.path.abspath(results)
225        resultdir_exists = False
226        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
227            if os.path.exists(os.path.join(results, filename)):
228                resultdir_exists = True
229        if not parser.options.use_existing_results and resultdir_exists:
230            error = "Error: results directory already exists: %s\n" % results
231            sys.stderr.write(error)
232            sys.exit(1)
233
234        # Now that we certified that there's no leftover results dir from
235        # previous jobs, lets create the result dir since the logging system
236        # needs to create the log file in there.
237        if not os.path.isdir(results):
238            os.makedirs(results)
239
240    logging_manager.configure_logging(
241            server_logging_config.ServerLoggingConfig(), results_dir=results,
242            use_console=not parser.options.no_tee,
243            verbose=parser.options.verbose,
244            no_console_prefix=parser.options.no_console_prefix)
245    if results:
246        logging.info("Results placed in %s" % results)
247
248        # wait until now to perform this check, so it get properly logged
249        if parser.options.use_existing_results and not resultdir_exists:
250            logging.error("No existing results directory found: %s", results)
251            sys.exit(1)
252
253    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
254
255    if parser.options.write_pidfile:
256        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
257                                                  results)
258        pid_file_manager.open_file()
259    else:
260        pid_file_manager = None
261
262    autotest.BaseAutotest.set_install_in_tmpdir(
263        parser.options.install_in_tmpdir)
264
265    timer = None
266    try:
267        # Take the first argument as control file name, get the test name from
268        # the control file. If the test name exists in the list of tests with
269        # run time measurement enabled, start a timer to begin measurement.
270        if (len(parser.args) > 0 and parser.args[0] != '' and
271            parser.options.machines):
272            try:
273                test_name = control_data.parse_control(parser.args[0],
274                                                       raise_warnings=True).name
275            except control_data.ControlVariableException:
276                logging.debug('Failed to retrieve test name from control file.')
277                test_name = None
278            if test_name in measure_run_time_tests:
279                machines = parser.options.machines.replace(',', ' '
280                                                           ).strip().split()
281                afe = frontend.AFE()
282                board = server_utils.get_board_from_afe(machines[0], afe)
283                timer = stats.Timer('autoserv_run_time.%s.%s' %
284                                    (board, test_name))
285                timer.start()
286    except control_data.ControlVariableException as e:
287        logging.error(str(e))
288    exit_code = 0
289    try:
290        try:
291            run_autoserv(pid_file_manager, results, parser)
292        except SystemExit as e:
293            exit_code = e.code
294            if exit_code:
295                logging.exception(e)
296        except Exception as e:
297            # If we don't know what happened, we'll classify it as
298            # an 'abort' and return 1.
299            logging.exception(e)
300            exit_code = 1
301    finally:
302        if pid_file_manager:
303            pid_file_manager.close_file(exit_code)
304        if timer:
305            timer.stop()
306    sys.exit(exit_code)
307
308
309if __name__ == '__main__':
310    main()
311