autoserv revision 49e21e6f66df4cca76824a9e10dec55c4e94cf0d
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import sys, os, re, traceback, signal, time, logging, getpass
10
11import common
12
13from autotest_lib.client.common_lib import control_data
14from autotest_lib.client.common_lib import global_config
15require_atfork = global_config.global_config.get_config_value(
16        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
17
18
19try:
20    import atfork
21    atfork.monkeypatch_os_fork_functions()
22    import atfork.stdlib_fixer
23    # Fix the Python standard library for threading+fork safety with its
24    # internal locks.  http://code.google.com/p/python-atfork/
25    import warnings
26    warnings.filterwarnings('ignore', 'logging module already imported')
27    atfork.stdlib_fixer.fix_logging_module()
28except ImportError, e:
29    from autotest_lib.client.common_lib import global_config
30    if global_config.global_config.get_config_value(
31            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
32        print >>sys.stderr, 'Please run utils/build_externals.py'
33        print e
34        sys.exit(1)
35
36from autotest_lib.server import frontend
37from autotest_lib.server import server_logging_config
38from autotest_lib.server import server_job, utils, autoserv_parser, autotest
39from autotest_lib.server import utils as server_utils
40
41from autotest_lib.client.common_lib import pidfile, logging_manager
42from autotest_lib.site_utils.graphite import stats
43
44def log_alarm(signum, frame):
45    logging.error("Received SIGALARM. Ignoring and continuing on.")
46    sys.exit(1)
47
48def run_autoserv(pid_file_manager, results, parser):
49    # send stdin to /dev/null
50    dev_null = os.open(os.devnull, os.O_RDONLY)
51    os.dup2(dev_null, sys.stdin.fileno())
52    os.close(dev_null)
53
54    # Create separate process group
55    os.setpgrp()
56
57    # Implement SIGTERM handler
58    def handle_sigterm(signum, frame):
59        logging.debug('Received SIGTERM')
60        if pid_file_manager:
61            pid_file_manager.close_file(1, signal.SIGTERM)
62        logging.debug('Finished writing to pid_file. Killing process.')
63        os.killpg(os.getpgrp(), signal.SIGKILL)
64
65    # Set signal handler
66    signal.signal(signal.SIGTERM, handle_sigterm)
67
68    # Ignore SIGTTOU's generated by output from forked children.
69    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
70
71    # If we received a SIGALARM, let's be loud about it.
72    signal.signal(signal.SIGALRM, log_alarm)
73
74    # Server side tests that call shell scripts often depend on $USER being set
75    # but depending on how you launch your autotest scheduler it may not be set.
76    os.environ['USER'] = getpass.getuser()
77
78    if parser.options.machines:
79        machines = parser.options.machines.replace(',', ' ').strip().split()
80    else:
81        machines = []
82    machines_file = parser.options.machines_file
83    label = parser.options.label
84    group_name = parser.options.group_name
85    user = parser.options.user
86    client = parser.options.client
87    server = parser.options.server
88    install_before = parser.options.install_before
89    install_after = parser.options.install_after
90    verify = parser.options.verify
91    repair = parser.options.repair
92    cleanup = parser.options.cleanup
93    provision = parser.options.provision
94    reset = parser.options.reset
95    no_tee = parser.options.no_tee
96    parse_job = parser.options.parse_job
97    execution_tag = parser.options.execution_tag
98    if not execution_tag:
99        execution_tag = parse_job
100    host_protection = parser.options.host_protection
101    ssh_user = parser.options.ssh_user
102    ssh_port = parser.options.ssh_port
103    ssh_pass = parser.options.ssh_pass
104    collect_crashinfo = parser.options.collect_crashinfo
105    control_filename = parser.options.control_filename
106    test_retry = parser.options.test_retry
107    verify_job_repo_url = parser.options.verify_job_repo_url
108    skip_crash_collection = parser.options.skip_crash_collection
109    ssh_verbosity = int(parser.options.ssh_verbosity)
110    ssh_options = parser.options.ssh_options
111
112    # can't be both a client and a server side test
113    if client and server:
114        parser.parser.error("Can not specify a test as both server and client!")
115
116    if provision and client:
117        parser.parser.error("Cannot specify provisioning and client!")
118
119    is_special_task = (verify or repair or cleanup or collect_crashinfo or
120                       provision or reset)
121    if len(parser.args) < 1 and not is_special_task:
122        parser.parser.error("Missing argument: control file")
123
124    if ssh_verbosity > 0:
125        # ssh_verbosity is an integer between 0 and 3, inclusive
126        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
127    else:
128        ssh_verbosity_flag = ''
129
130    # We have a control file unless it's just a verify/repair/cleanup job
131    if len(parser.args) > 0:
132        control = parser.args[0]
133    else:
134        control = None
135
136    if machines_file:
137        machines = []
138        for m in open(machines_file, 'r').readlines():
139            # remove comments, spaces
140            m = re.sub('#.*', '', m).strip()
141            if m:
142                machines.append(m)
143        print "Read list of machines from file: %s" % machines_file
144        print ','.join(machines)
145
146    if machines:
147        for machine in machines:
148            if not machine or re.search('\s', machine):
149                parser.parser.error("Invalid machine: %s" % str(machine))
150        machines = list(set(machines))
151        machines.sort()
152
153    if group_name and len(machines) < 2:
154        parser.parser.error("-G %r may only be supplied with more than one machine."
155               % group_name)
156
157    kwargs = {'group_name': group_name, 'tag': execution_tag,
158              'disable_sysinfo': parser.options.disable_sysinfo}
159    if control_filename:
160        kwargs['control_filename'] = control_filename
161    job = server_job.server_job(control, parser.args[1:], results, label,
162                                user, machines, client, parse_job,
163                                ssh_user, ssh_port, ssh_pass,
164                                ssh_verbosity_flag, ssh_options,
165                                test_retry, **kwargs)
166    job.logging.start_logging()
167    job.init_parser()
168
169    # perform checks
170    job.precheck()
171
172    # run the job
173    exit_code = 0
174    try:
175        try:
176            if repair:
177                job.repair(host_protection)
178            elif verify:
179                job.verify()
180            elif provision:
181                job.provision(provision)
182            elif reset:
183                job.reset()
184            else:
185                job.run(cleanup, install_before, install_after,
186                        verify_job_repo_url=verify_job_repo_url,
187                        only_collect_crashinfo=collect_crashinfo,
188                        skip_crash_collection=skip_crash_collection)
189        finally:
190            while job.hosts:
191                host = job.hosts.pop()
192                host.close()
193    except:
194        exit_code = 1
195        traceback.print_exc()
196
197    if pid_file_manager:
198        pid_file_manager.num_tests_failed = job.num_tests_failed
199        pid_file_manager.close_file(exit_code)
200    job.cleanup_parser()
201
202    sys.exit(exit_code)
203
204
205def main():
206    # White list of tests with run time measurement enabled.
207    measure_run_time_tests_names = global_config.global_config.get_config_value(
208                        'AUTOSERV', 'measure_run_time_tests', type=str)
209    if measure_run_time_tests_names:
210        measure_run_time_tests = [t.strip() for t in
211                                  measure_run_time_tests_names.split(',')]
212    else:
213        measure_run_time_tests = []
214    # grab the parser
215    parser = autoserv_parser.autoserv_parser
216    parser.parse_args()
217
218    if len(sys.argv) == 1:
219        parser.parser.print_help()
220        sys.exit(1)
221
222    if parser.options.no_logging:
223        results = None
224    else:
225        results = parser.options.results
226        if not results:
227            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
228        results  = os.path.abspath(results)
229        resultdir_exists = False
230        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
231            if os.path.exists(os.path.join(results, filename)):
232                resultdir_exists = True
233        if not parser.options.use_existing_results and resultdir_exists:
234            error = "Error: results directory already exists: %s\n" % results
235            sys.stderr.write(error)
236            sys.exit(1)
237
238        # Now that we certified that there's no leftover results dir from
239        # previous jobs, lets create the result dir since the logging system
240        # needs to create the log file in there.
241        if not os.path.isdir(results):
242            os.makedirs(results)
243
244    logging_manager.configure_logging(
245            server_logging_config.ServerLoggingConfig(), results_dir=results,
246            use_console=not parser.options.no_tee,
247            verbose=parser.options.verbose,
248            no_console_prefix=parser.options.no_console_prefix)
249    if results:
250        logging.info("Results placed in %s" % results)
251
252        # wait until now to perform this check, so it get properly logged
253        if parser.options.use_existing_results and not resultdir_exists:
254            logging.error("No existing results directory found: %s", results)
255            sys.exit(1)
256
257    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
258
259    if parser.options.write_pidfile:
260        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
261                                                  results)
262        pid_file_manager.open_file()
263    else:
264        pid_file_manager = None
265
266    autotest.BaseAutotest.set_install_in_tmpdir(
267        parser.options.install_in_tmpdir)
268
269    timer = None
270    try:
271        # Take the first argument as control file name, get the test name from
272        # the control file. If the test name exists in the list of tests with
273        # run time measurement enabled, start a timer to begin measurement.
274        if (len(parser.args) > 0 and parser.args[0] != '' and
275            parser.options.machines):
276            try:
277                test_name = control_data.parse_control(parser.args[0],
278                                                       raise_warnings=True).name
279            except control_data.ControlVariableException:
280                logging.debug('Failed to retrieve test name from control file.')
281                test_name = None
282            if test_name in measure_run_time_tests:
283                machines = parser.options.machines.replace(',', ' '
284                                                           ).strip().split()
285                afe = frontend.AFE()
286                board = server_utils.get_board_from_afe(machines[0], afe)
287                timer = stats.Timer('autoserv_run_time.%s.%s' %
288                                    (board, test_name))
289                timer.start()
290    except control_data.ControlVariableException as e:
291        logging.error(str(e))
292    exit_code = 0
293    try:
294        try:
295            run_autoserv(pid_file_manager, results, parser)
296        except SystemExit as e:
297            exit_code = e.code
298            if exit_code:
299                logging.exception(e)
300        except Exception as e:
301            # If we don't know what happened, we'll classify it as
302            # an 'abort' and return 1.
303            logging.exception(e)
304            exit_code = 1
305    finally:
306        if pid_file_manager:
307            pid_file_manager.close_file(exit_code)
308        if timer:
309            timer.stop()
310    sys.exit(exit_code)
311
312
313if __name__ == '__main__':
314    main()
315