autoserv.py revision a1ecd5c903928f359cd6cbcff5c986652e109599
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import sys, os, re, traceback, signal, time, logging, getpass
10
11import common
12
13from autotest_lib.client.common_lib import control_data
14from autotest_lib.client.common_lib import global_config
15require_atfork = global_config.global_config.get_config_value(
16        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
17
18
19try:
20    import atfork
21    atfork.monkeypatch_os_fork_functions()
22    import atfork.stdlib_fixer
23    # Fix the Python standard library for threading+fork safety with its
24    # internal locks.  http://code.google.com/p/python-atfork/
25    import warnings
26    warnings.filterwarnings('ignore', 'logging module already imported')
27    atfork.stdlib_fixer.fix_logging_module()
28except ImportError, e:
29    from autotest_lib.client.common_lib import global_config
30    if global_config.global_config.get_config_value(
31            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
32        print >>sys.stderr, 'Please run utils/build_externals.py'
33        print e
34        sys.exit(1)
35
36from autotest_lib.server import frontend
37from autotest_lib.server.hosts import site_host
38from autotest_lib.server import server_logging_config
39from autotest_lib.server import server_job, utils, autoserv_parser, autotest
40from autotest_lib.server import utils as server_utils
41
42from autotest_lib.client.common_lib import pidfile, logging_manager
43from autotest_lib.site_utils.graphite import stats
44
45def log_alarm(signum, frame):
46    logging.error("Received SIGALARM. Ignoring and continuing on.")
47    sys.exit(1)
48
49def run_autoserv(pid_file_manager, results, parser):
50    # send stdin to /dev/null
51    dev_null = os.open(os.devnull, os.O_RDONLY)
52    os.dup2(dev_null, sys.stdin.fileno())
53    os.close(dev_null)
54
55    # Create separate process group
56    os.setpgrp()
57
58    # Implement SIGTERM handler
59    def handle_sigterm(signum, frame):
60        if pid_file_manager:
61            pid_file_manager.close_file(1, signal.SIGTERM)
62        os.killpg(os.getpgrp(), signal.SIGKILL)
63
64    # Set signal handler
65    signal.signal(signal.SIGTERM, handle_sigterm)
66
67    # Ignore SIGTTOU's generated by output from forked children.
68    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
69
70    # If we received a SIGALARM, let's be loud about it.
71    signal.signal(signal.SIGALRM, log_alarm)
72
73    # Server side tests that call shell scripts often depend on $USER being set
74    # but depending on how you launch your autotest scheduler it may not be set.
75    os.environ['USER'] = getpass.getuser()
76
77    if parser.options.machines:
78        machines = parser.options.machines.replace(',', ' ').strip().split()
79    else:
80        machines = []
81    machines_file = parser.options.machines_file
82    label = parser.options.label
83    group_name = parser.options.group_name
84    user = parser.options.user
85    client = parser.options.client
86    server = parser.options.server
87    install_before = parser.options.install_before
88    install_after = parser.options.install_after
89    verify = parser.options.verify
90    repair = parser.options.repair
91    cleanup = parser.options.cleanup
92    provision = parser.options.provision
93    reset = parser.options.reset
94    no_tee = parser.options.no_tee
95    parse_job = parser.options.parse_job
96    execution_tag = parser.options.execution_tag
97    if not execution_tag:
98        execution_tag = parse_job
99    host_protection = parser.options.host_protection
100    ssh_user = parser.options.ssh_user
101    ssh_port = parser.options.ssh_port
102    ssh_pass = parser.options.ssh_pass
103    collect_crashinfo = parser.options.collect_crashinfo
104    control_filename = parser.options.control_filename
105    test_retry = parser.options.test_retry
106    verify_job_repo_url = parser.options.verify_job_repo_url
107    skip_crash_collection = parser.options.skip_crash_collection
108
109    # can't be both a client and a server side test
110    if client and server:
111        parser.parser.error("Can not specify a test as both server and client!")
112
113    if provision and client:
114        parser.parser.error("Cannot specify provisioning and client!")
115
116    is_special_task = (verify or repair or cleanup or collect_crashinfo or
117                       provision or reset)
118    if len(parser.args) < 1 and not is_special_task:
119        parser.parser.error("Missing argument: control file")
120
121    # We have a control file unless it's just a verify/repair/cleanup job
122    if len(parser.args) > 0:
123        control = parser.args[0]
124    else:
125        control = None
126
127    if machines_file:
128        machines = []
129        for m in open(machines_file, 'r').readlines():
130            # remove comments, spaces
131            m = re.sub('#.*', '', m).strip()
132            if m:
133                machines.append(m)
134        print "Read list of machines from file: %s" % machines_file
135        print ','.join(machines)
136
137    if machines:
138        for machine in machines:
139            if not machine or re.search('\s', machine):
140                parser.parser.error("Invalid machine: %s" % str(machine))
141        machines = list(set(machines))
142        machines.sort()
143
144    if group_name and len(machines) < 2:
145        parser.parser.error("-G %r may only be supplied with more than one machine."
146               % group_name)
147
148    kwargs = {'group_name': group_name, 'tag': execution_tag}
149    if control_filename:
150        kwargs['control_filename'] = control_filename
151    job = server_job.server_job(control, parser.args[1:], results, label,
152                                user, machines, client, parse_job,
153                                ssh_user, ssh_port, ssh_pass, test_retry,
154                                **kwargs)
155    job.logging.start_logging()
156    job.init_parser()
157
158    # perform checks
159    job.precheck()
160
161    # run the job
162    exit_code = 0
163    try:
164        try:
165            if repair:
166                job.repair(host_protection)
167            elif verify:
168                job.verify()
169            elif provision:
170                job.provision(provision)
171            elif reset:
172                job.reset()
173            else:
174                job.run(cleanup, install_before, install_after,
175                        verify_job_repo_url=verify_job_repo_url,
176                        only_collect_crashinfo=collect_crashinfo,
177                        skip_crash_collection=skip_crash_collection)
178        finally:
179            while job.hosts:
180                host = job.hosts.pop()
181                host.close()
182    except:
183        exit_code = 1
184        traceback.print_exc()
185
186    if pid_file_manager:
187        pid_file_manager.num_tests_failed = job.num_tests_failed
188        pid_file_manager.close_file(exit_code)
189    job.cleanup_parser()
190
191    sys.exit(exit_code)
192
193
194def main():
195    # White list of tests with run time measurement enabled.
196    measure_run_time_tests_names = global_config.global_config.get_config_value(
197                        'AUTOSERV', 'measure_run_time_tests', type=str)
198    if measure_run_time_tests_names:
199        measure_run_time_tests = [t.strip() for t in
200                                  measure_run_time_tests_names.split(',')]
201    else:
202        measure_run_time_tests = []
203    # grab the parser
204    parser = autoserv_parser.autoserv_parser
205    parser.parse_args()
206
207    if len(sys.argv) == 1:
208        parser.parser.print_help()
209        sys.exit(1)
210
211    if parser.options.no_logging:
212        results = None
213    else:
214        results = parser.options.results
215        if not results:
216            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
217        results  = os.path.abspath(results)
218        resultdir_exists = False
219        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
220            if os.path.exists(os.path.join(results, filename)):
221                resultdir_exists = True
222        if not parser.options.use_existing_results and resultdir_exists:
223            error = "Error: results directory already exists: %s\n" % results
224            sys.stderr.write(error)
225            sys.exit(1)
226
227        # Now that we certified that there's no leftover results dir from
228        # previous jobs, lets create the result dir since the logging system
229        # needs to create the log file in there.
230        if not os.path.isdir(results):
231            os.makedirs(results)
232
233    logging_manager.configure_logging(
234            server_logging_config.ServerLoggingConfig(), results_dir=results,
235            use_console=not parser.options.no_tee,
236            verbose=parser.options.verbose,
237            no_console_prefix=parser.options.no_console_prefix)
238    if results:
239        logging.info("Results placed in %s" % results)
240
241        # wait until now to perform this check, so it get properly logged
242        if parser.options.use_existing_results and not resultdir_exists:
243            logging.error("No existing results directory found: %s", results)
244            sys.exit(1)
245
246
247    if parser.options.write_pidfile:
248        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
249                                                  results)
250        pid_file_manager.open_file()
251    else:
252        pid_file_manager = None
253
254    autotest.BaseAutotest.set_install_in_tmpdir(
255        parser.options.install_in_tmpdir)
256
257    timer = None
258    try:
259        # Take the first argument as control file name, get the test name from
260        # the control file. If the test name exists in the list of tests with
261        # run time measurement enabled, start a timer to begin measurement.
262        if (len(parser.args) > 0 and parser.args[0] != '' and
263            parser.options.machines):
264            test_name = control_data.parse_control(parser.args[0]).name
265            if test_name in measure_run_time_tests:
266                machines = parser.options.machines.replace(',', ' '
267                                                           ).strip().split()
268                afe = frontend.AFE()
269                board = server_utils.get_board_from_afe(machines[0], afe)
270                timer = stats.Timer('autoserv_run_time.%s.%s' %
271                                    (board, test_name))
272                timer.start()
273    except control_data.ControlVariableException as e:
274        logging.error(str(e))
275    exit_code = 0
276    try:
277        try:
278            run_autoserv(pid_file_manager, results, parser)
279        except SystemExit, e:
280            exit_code = e.code
281        except:
282            traceback.print_exc()
283            # If we don't know what happened, we'll classify it as
284            # an 'abort' and return 1.
285            exit_code = 1
286    finally:
287        if pid_file_manager:
288            pid_file_manager.close_file(exit_code)
289        if timer:
290            timer.stop()
291    sys.exit(exit_code)
292
293
294if __name__ == '__main__':
295    main()
296