autoserv revision 20cc72ac289b10e362e2865f4b662425f6ea1a6d
1#!/usr/bin/python -u
2# Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc.
3# Released under the GPL v2
4
5"""
6Run a control file through the server side engine
7"""
8
9import datetime
10import contextlib
11import getpass
12import logging
13import os
14import re
15import signal
16import socket
17import sys
18import traceback
19import time
20import urllib2
21
22import common
23from autotest_lib.client.common_lib import control_data
24from autotest_lib.client.common_lib import error
25from autotest_lib.client.common_lib import global_config
26from autotest_lib.client.common_lib import utils
27from autotest_lib.client.common_lib.cros.graphite import autotest_es
28from autotest_lib.client.common_lib.cros.graphite import autotest_stats
29try:
30    from autotest_lib.puppylab import results_mocker
31except ImportError:
32    results_mocker = None
33
34_CONFIG = global_config.global_config
35
36require_atfork = _CONFIG.get_config_value(
37        'AUTOSERV', 'require_atfork_module', type=bool, default=True)
38
39
40# Number of seconds to wait before returning if testing mode is enabled
41TESTING_MODE_SLEEP_SECS = 1
42
43try:
44    import atfork
45    atfork.monkeypatch_os_fork_functions()
46    import atfork.stdlib_fixer
47    # Fix the Python standard library for threading+fork safety with its
48    # internal locks.  http://code.google.com/p/python-atfork/
49    import warnings
50    warnings.filterwarnings('ignore', 'logging module already imported')
51    atfork.stdlib_fixer.fix_logging_module()
52except ImportError, e:
53    from autotest_lib.client.common_lib import global_config
54    if _CONFIG.get_config_value(
55            'AUTOSERV', 'require_atfork_module', type=bool, default=False):
56        print >>sys.stderr, 'Please run utils/build_externals.py'
57        print e
58        sys.exit(1)
59
60from autotest_lib.server import frontend
61from autotest_lib.server import server_logging_config
62from autotest_lib.server import server_job, utils, autoserv_parser, autotest
63from autotest_lib.server import utils as server_utils
64from autotest_lib.server import site_utils
65from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
66from autotest_lib.site_utils import job_directories
67from autotest_lib.site_utils import job_overhead
68from autotest_lib.site_utils import lxc
69from autotest_lib.site_utils import lxc_utils
70from autotest_lib.client.common_lib import pidfile, logging_manager
71from autotest_lib.client.common_lib.cros.graphite import autotest_stats
72
73
74# Control segment to stage server-side package.
75STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
76        'stage_server_side_package')
77
78# Command line to start servod in a moblab.
79START_SERVOD_CMD = 'sudo start servod BOARD=%s PORT=%s'
80STOP_SERVOD_CMD = 'sudo stop servod'
81
82def log_alarm(signum, frame):
83    logging.error("Received SIGALARM. Ignoring and continuing on.")
84    sys.exit(1)
85
86
87def _get_machines(parser):
88    """Get a list of machine names from command line arg -m or a file.
89
90    @param parser: Parser for the command line arguments.
91
92    @return: A list of machine names from command line arg -m or the
93             machines file specified in the command line arg -M.
94    """
95    if parser.options.machines:
96        machines = parser.options.machines.replace(',', ' ').strip().split()
97    else:
98        machines = []
99    machines_file = parser.options.machines_file
100    if machines_file:
101        machines = []
102        for m in open(machines_file, 'r').readlines():
103            # remove comments, spaces
104            m = re.sub('#.*', '', m).strip()
105            if m:
106                machines.append(m)
107        logging.debug('Read list of machines from file: %s', machines_file)
108        logging.debug('Machines: %s', ','.join(machines))
109
110    if machines:
111        for machine in machines:
112            if not machine or re.search('\s', machine):
113                parser.parser.error("Invalid machine: %s" % str(machine))
114        machines = list(set(machines))
115        machines.sort()
116    return machines
117
118
119def _stage_ssp(parser):
120    """Stage server-side package.
121
122    This function calls a control segment to stage server-side package based on
123    the job and autoserv command line option. The detail implementation could
124    be different for each host type. Currently, only CrosHost has
125    stage_server_side_package function defined.
126    The script returns None if no server-side package is available. However,
127    it may raise exception if it failed for reasons other than artifact (the
128    server-side package) not found.
129
130    @param parser: Command line arguments parser passed in the autoserv process.
131
132    @return: (ssp_url, error_msg), where
133              ssp_url is a url to the autotest server-side package. None if
134              server-side package is not supported.
135              error_msg is a string indicating the failures. None if server-
136              side package is staged successfully.
137    """
138    machines_list = _get_machines(parser)
139    if bool(parser.options.lab):
140        machine_dict_list = []
141        afe = frontend_wrappers.RetryingAFE(timeout_min=5, delay_sec=10)
142        for machine in machines_list:
143            afe_host = afe.get_hosts(hostname=machine)[0]
144            machine_dict_list.append({'hostname': machine,
145                                      'afe_host': afe_host})
146        machines_list = machine_dict_list
147
148    # If test_source_build is not specified, default to use server-side test
149    # code from build specified in --image.
150    namespace = {'machines': machines_list,
151                 'image': (parser.options.test_source_build or
152                           parser.options.image),}
153    script_locals = {}
154    execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
155    return script_locals['ssp_url'], script_locals['error_msg']
156
157
158def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
159                  job_folder, machines):
160    """Run the server job with server-side packaging.
161
162    @param job: The server job object.
163    @param container_name: Name of the container to run the test.
164    @param job_id: ID of the test job.
165    @param results: Folder to store results. This could be different from
166                    parser.options.results:
167                    parser.options.results  can be set to None for results to be
168                    stored in a temp folder.
169                    results can be None for autoserv run requires no logging.
170    @param parser: Command line parser that contains the options.
171    @param ssp_url: url of the staged server-side package.
172    @param job_folder: Name of the job result folder.
173    @param machines: A list of machines to run the test.
174    """
175    bucket = lxc.ContainerBucket()
176    control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
177               else None)
178    try:
179        dut_name = machines[0] if len(machines) >= 1 else None
180        test_container = bucket.setup_test(container_name, job_id, ssp_url,
181                                           results, control=control,
182                                           job_folder=job_folder,
183                                           dut_name=dut_name)
184    except Exception as e:
185        job.record('FAIL', None, None,
186                   'Failed to setup container for test: %s. Check logs in '
187                   'ssp_logs folder for more details.' % e)
188        raise
189
190    args = sys.argv[:]
191    args.remove('--require-ssp')
192    # --parent_job_id is only useful in autoserv running in host, not in
193    # container. Include this argument will cause test to fail for builds before
194    # CL 286265 was merged.
195    if '--parent_job_id' in args:
196        index = args.index('--parent_job_id')
197        args.remove('--parent_job_id')
198        # Remove the actual parent job id in command line arg.
199        del args[index]
200
201    # A dictionary of paths to replace in the command line. Key is the path to
202    # be replaced with the one in value.
203    paths_to_replace = {}
204    # Replace the control file path with the one in container.
205    if control:
206        container_control_filename = os.path.join(
207                lxc.CONTROL_TEMP_PATH, os.path.basename(control))
208        paths_to_replace[control] = container_control_filename
209    # Update result directory with the one in container.
210    container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
211    if parser.options.results:
212        paths_to_replace[parser.options.results] = container_result_dir
213    # Update parse_job directory with the one in container. The assumption is
214    # that the result folder to be parsed is always the same as the results_dir.
215    if parser.options.parse_job:
216        paths_to_replace[parser.options.parse_job] = container_result_dir
217
218    args = [paths_to_replace.get(arg, arg) for arg in args]
219
220    # Apply --use-existing-results, results directory is aready created and
221    # mounted in container. Apply this arg to avoid exception being raised.
222    if not '--use-existing-results' in args:
223        args.append('--use-existing-results')
224
225    # Make sure autoserv running in container using a different pid file.
226    if not '--pidfile-label' in args:
227        args.extend(['--pidfile-label', 'container_autoserv'])
228
229    cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
230    logging.info('Run command in container: %s', cmd_line)
231    success = False
232    try:
233        test_container.attach_run(cmd_line)
234        success = True
235    except Exception as e:
236        # If the test run inside container fails without generating any log,
237        # write a message to status.log to help troubleshooting.
238        debug_files = os.listdir(os.path.join(results, 'debug'))
239        if not debug_files:
240            job.record('FAIL', None, None,
241                       'Failed to run test inside the container: %s. Check '
242                       'logs in ssp_logs folder for more details.' % e)
243        raise
244    finally:
245        counter_key = '%s.%s' % (lxc.STATS_KEY,
246                                 'success' if success else 'fail')
247        autotest_stats.Counter(counter_key).increment()
248        # metadata is uploaded separately so it can use http to upload.
249        metadata = {'drone': socket.gethostname(),
250                    'job_id': job_id,
251                    'success': success}
252        autotest_es.post(use_http=True,
253                         type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
254                         metadata=metadata)
255        test_container.destroy()
256
257
258def correct_results_folder_permission(results):
259    """Make sure the results folder has the right permission settings.
260
261    For tests running with server-side packaging, the results folder has the
262    owner of root. This must be changed to the user running the autoserv
263    process, so parsing job can access the results folder.
264    TODO(dshi): crbug.com/459344 Remove this function when test container can be
265    unprivileged container.
266
267    @param results: Path to the results folder.
268
269    """
270    if not results:
271        return
272
273    try:
274        utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
275        utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
276    except error.CmdError as e:
277        metadata = {'error': str(e),
278                    'result_folder': results,
279                    'drone': socket.gethostname()}
280        autotest_es.post(use_http=True, type_str='correct_results_folder_failure',
281                         metadata=metadata)
282        raise
283
284
285def _start_servod(machine):
286    """Try to start servod in moblab if it's not already running or running with
287    different board or port.
288
289    @param machine: Name of the dut used for test.
290    """
291    if not utils.is_moblab():
292        return
293
294    logging.debug('Trying to start servod.')
295    try:
296        afe = frontend.AFE()
297        board = server_utils.get_board_from_afe(machine, afe)
298        hosts = afe.get_hosts(hostname=machine)
299        servo_host = hosts[0].attributes.get('servo_host', None)
300        servo_port = hosts[0].attributes.get('servo_port', 9999)
301        if not servo_host in ['localhost', '127.0.0.1']:
302            logging.warn('Starting servod is aborted. The dut\'s servo_host '
303                         'attribute is not set to localhost.')
304            return
305    except (urllib2.HTTPError, urllib2.URLError):
306        # Ignore error if RPC failed to get board
307        logging.error('Failed to get board name from AFE. Start servod is '
308                      'aborted')
309        return
310
311    try:
312        pid = utils.run('pgrep servod').stdout
313        cmd_line = utils.run('ps -fp %s' % pid).stdout
314        if ('--board %s' % board in cmd_line and
315            '--port %s' % servo_port in cmd_line):
316            logging.debug('Servod is already running with given board and port.'
317                          ' There is no need to restart servod.')
318            return
319        logging.debug('Servod is running with different board or port. '
320                      'Stopping existing servod.')
321        utils.run('sudo stop servod')
322    except error.CmdError:
323        # servod is not running.
324        pass
325
326    try:
327        utils.run(START_SERVOD_CMD % (board, servo_port))
328        logging.debug('Servod is started')
329    except error.CmdError as e:
330        logging.error('Servod failed to be started, error: %s', e)
331
332
333def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp):
334    """Run server job with given options.
335
336    @param pid_file_manager: PidFileManager used to monitor the autoserv process
337    @param results: Folder to store results.
338    @param parser: Parser for the command line arguments.
339    @param ssp_url: Url to server-side package.
340    @param use_ssp: Set to True to run with server-side packaging.
341    """
342    if parser.options.warn_no_ssp:
343        # Post a warning in the log.
344        logging.warn('Autoserv is required to run with server-side packaging. '
345                     'However, no drone is found to support server-side '
346                     'packaging. The test will be executed in a drone without '
347                     'server-side packaging supported.')
348
349    # send stdin to /dev/null
350    dev_null = os.open(os.devnull, os.O_RDONLY)
351    os.dup2(dev_null, sys.stdin.fileno())
352    os.close(dev_null)
353
354    # Create separate process group if the process is not a process group
355    # leader. This allows autoserv process to keep running after the caller
356    # process (drone manager call) exits.
357    if os.getpid() != os.getpgid(0):
358        os.setsid()
359
360    # Container name is predefined so the container can be destroyed in
361    # handle_sigterm.
362    job_or_task_id = job_directories.get_job_id_or_task_id(
363            parser.options.results)
364    container_name = (lxc.TEST_CONTAINER_NAME_FMT %
365                      (job_or_task_id, time.time(), os.getpid()))
366    job_folder = job_directories.get_job_folder_name(parser.options.results)
367
368    # Implement SIGTERM handler
369    def handle_sigterm(signum, frame):
370        logging.debug('Received SIGTERM')
371        if pid_file_manager:
372            pid_file_manager.close_file(1, signal.SIGTERM)
373        logging.debug('Finished writing to pid_file. Killing process.')
374
375        # Update results folder's file permission. This needs to be done ASAP
376        # before the parsing process tries to access the log.
377        if use_ssp and results:
378            correct_results_folder_permission(results)
379
380        # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
381        # This sleep allows the pending output to be logged before the kill
382        # signal is sent.
383        time.sleep(.1)
384        if use_ssp:
385            logging.debug('Destroy container %s before aborting the autoserv '
386                          'process.', container_name)
387            metadata = {'drone': socket.gethostname(),
388                        'job_id': job_or_task_id,
389                        'container_name': container_name,
390                        'action': 'abort',
391                        'success': True}
392            try:
393                bucket = lxc.ContainerBucket()
394                container = bucket.get(container_name)
395                if container:
396                    container.destroy()
397                else:
398                    metadata['success'] = False
399                    metadata['error'] = 'container not found'
400                    logging.debug('Container %s is not found.', container_name)
401            except:
402                metadata['success'] = False
403                metadata['error'] = 'Exception: %s' % str(sys.exc_info())
404                # Handle any exception so the autoserv process can be aborted.
405                logging.exception('Failed to destroy container %s.',
406                                  container_name)
407            autotest_es.post(use_http=True,
408                             type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
409                             metadata=metadata)
410            # Try to correct the result file permission again after the
411            # container is destroyed, as the container might have created some
412            # new files in the result folder.
413            if results:
414                correct_results_folder_permission(results)
415
416        os.killpg(os.getpgrp(), signal.SIGKILL)
417
418    # Set signal handler
419    signal.signal(signal.SIGTERM, handle_sigterm)
420
421    # faulthandler is only needed to debug in the Lab and is not avaliable to
422    # be imported in the chroot as part of VMTest, so Try-Except it.
423    try:
424        import faulthandler
425        faulthandler.register(signal.SIGTERM, all_threads=True, chain=True)
426        logging.debug('faulthandler registered on SIGTERM.')
427    except ImportError:
428        sys.exc_clear()
429
430    # Ignore SIGTTOU's generated by output from forked children.
431    signal.signal(signal.SIGTTOU, signal.SIG_IGN)
432
433    # If we received a SIGALARM, let's be loud about it.
434    signal.signal(signal.SIGALRM, log_alarm)
435
436    # Server side tests that call shell scripts often depend on $USER being set
437    # but depending on how you launch your autotest scheduler it may not be set.
438    os.environ['USER'] = getpass.getuser()
439
440    label = parser.options.label
441    group_name = parser.options.group_name
442    user = parser.options.user
443    client = parser.options.client
444    server = parser.options.server
445    install_before = parser.options.install_before
446    install_after = parser.options.install_after
447    verify = parser.options.verify
448    repair = parser.options.repair
449    cleanup = parser.options.cleanup
450    provision = parser.options.provision
451    reset = parser.options.reset
452    job_labels = parser.options.job_labels
453    no_tee = parser.options.no_tee
454    parse_job = parser.options.parse_job
455    execution_tag = parser.options.execution_tag
456    if not execution_tag:
457        execution_tag = parse_job
458    ssh_user = parser.options.ssh_user
459    ssh_port = parser.options.ssh_port
460    ssh_pass = parser.options.ssh_pass
461    collect_crashinfo = parser.options.collect_crashinfo
462    control_filename = parser.options.control_filename
463    test_retry = parser.options.test_retry
464    verify_job_repo_url = parser.options.verify_job_repo_url
465    skip_crash_collection = parser.options.skip_crash_collection
466    ssh_verbosity = int(parser.options.ssh_verbosity)
467    ssh_options = parser.options.ssh_options
468    no_use_packaging = parser.options.no_use_packaging
469    host_attributes = parser.options.host_attributes
470    in_lab = bool(parser.options.lab)
471
472    # can't be both a client and a server side test
473    if client and server:
474        parser.parser.error("Can not specify a test as both server and client!")
475
476    if provision and client:
477        parser.parser.error("Cannot specify provisioning and client!")
478
479    is_special_task = (verify or repair or cleanup or collect_crashinfo or
480                       provision or reset)
481    if len(parser.args) < 1 and not is_special_task:
482        parser.parser.error("Missing argument: control file")
483
484    if ssh_verbosity > 0:
485        # ssh_verbosity is an integer between 0 and 3, inclusive
486        ssh_verbosity_flag = '-' + 'v' * ssh_verbosity
487    else:
488        ssh_verbosity_flag = ''
489
490    # We have a control file unless it's just a verify/repair/cleanup job
491    if len(parser.args) > 0:
492        control = parser.args[0]
493    else:
494        control = None
495
496    machines = _get_machines(parser)
497    if group_name and len(machines) < 2:
498        parser.parser.error('-G %r may only be supplied with more than one '
499                            'machine.' % group_name)
500
501    kwargs = {'group_name': group_name, 'tag': execution_tag,
502              'disable_sysinfo': parser.options.disable_sysinfo}
503    if parser.options.parent_job_id:
504        kwargs['parent_job_id'] = int(parser.options.parent_job_id)
505    if control_filename:
506        kwargs['control_filename'] = control_filename
507    if host_attributes:
508        kwargs['host_attributes'] = host_attributes
509    kwargs['in_lab'] = in_lab
510    job = server_job.server_job(control, parser.args[1:], results, label,
511                                user, machines, client, parse_job,
512                                ssh_user, ssh_port, ssh_pass,
513                                ssh_verbosity_flag, ssh_options,
514                                test_retry, **kwargs)
515
516    job.logging.start_logging()
517    job.init_parser()
518
519    # perform checks
520    job.precheck()
521
522    # run the job
523    exit_code = 0
524    auto_start_servod = _CONFIG.get_config_value(
525            'AUTOSERV', 'auto_start_servod', type=bool, default=False)
526
527    try:
528        with site_utils.SetupTsMonGlobalState('autoserv', indirect=True, short_lived=True):
529            try:
530                if repair:
531                    if auto_start_servod and len(machines) == 1:
532                        _start_servod(machines[0])
533                    job.repair(job_labels)
534                elif verify:
535                    job.verify(job_labels)
536                elif provision:
537                    job.provision(job_labels)
538                elif reset:
539                    job.reset(job_labels)
540                elif cleanup:
541                    job.cleanup(job_labels)
542                else:
543                    if auto_start_servod and len(machines) == 1:
544                        _start_servod(machines[0])
545                    if use_ssp:
546                        try:
547                            _run_with_ssp(job, container_name, job_or_task_id,
548                                          results, parser, ssp_url, job_folder,
549                                          machines)
550                        finally:
551                            # Update the ownership of files in result folder.
552                            correct_results_folder_permission(results)
553                    else:
554                        if collect_crashinfo:
555                            # Update the ownership of files in result folder. If the
556                            # job to collect crashinfo was running inside container
557                            # (SSP) and crashed before correcting folder permission,
558                            # the result folder might have wrong permission setting.
559                            try:
560                                correct_results_folder_permission(results)
561                            except:
562                                # Ignore any error as the user may not have root
563                                # permission to run sudo command.
564                                pass
565                        job.run(install_before, install_after,
566                                verify_job_repo_url=verify_job_repo_url,
567                                only_collect_crashinfo=collect_crashinfo,
568                                skip_crash_collection=skip_crash_collection,
569                                job_labels=job_labels,
570                                use_packaging=(not no_use_packaging))
571            finally:
572                while job.hosts:
573                    host = job.hosts.pop()
574                    host.close()
575    except:
576        exit_code = 1
577        traceback.print_exc()
578
579    if pid_file_manager:
580        pid_file_manager.num_tests_failed = job.num_tests_failed
581        pid_file_manager.close_file(exit_code)
582    job.cleanup_parser()
583
584    sys.exit(exit_code)
585
586
587def record_autoserv(options, duration_secs):
588    """Record autoserv end-to-end time in metadata db.
589
590    @param options: parser options.
591    @param duration_secs: How long autoserv has taken, in secs.
592    """
593    # Get machine hostname
594    machines = options.machines.replace(
595            ',', ' ').strip().split() if options.machines else []
596    num_machines = len(machines)
597    if num_machines > 1:
598        # Skip the case where atomic group is used.
599        return
600    elif num_machines == 0:
601        machines.append('hostless')
602
603    # Determine the status that will be reported.
604    s = job_overhead.STATUS
605    task_mapping = {
606            'reset': s.RESETTING, 'verify': s.VERIFYING,
607            'provision': s.PROVISIONING, 'repair': s.REPAIRING,
608            'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING}
609    match = filter(lambda task: getattr(options, task, False) == True,
610                   task_mapping)
611    status = task_mapping[match[0]] if match else s.RUNNING
612    is_special_task = status not in [s.RUNNING, s.GATHERING]
613    job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
614    job_overhead.record_state_duration(
615            job_or_task_id, machines[0], status, duration_secs,
616            is_special_task=is_special_task)
617
618
619def main():
620    start_time = datetime.datetime.now()
621    # White list of tests with run time measurement enabled.
622    measure_run_time_tests_names = _CONFIG.get_config_value(
623            'AUTOSERV', 'measure_run_time_tests', type=str)
624    if measure_run_time_tests_names:
625        measure_run_time_tests = [t.strip() for t in
626                                  measure_run_time_tests_names.split(',')]
627    else:
628        measure_run_time_tests = []
629    # grab the parser
630    parser = autoserv_parser.autoserv_parser
631    parser.parse_args()
632
633    if len(sys.argv) == 1:
634        parser.parser.print_help()
635        sys.exit(1)
636
637    # If the job requires to run with server-side package, try to stage server-
638    # side package first. If that fails with error that autotest server package
639    # does not exist, fall back to run the job without using server-side
640    # packaging. If option warn_no_ssp is specified, that means autoserv is
641    # running in a drone does not support SSP, thus no need to stage server-side
642    # package.
643    ssp_url = None
644    ssp_url_warning = False
645    if (not parser.options.warn_no_ssp and parser.options.require_ssp):
646        ssp_url, ssp_error_msg = _stage_ssp(parser)
647        # The build does not have autotest server package. Fall back to not
648        # to use server-side package. Logging is postponed until logging being
649        # set up.
650        ssp_url_warning = not ssp_url
651
652    if parser.options.no_logging:
653        results = None
654    else:
655        results = parser.options.results
656        if not results:
657            results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S')
658        results = os.path.abspath(results)
659        resultdir_exists = False
660        for filename in ('control.srv', 'status.log', '.autoserv_execute'):
661            if os.path.exists(os.path.join(results, filename)):
662                resultdir_exists = True
663        if not parser.options.use_existing_results and resultdir_exists:
664            error = "Error: results directory already exists: %s\n" % results
665            sys.stderr.write(error)
666            sys.exit(1)
667
668        # Now that we certified that there's no leftover results dir from
669        # previous jobs, lets create the result dir since the logging system
670        # needs to create the log file in there.
671        if not os.path.isdir(results):
672            os.makedirs(results)
673
674    # Server-side packaging will only be used if it's required and the package
675    # is available. If warn_no_ssp is specified, it means that autoserv is
676    # running in a drone does not have SSP supported and a warning will be logs.
677    # Therefore, it should not run with SSP.
678    use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp
679               and ssp_url)
680    if use_ssp:
681        log_dir = os.path.join(results, 'ssp_logs') if results else None
682        if log_dir and not os.path.exists(log_dir):
683            os.makedirs(log_dir)
684    else:
685        log_dir = results
686
687    logging_manager.configure_logging(
688            server_logging_config.ServerLoggingConfig(),
689            results_dir=log_dir,
690            use_console=not parser.options.no_tee,
691            verbose=parser.options.verbose,
692            no_console_prefix=parser.options.no_console_prefix)
693
694    if ssp_url_warning:
695        logging.warn(
696                'Autoserv is required to run with server-side packaging. '
697                'However, no server-side package can be found based on '
698                '`--image`, host attribute job_repo_url or host OS version '
699                'label. It could be that the build to test is older than the '
700                'minimum version that supports server-side packaging. The test '
701                'will be executed without using erver-side packaging. '
702                'Following is the detailed error:\n%s', ssp_error_msg)
703
704    if results:
705        logging.info("Results placed in %s" % results)
706
707        # wait until now to perform this check, so it get properly logged
708        if (parser.options.use_existing_results and not resultdir_exists and
709            not utils.is_in_container()):
710            logging.error("No existing results directory found: %s", results)
711            sys.exit(1)
712
713    logging.debug('autoserv is running in drone %s.', socket.gethostname())
714    logging.debug('autoserv command was: %s', ' '.join(sys.argv))
715
716    if parser.options.write_pidfile and results:
717        pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
718                                                  results)
719        pid_file_manager.open_file()
720    else:
721        pid_file_manager = None
722
723    autotest.BaseAutotest.set_install_in_tmpdir(
724        parser.options.install_in_tmpdir)
725
726    timer = None
727    try:
728        # Take the first argument as control file name, get the test name from
729        # the control file. If the test name exists in the list of tests with
730        # run time measurement enabled, start a timer to begin measurement.
731        if (len(parser.args) > 0 and parser.args[0] != '' and
732            parser.options.machines):
733            try:
734                test_name = control_data.parse_control(parser.args[0],
735                                                       raise_warnings=True).name
736            except control_data.ControlVariableException:
737                logging.debug('Failed to retrieve test name from control file.')
738                test_name = None
739            if test_name in measure_run_time_tests:
740                machines = parser.options.machines.replace(',', ' '
741                                                           ).strip().split()
742                try:
743                    afe = frontend.AFE()
744                    board = server_utils.get_board_from_afe(machines[0], afe)
745                    timer = autotest_stats.Timer('autoserv_run_time.%s.%s' %
746                                                 (board, test_name))
747                    timer.start()
748                except (urllib2.HTTPError, urllib2.URLError):
749                    # Ignore error if RPC failed to get board
750                    pass
751    except control_data.ControlVariableException as e:
752        logging.error(str(e))
753    exit_code = 0
754    # TODO(beeps): Extend this to cover different failure modes.
755    # Testing exceptions are matched against labels sent to autoserv. Eg,
756    # to allow only the hostless job to run, specify
757    # testing_exceptions: test_suite in the shadow_config. To allow both
758    # the hostless job and dummy_Pass to run, specify
759    # testing_exceptions: test_suite,dummy_Pass. You can figure out
760    # what label autoserv is invoked with by looking through the logs of a test
761    # for the autoserv command's -l option.
762    testing_exceptions = _CONFIG.get_config_value(
763            'AUTOSERV', 'testing_exceptions', type=list, default=[])
764    test_mode = _CONFIG.get_config_value(
765            'AUTOSERV', 'testing_mode', type=bool, default=False)
766    test_mode = (results_mocker and test_mode and not
767                 any([ex in parser.options.label
768                      for ex in testing_exceptions]))
769    is_task = (parser.options.verify or parser.options.repair or
770               parser.options.provision or parser.options.reset or
771               parser.options.cleanup or parser.options.collect_crashinfo)
772    try:
773        try:
774            if test_mode:
775                # The parser doesn't run on tasks anyway, so we can just return
776                # happy signals without faking results.
777                if not is_task:
778                    machine = parser.options.results.split('/')[-1]
779
780                    # TODO(beeps): The proper way to do this would be to
781                    # refactor job creation so we can invoke job.record
782                    # directly. To do that one needs to pipe the test_name
783                    # through run_autoserv and bail just before invoking
784                    # the server job. See the comment in
785                    # puppylab/results_mocker for more context.
786                    results_mocker.ResultsMocker(
787                            test_name if test_name else 'unknown-test',
788                            parser.options.results, machine
789                            ).mock_results()
790                return
791            else:
792                run_autoserv(pid_file_manager, results, parser, ssp_url,
793                             use_ssp)
794        except SystemExit as e:
795            exit_code = e.code
796            if exit_code:
797                logging.exception(e)
798        except Exception as e:
799            # If we don't know what happened, we'll classify it as
800            # an 'abort' and return 1.
801            logging.exception(e)
802            exit_code = 1
803    finally:
804        if pid_file_manager:
805            pid_file_manager.close_file(exit_code)
806        if timer:
807            timer.stop()
808        # Record the autoserv duration time. Must be called
809        # just before the system exits to ensure accuracy.
810        duration_secs = (datetime.datetime.now() - start_time).total_seconds()
811        record_autoserv(parser.options, duration_secs)
812    sys.exit(exit_code)
813
814
815if __name__ == '__main__':
816    main()
817