base_utils.py revision b2d1fc137b403edf24d3ef3f6e6d3e409bb33948
1#
2# Copyright 2008 Google Inc. Released under the GPL v2
3
4# pylint: disable-msg=C0111
5
6import StringIO
7import errno
8import itertools
9import logging
10import os
11import pickle
12import random
13import re
14import resource
15import select
16import shutil
17import signal
18import smtplib
19import socket
20import string
21import struct
22import subprocess
23import textwrap
24import time
25import urllib2
26import urlparse
27import warnings
28
29from threading import Thread, Event
30
31try:
32    import hashlib
33except ImportError:
34    import md5
35    import sha
36
37from autotest_lib.client.common_lib import error, logging_manager
38
39
40def deprecated(func):
41    """This is a decorator which can be used to mark functions as deprecated.
42    It will result in a warning being emmitted when the function is used."""
43    def new_func(*args, **dargs):
44        warnings.warn("Call to deprecated function %s." % func.__name__,
45                      category=DeprecationWarning)
46        return func(*args, **dargs)
47    new_func.__name__ = func.__name__
48    new_func.__doc__ = func.__doc__
49    new_func.__dict__.update(func.__dict__)
50    return new_func
51
52
53class _NullStream(object):
54    def write(self, data):
55        pass
56
57
58    def flush(self):
59        pass
60
61
62TEE_TO_LOGS = object()
63_the_null_stream = _NullStream()
64
65DEFAULT_STDOUT_LEVEL = logging.DEBUG
66DEFAULT_STDERR_LEVEL = logging.ERROR
67
68# prefixes for logging stdout/stderr of commands
69STDOUT_PREFIX = '[stdout] '
70STDERR_PREFIX = '[stderr] '
71
72# safe characters for the shell (do not need quoting)
73SHELL_QUOTING_WHITELIST = frozenset(string.ascii_letters +
74                                    string.digits +
75                                    '_-+=')
76
77
78def custom_warning_handler(message, category, filename, lineno, file=None,
79                           line=None):
80    """Custom handler to log at the WARNING error level. Ignores |file|."""
81    logging.warning(warnings.formatwarning(message, category, filename, lineno,
82                                           line))
83
84warnings.showwarning = custom_warning_handler
85
86def get_stream_tee_file(stream, level, prefix=''):
87    if stream is None:
88        return _the_null_stream
89    if stream is TEE_TO_LOGS:
90        return logging_manager.LoggingFile(level=level, prefix=prefix)
91    return stream
92
93
94def _join_with_nickname(base_string, nickname):
95    if nickname:
96        return '%s BgJob "%s" ' % (base_string, nickname)
97    return base_string
98
99
100# TODO: Cleanup and possibly eliminate no_pipes, which is only used
101# in our master-ssh connection process, while fixing underlying
102# semantics problem in BgJob. See crbug.com/279312
103class BgJob(object):
104    def __init__(self, command, stdout_tee=None, stderr_tee=None, verbose=True,
105                 stdin=None, stderr_level=DEFAULT_STDERR_LEVEL, nickname=None,
106                 no_pipes=False):
107        """Create and start a new BgJob.
108
109        This constructor creates a new BgJob, and uses Popen to start a new
110        subprocess with given command. It returns without blocking on execution
111        of the subprocess.
112
113        After starting a new BgJob, use output_prepare to connect the process's
114        stdout and stderr pipes to the stream of your choice.
115
116        When the job is running, the jobs's output streams are only read from
117        when process_output is called.
118
119        @param command: command to be executed in new subprocess. May be either
120                        a list, or a string (in which case Popen will be called
121                        with shell=True)
122        @param stdout_tee: Optional additional stream that the process's stdout
123                           stream output will be written to. Or, specify
124                           base_utils.TEE_TO_LOGS and the output will handled by
125                           the standard logging_manager.
126        @param stderr_tee: Same as stdout_tee, but for stderr.
127        @param verbose: Boolean, make BgJob logging more verbose.
128        @param stdin: Stream object, will be passed to Popen as the new
129                      process's stdin.
130        @param stderr_level: A logging level value. If stderr_tee was set to
131                             base_utils.TEE_TO_LOGS, sets the level that tee'd
132                             stderr output will be logged at. Ignored
133                             otherwise.
134        @param nickname: Optional string, to be included in logging messages
135        @param no_pipes: Boolean, default False. If True, this subprocess
136                         created by this BgJob does NOT use subprocess.PIPE
137                         for its stdin or stderr streams. Instead, these
138                         streams are connected to the logging manager
139                         (regardless of the values of stdout_tee and
140                         stderr_tee).
141                         If no_pipes is True, then calls to output_prepare,
142                         process_output, and cleanup will result in an
143                         InvalidBgJobCall exception. no_pipes should be
144                         True for BgJobs that do not interact via stdout/stderr
145                         with other BgJobs, or long runing background jobs that
146                         will never be joined with join_bg_jobs, such as the
147                         master-ssh connection BgJob.
148        """
149        self.command = command
150        self._no_pipes = no_pipes
151        if no_pipes:
152            stdout_tee = TEE_TO_LOGS
153            stderr_tee = TEE_TO_LOGS
154        self.stdout_tee = get_stream_tee_file(stdout_tee, DEFAULT_STDOUT_LEVEL,
155                prefix=_join_with_nickname(STDOUT_PREFIX, nickname))
156        self.stderr_tee = get_stream_tee_file(stderr_tee, stderr_level,
157                prefix=_join_with_nickname(STDERR_PREFIX, nickname))
158        self.result = CmdResult(command)
159
160        # allow for easy stdin input by string, we'll let subprocess create
161        # a pipe for stdin input and we'll write to it in the wait loop
162        if isinstance(stdin, basestring):
163            self.string_stdin = stdin
164            stdin = subprocess.PIPE
165        else:
166            self.string_stdin = None
167
168
169        if no_pipes:
170            stdout_param = self.stdout_tee
171            stderr_param = self.stderr_tee
172        else:
173            stdout_param = subprocess.PIPE
174            stderr_param = subprocess.PIPE
175
176        if verbose:
177            logging.debug("Running '%s'", command)
178        if type(command) == list:
179            self.sp = subprocess.Popen(command,
180                                       stdout=stdout_param,
181                                       stderr=stderr_param,
182                                       preexec_fn=self._reset_sigpipe,
183                                       stdin=stdin)
184        else:
185            self.sp = subprocess.Popen(command, stdout=stdout_param,
186                                       stderr=stderr_param,
187                                       preexec_fn=self._reset_sigpipe, shell=True,
188                                       executable="/bin/bash",
189                                       stdin=stdin)
190
191        self._output_prepare_called = False
192        self._process_output_warned = False
193        self._cleanup_called = False
194        self.stdout_file = _the_null_stream
195        self.stderr_file = _the_null_stream
196
197    def output_prepare(self, stdout_file=_the_null_stream,
198                       stderr_file=_the_null_stream):
199        """Connect the subprocess's stdout and stderr to streams.
200
201        Subsequent calls to output_prepare are permitted, and will reassign
202        the streams. However, this will have the side effect that the ultimate
203        call to cleanup() will only remember the stdout and stderr data up to
204        the last output_prepare call when saving this data to BgJob.result.
205
206        @param stdout_file: Stream that output from the process's stdout pipe
207                            will be written to. Default: a null stream.
208        @param stderr_file: Stream that output from the process's stdout pipe
209                            will be written to. Default: a null stream.
210        """
211        if self._no_pipes:
212            raise error.InvalidBgJobCall('Cannot call output_prepare on a '
213                                         'job with no_pipes=True.')
214        if self._output_prepare_called:
215            logging.warning('BgJob [%s] received a duplicate call to '
216                            'output prepare. Allowing, but this may result '
217                            'in data missing from BgJob.result.')
218        self.stdout_file = stdout_file
219        self.stderr_file = stderr_file
220        self._output_prepare_called = True
221
222
223    def process_output(self, stdout=True, final_read=False):
224        """Read from process's output stream, and write data to destinations.
225
226        This function reads up to 1024 bytes from the background job's
227        stdout or stderr stream, and writes the resulting data to the BgJob's
228        output tee and to the stream set up in output_prepare.
229
230        Warning: Calls to process_output will block on reads from the
231        subprocess stream, and will block on writes to the configured
232        destination stream.
233
234        @param stdout: True = read and process data from job's stdout.
235                       False = from stderr.
236                       Default: True
237        @param final_read: Do not read only 1024 bytes from stream. Instead,
238                           read and process all data until end of the stream.
239
240        """
241        if self._no_pipes:
242            raise error.InvalidBgJobCall('Cannot call process_output on '
243                                         'a job with no_pipes=True')
244        if not self._output_prepare_called and not self._process_output_warned:
245            logging.warning('BgJob with command [%s] handled a process_output '
246                            'call before output_prepare was called. '
247                            'Some output data discarded. '
248                            'Future warnings suppressed.',
249                            self.command)
250            self._process_output_warned = True
251        if stdout:
252            pipe, buf, tee = self.sp.stdout, self.stdout_file, self.stdout_tee
253        else:
254            pipe, buf, tee = self.sp.stderr, self.stderr_file, self.stderr_tee
255
256        if final_read:
257            # read in all the data we can from pipe and then stop
258            data = []
259            while select.select([pipe], [], [], 0)[0]:
260                data.append(os.read(pipe.fileno(), 1024))
261                if len(data[-1]) == 0:
262                    break
263            data = "".join(data)
264        else:
265            # perform a single read
266            data = os.read(pipe.fileno(), 1024)
267        buf.write(data)
268        tee.write(data)
269
270
271    def cleanup(self):
272        """Clean up after BgJob.
273
274        Flush the stdout_tee and stderr_tee buffers, close the
275        subprocess stdout and stderr buffers, and saves data from
276        the configured stdout and stderr destination streams to
277        self.result. Duplicate calls ignored with a warning.
278        """
279        if self._no_pipes:
280            raise error.InvalidBgJobCall('Cannot call cleanup on '
281                                         'a job with no_pipes=True')
282        if self._cleanup_called:
283            logging.warning('BgJob [%s] received a duplicate call to '
284                            'cleanup. Ignoring.', self.command)
285            return
286        try:
287            self.stdout_tee.flush()
288            self.stderr_tee.flush()
289            self.sp.stdout.close()
290            self.sp.stderr.close()
291            self.result.stdout = self.stdout_file.getvalue()
292            self.result.stderr = self.stderr_file.getvalue()
293        finally:
294            self._cleanup_called = True
295
296
297    def _reset_sigpipe(self):
298        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
299
300
301def ip_to_long(ip):
302    # !L is a long in network byte order
303    return struct.unpack('!L', socket.inet_aton(ip))[0]
304
305
306def long_to_ip(number):
307    # See above comment.
308    return socket.inet_ntoa(struct.pack('!L', number))
309
310
311def create_subnet_mask(bits):
312    return (1 << 32) - (1 << 32-bits)
313
314
315def format_ip_with_mask(ip, mask_bits):
316    masked_ip = ip_to_long(ip) & create_subnet_mask(mask_bits)
317    return "%s/%s" % (long_to_ip(masked_ip), mask_bits)
318
319
320def normalize_hostname(alias):
321    ip = socket.gethostbyname(alias)
322    return socket.gethostbyaddr(ip)[0]
323
324
325def get_ip_local_port_range():
326    match = re.match(r'\s*(\d+)\s*(\d+)\s*$',
327                     read_one_line('/proc/sys/net/ipv4/ip_local_port_range'))
328    return (int(match.group(1)), int(match.group(2)))
329
330
331def set_ip_local_port_range(lower, upper):
332    write_one_line('/proc/sys/net/ipv4/ip_local_port_range',
333                   '%d %d\n' % (lower, upper))
334
335
336def send_email(mail_from, mail_to, subject, body):
337    """
338    Sends an email via smtp
339
340    mail_from: string with email address of sender
341    mail_to: string or list with email address(es) of recipients
342    subject: string with subject of email
343    body: (multi-line) string with body of email
344    """
345    if isinstance(mail_to, str):
346        mail_to = [mail_to]
347    msg = "From: %s\nTo: %s\nSubject: %s\n\n%s" % (mail_from, ','.join(mail_to),
348                                                   subject, body)
349    try:
350        mailer = smtplib.SMTP('localhost')
351        try:
352            mailer.sendmail(mail_from, mail_to, msg)
353        finally:
354            mailer.quit()
355    except Exception, e:
356        # Emails are non-critical, not errors, but don't raise them
357        print "Sending email failed. Reason: %s" % repr(e)
358
359
360def read_one_line(filename):
361    return open(filename, 'r').readline().rstrip('\n')
362
363
364def read_file(filename):
365    f = open(filename)
366    try:
367        return f.read()
368    finally:
369        f.close()
370
371
372def get_field(data, param, linestart="", sep=" "):
373    """
374    Parse data from string.
375    @param data: Data to parse.
376        example:
377          data:
378             cpu   324 345 34  5 345
379             cpu0  34  11  34 34  33
380             ^^^^
381             start of line
382             params 0   1   2  3   4
383    @param param: Position of parameter after linestart marker.
384    @param linestart: String to which start line with parameters.
385    @param sep: Separator between parameters regular expression.
386    """
387    search = re.compile(r"(?<=^%s)\s*(.*)" % linestart, re.MULTILINE)
388    find = search.search(data)
389    if find != None:
390        return re.split("%s" % sep, find.group(1))[param]
391    else:
392        print "There is no line which starts with %s in data." % linestart
393        return None
394
395
396def write_one_line(filename, line):
397    open_write_close(filename, str(line).rstrip('\n') + '\n')
398
399
400def open_write_close(filename, data):
401    f = open(filename, 'w')
402    try:
403        f.write(data)
404    finally:
405        f.close()
406
407
408def locate_file(path, base_dir=None):
409    """Locates a file.
410
411    @param path: The path of the file being located. Could be absolute or relative
412        path. For relative path, it tries to locate the file from base_dir.
413    @param base_dir (optional): Base directory of the relative path.
414
415    @returns Absolute path of the file if found. None if path is None.
416    @raises error.TestFail if the file is not found.
417    """
418    if path is None:
419        return None
420
421    if not os.path.isabs(path) and base_dir is not None:
422        # Assume the relative path is based in autotest directory.
423        path = os.path.join(base_dir, path)
424    if not os.path.isfile(path):
425        raise error.TestFail('ERROR: Unable to find %s' % path)
426    return path
427
428
429def matrix_to_string(matrix, header=None):
430    """
431    Return a pretty, aligned string representation of a nxm matrix.
432
433    This representation can be used to print any tabular data, such as
434    database results. It works by scanning the lengths of each element
435    in each column, and determining the format string dynamically.
436
437    @param matrix: Matrix representation (list with n rows of m elements).
438    @param header: Optional tuple or list with header elements to be displayed.
439    """
440    if type(header) is list:
441        header = tuple(header)
442    lengths = []
443    if header:
444        for column in header:
445            lengths.append(len(column))
446    for row in matrix:
447        for i, column in enumerate(row):
448            column = unicode(column).encode("utf-8")
449            cl = len(column)
450            try:
451                ml = lengths[i]
452                if cl > ml:
453                    lengths[i] = cl
454            except IndexError:
455                lengths.append(cl)
456
457    lengths = tuple(lengths)
458    format_string = ""
459    for length in lengths:
460        format_string += "%-" + str(length) + "s "
461    format_string += "\n"
462
463    matrix_str = ""
464    if header:
465        matrix_str += format_string % header
466    for row in matrix:
467        matrix_str += format_string % tuple(row)
468
469    return matrix_str
470
471
472def read_keyval(path, type_tag=None):
473    """
474    Read a key-value pair format file into a dictionary, and return it.
475    Takes either a filename or directory name as input. If it's a
476    directory name, we assume you want the file to be called keyval.
477
478    @param path: Full path of the file to read from.
479    @param type_tag: If not None, only keyvals with key ending
480                     in a suffix {type_tag} will be collected.
481    """
482    if os.path.isdir(path):
483        path = os.path.join(path, 'keyval')
484    if not os.path.exists(path):
485        return {}
486
487    if type_tag:
488        pattern = r'^([-\.\w]+)\{%s\}=(.*)$' % type_tag
489    else:
490        pattern = r'^([-\.\w]+)=(.*)$'
491
492    keyval = {}
493    f = open(path)
494    for line in f:
495        line = re.sub('#.*', '', line).rstrip()
496        if not line:
497            continue
498        match = re.match(pattern, line)
499        if match:
500            key = match.group(1)
501            value = match.group(2)
502            if re.search('^\d+$', value):
503                value = int(value)
504            elif re.search('^(\d+\.)?\d+$', value):
505                value = float(value)
506            keyval[key] = value
507        else:
508            raise ValueError('Invalid format line: %s' % line)
509    f.close()
510    return keyval
511
512
513def write_keyval(path, dictionary, type_tag=None, tap_report=None):
514    """
515    Write a key-value pair format file out to a file. This uses append
516    mode to open the file, so existing text will not be overwritten or
517    reparsed.
518
519    If type_tag is None, then the key must be composed of alphanumeric
520    characters (or dashes+underscores). However, if type-tag is not
521    null then the keys must also have "{type_tag}" as a suffix. At
522    the moment the only valid values of type_tag are "attr" and "perf".
523
524    @param path: full path of the file to be written
525    @param dictionary: the items to write
526    @param type_tag: see text above
527    """
528    if os.path.isdir(path):
529        path = os.path.join(path, 'keyval')
530    keyval = open(path, 'a')
531
532    if type_tag is None:
533        key_regex = re.compile(r'^[-\.\w]+$')
534    else:
535        if type_tag not in ('attr', 'perf'):
536            raise ValueError('Invalid type tag: %s' % type_tag)
537        escaped_tag = re.escape(type_tag)
538        key_regex = re.compile(r'^[-\.\w]+\{%s\}$' % escaped_tag)
539    try:
540        for key in sorted(dictionary.keys()):
541            if not key_regex.search(key):
542                raise ValueError('Invalid key: %s' % key)
543            keyval.write('%s=%s\n' % (key, dictionary[key]))
544    finally:
545        keyval.close()
546
547    # same for tap
548    if tap_report is not None and tap_report.do_tap_report:
549        tap_report.record_keyval(path, dictionary, type_tag=type_tag)
550
551class FileFieldMonitor(object):
552    """
553    Monitors the information from the file and reports it's values.
554
555    It gather the information at start and stop of the measurement or
556    continuously during the measurement.
557    """
558    class Monitor(Thread):
559        """
560        Internal monitor class to ensure continuous monitor of monitored file.
561        """
562        def __init__(self, master):
563            """
564            @param master: Master class which control Monitor
565            """
566            Thread.__init__(self)
567            self.master = master
568
569        def run(self):
570            """
571            Start monitor in thread mode
572            """
573            while not self.master.end_event.isSet():
574                self.master._get_value(self.master.logging)
575                time.sleep(self.master.time_step)
576
577
578    def __init__(self, status_file, data_to_read, mode_diff, continuously=False,
579                 contlogging=False, separator=" +", time_step=0.1):
580        """
581        Initialize variables.
582        @param status_file: File contain status.
583        @param mode_diff: If True make a difference of value, else average.
584        @param data_to_read: List of tuples with data position.
585            format: [(start_of_line,position in params)]
586            example:
587              data:
588                 cpu   324 345 34  5 345
589                 cpu0  34  11  34 34  33
590                 ^^^^
591                 start of line
592                 params 0   1   2  3   4
593        @param mode_diff: True to subtract old value from new value,
594            False make average of the values.
595        @parma continuously: Start the monitoring thread using the time_step
596            as the measurement period.
597        @param contlogging: Log data in continuous run.
598        @param separator: Regular expression of separator.
599        @param time_step: Time period of the monitoring value.
600        """
601        self.end_event = Event()
602        self.start_time = 0
603        self.end_time = 0
604        self.test_time = 0
605
606        self.status_file = status_file
607        self.separator = separator
608        self.data_to_read = data_to_read
609        self.num_of_params = len(self.data_to_read)
610        self.mode_diff = mode_diff
611        self.continuously = continuously
612        self.time_step = time_step
613
614        self.value = [0 for i in range(self.num_of_params)]
615        self.old_value = [0 for i in range(self.num_of_params)]
616        self.log = []
617        self.logging = contlogging
618
619        self.started = False
620        self.num_of_get_value = 0
621        self.monitor = None
622
623
624    def _get_value(self, logging=True):
625        """
626        Return current values.
627        @param logging: If true log value in memory. There can be problem
628          with long run.
629        """
630        data = read_file(self.status_file)
631        value = []
632        for i in range(self.num_of_params):
633            value.append(int(get_field(data,
634                             self.data_to_read[i][1],
635                             self.data_to_read[i][0],
636                             self.separator)))
637
638        if logging:
639            self.log.append(value)
640        if not self.mode_diff:
641            value = map(lambda x, y: x + y, value, self.old_value)
642
643        self.old_value = value
644        self.num_of_get_value += 1
645        return value
646
647
648    def start(self):
649        """
650        Start value monitor.
651        """
652        if self.started:
653            self.stop()
654        self.old_value = [0 for i in range(self.num_of_params)]
655        self.num_of_get_value = 0
656        self.log = []
657        self.end_event.clear()
658        self.start_time = time.time()
659        self._get_value()
660        self.started = True
661        if (self.continuously):
662            self.monitor = FileFieldMonitor.Monitor(self)
663            self.monitor.start()
664
665
666    def stop(self):
667        """
668        Stop value monitor.
669        """
670        if self.started:
671            self.started = False
672            self.end_time = time.time()
673            self.test_time = self.end_time - self.start_time
674            self.value = self._get_value()
675            if (self.continuously):
676                self.end_event.set()
677                self.monitor.join()
678            if (self.mode_diff):
679                self.value = map(lambda x, y: x - y, self.log[-1], self.log[0])
680            else:
681                self.value = map(lambda x: x / self.num_of_get_value,
682                                 self.value)
683
684
685    def get_status(self):
686        """
687        @return: Status of monitored process average value,
688            time of test and array of monitored values and time step of
689            continuous run.
690        """
691        if self.started:
692            self.stop()
693        if self.mode_diff:
694            for i in range(len(self.log) - 1):
695                self.log[i] = (map(lambda x, y: x - y,
696                                   self.log[i + 1], self.log[i]))
697            self.log.pop()
698        return (self.value, self.test_time, self.log, self.time_step)
699
700
701def is_url(path):
702    """Return true if path looks like a URL"""
703    # for now, just handle http and ftp
704    url_parts = urlparse.urlparse(path)
705    return (url_parts[0] in ('http', 'ftp'))
706
707
708def urlopen(url, data=None, timeout=5):
709    """Wrapper to urllib2.urlopen with timeout addition."""
710
711    # Save old timeout
712    old_timeout = socket.getdefaulttimeout()
713    socket.setdefaulttimeout(timeout)
714    try:
715        return urllib2.urlopen(url, data=data)
716    finally:
717        socket.setdefaulttimeout(old_timeout)
718
719
720def urlretrieve(url, filename, data=None, timeout=300):
721    """Retrieve a file from given url."""
722    logging.debug('Fetching %s -> %s', url, filename)
723
724    src_file = urlopen(url, data=data, timeout=timeout)
725    try:
726        dest_file = open(filename, 'wb')
727        try:
728            shutil.copyfileobj(src_file, dest_file)
729        finally:
730            dest_file.close()
731    finally:
732        src_file.close()
733
734
735def hash(type, input=None):
736    """
737    Returns an hash object of type md5 or sha1. This function is implemented in
738    order to encapsulate hash objects in a way that is compatible with python
739    2.4 and python 2.6 without warnings.
740
741    Note that even though python 2.6 hashlib supports hash types other than
742    md5 and sha1, we are artificially limiting the input values in order to
743    make the function to behave exactly the same among both python
744    implementations.
745
746    @param input: Optional input string that will be used to update the hash.
747    """
748    if type not in ['md5', 'sha1']:
749        raise ValueError("Unsupported hash type: %s" % type)
750
751    try:
752        hash = hashlib.new(type)
753    except NameError:
754        if type == 'md5':
755            hash = md5.new()
756        elif type == 'sha1':
757            hash = sha.new()
758
759    if input:
760        hash.update(input)
761
762    return hash
763
764
765def get_file(src, dest, permissions=None):
766    """Get a file from src, which can be local or a remote URL"""
767    if src == dest:
768        return
769
770    if is_url(src):
771        urlretrieve(src, dest)
772    else:
773        shutil.copyfile(src, dest)
774
775    if permissions:
776        os.chmod(dest, permissions)
777    return dest
778
779
780def unmap_url(srcdir, src, destdir='.'):
781    """
782    Receives either a path to a local file or a URL.
783    returns either the path to the local file, or the fetched URL
784
785    unmap_url('/usr/src', 'foo.tar', '/tmp')
786                            = '/usr/src/foo.tar'
787    unmap_url('/usr/src', 'http://site/file', '/tmp')
788                            = '/tmp/file'
789                            (after retrieving it)
790    """
791    if is_url(src):
792        url_parts = urlparse.urlparse(src)
793        filename = os.path.basename(url_parts[2])
794        dest = os.path.join(destdir, filename)
795        return get_file(src, dest)
796    else:
797        return os.path.join(srcdir, src)
798
799
800def update_version(srcdir, preserve_srcdir, new_version, install,
801                   *args, **dargs):
802    """
803    Make sure srcdir is version new_version
804
805    If not, delete it and install() the new version.
806
807    In the preserve_srcdir case, we just check it's up to date,
808    and if not, we rerun install, without removing srcdir
809    """
810    versionfile = os.path.join(srcdir, '.version')
811    install_needed = True
812
813    if os.path.exists(versionfile):
814        old_version = pickle.load(open(versionfile))
815        if old_version == new_version:
816            install_needed = False
817
818    if install_needed:
819        if not preserve_srcdir and os.path.exists(srcdir):
820            shutil.rmtree(srcdir)
821        install(*args, **dargs)
822        if os.path.exists(srcdir):
823            pickle.dump(new_version, open(versionfile, 'w'))
824
825
826def get_stderr_level(stderr_is_expected):
827    if stderr_is_expected:
828        return DEFAULT_STDOUT_LEVEL
829    return DEFAULT_STDERR_LEVEL
830
831
832def run(command, timeout=None, ignore_status=False,
833        stdout_tee=None, stderr_tee=None, verbose=True, stdin=None,
834        stderr_is_expected=None, args=(), nickname=None, ignore_timeout=False):
835    """
836    Run a command on the host.
837
838    @param command: the command line string.
839    @param timeout: time limit in seconds before attempting to kill the
840            running process. The run() function will take a few seconds
841            longer than 'timeout' to complete if it has to kill the process.
842    @param ignore_status: do not raise an exception, no matter what the exit
843            code of the command is.
844    @param ignore_timeout: If True, timeouts are ignored otherwise if a
845            timeout occurs it will raise CmdTimeoutError.
846    @param stdout_tee: optional file-like object to which stdout data
847            will be written as it is generated (data will still be stored
848            in result.stdout).
849    @param stderr_tee: likewise for stderr.
850    @param verbose: if True, log the command being run.
851    @param stdin: stdin to pass to the executed process (can be a file
852            descriptor, a file object of a real file or a string).
853    @param args: sequence of strings of arguments to be given to the command
854            inside " quotes after they have been escaped for that; each
855            element in the sequence will be given as a separate command
856            argument
857    @param nickname: Short string that will appear in logging messages
858                     associated with this command.
859
860    @return a CmdResult object or None if the command timed out and
861            ignore_timeout is True
862
863    @raise CmdError: the exit code of the command execution was not 0
864    @raise CmdTimeoutError: the command timed out and ignore_timeout is False.
865    """
866    if isinstance(args, basestring):
867        raise TypeError('Got a string for the "args" keyword argument, '
868                        'need a sequence.')
869
870    for arg in args:
871        command += ' "%s"' % sh_escape(arg)
872    if stderr_is_expected is None:
873        stderr_is_expected = ignore_status
874
875    try:
876        bg_job = join_bg_jobs(
877            (BgJob(command, stdout_tee, stderr_tee, verbose, stdin=stdin,
878                   stderr_level=get_stderr_level(stderr_is_expected),
879                   nickname=nickname),), timeout)[0]
880    except error.CmdTimeoutError:
881        if not ignore_timeout:
882            raise
883        return None
884
885    if not ignore_status and bg_job.result.exit_status:
886        raise error.CmdError(command, bg_job.result,
887                             "Command returned non-zero exit status")
888
889    return bg_job.result
890
891
892def run_parallel(commands, timeout=None, ignore_status=False,
893                 stdout_tee=None, stderr_tee=None,
894                 nicknames=[]):
895    """
896    Behaves the same as run() with the following exceptions:
897
898    - commands is a list of commands to run in parallel.
899    - ignore_status toggles whether or not an exception should be raised
900      on any error.
901
902    @return: a list of CmdResult objects
903    """
904    bg_jobs = []
905    for (command, nickname) in itertools.izip_longest(commands, nicknames):
906        bg_jobs.append(BgJob(command, stdout_tee, stderr_tee,
907                             stderr_level=get_stderr_level(ignore_status),
908                             nickname=nickname))
909
910    # Updates objects in bg_jobs list with their process information
911    join_bg_jobs(bg_jobs, timeout)
912
913    for bg_job in bg_jobs:
914        if not ignore_status and bg_job.result.exit_status:
915            raise error.CmdError(command, bg_job.result,
916                                 "Command returned non-zero exit status")
917
918    return [bg_job.result for bg_job in bg_jobs]
919
920
921@deprecated
922def run_bg(command):
923    """Function deprecated. Please use BgJob class instead."""
924    bg_job = BgJob(command)
925    return bg_job.sp, bg_job.result
926
927
928def join_bg_jobs(bg_jobs, timeout=None):
929    """Joins the bg_jobs with the current thread.
930
931    Returns the same list of bg_jobs objects that was passed in.
932    """
933    ret, timeout_error = 0, False
934    for bg_job in bg_jobs:
935        bg_job.output_prepare(StringIO.StringIO(), StringIO.StringIO())
936
937    try:
938        # We are holding ends to stdin, stdout pipes
939        # hence we need to be sure to close those fds no mater what
940        start_time = time.time()
941        timeout_error = _wait_for_commands(bg_jobs, start_time, timeout)
942
943        for bg_job in bg_jobs:
944            # Process stdout and stderr
945            bg_job.process_output(stdout=True,final_read=True)
946            bg_job.process_output(stdout=False,final_read=True)
947    finally:
948        # close our ends of the pipes to the sp no matter what
949        for bg_job in bg_jobs:
950            bg_job.cleanup()
951
952    if timeout_error:
953        # TODO: This needs to be fixed to better represent what happens when
954        # running in parallel. However this is backwards compatable, so it will
955        # do for the time being.
956        raise error.CmdTimeoutError(
957                bg_jobs[0].command, bg_jobs[0].result,
958                "Command(s) did not complete within %d seconds" % timeout)
959
960
961    return bg_jobs
962
963
964def _wait_for_commands(bg_jobs, start_time, timeout):
965    """Waits for background jobs by select polling their stdout/stderr.
966
967    @param bg_jobs: A list of background jobs to wait on.
968    @param start_time: Time used to calculate the timeout lifetime of a job.
969    @param timeout: The timeout of the list of bg_jobs.
970
971    @return: True if the return was due to a timeout, False otherwise.
972    """
973
974    # To check for processes which terminate without producing any output
975    # a 1 second timeout is used in select.
976    SELECT_TIMEOUT = 1
977
978    read_list = []
979    write_list = []
980    reverse_dict = {}
981
982    for bg_job in bg_jobs:
983        read_list.append(bg_job.sp.stdout)
984        read_list.append(bg_job.sp.stderr)
985        reverse_dict[bg_job.sp.stdout] = (bg_job, True)
986        reverse_dict[bg_job.sp.stderr] = (bg_job, False)
987        if bg_job.string_stdin is not None:
988            write_list.append(bg_job.sp.stdin)
989            reverse_dict[bg_job.sp.stdin] = bg_job
990
991    if timeout:
992        stop_time = start_time + timeout
993        time_left = stop_time - time.time()
994    else:
995        time_left = None # so that select never times out
996
997    while not timeout or time_left > 0:
998        # select will return when we may write to stdin, when there is
999        # stdout/stderr output we can read (including when it is
1000        # EOF, that is the process has terminated) or when a non-fatal
1001        # signal was sent to the process. In the last case the select returns
1002        # EINTR, and we continue waiting for the job if the signal handler for
1003        # the signal that interrupted the call allows us to.
1004        try:
1005            read_ready, write_ready, _ = select.select(read_list, write_list,
1006                                                       [], SELECT_TIMEOUT)
1007        except select.error as v:
1008            if v[0] == errno.EINTR:
1009                logging.warning(v)
1010                continue
1011            else:
1012                raise
1013        # os.read() has to be used instead of
1014        # subproc.stdout.read() which will otherwise block
1015        for file_obj in read_ready:
1016            bg_job, is_stdout = reverse_dict[file_obj]
1017            bg_job.process_output(is_stdout)
1018
1019        for file_obj in write_ready:
1020            # we can write PIPE_BUF bytes without blocking
1021            # POSIX requires PIPE_BUF is >= 512
1022            bg_job = reverse_dict[file_obj]
1023            file_obj.write(bg_job.string_stdin[:512])
1024            bg_job.string_stdin = bg_job.string_stdin[512:]
1025            # no more input data, close stdin, remove it from the select set
1026            if not bg_job.string_stdin:
1027                file_obj.close()
1028                write_list.remove(file_obj)
1029                del reverse_dict[file_obj]
1030
1031        all_jobs_finished = True
1032        for bg_job in bg_jobs:
1033            if bg_job.result.exit_status is not None:
1034                continue
1035
1036            bg_job.result.exit_status = bg_job.sp.poll()
1037            if bg_job.result.exit_status is not None:
1038                # process exited, remove its stdout/stdin from the select set
1039                bg_job.result.duration = time.time() - start_time
1040                read_list.remove(bg_job.sp.stdout)
1041                read_list.remove(bg_job.sp.stderr)
1042                del reverse_dict[bg_job.sp.stdout]
1043                del reverse_dict[bg_job.sp.stderr]
1044            else:
1045                all_jobs_finished = False
1046
1047        if all_jobs_finished:
1048            return False
1049
1050        if timeout:
1051            time_left = stop_time - time.time()
1052
1053    # Kill all processes which did not complete prior to timeout
1054    for bg_job in bg_jobs:
1055        if bg_job.result.exit_status is not None:
1056            continue
1057
1058        logging.warning('run process timeout (%s) fired on: %s', timeout,
1059                        bg_job.command)
1060        if nuke_subprocess(bg_job.sp) is None:
1061            # If process could not be SIGKILL'd, log kernel stack.
1062            logging.warning(read_file('/proc/%d/stack' % bg_job.sp.pid))
1063        bg_job.result.exit_status = bg_job.sp.poll()
1064        bg_job.result.duration = time.time() - start_time
1065
1066    return True
1067
1068
1069def pid_is_alive(pid):
1070    """
1071    True if process pid exists and is not yet stuck in Zombie state.
1072    Zombies are impossible to move between cgroups, etc.
1073    pid can be integer, or text of integer.
1074    """
1075    path = '/proc/%s/stat' % pid
1076
1077    try:
1078        stat = read_one_line(path)
1079    except IOError:
1080        if not os.path.exists(path):
1081            # file went away
1082            return False
1083        raise
1084
1085    return stat.split()[2] != 'Z'
1086
1087
1088def signal_pid(pid, sig):
1089    """
1090    Sends a signal to a process id. Returns True if the process terminated
1091    successfully, False otherwise.
1092    """
1093    try:
1094        os.kill(pid, sig)
1095    except OSError:
1096        # The process may have died before we could kill it.
1097        pass
1098
1099    for i in range(5):
1100        if not pid_is_alive(pid):
1101            return True
1102        time.sleep(1)
1103
1104    # The process is still alive
1105    return False
1106
1107
1108def nuke_subprocess(subproc):
1109    # check if the subprocess is still alive, first
1110    if subproc.poll() is not None:
1111        return subproc.poll()
1112
1113    # the process has not terminated within timeout,
1114    # kill it via an escalating series of signals.
1115    signal_queue = [signal.SIGTERM, signal.SIGKILL]
1116    for sig in signal_queue:
1117        signal_pid(subproc.pid, sig)
1118        if subproc.poll() is not None:
1119            return subproc.poll()
1120
1121
1122def nuke_pid(pid, signal_queue=(signal.SIGTERM, signal.SIGKILL)):
1123    # the process has not terminated within timeout,
1124    # kill it via an escalating series of signals.
1125    pid_path = '/proc/%d/'
1126    if not os.path.exists(pid_path % pid):
1127        # Assume that if the pid does not exist in proc it is already dead.
1128        logging.error('No listing in /proc for pid:%d.', pid)
1129        raise error.AutoservPidAlreadyDeadError('Could not kill nonexistant '
1130                                                'pid: %s.', pid)
1131    for sig in signal_queue:
1132        if signal_pid(pid, sig):
1133            return
1134
1135    # no signal successfully terminated the process
1136    raise error.AutoservRunError('Could not kill %d for process name: %s' % (
1137            pid, get_process_name(pid)), None)
1138
1139
1140def system(command, timeout=None, ignore_status=False):
1141    """
1142    Run a command
1143
1144    @param timeout: timeout in seconds
1145    @param ignore_status: if ignore_status=False, throw an exception if the
1146            command's exit code is non-zero
1147            if ignore_stauts=True, return the exit code.
1148
1149    @return exit status of command
1150            (note, this will always be zero unless ignore_status=True)
1151    """
1152    return run(command, timeout=timeout, ignore_status=ignore_status,
1153               stdout_tee=TEE_TO_LOGS, stderr_tee=TEE_TO_LOGS).exit_status
1154
1155
1156def system_parallel(commands, timeout=None, ignore_status=False):
1157    """This function returns a list of exit statuses for the respective
1158    list of commands."""
1159    return [bg_jobs.exit_status for bg_jobs in
1160            run_parallel(commands, timeout=timeout, ignore_status=ignore_status,
1161                         stdout_tee=TEE_TO_LOGS, stderr_tee=TEE_TO_LOGS)]
1162
1163
1164def system_output(command, timeout=None, ignore_status=False,
1165                  retain_output=False, args=()):
1166    """
1167    Run a command and return the stdout output.
1168
1169    @param command: command string to execute.
1170    @param timeout: time limit in seconds before attempting to kill the
1171            running process. The function will take a few seconds longer
1172            than 'timeout' to complete if it has to kill the process.
1173    @param ignore_status: do not raise an exception, no matter what the exit
1174            code of the command is.
1175    @param retain_output: set to True to make stdout/stderr of the command
1176            output to be also sent to the logging system
1177    @param args: sequence of strings of arguments to be given to the command
1178            inside " quotes after they have been escaped for that; each
1179            element in the sequence will be given as a separate command
1180            argument
1181
1182    @return a string with the stdout output of the command.
1183    """
1184    if retain_output:
1185        out = run(command, timeout=timeout, ignore_status=ignore_status,
1186                  stdout_tee=TEE_TO_LOGS, stderr_tee=TEE_TO_LOGS,
1187                  args=args).stdout
1188    else:
1189        out = run(command, timeout=timeout, ignore_status=ignore_status,
1190                  args=args).stdout
1191    if out[-1:] == '\n':
1192        out = out[:-1]
1193    return out
1194
1195
1196def system_output_parallel(commands, timeout=None, ignore_status=False,
1197                           retain_output=False):
1198    if retain_output:
1199        out = [bg_job.stdout for bg_job
1200               in run_parallel(commands, timeout=timeout,
1201                               ignore_status=ignore_status,
1202                               stdout_tee=TEE_TO_LOGS, stderr_tee=TEE_TO_LOGS)]
1203    else:
1204        out = [bg_job.stdout for bg_job in run_parallel(commands,
1205                                  timeout=timeout, ignore_status=ignore_status)]
1206    for x in out:
1207        if out[-1:] == '\n': out = out[:-1]
1208    return out
1209
1210
1211def strip_unicode(input):
1212    if type(input) == list:
1213        return [strip_unicode(i) for i in input]
1214    elif type(input) == dict:
1215        output = {}
1216        for key in input.keys():
1217            output[str(key)] = strip_unicode(input[key])
1218        return output
1219    elif type(input) == unicode:
1220        return str(input)
1221    else:
1222        return input
1223
1224
1225def get_cpu_percentage(function, *args, **dargs):
1226    """Returns a tuple containing the CPU% and return value from function call.
1227
1228    This function calculates the usage time by taking the difference of
1229    the user and system times both before and after the function call.
1230    """
1231    child_pre = resource.getrusage(resource.RUSAGE_CHILDREN)
1232    self_pre = resource.getrusage(resource.RUSAGE_SELF)
1233    start = time.time()
1234    to_return = function(*args, **dargs)
1235    elapsed = time.time() - start
1236    self_post = resource.getrusage(resource.RUSAGE_SELF)
1237    child_post = resource.getrusage(resource.RUSAGE_CHILDREN)
1238
1239    # Calculate CPU Percentage
1240    s_user, s_system = [a - b for a, b in zip(self_post, self_pre)[:2]]
1241    c_user, c_system = [a - b for a, b in zip(child_post, child_pre)[:2]]
1242    cpu_percent = (s_user + c_user + s_system + c_system) / elapsed
1243
1244    return cpu_percent, to_return
1245
1246
1247class SystemLoad(object):
1248    """
1249    Get system and/or process values and return average value of load.
1250    """
1251    def __init__(self, pids, advanced=False, time_step=0.1, cpu_cont=False,
1252                 use_log=False):
1253        """
1254        @param pids: List of pids to be monitored. If pid = 0 whole system will
1255          be monitored. pid == 0 means whole system.
1256        @param advanced: monitor add value for system irq count and softirq
1257          for process minor and maior page fault
1258        @param time_step: Time step for continuous monitoring.
1259        @param cpu_cont: If True monitor CPU load continuously.
1260        @param use_log: If true every monitoring is logged for dump.
1261        """
1262        self.pids = []
1263        self.stats = {}
1264        for pid in pids:
1265            if pid == 0:
1266                cpu = FileFieldMonitor("/proc/stat",
1267                                       [("cpu", 0), # User Time
1268                                        ("cpu", 2), # System Time
1269                                        ("intr", 0), # IRQ Count
1270                                        ("softirq", 0)], # Soft IRQ Count
1271                                       True,
1272                                       cpu_cont,
1273                                       use_log,
1274                                       " +",
1275                                       time_step)
1276                mem = FileFieldMonitor("/proc/meminfo",
1277                                       [("MemTotal:", 0), # Mem Total
1278                                        ("MemFree:", 0), # Mem Free
1279                                        ("Buffers:", 0), # Buffers
1280                                        ("Cached:", 0)], # Cached
1281                                       False,
1282                                       True,
1283                                       use_log,
1284                                       " +",
1285                                       time_step)
1286                self.stats[pid] = ["TOTAL", cpu, mem]
1287                self.pids.append(pid)
1288            else:
1289                name = ""
1290                if (type(pid) is int):
1291                    self.pids.append(pid)
1292                    name = get_process_name(pid)
1293                else:
1294                    self.pids.append(pid[0])
1295                    name = pid[1]
1296
1297                cpu = FileFieldMonitor("/proc/%d/stat" %
1298                                       self.pids[-1],
1299                                       [("", 13), # User Time
1300                                        ("", 14), # System Time
1301                                        ("", 9), # Minority Page Fault
1302                                        ("", 11)], # Majority Page Fault
1303                                       True,
1304                                       cpu_cont,
1305                                       use_log,
1306                                       " +",
1307                                       time_step)
1308                mem = FileFieldMonitor("/proc/%d/status" %
1309                                       self.pids[-1],
1310                                       [("VmSize:", 0), # Virtual Memory Size
1311                                        ("VmRSS:", 0), # Resident Set Size
1312                                        ("VmPeak:", 0), # Peak VM Size
1313                                        ("VmSwap:", 0)], # VM in Swap
1314                                       False,
1315                                       True,
1316                                       use_log,
1317                                       " +",
1318                                       time_step)
1319                self.stats[self.pids[-1]] = [name, cpu, mem]
1320
1321        self.advanced = advanced
1322
1323
1324    def __str__(self):
1325        """
1326        Define format how to print
1327        """
1328        out = ""
1329        for pid in self.pids:
1330            for stat in self.stats[pid][1:]:
1331                out += str(stat.get_status()) + "\n"
1332        return out
1333
1334
1335    def start(self, pids=[]):
1336        """
1337        Start monitoring of the process system usage.
1338        @param pids: List of PIDs you intend to control. Use pids=[] to control
1339            all defined PIDs.
1340        """
1341        if pids == []:
1342            pids = self.pids
1343
1344        for pid in pids:
1345            for stat in self.stats[pid][1:]:
1346                stat.start()
1347
1348
1349    def stop(self, pids=[]):
1350        """
1351        Stop monitoring of the process system usage.
1352        @param pids: List of PIDs you intend to control. Use pids=[] to control
1353            all defined PIDs.
1354        """
1355        if pids == []:
1356            pids = self.pids
1357
1358        for pid in pids:
1359            for stat in self.stats[pid][1:]:
1360                stat.stop()
1361
1362
1363    def dump(self, pids=[]):
1364        """
1365        Get the status of monitoring.
1366        @param pids: List of PIDs you intend to control. Use pids=[] to control
1367            all defined PIDs.
1368         @return:
1369            tuple([cpu load], [memory load]):
1370                ([(PID1, (PID1_cpu_meas)), (PID2, (PID2_cpu_meas)), ...],
1371                 [(PID1, (PID1_mem_meas)), (PID2, (PID2_mem_meas)), ...])
1372
1373            PID1_cpu_meas:
1374                average_values[], test_time, cont_meas_values[[]], time_step
1375            PID1_mem_meas:
1376                average_values[], test_time, cont_meas_values[[]], time_step
1377            where average_values[] are the measured values (mem_free,swap,...)
1378            which are described in SystemLoad.__init__()-FileFieldMonitor.
1379            cont_meas_values[[]] is a list of average_values in the sampling
1380            times.
1381        """
1382        if pids == []:
1383            pids = self.pids
1384
1385        cpus = []
1386        memory = []
1387        for pid in pids:
1388            stat = (pid, self.stats[pid][1].get_status())
1389            cpus.append(stat)
1390        for pid in pids:
1391            stat = (pid, self.stats[pid][2].get_status())
1392            memory.append(stat)
1393
1394        return (cpus, memory)
1395
1396
1397    def get_cpu_status_string(self, pids=[]):
1398        """
1399        Convert status to string array.
1400        @param pids: List of PIDs you intend to control. Use pids=[] to control
1401            all defined PIDs.
1402        @return: String format to table.
1403        """
1404        if pids == []:
1405            pids = self.pids
1406
1407        headers = ["NAME",
1408                   ("%7s") % "PID",
1409                   ("%5s") % "USER",
1410                   ("%5s") % "SYS",
1411                   ("%5s") % "SUM"]
1412        if self.advanced:
1413            headers.extend(["MINFLT/IRQC",
1414                            "MAJFLT/SOFTIRQ"])
1415        headers.append(("%11s") % "TIME")
1416        textstatus = []
1417        for pid in pids:
1418            stat = self.stats[pid][1].get_status()
1419            time = stat[1]
1420            stat = stat[0]
1421            textstatus.append(["%s" % self.stats[pid][0],
1422                               "%7s" % pid,
1423                               "%4.0f%%" % (stat[0] / time),
1424                               "%4.0f%%" % (stat[1] / time),
1425                               "%4.0f%%" % ((stat[0] + stat[1]) / time),
1426                               "%10.3fs" % time])
1427            if self.advanced:
1428                textstatus[-1].insert(-1, "%11d" % stat[2])
1429                textstatus[-1].insert(-1, "%14d" % stat[3])
1430
1431        return matrix_to_string(textstatus, tuple(headers))
1432
1433
1434    def get_mem_status_string(self, pids=[]):
1435        """
1436        Convert status to string array.
1437        @param pids: List of PIDs you intend to control. Use pids=[] to control
1438            all defined PIDs.
1439        @return: String format to table.
1440        """
1441        if pids == []:
1442            pids = self.pids
1443
1444        headers = ["NAME",
1445                   ("%7s") % "PID",
1446                   ("%8s") % "TOTAL/VMSIZE",
1447                   ("%8s") % "FREE/VMRSS",
1448                   ("%8s") % "BUFFERS/VMPEAK",
1449                   ("%8s") % "CACHED/VMSWAP",
1450                   ("%11s") % "TIME"]
1451        textstatus = []
1452        for pid in pids:
1453            stat = self.stats[pid][2].get_status()
1454            time = stat[1]
1455            stat = stat[0]
1456            textstatus.append(["%s" % self.stats[pid][0],
1457                               "%7s" % pid,
1458                               "%10dMB" % (stat[0] / 1024),
1459                               "%8dMB" % (stat[1] / 1024),
1460                               "%12dMB" % (stat[2] / 1024),
1461                               "%11dMB" % (stat[3] / 1024),
1462                               "%10.3fs" % time])
1463
1464        return matrix_to_string(textstatus, tuple(headers))
1465
1466
1467def get_arch(run_function=run):
1468    """
1469    Get the hardware architecture of the machine.
1470    If specified, run_function should return a CmdResult object and throw a
1471    CmdError exception.
1472    If run_function is anything other than utils.run(), it is used to
1473    execute the commands. By default (when set to utils.run()) this will
1474    just examine os.uname()[4].
1475    """
1476
1477    # Short circuit from the common case.
1478    if run_function == run:
1479        return re.sub(r'i\d86$', 'i386', os.uname()[4])
1480
1481    # Otherwise, use the run_function in case it hits a remote machine.
1482    arch = run_function('/bin/uname -m').stdout.rstrip()
1483    if re.match(r'i\d86$', arch):
1484        arch = 'i386'
1485    return arch
1486
1487def get_arch_userspace(run_function=run):
1488    """
1489    Get the architecture by userspace (possibly different from kernel).
1490    """
1491    archs = {
1492        'arm': 'ELF 32-bit.*, ARM,',
1493        'i386': 'ELF 32-bit.*, Intel 80386,',
1494        'x86_64': 'ELF 64-bit.*, x86-64,',
1495    }
1496
1497    cmd = 'file --brief --dereference /bin/sh'
1498    filestr = run_function(cmd).stdout.rstrip()
1499    for a, regex in archs.iteritems():
1500        if re.match(regex, filestr):
1501            return a
1502
1503    return get_arch()
1504
1505
1506def get_num_logical_cpus_per_socket(run_function=run):
1507    """
1508    Get the number of cores (including hyperthreading) per cpu.
1509    run_function is used to execute the commands. It defaults to
1510    utils.run() but a custom method (if provided) should be of the
1511    same schema as utils.run. It should return a CmdResult object and
1512    throw a CmdError exception.
1513    """
1514    siblings = run_function('grep "^siblings" /proc/cpuinfo').stdout.rstrip()
1515    num_siblings = map(int,
1516                       re.findall(r'^siblings\s*:\s*(\d+)\s*$',
1517                                  siblings, re.M))
1518    if len(num_siblings) == 0:
1519        raise error.TestError('Unable to find siblings info in /proc/cpuinfo')
1520    if min(num_siblings) != max(num_siblings):
1521        raise error.TestError('Number of siblings differ %r' %
1522                              num_siblings)
1523    return num_siblings[0]
1524
1525
1526def merge_trees(src, dest):
1527    """
1528    Merges a source directory tree at 'src' into a destination tree at
1529    'dest'. If a path is a file in both trees than the file in the source
1530    tree is APPENDED to the one in the destination tree. If a path is
1531    a directory in both trees then the directories are recursively merged
1532    with this function. In any other case, the function will skip the
1533    paths that cannot be merged (instead of failing).
1534    """
1535    if not os.path.exists(src):
1536        return # exists only in dest
1537    elif not os.path.exists(dest):
1538        if os.path.isfile(src):
1539            shutil.copy2(src, dest) # file only in src
1540        else:
1541            shutil.copytree(src, dest, symlinks=True) # dir only in src
1542        return
1543    elif os.path.isfile(src) and os.path.isfile(dest):
1544        # src & dest are files in both trees, append src to dest
1545        destfile = open(dest, "a")
1546        try:
1547            srcfile = open(src)
1548            try:
1549                destfile.write(srcfile.read())
1550            finally:
1551                srcfile.close()
1552        finally:
1553            destfile.close()
1554    elif os.path.isdir(src) and os.path.isdir(dest):
1555        # src & dest are directories in both trees, so recursively merge
1556        for name in os.listdir(src):
1557            merge_trees(os.path.join(src, name), os.path.join(dest, name))
1558    else:
1559        # src & dest both exist, but are incompatible
1560        return
1561
1562
1563class CmdResult(object):
1564    """
1565    Command execution result.
1566
1567    command:     String containing the command line itself
1568    exit_status: Integer exit code of the process
1569    stdout:      String containing stdout of the process
1570    stderr:      String containing stderr of the process
1571    duration:    Elapsed wall clock time running the process
1572    """
1573
1574
1575    def __init__(self, command="", stdout="", stderr="",
1576                 exit_status=None, duration=0):
1577        self.command = command
1578        self.exit_status = exit_status
1579        self.stdout = stdout
1580        self.stderr = stderr
1581        self.duration = duration
1582
1583
1584    def __repr__(self):
1585        wrapper = textwrap.TextWrapper(width = 78,
1586                                       initial_indent="\n    ",
1587                                       subsequent_indent="    ")
1588
1589        stdout = self.stdout.rstrip()
1590        if stdout:
1591            stdout = "\nstdout:\n%s" % stdout
1592
1593        stderr = self.stderr.rstrip()
1594        if stderr:
1595            stderr = "\nstderr:\n%s" % stderr
1596
1597        return ("* Command: %s\n"
1598                "Exit status: %s\n"
1599                "Duration: %s\n"
1600                "%s"
1601                "%s"
1602                % (wrapper.fill(str(self.command)), self.exit_status,
1603                self.duration, stdout, stderr))
1604
1605
1606class run_randomly:
1607    def __init__(self, run_sequentially=False):
1608        # Run sequentially is for debugging control files
1609        self.test_list = []
1610        self.run_sequentially = run_sequentially
1611
1612
1613    def add(self, *args, **dargs):
1614        test = (args, dargs)
1615        self.test_list.append(test)
1616
1617
1618    def run(self, fn):
1619        while self.test_list:
1620            test_index = random.randint(0, len(self.test_list)-1)
1621            if self.run_sequentially:
1622                test_index = 0
1623            (args, dargs) = self.test_list.pop(test_index)
1624            fn(*args, **dargs)
1625
1626
1627def import_site_module(path, module, dummy=None, modulefile=None):
1628    """
1629    Try to import the site specific module if it exists.
1630
1631    @param path full filename of the source file calling this (ie __file__)
1632    @param module full module name
1633    @param dummy dummy value to return in case there is no symbol to import
1634    @param modulefile module filename
1635
1636    @return site specific module or dummy
1637
1638    @raises ImportError if the site file exists but imports fails
1639    """
1640    short_module = module[module.rfind(".") + 1:]
1641
1642    if not modulefile:
1643        modulefile = short_module + ".py"
1644
1645    if os.path.exists(os.path.join(os.path.dirname(path), modulefile)):
1646        return __import__(module, {}, {}, [short_module])
1647    return dummy
1648
1649
1650def import_site_symbol(path, module, name, dummy=None, modulefile=None):
1651    """
1652    Try to import site specific symbol from site specific file if it exists
1653
1654    @param path full filename of the source file calling this (ie __file__)
1655    @param module full module name
1656    @param name symbol name to be imported from the site file
1657    @param dummy dummy value to return in case there is no symbol to import
1658    @param modulefile module filename
1659
1660    @return site specific symbol or dummy
1661
1662    @raises ImportError if the site file exists but imports fails
1663    """
1664    module = import_site_module(path, module, modulefile=modulefile)
1665    if not module:
1666        return dummy
1667
1668    # special unique value to tell us if the symbol can't be imported
1669    cant_import = object()
1670
1671    obj = getattr(module, name, cant_import)
1672    if obj is cant_import:
1673        return dummy
1674
1675    return obj
1676
1677
1678def import_site_class(path, module, classname, baseclass, modulefile=None):
1679    """
1680    Try to import site specific class from site specific file if it exists
1681
1682    Args:
1683        path: full filename of the source file calling this (ie __file__)
1684        module: full module name
1685        classname: class name to be loaded from site file
1686        baseclass: base class object to return when no site file present or
1687            to mixin when site class exists but is not inherited from baseclass
1688        modulefile: module filename
1689
1690    Returns: baseclass if site specific class does not exist, the site specific
1691        class if it exists and is inherited from baseclass or a mixin of the
1692        site specific class and baseclass when the site specific class exists
1693        and is not inherited from baseclass
1694
1695    Raises: ImportError if the site file exists but imports fails
1696    """
1697
1698    res = import_site_symbol(path, module, classname, None, modulefile)
1699    if res:
1700        if not issubclass(res, baseclass):
1701            # if not a subclass of baseclass then mix in baseclass with the
1702            # site specific class object and return the result
1703            res = type(classname, (res, baseclass), {})
1704    else:
1705        res = baseclass
1706
1707    return res
1708
1709
1710def import_site_function(path, module, funcname, dummy, modulefile=None):
1711    """
1712    Try to import site specific function from site specific file if it exists
1713
1714    Args:
1715        path: full filename of the source file calling this (ie __file__)
1716        module: full module name
1717        funcname: function name to be imported from site file
1718        dummy: dummy function to return in case there is no function to import
1719        modulefile: module filename
1720
1721    Returns: site specific function object or dummy
1722
1723    Raises: ImportError if the site file exists but imports fails
1724    """
1725
1726    return import_site_symbol(path, module, funcname, dummy, modulefile)
1727
1728
1729def _get_pid_path(program_name):
1730    my_path = os.path.dirname(__file__)
1731    return os.path.abspath(os.path.join(my_path, "..", "..",
1732                                        "%s.pid" % program_name))
1733
1734
1735def write_pid(program_name):
1736    """
1737    Try to drop <program_name>.pid in the main autotest directory.
1738
1739    Args:
1740      program_name: prefix for file name
1741    """
1742    pidfile = open(_get_pid_path(program_name), "w")
1743    try:
1744        pidfile.write("%s\n" % os.getpid())
1745    finally:
1746        pidfile.close()
1747
1748
1749def delete_pid_file_if_exists(program_name):
1750    """
1751    Tries to remove <program_name>.pid from the main autotest directory.
1752    """
1753    pidfile_path = _get_pid_path(program_name)
1754
1755    try:
1756        os.remove(pidfile_path)
1757    except OSError:
1758        if not os.path.exists(pidfile_path):
1759            return
1760        raise
1761
1762
1763def get_pid_from_file(program_name):
1764    """
1765    Reads the pid from <program_name>.pid in the autotest directory.
1766
1767    @param program_name the name of the program
1768    @return the pid if the file exists, None otherwise.
1769    """
1770    pidfile_path = _get_pid_path(program_name)
1771    if not os.path.exists(pidfile_path):
1772        return None
1773
1774    pidfile = open(_get_pid_path(program_name), 'r')
1775
1776    try:
1777        try:
1778            pid = int(pidfile.readline())
1779        except IOError:
1780            if not os.path.exists(pidfile_path):
1781                return None
1782            raise
1783    finally:
1784        pidfile.close()
1785
1786    return pid
1787
1788
1789def get_process_name(pid):
1790    """
1791    Get process name from PID.
1792    @param pid: PID of process.
1793    @return: Process name if PID stat file exists or 'Dead PID' if it does not.
1794    """
1795    pid_stat_path = "/proc/%d/stat"
1796    if not os.path.exists(pid_stat_path % pid):
1797        return "Dead Pid"
1798    return get_field(read_file(pid_stat_path % pid), 1)[1:-1]
1799
1800
1801def program_is_alive(program_name):
1802    """
1803    Checks if the process is alive and not in Zombie state.
1804
1805    @param program_name the name of the program
1806    @return True if still alive, False otherwise
1807    """
1808    pid = get_pid_from_file(program_name)
1809    if pid is None:
1810        return False
1811    return pid_is_alive(pid)
1812
1813
1814def signal_program(program_name, sig=signal.SIGTERM):
1815    """
1816    Sends a signal to the process listed in <program_name>.pid
1817
1818    @param program_name the name of the program
1819    @param sig signal to send
1820    """
1821    pid = get_pid_from_file(program_name)
1822    if pid:
1823        signal_pid(pid, sig)
1824
1825
1826def get_relative_path(path, reference):
1827    """Given 2 absolute paths "path" and "reference", compute the path of
1828    "path" as relative to the directory "reference".
1829
1830    @param path the absolute path to convert to a relative path
1831    @param reference an absolute directory path to which the relative
1832        path will be computed
1833    """
1834    # normalize the paths (remove double slashes, etc)
1835    assert(os.path.isabs(path))
1836    assert(os.path.isabs(reference))
1837
1838    path = os.path.normpath(path)
1839    reference = os.path.normpath(reference)
1840
1841    # we could use os.path.split() but it splits from the end
1842    path_list = path.split(os.path.sep)[1:]
1843    ref_list = reference.split(os.path.sep)[1:]
1844
1845    # find the longest leading common path
1846    for i in xrange(min(len(path_list), len(ref_list))):
1847        if path_list[i] != ref_list[i]:
1848            # decrement i so when exiting this loop either by no match or by
1849            # end of range we are one step behind
1850            i -= 1
1851            break
1852    i += 1
1853    # drop the common part of the paths, not interested in that anymore
1854    del path_list[:i]
1855
1856    # for each uncommon component in the reference prepend a ".."
1857    path_list[:0] = ['..'] * (len(ref_list) - i)
1858
1859    return os.path.join(*path_list)
1860
1861
1862def sh_escape(command):
1863    """
1864    Escape special characters from a command so that it can be passed
1865    as a double quoted (" ") string in a (ba)sh command.
1866
1867    Args:
1868            command: the command string to escape.
1869
1870    Returns:
1871            The escaped command string. The required englobing double
1872            quotes are NOT added and so should be added at some point by
1873            the caller.
1874
1875    See also: http://www.tldp.org/LDP/abs/html/escapingsection.html
1876    """
1877    command = command.replace("\\", "\\\\")
1878    command = command.replace("$", r'\$')
1879    command = command.replace('"', r'\"')
1880    command = command.replace('`', r'\`')
1881    return command
1882
1883
1884def sh_quote_word(text, whitelist=SHELL_QUOTING_WHITELIST):
1885    r"""Quote a string to make it safe as a single word in a shell command.
1886
1887    POSIX shell syntax recognizes no escape characters inside a single-quoted
1888    string.  So, single quotes can safely quote any string of characters except
1889    a string with a single quote character.  A single quote character must be
1890    quoted with the sequence '\'' which translates to:
1891        '  -> close current quote
1892        \' -> insert a literal single quote
1893        '  -> reopen quoting again.
1894
1895    This is safe for all combinations of characters, including embedded and
1896    trailing backslashes in odd or even numbers.
1897
1898    This is also safe for nesting, e.g. the following is a valid use:
1899
1900        adb_command = 'adb shell %s' % (
1901                sh_quote_word('echo %s' % sh_quote_word('hello world')))
1902
1903    @param text: The string to be quoted into a single word for the shell.
1904    @param whitelist: Optional list of characters that do not need quoting.
1905                      Defaults to a known good list of characters.
1906
1907    @return A string, possibly quoted, safe as a single word for a shell.
1908    """
1909    if all(c in whitelist for c in text):
1910        return text
1911    return "'" + text.replace("'", r"'\''") + "'"
1912
1913
1914def configure(extra=None, configure='./configure'):
1915    """
1916    Run configure passing in the correct host, build, and target options.
1917
1918    @param extra: extra command line arguments to pass to configure
1919    @param configure: which configure script to use
1920    """
1921    args = []
1922    if 'CHOST' in os.environ:
1923        args.append('--host=' + os.environ['CHOST'])
1924    if 'CBUILD' in os.environ:
1925        args.append('--build=' + os.environ['CBUILD'])
1926    if 'CTARGET' in os.environ:
1927        args.append('--target=' + os.environ['CTARGET'])
1928    if extra:
1929        args.append(extra)
1930
1931    system('%s %s' % (configure, ' '.join(args)))
1932
1933
1934def make(extra='', make='make', timeout=None, ignore_status=False):
1935    """
1936    Run make, adding MAKEOPTS to the list of options.
1937
1938    @param extra: extra command line arguments to pass to make.
1939    """
1940    cmd = '%s %s %s' % (make, os.environ.get('MAKEOPTS', ''), extra)
1941    return system(cmd, timeout=timeout, ignore_status=ignore_status)
1942
1943
1944def compare_versions(ver1, ver2):
1945    """Version number comparison between ver1 and ver2 strings.
1946
1947    >>> compare_tuple("1", "2")
1948    -1
1949    >>> compare_tuple("foo-1.1", "foo-1.2")
1950    -1
1951    >>> compare_tuple("1.2", "1.2a")
1952    -1
1953    >>> compare_tuple("1.2b", "1.2a")
1954    1
1955    >>> compare_tuple("1.3.5.3a", "1.3.5.3b")
1956    -1
1957
1958    Args:
1959        ver1: version string
1960        ver2: version string
1961
1962    Returns:
1963        int:  1 if ver1 >  ver2
1964              0 if ver1 == ver2
1965             -1 if ver1 <  ver2
1966    """
1967    ax = re.split('[.-]', ver1)
1968    ay = re.split('[.-]', ver2)
1969    while len(ax) > 0 and len(ay) > 0:
1970        cx = ax.pop(0)
1971        cy = ay.pop(0)
1972        maxlen = max(len(cx), len(cy))
1973        c = cmp(cx.zfill(maxlen), cy.zfill(maxlen))
1974        if c != 0:
1975            return c
1976    return cmp(len(ax), len(ay))
1977
1978
1979def args_to_dict(args):
1980    """Convert autoserv extra arguments in the form of key=val or key:val to a
1981    dictionary.  Each argument key is converted to lowercase dictionary key.
1982
1983    Args:
1984        args - list of autoserv extra arguments.
1985
1986    Returns:
1987        dictionary
1988    """
1989    arg_re = re.compile(r'(\w+)[:=](.*)$')
1990    dict = {}
1991    for arg in args:
1992        match = arg_re.match(arg)
1993        if match:
1994            dict[match.group(1).lower()] = match.group(2)
1995        else:
1996            logging.warning("args_to_dict: argument '%s' doesn't match "
1997                            "'%s' pattern. Ignored.", arg, arg_re.pattern)
1998    return dict
1999
2000
2001def get_unused_port():
2002    """
2003    Finds a semi-random available port. A race condition is still
2004    possible after the port number is returned, if another process
2005    happens to bind it.
2006
2007    Returns:
2008        A port number that is unused on both TCP and UDP.
2009    """
2010
2011    def try_bind(port, socket_type, socket_proto):
2012        s = socket.socket(socket.AF_INET, socket_type, socket_proto)
2013        try:
2014            try:
2015                s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
2016                s.bind(('', port))
2017                return s.getsockname()[1]
2018            except socket.error:
2019                return None
2020        finally:
2021            s.close()
2022
2023    # On the 2.6 kernel, calling try_bind() on UDP socket returns the
2024    # same port over and over. So always try TCP first.
2025    while True:
2026        # Ask the OS for an unused port.
2027        port = try_bind(0, socket.SOCK_STREAM, socket.IPPROTO_TCP)
2028        # Check if this port is unused on the other protocol.
2029        if port and try_bind(port, socket.SOCK_DGRAM, socket.IPPROTO_UDP):
2030            return port
2031
2032
2033def ask(question, auto=False):
2034    """
2035    Raw input with a prompt that emulates logging.
2036
2037    @param question: Question to be asked
2038    @param auto: Whether to return "y" instead of asking the question
2039    """
2040    if auto:
2041        logging.info("%s (y/n) y", question)
2042        return "y"
2043    return raw_input("%s INFO | %s (y/n) " %
2044                     (time.strftime("%H:%M:%S", time.localtime()), question))
2045
2046
2047def rdmsr(address, cpu=0):
2048    """
2049    Reads an x86 MSR from the specified CPU, returns as long integer.
2050    """
2051    with open('/dev/cpu/%s/msr' % cpu, 'r', 0) as fd:
2052        fd.seek(address)
2053        return struct.unpack('=Q', fd.read(8))[0]
2054
2055
2056def wait_for_value(func,
2057                   expected_value=None,
2058                   min_threshold=None,
2059                   max_threshold=None,
2060                   timeout_sec=10):
2061    """
2062    Returns the value of func().  If |expected_value|, |min_threshold|, and
2063    |max_threshold| are not set, returns immediately.
2064
2065    If |expected_value| is set, polls the return value until |expected_value| is
2066    reached, and returns that value.
2067
2068    If either |max_threshold| or |min_threshold| is set, this function will
2069    will repeatedly call func() until the return value reaches or exceeds one of
2070    these thresholds.
2071
2072    Polling will stop after |timeout_sec| regardless of these thresholds.
2073
2074    @param func: function whose return value is to be waited on.
2075    @param expected_value: wait for func to return this value.
2076    @param min_threshold: wait for func value to reach or fall below this value.
2077    @param max_threshold: wait for func value to reach or rise above this value.
2078    @param timeout_sec: Number of seconds to wait before giving up and
2079                        returning whatever value func() last returned.
2080
2081    Return value:
2082        The most recent return value of func().
2083    """
2084    value = None
2085    start_time_sec = time.time()
2086    while True:
2087        value = func()
2088        if (expected_value is None and \
2089            min_threshold is None and \
2090            max_threshold is None) or \
2091           (expected_value is not None and value == expected_value) or \
2092           (min_threshold is not None and value <= min_threshold) or \
2093           (max_threshold is not None and value >= max_threshold):
2094            break
2095
2096        if time.time() - start_time_sec >= timeout_sec:
2097            break
2098        time.sleep(0.1)
2099
2100    return value
2101
2102
2103def wait_for_value_changed(func,
2104                           old_value=None,
2105                           timeout_sec=10):
2106    """
2107    Returns the value of func().
2108
2109    The function polls the return value until it is different from |old_value|,
2110    and returns that value.
2111
2112    Polling will stop after |timeout_sec|.
2113
2114    @param func: function whose return value is to be waited on.
2115    @param old_value: wait for func to return a value different from this.
2116    @param timeout_sec: Number of seconds to wait before giving up and
2117                        returning whatever value func() last returned.
2118
2119    @returns The most recent return value of func().
2120    """
2121    value = None
2122    start_time_sec = time.time()
2123    while True:
2124        value = func()
2125        if value != old_value:
2126            break
2127
2128        if time.time() - start_time_sec >= timeout_sec:
2129            break
2130        time.sleep(0.1)
2131
2132    return value
2133
2134
2135def restart_job(name):
2136    """
2137    Restarts an upstart job if it's running.
2138    If it's not running, start it.
2139    """
2140
2141    if system_output('status %s' % name).find('start/running') != -1:
2142        system_output('restart %s' % name)
2143    else:
2144        system_output('start %s' % name)
2145
2146