base_classes.py revision 672fb5f8806694d9476f016c0f1094da29120f31
1# Copyright 2009 Google Inc. Released under the GPL v2
2
3"""
4This module defines the base classes for the Host hierarchy.
5
6Implementation details:
7You should import the "hosts" package instead of importing each type of host.
8
9        Host: a machine on which you can run programs
10"""
11
12__author__ = """
13mbligh@google.com (Martin J. Bligh),
14poirier@google.com (Benjamin Poirier),
15stutsman@google.com (Ryan Stutsman)
16"""
17
18import cPickle, logging, os, re, time
19
20from autotest_lib.client.common_lib import global_config, error, utils
21from autotest_lib.client.common_lib.cros import path_utils
22
23
24class Host(object):
25    """
26    This class represents a machine on which you can run programs.
27
28    It may be a local machine, the one autoserv is running on, a remote
29    machine or a virtual machine.
30
31    Implementation details:
32    This is an abstract class, leaf subclasses must implement the methods
33    listed here. You must not instantiate this class but should
34    instantiate one of those leaf subclasses.
35
36    When overriding methods that raise NotImplementedError, the leaf class
37    is fully responsible for the implementation and should not chain calls
38    to super. When overriding methods that are a NOP in Host, the subclass
39    should chain calls to super(). The criteria for fitting a new method into
40    one category or the other should be:
41        1. If two separate generic implementations could reasonably be
42           concatenated, then the abstract implementation should pass and
43           subclasses should chain calls to super.
44        2. If only one class could reasonably perform the stated function
45           (e.g. two separate run() implementations cannot both be executed)
46           then the method should raise NotImplementedError in Host, and
47           the implementor should NOT chain calls to super, to ensure that
48           only one implementation ever gets executed.
49    """
50
51    job = None
52    DEFAULT_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
53        "HOSTS", "default_reboot_timeout", type=int, default=1800)
54    WAIT_DOWN_REBOOT_TIMEOUT = global_config.global_config.get_config_value(
55        "HOSTS", "wait_down_reboot_timeout", type=int, default=840)
56    WAIT_DOWN_REBOOT_WARNING = global_config.global_config.get_config_value(
57        "HOSTS", "wait_down_reboot_warning", type=int, default=540)
58    HOURS_TO_WAIT_FOR_RECOVERY = global_config.global_config.get_config_value(
59        "HOSTS", "hours_to_wait_for_recovery", type=float, default=2.5)
60    # the number of hardware repair requests that need to happen before we
61    # actually send machines to hardware repair
62    HARDWARE_REPAIR_REQUEST_THRESHOLD = 4
63    OP_REBOOT = 'reboot'
64    OP_SUSPEND = 'suspend'
65    PWR_OPERATION = [OP_REBOOT, OP_SUSPEND]
66
67
68    def __init__(self, *args, **dargs):
69        self._initialize(*args, **dargs)
70
71
72    def _initialize(self, *args, **dargs):
73        pass
74
75
76    @property
77    def job_repo_url_attribute(self):
78        """Get the host attribute name for job_repo_url.
79        """
80        return 'job_repo_url'
81
82
83    def close(self):
84        """Close the connection to the host.
85        """
86        pass
87
88
89    def setup(self):
90        """Setup the host object.
91        """
92        pass
93
94
95    def run(self, command, timeout=3600, ignore_status=False,
96            stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS,
97            stdin=None, args=()):
98        """
99        Run a command on this host.
100
101        @param command: the command line string
102        @param timeout: time limit in seconds before attempting to
103                kill the running process. The run() function
104                will take a few seconds longer than 'timeout'
105                to complete if it has to kill the process.
106        @param ignore_status: do not raise an exception, no matter
107                what the exit code of the command is.
108        @param stdout_tee: where to tee the stdout
109        @param stderr_tee: where to tee the stderr
110        @param stdin: stdin to pass (a string) to the executed command
111        @param args: sequence of strings to pass as arguments to command by
112                quoting them in " and escaping their contents if necessary
113
114        @return a utils.CmdResult object
115
116        @raises AutotestHostRunError: the exit code of the command execution
117                was not 0 and ignore_status was not enabled
118        """
119        raise NotImplementedError('Run not implemented!')
120
121    # TODO(pwang): Delete this once crbug.com/735653, crbug.com/734887 is fixed
122    # and ssh time is reasonable.
123    def run_very_slowly(self, *args, **kwargs):
124        return self.run(*args, **kwargs)
125
126
127    def run_output(self, command, *args, **dargs):
128        """Run and retrieve the value of stdout stripped of whitespace.
129
130        @param command: Command to execute.
131        @param *args: Extra arguments to run.
132        @param **dargs: Extra keyword arguments to run.
133
134        @return: String value of stdout.
135        """
136        return self.run_very_slowly(command, *args, **dargs).stdout.rstrip()
137
138
139    def reboot(self):
140        """Reboot the host.
141        """
142        raise NotImplementedError('Reboot not implemented!')
143
144
145    def suspend(self):
146        """Suspend the host.
147        """
148        raise NotImplementedError('Suspend not implemented!')
149
150
151    def sysrq_reboot(self):
152        """Execute host reboot via SysRq key.
153        """
154        raise NotImplementedError('Sysrq reboot not implemented!')
155
156
157    def reboot_setup(self, *args, **dargs):
158        """Prepare for reboot.
159
160        This doesn't appear to be implemented by any current hosts.
161
162        @param *args: Extra arguments to ?.
163        @param **dargs: Extra keyword arguments to ?.
164        """
165        pass
166
167
168    def reboot_followup(self, *args, **dargs):
169        """Post reboot work.
170
171        This doesn't appear to be implemented by any current hosts.
172
173        @param *args: Extra arguments to ?.
174        @param **dargs: Extra keyword arguments to ?.
175        """
176        pass
177
178
179    def get_file(self, source, dest, delete_dest=False):
180        """Retrieve a file from the host.
181
182        @param source: Remote file path (directory, file or list).
183        @param dest: Local file path (directory, file or list).
184        @param delete_dest: Delete files in remote path that are not in local
185            path.
186        """
187        raise NotImplementedError('Get file not implemented!')
188
189
190    def send_file(self, source, dest, delete_dest=False):
191        """Send a file to the host.
192
193        @param source: Local file path (directory, file or list).
194        @param dest: Remote file path (directory, file or list).
195        @param delete_dest: Delete files in remote path that are not in local
196            path.
197        """
198        raise NotImplementedError('Send file not implemented!')
199
200
201    def get_tmp_dir(self):
202        """Create a temporary directory on the host.
203        """
204        raise NotImplementedError('Get temp dir not implemented!')
205
206
207    def is_up(self):
208        """Confirm the host is online.
209        """
210        raise NotImplementedError('Is up not implemented!')
211
212
213    def is_shutting_down(self):
214        """ Indicates is a machine is currently shutting down. """
215        return False
216
217
218    def get_wait_up_processes(self):
219        """ Gets the list of local processes to wait for in wait_up. """
220        get_config = global_config.global_config.get_config_value
221        proc_list = get_config("HOSTS", "wait_up_processes",
222                               default="").strip()
223        processes = set(p.strip() for p in proc_list.split(","))
224        processes.discard("")
225        return processes
226
227
228    def get_boot_id(self, timeout=60):
229        """ Get a unique ID associated with the current boot.
230
231        Should return a string with the semantics such that two separate
232        calls to Host.get_boot_id() return the same string if the host did
233        not reboot between the two calls, and two different strings if it
234        has rebooted at least once between the two calls.
235
236        @param timeout The number of seconds to wait before timing out.
237
238        @return A string unique to this boot or None if not available."""
239        BOOT_ID_FILE = '/proc/sys/kernel/random/boot_id'
240        NO_ID_MSG = 'no boot_id available'
241        cmd = 'if [ -f %r ]; then cat %r; else echo %r; fi' % (
242                BOOT_ID_FILE, BOOT_ID_FILE, NO_ID_MSG)
243        boot_id = self.run_very_slowly(cmd, timeout=timeout).stdout.strip()
244        if boot_id == NO_ID_MSG:
245            return None
246        return boot_id
247
248
249    def wait_up(self, timeout=None):
250        """Wait for the host to come up.
251
252        @param timeout: Max seconds to wait.
253        """
254        raise NotImplementedError('Wait up not implemented!')
255
256
257    def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
258        """Wait for the host to go down.
259
260        @param timeout: Max seconds to wait before returning.
261        @param warning_timer: Seconds before warning host is not down.
262        @param old_boot_id: Result of self.get_boot_id() before shutdown.
263        """
264        raise NotImplementedError('Wait down not implemented!')
265
266
267    def _construct_host_metadata(self, type_str):
268        """Returns dict of metadata with type_str, hostname, time_recorded.
269
270        @param type_str: String representing _type field in es db.
271            For example: type_str='reboot_total'.
272        """
273        metadata = {
274            'hostname': self.hostname,
275            'time_recorded': time.time(),
276            '_type': type_str,
277        }
278        return metadata
279
280
281    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT,
282                         down_timeout=WAIT_DOWN_REBOOT_TIMEOUT,
283                         down_warning=WAIT_DOWN_REBOOT_WARNING,
284                         log_failure=True, old_boot_id=None, **dargs):
285        """Wait for the host to come back from a reboot.
286
287        This is a generic implementation based entirely on wait_up and
288        wait_down.
289
290        @param timeout: Max seconds to wait for reboot to start.
291        @param down_timeout: Max seconds to wait for host to go down.
292        @param down_warning: Seconds to wait before warning host hasn't gone
293            down.
294        @param log_failure: bool(Log when host does not go down.)
295        @param old_boot_id: Result of self.get_boot_id() before restart.
296        @param **dargs: Extra arguments to reboot_followup.
297
298        @raises AutoservRebootError if host does not come back up.
299        """
300        key_string = 'Reboot.%s' % dargs.get('board')
301
302        if not self.wait_down(timeout=down_timeout,
303                              warning_timer=down_warning,
304                              old_boot_id=old_boot_id):
305            if log_failure:
306                self.record("ABORT", None, "reboot.verify", "shut down failed")
307            raise error.AutoservShutdownError("Host did not shut down")
308        if self.wait_up(timeout):
309            self.record("GOOD", None, "reboot.verify")
310            self.reboot_followup(**dargs)
311        else:
312            self.record("ABORT", None, "reboot.verify",
313                        "Host did not return from reboot")
314            raise error.AutoservRebootError("Host did not return from reboot")
315
316
317    def verify(self):
318        """Check if host is in good state.
319        """
320        self.verify_hardware()
321        self.verify_connectivity()
322        self.verify_software()
323
324
325    def verify_hardware(self):
326        """Check host hardware.
327        """
328        pass
329
330
331    def verify_connectivity(self):
332        """Check host network connectivity.
333        """
334        pass
335
336
337    def verify_software(self):
338        """Check host software.
339        """
340        pass
341
342
343    def check_diskspace(self, path, gb):
344        """Raises an error if path does not have at least gb GB free.
345
346        @param path The path to check for free disk space.
347        @param gb A floating point number to compare with a granularity
348            of 1 MB.
349
350        1000 based SI units are used.
351
352        @raises AutoservDiskFullHostError if path has less than gb GB free.
353        """
354        one_mb = 10 ** 6  # Bytes (SI unit).
355        mb_per_gb = 1000.0
356        logging.info('Checking for >= %s GB of space under %s on machine %s',
357                     gb, path, self.hostname)
358        df = self.run_very_slowly('df -PB %d %s | tail -1'
359                                  % (one_mb, path)).stdout.split()
360        free_space_gb = int(df[3]) / mb_per_gb
361        if free_space_gb < gb:
362            raise error.AutoservDiskFullHostError(path, gb, free_space_gb)
363        else:
364            logging.info('Found %s GB >= %s GB of space under %s on machine %s',
365                free_space_gb, gb, path, self.hostname)
366
367
368    def check_inodes(self, path, min_kilo_inodes):
369        """Raises an error if a file system is short on i-nodes.
370
371        @param path The path to check for free i-nodes.
372        @param min_kilo_inodes Minimum number of i-nodes required,
373                               in units of 1000 i-nodes.
374
375        @raises AutoservNoFreeInodesError If the minimum required
376                                  i-node count isn't available.
377        """
378        min_inodes = 1000 * min_kilo_inodes
379        logging.info('Checking for >= %d i-nodes under %s '
380                     'on machine %s', min_inodes, path, self.hostname)
381        df = self.run_very_slowly('df -Pi %s | tail -1' % path).stdout.split()
382        free_inodes = int(df[3])
383        if free_inodes < min_inodes:
384            raise error.AutoservNoFreeInodesError(path, min_inodes,
385                                                  free_inodes)
386        else:
387            logging.info('Found %d >= %d i-nodes under %s on '
388                         'machine %s', free_inodes, min_inodes,
389                         path, self.hostname)
390
391
392    def erase_dir_contents(self, path, ignore_status=True, timeout=3600):
393        """Empty a given directory path contents.
394
395        @param path: Path to empty.
396        @param ignore_status: Ignore the exit status from run.
397        @param timeout: Max seconds to allow command to complete.
398        """
399        rm_cmd = 'find "%s" -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf'
400        self.run_very_slowly(rm_cmd % path,
401                             ignore_status=ignore_status,
402                             timeout=timeout)
403
404
405    def repair(self):
406        """Try and get the host to pass `self.verify()`."""
407        self.verify()
408
409
410    def disable_ipfilters(self):
411        """Allow all network packets in and out of the host."""
412        self.run_very_slowly('iptables-save > /tmp/iptable-rules')
413        self.run_very_slowly('iptables -P INPUT ACCEPT')
414        self.run_very_slowly('iptables -P FORWARD ACCEPT')
415        self.run_very_slowly('iptables -P OUTPUT ACCEPT')
416
417
418    def enable_ipfilters(self):
419        """Re-enable the IP filters disabled from disable_ipfilters()"""
420        if self.path_exists('/tmp/iptable-rules'):
421            self.run_very_slowly('iptables-restore < /tmp/iptable-rules')
422
423
424    def cleanup(self):
425        """Restore host to clean state.
426        """
427        pass
428
429
430    def machine_install(self):
431        """Install on the host.
432        """
433        raise NotImplementedError('Machine install not implemented!')
434
435
436    def install(self, installableObject):
437        """Call install on a thing.
438
439        @param installableObject: Thing with install method that will accept our
440            self.
441        """
442        installableObject.install(self)
443
444
445    def get_autodir(self):
446        raise NotImplementedError('Get autodir not implemented!')
447
448
449    def set_autodir(self):
450        raise NotImplementedError('Set autodir not implemented!')
451
452
453    def start_loggers(self):
454        """ Called to start continuous host logging. """
455        pass
456
457
458    def stop_loggers(self):
459        """ Called to stop continuous host logging. """
460        pass
461
462
463    # some extra methods simplify the retrieval of information about the
464    # Host machine, with generic implementations based on run(). subclasses
465    # should feel free to override these if they can provide better
466    # implementations for their specific Host types
467
468    def get_num_cpu(self):
469        """ Get the number of CPUs in the host according to /proc/cpuinfo. """
470        proc_cpuinfo = self.run_very_slowly(
471            'cat /proc/cpuinfo',
472            stdout_tee=open(os.devnull, 'w')).stdout
473        cpus = 0
474        for line in proc_cpuinfo.splitlines():
475            if line.startswith('processor'):
476                cpus += 1
477        return cpus
478
479
480    def get_arch(self):
481        """ Get the hardware architecture of the remote machine. """
482        cmd_uname = path_utils.must_be_installed('/bin/uname', host=self)
483        arch = self.run_very_slowly('%s -m' % cmd_uname).stdout.rstrip()
484        if re.match(r'i\d86$', arch):
485            arch = 'i386'
486        return arch
487
488
489    def get_kernel_ver(self):
490        """ Get the kernel version of the remote machine. """
491        cmd_uname = path_utils.must_be_installed('/bin/uname', host=self)
492        return self.run_very_slowly('%s -r' % cmd_uname).stdout.rstrip()
493
494
495    def get_cmdline(self):
496        """ Get the kernel command line of the remote machine. """
497        return self.run_very_slowly('cat /proc/cmdline').stdout.rstrip()
498
499
500    def get_meminfo(self):
501        """ Get the kernel memory info (/proc/meminfo) of the remote machine
502        and return a dictionary mapping the various statistics. """
503        meminfo_dict = {}
504        meminfo = self.run_very_slowly('cat /proc/meminfo').stdout.splitlines()
505        for key, val in (line.split(':', 1) for line in meminfo):
506            meminfo_dict[key.strip()] = val.strip()
507        return meminfo_dict
508
509
510    def path_exists(self, path):
511        """Determine if path exists on the remote machine.
512
513        @param path: path to check
514
515        @return: bool(path exists)"""
516        result = self.run_very_slowly('test -e "%s"' % utils.sh_escape(path),
517                          ignore_status=True)
518        return result.exit_status == 0
519
520
521    # some extra helpers for doing job-related operations
522
523    def record(self, *args, **dargs):
524        """ Helper method for recording status logs against Host.job that
525        silently becomes a NOP if Host.job is not available. The args and
526        dargs are passed on to Host.job.record unchanged. """
527        if self.job:
528            self.job.record(*args, **dargs)
529
530
531    def log_kernel(self):
532        """ Helper method for logging kernel information into the status logs.
533        Intended for cases where the "current" kernel is not really defined
534        and we want to explicitly log it. Does nothing if this host isn't
535        actually associated with a job. """
536        if self.job:
537            kernel = self.get_kernel_ver()
538            self.job.record("INFO", None, None,
539                            optional_fields={"kernel": kernel})
540
541
542    def log_op(self, op, op_func):
543        """ Decorator for wrapping a management operaiton in a group for status
544        logging purposes.
545
546        @param op: name of the operation.
547        @param op_func: a function that carries out the operation
548                        (reboot, suspend)
549        """
550        if self.job and not hasattr(self, "RUNNING_LOG_OP"):
551            self.RUNNING_LOG_OP = True
552            try:
553                self.job.run_op(op, op_func, self.get_kernel_ver)
554            finally:
555                del self.RUNNING_LOG_OP
556        else:
557            op_func()
558
559
560    def list_files_glob(self, glob):
561        """Get a list of files on a remote host given a glob pattern path.
562
563        @param glob: pattern
564
565        @return: list of files
566        """
567        SCRIPT = ("python -c 'import cPickle, glob, sys;"
568                  "cPickle.dump(glob.glob(sys.argv[1]), sys.stdout, 0)'")
569        output = self.run_very_slowly(SCRIPT, args=(glob,), stdout_tee=None,
570                                      timeout=60).stdout
571        return cPickle.loads(output)
572
573
574    def symlink_closure(self, paths):
575        """
576        Given a sequence of path strings, return the set of all paths that
577        can be reached from the initial set by following symlinks.
578
579        @param paths: sequence of path strings.
580        @return: a sequence of path strings that are all the unique paths that
581                can be reached from the given ones after following symlinks.
582        """
583        SCRIPT = ("python -c 'import cPickle, os, sys\n"
584                  "paths = cPickle.load(sys.stdin)\n"
585                  "closure = {}\n"
586                  "while paths:\n"
587                  "    path = paths.keys()[0]\n"
588                  "    del paths[path]\n"
589                  "    if not os.path.exists(path):\n"
590                  "        continue\n"
591                  "    closure[path] = None\n"
592                  "    if os.path.islink(path):\n"
593                  "        link_to = os.path.join(os.path.dirname(path),\n"
594                  "                               os.readlink(path))\n"
595                  "        if link_to not in closure.keys():\n"
596                  "            paths[link_to] = None\n"
597                  "cPickle.dump(closure.keys(), sys.stdout, 0)'")
598        input_data = cPickle.dumps(dict((path, None) for path in paths), 0)
599        output = self.run_very_slowly(SCRIPT, stdout_tee=None, stdin=input_data,
600                                      timeout=60).stdout
601        return cPickle.loads(output)
602
603
604    def cleanup_kernels(self, boot_dir='/boot'):
605        """
606        Remove any kernel image and associated files (vmlinux, system.map,
607        modules) for any image found in the boot directory that is not
608        referenced by entries in the bootloader configuration.
609
610        @param boot_dir: boot directory path string, default '/boot'
611        """
612        # find all the vmlinuz images referenced by the bootloader
613        vmlinuz_prefix = os.path.join(boot_dir, 'vmlinuz-')
614        boot_info = self.bootloader.get_entries()
615        used_kernver = [boot['kernel'][len(vmlinuz_prefix):]
616                        for boot in boot_info.itervalues()]
617
618        # find all the unused vmlinuz images in /boot
619        all_vmlinuz = self.list_files_glob(vmlinuz_prefix + '*')
620        used_vmlinuz = self.symlink_closure(vmlinuz_prefix + kernver
621                                            for kernver in used_kernver)
622        unused_vmlinuz = set(all_vmlinuz) - set(used_vmlinuz)
623
624        # find all the unused vmlinux images in /boot
625        vmlinux_prefix = os.path.join(boot_dir, 'vmlinux-')
626        all_vmlinux = self.list_files_glob(vmlinux_prefix + '*')
627        used_vmlinux = self.symlink_closure(vmlinux_prefix + kernver
628                                            for kernver in used_kernver)
629        unused_vmlinux = set(all_vmlinux) - set(used_vmlinux)
630
631        # find all the unused System.map files in /boot
632        systemmap_prefix = os.path.join(boot_dir, 'System.map-')
633        all_system_map = self.list_files_glob(systemmap_prefix + '*')
634        used_system_map = self.symlink_closure(
635            systemmap_prefix + kernver for kernver in used_kernver)
636        unused_system_map = set(all_system_map) - set(used_system_map)
637
638        # find all the module directories associated with unused kernels
639        modules_prefix = '/lib/modules/'
640        all_moddirs = [dir for dir in self.list_files_glob(modules_prefix + '*')
641                       if re.match(modules_prefix + r'\d+\.\d+\.\d+.*', dir)]
642        used_moddirs = self.symlink_closure(modules_prefix + kernver
643                                            for kernver in used_kernver)
644        unused_moddirs = set(all_moddirs) - set(used_moddirs)
645
646        # remove all the vmlinuz files we don't use
647        # TODO: if needed this should become package manager agnostic
648        for vmlinuz in unused_vmlinuz:
649            # try and get an rpm package name
650            rpm = self.run_very_slowly('rpm -qf', args=(vmlinuz,),
651                                       ignore_status=True, timeout=120)
652            if rpm.exit_status == 0:
653                packages = set(line.strip() for line in
654                               rpm.stdout.splitlines())
655                # if we found some package names, try to remove them
656                for package in packages:
657                    self.run_very_slowly('rpm -e', args=(package,),
658                                         ignore_status=True, timeout=120)
659            # remove the image files anyway, even if rpm didn't
660            self.run_very_slowly('rm -f', args=(vmlinuz,),
661                                 ignore_status=True, timeout=120)
662
663        # remove all the vmlinux and System.map files left over
664        for f in (unused_vmlinux | unused_system_map):
665            self.run_very_slowly('rm -f', args=(f,),
666                                 ignore_status=True, timeout=120)
667
668        # remove all unused module directories
669        # the regex match should keep us safe from removing the wrong files
670        for moddir in unused_moddirs:
671            self.run_very_slowly('rm -fr', args=(moddir,), ignore_status=True)
672
673
674    def get_attributes_to_clear_before_provision(self):
675        """Get a list of attributes to be cleared before machine_install starts.
676
677        If provision runs in a lab environment, it is necessary to clear certain
678        host attributes for the host in afe_host_attributes table. For example,
679        `job_repo_url` is a devserver url pointed to autotest packages for
680        CrosHost, it needs to be removed before provision starts for tests to
681        run reliably.
682        For ADBHost, the job repo url has a different format, i.e., appended by
683        adb_serial, so this method should be overriden in ADBHost.
684        """
685        return ['job_repo_url']
686
687
688    def get_platform(self):
689        """Determine the correct platform label for this host.
690
691        @return: A string representing this host's platform.
692        """
693        raise NotImplementedError("Get platform not implemented!")
694
695
696    def get_labels(self):
697        """Return a list of the labels gathered from the devices connected.
698
699        @return: A list of strings that denote the labels from all the devices
700        connected.
701        """
702        raise NotImplementedError("Get labels not implemented!")
703
704