base_classes.py revision 672fb5f8806694d9476f016c0f1094da29120f31
1# Copyright 2009 Google Inc. Released under the GPL v2 2 3""" 4This module defines the base classes for the Host hierarchy. 5 6Implementation details: 7You should import the "hosts" package instead of importing each type of host. 8 9 Host: a machine on which you can run programs 10""" 11 12__author__ = """ 13mbligh@google.com (Martin J. Bligh), 14poirier@google.com (Benjamin Poirier), 15stutsman@google.com (Ryan Stutsman) 16""" 17 18import cPickle, logging, os, re, time 19 20from autotest_lib.client.common_lib import global_config, error, utils 21from autotest_lib.client.common_lib.cros import path_utils 22 23 24class Host(object): 25 """ 26 This class represents a machine on which you can run programs. 27 28 It may be a local machine, the one autoserv is running on, a remote 29 machine or a virtual machine. 30 31 Implementation details: 32 This is an abstract class, leaf subclasses must implement the methods 33 listed here. You must not instantiate this class but should 34 instantiate one of those leaf subclasses. 35 36 When overriding methods that raise NotImplementedError, the leaf class 37 is fully responsible for the implementation and should not chain calls 38 to super. When overriding methods that are a NOP in Host, the subclass 39 should chain calls to super(). The criteria for fitting a new method into 40 one category or the other should be: 41 1. If two separate generic implementations could reasonably be 42 concatenated, then the abstract implementation should pass and 43 subclasses should chain calls to super. 44 2. If only one class could reasonably perform the stated function 45 (e.g. two separate run() implementations cannot both be executed) 46 then the method should raise NotImplementedError in Host, and 47 the implementor should NOT chain calls to super, to ensure that 48 only one implementation ever gets executed. 49 """ 50 51 job = None 52 DEFAULT_REBOOT_TIMEOUT = global_config.global_config.get_config_value( 53 "HOSTS", "default_reboot_timeout", type=int, default=1800) 54 WAIT_DOWN_REBOOT_TIMEOUT = global_config.global_config.get_config_value( 55 "HOSTS", "wait_down_reboot_timeout", type=int, default=840) 56 WAIT_DOWN_REBOOT_WARNING = global_config.global_config.get_config_value( 57 "HOSTS", "wait_down_reboot_warning", type=int, default=540) 58 HOURS_TO_WAIT_FOR_RECOVERY = global_config.global_config.get_config_value( 59 "HOSTS", "hours_to_wait_for_recovery", type=float, default=2.5) 60 # the number of hardware repair requests that need to happen before we 61 # actually send machines to hardware repair 62 HARDWARE_REPAIR_REQUEST_THRESHOLD = 4 63 OP_REBOOT = 'reboot' 64 OP_SUSPEND = 'suspend' 65 PWR_OPERATION = [OP_REBOOT, OP_SUSPEND] 66 67 68 def __init__(self, *args, **dargs): 69 self._initialize(*args, **dargs) 70 71 72 def _initialize(self, *args, **dargs): 73 pass 74 75 76 @property 77 def job_repo_url_attribute(self): 78 """Get the host attribute name for job_repo_url. 79 """ 80 return 'job_repo_url' 81 82 83 def close(self): 84 """Close the connection to the host. 85 """ 86 pass 87 88 89 def setup(self): 90 """Setup the host object. 91 """ 92 pass 93 94 95 def run(self, command, timeout=3600, ignore_status=False, 96 stdout_tee=utils.TEE_TO_LOGS, stderr_tee=utils.TEE_TO_LOGS, 97 stdin=None, args=()): 98 """ 99 Run a command on this host. 100 101 @param command: the command line string 102 @param timeout: time limit in seconds before attempting to 103 kill the running process. The run() function 104 will take a few seconds longer than 'timeout' 105 to complete if it has to kill the process. 106 @param ignore_status: do not raise an exception, no matter 107 what the exit code of the command is. 108 @param stdout_tee: where to tee the stdout 109 @param stderr_tee: where to tee the stderr 110 @param stdin: stdin to pass (a string) to the executed command 111 @param args: sequence of strings to pass as arguments to command by 112 quoting them in " and escaping their contents if necessary 113 114 @return a utils.CmdResult object 115 116 @raises AutotestHostRunError: the exit code of the command execution 117 was not 0 and ignore_status was not enabled 118 """ 119 raise NotImplementedError('Run not implemented!') 120 121 # TODO(pwang): Delete this once crbug.com/735653, crbug.com/734887 is fixed 122 # and ssh time is reasonable. 123 def run_very_slowly(self, *args, **kwargs): 124 return self.run(*args, **kwargs) 125 126 127 def run_output(self, command, *args, **dargs): 128 """Run and retrieve the value of stdout stripped of whitespace. 129 130 @param command: Command to execute. 131 @param *args: Extra arguments to run. 132 @param **dargs: Extra keyword arguments to run. 133 134 @return: String value of stdout. 135 """ 136 return self.run_very_slowly(command, *args, **dargs).stdout.rstrip() 137 138 139 def reboot(self): 140 """Reboot the host. 141 """ 142 raise NotImplementedError('Reboot not implemented!') 143 144 145 def suspend(self): 146 """Suspend the host. 147 """ 148 raise NotImplementedError('Suspend not implemented!') 149 150 151 def sysrq_reboot(self): 152 """Execute host reboot via SysRq key. 153 """ 154 raise NotImplementedError('Sysrq reboot not implemented!') 155 156 157 def reboot_setup(self, *args, **dargs): 158 """Prepare for reboot. 159 160 This doesn't appear to be implemented by any current hosts. 161 162 @param *args: Extra arguments to ?. 163 @param **dargs: Extra keyword arguments to ?. 164 """ 165 pass 166 167 168 def reboot_followup(self, *args, **dargs): 169 """Post reboot work. 170 171 This doesn't appear to be implemented by any current hosts. 172 173 @param *args: Extra arguments to ?. 174 @param **dargs: Extra keyword arguments to ?. 175 """ 176 pass 177 178 179 def get_file(self, source, dest, delete_dest=False): 180 """Retrieve a file from the host. 181 182 @param source: Remote file path (directory, file or list). 183 @param dest: Local file path (directory, file or list). 184 @param delete_dest: Delete files in remote path that are not in local 185 path. 186 """ 187 raise NotImplementedError('Get file not implemented!') 188 189 190 def send_file(self, source, dest, delete_dest=False): 191 """Send a file to the host. 192 193 @param source: Local file path (directory, file or list). 194 @param dest: Remote file path (directory, file or list). 195 @param delete_dest: Delete files in remote path that are not in local 196 path. 197 """ 198 raise NotImplementedError('Send file not implemented!') 199 200 201 def get_tmp_dir(self): 202 """Create a temporary directory on the host. 203 """ 204 raise NotImplementedError('Get temp dir not implemented!') 205 206 207 def is_up(self): 208 """Confirm the host is online. 209 """ 210 raise NotImplementedError('Is up not implemented!') 211 212 213 def is_shutting_down(self): 214 """ Indicates is a machine is currently shutting down. """ 215 return False 216 217 218 def get_wait_up_processes(self): 219 """ Gets the list of local processes to wait for in wait_up. """ 220 get_config = global_config.global_config.get_config_value 221 proc_list = get_config("HOSTS", "wait_up_processes", 222 default="").strip() 223 processes = set(p.strip() for p in proc_list.split(",")) 224 processes.discard("") 225 return processes 226 227 228 def get_boot_id(self, timeout=60): 229 """ Get a unique ID associated with the current boot. 230 231 Should return a string with the semantics such that two separate 232 calls to Host.get_boot_id() return the same string if the host did 233 not reboot between the two calls, and two different strings if it 234 has rebooted at least once between the two calls. 235 236 @param timeout The number of seconds to wait before timing out. 237 238 @return A string unique to this boot or None if not available.""" 239 BOOT_ID_FILE = '/proc/sys/kernel/random/boot_id' 240 NO_ID_MSG = 'no boot_id available' 241 cmd = 'if [ -f %r ]; then cat %r; else echo %r; fi' % ( 242 BOOT_ID_FILE, BOOT_ID_FILE, NO_ID_MSG) 243 boot_id = self.run_very_slowly(cmd, timeout=timeout).stdout.strip() 244 if boot_id == NO_ID_MSG: 245 return None 246 return boot_id 247 248 249 def wait_up(self, timeout=None): 250 """Wait for the host to come up. 251 252 @param timeout: Max seconds to wait. 253 """ 254 raise NotImplementedError('Wait up not implemented!') 255 256 257 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None): 258 """Wait for the host to go down. 259 260 @param timeout: Max seconds to wait before returning. 261 @param warning_timer: Seconds before warning host is not down. 262 @param old_boot_id: Result of self.get_boot_id() before shutdown. 263 """ 264 raise NotImplementedError('Wait down not implemented!') 265 266 267 def _construct_host_metadata(self, type_str): 268 """Returns dict of metadata with type_str, hostname, time_recorded. 269 270 @param type_str: String representing _type field in es db. 271 For example: type_str='reboot_total'. 272 """ 273 metadata = { 274 'hostname': self.hostname, 275 'time_recorded': time.time(), 276 '_type': type_str, 277 } 278 return metadata 279 280 281 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, 282 down_timeout=WAIT_DOWN_REBOOT_TIMEOUT, 283 down_warning=WAIT_DOWN_REBOOT_WARNING, 284 log_failure=True, old_boot_id=None, **dargs): 285 """Wait for the host to come back from a reboot. 286 287 This is a generic implementation based entirely on wait_up and 288 wait_down. 289 290 @param timeout: Max seconds to wait for reboot to start. 291 @param down_timeout: Max seconds to wait for host to go down. 292 @param down_warning: Seconds to wait before warning host hasn't gone 293 down. 294 @param log_failure: bool(Log when host does not go down.) 295 @param old_boot_id: Result of self.get_boot_id() before restart. 296 @param **dargs: Extra arguments to reboot_followup. 297 298 @raises AutoservRebootError if host does not come back up. 299 """ 300 key_string = 'Reboot.%s' % dargs.get('board') 301 302 if not self.wait_down(timeout=down_timeout, 303 warning_timer=down_warning, 304 old_boot_id=old_boot_id): 305 if log_failure: 306 self.record("ABORT", None, "reboot.verify", "shut down failed") 307 raise error.AutoservShutdownError("Host did not shut down") 308 if self.wait_up(timeout): 309 self.record("GOOD", None, "reboot.verify") 310 self.reboot_followup(**dargs) 311 else: 312 self.record("ABORT", None, "reboot.verify", 313 "Host did not return from reboot") 314 raise error.AutoservRebootError("Host did not return from reboot") 315 316 317 def verify(self): 318 """Check if host is in good state. 319 """ 320 self.verify_hardware() 321 self.verify_connectivity() 322 self.verify_software() 323 324 325 def verify_hardware(self): 326 """Check host hardware. 327 """ 328 pass 329 330 331 def verify_connectivity(self): 332 """Check host network connectivity. 333 """ 334 pass 335 336 337 def verify_software(self): 338 """Check host software. 339 """ 340 pass 341 342 343 def check_diskspace(self, path, gb): 344 """Raises an error if path does not have at least gb GB free. 345 346 @param path The path to check for free disk space. 347 @param gb A floating point number to compare with a granularity 348 of 1 MB. 349 350 1000 based SI units are used. 351 352 @raises AutoservDiskFullHostError if path has less than gb GB free. 353 """ 354 one_mb = 10 ** 6 # Bytes (SI unit). 355 mb_per_gb = 1000.0 356 logging.info('Checking for >= %s GB of space under %s on machine %s', 357 gb, path, self.hostname) 358 df = self.run_very_slowly('df -PB %d %s | tail -1' 359 % (one_mb, path)).stdout.split() 360 free_space_gb = int(df[3]) / mb_per_gb 361 if free_space_gb < gb: 362 raise error.AutoservDiskFullHostError(path, gb, free_space_gb) 363 else: 364 logging.info('Found %s GB >= %s GB of space under %s on machine %s', 365 free_space_gb, gb, path, self.hostname) 366 367 368 def check_inodes(self, path, min_kilo_inodes): 369 """Raises an error if a file system is short on i-nodes. 370 371 @param path The path to check for free i-nodes. 372 @param min_kilo_inodes Minimum number of i-nodes required, 373 in units of 1000 i-nodes. 374 375 @raises AutoservNoFreeInodesError If the minimum required 376 i-node count isn't available. 377 """ 378 min_inodes = 1000 * min_kilo_inodes 379 logging.info('Checking for >= %d i-nodes under %s ' 380 'on machine %s', min_inodes, path, self.hostname) 381 df = self.run_very_slowly('df -Pi %s | tail -1' % path).stdout.split() 382 free_inodes = int(df[3]) 383 if free_inodes < min_inodes: 384 raise error.AutoservNoFreeInodesError(path, min_inodes, 385 free_inodes) 386 else: 387 logging.info('Found %d >= %d i-nodes under %s on ' 388 'machine %s', free_inodes, min_inodes, 389 path, self.hostname) 390 391 392 def erase_dir_contents(self, path, ignore_status=True, timeout=3600): 393 """Empty a given directory path contents. 394 395 @param path: Path to empty. 396 @param ignore_status: Ignore the exit status from run. 397 @param timeout: Max seconds to allow command to complete. 398 """ 399 rm_cmd = 'find "%s" -mindepth 1 -maxdepth 1 -print0 | xargs -0 rm -rf' 400 self.run_very_slowly(rm_cmd % path, 401 ignore_status=ignore_status, 402 timeout=timeout) 403 404 405 def repair(self): 406 """Try and get the host to pass `self.verify()`.""" 407 self.verify() 408 409 410 def disable_ipfilters(self): 411 """Allow all network packets in and out of the host.""" 412 self.run_very_slowly('iptables-save > /tmp/iptable-rules') 413 self.run_very_slowly('iptables -P INPUT ACCEPT') 414 self.run_very_slowly('iptables -P FORWARD ACCEPT') 415 self.run_very_slowly('iptables -P OUTPUT ACCEPT') 416 417 418 def enable_ipfilters(self): 419 """Re-enable the IP filters disabled from disable_ipfilters()""" 420 if self.path_exists('/tmp/iptable-rules'): 421 self.run_very_slowly('iptables-restore < /tmp/iptable-rules') 422 423 424 def cleanup(self): 425 """Restore host to clean state. 426 """ 427 pass 428 429 430 def machine_install(self): 431 """Install on the host. 432 """ 433 raise NotImplementedError('Machine install not implemented!') 434 435 436 def install(self, installableObject): 437 """Call install on a thing. 438 439 @param installableObject: Thing with install method that will accept our 440 self. 441 """ 442 installableObject.install(self) 443 444 445 def get_autodir(self): 446 raise NotImplementedError('Get autodir not implemented!') 447 448 449 def set_autodir(self): 450 raise NotImplementedError('Set autodir not implemented!') 451 452 453 def start_loggers(self): 454 """ Called to start continuous host logging. """ 455 pass 456 457 458 def stop_loggers(self): 459 """ Called to stop continuous host logging. """ 460 pass 461 462 463 # some extra methods simplify the retrieval of information about the 464 # Host machine, with generic implementations based on run(). subclasses 465 # should feel free to override these if they can provide better 466 # implementations for their specific Host types 467 468 def get_num_cpu(self): 469 """ Get the number of CPUs in the host according to /proc/cpuinfo. """ 470 proc_cpuinfo = self.run_very_slowly( 471 'cat /proc/cpuinfo', 472 stdout_tee=open(os.devnull, 'w')).stdout 473 cpus = 0 474 for line in proc_cpuinfo.splitlines(): 475 if line.startswith('processor'): 476 cpus += 1 477 return cpus 478 479 480 def get_arch(self): 481 """ Get the hardware architecture of the remote machine. """ 482 cmd_uname = path_utils.must_be_installed('/bin/uname', host=self) 483 arch = self.run_very_slowly('%s -m' % cmd_uname).stdout.rstrip() 484 if re.match(r'i\d86$', arch): 485 arch = 'i386' 486 return arch 487 488 489 def get_kernel_ver(self): 490 """ Get the kernel version of the remote machine. """ 491 cmd_uname = path_utils.must_be_installed('/bin/uname', host=self) 492 return self.run_very_slowly('%s -r' % cmd_uname).stdout.rstrip() 493 494 495 def get_cmdline(self): 496 """ Get the kernel command line of the remote machine. """ 497 return self.run_very_slowly('cat /proc/cmdline').stdout.rstrip() 498 499 500 def get_meminfo(self): 501 """ Get the kernel memory info (/proc/meminfo) of the remote machine 502 and return a dictionary mapping the various statistics. """ 503 meminfo_dict = {} 504 meminfo = self.run_very_slowly('cat /proc/meminfo').stdout.splitlines() 505 for key, val in (line.split(':', 1) for line in meminfo): 506 meminfo_dict[key.strip()] = val.strip() 507 return meminfo_dict 508 509 510 def path_exists(self, path): 511 """Determine if path exists on the remote machine. 512 513 @param path: path to check 514 515 @return: bool(path exists)""" 516 result = self.run_very_slowly('test -e "%s"' % utils.sh_escape(path), 517 ignore_status=True) 518 return result.exit_status == 0 519 520 521 # some extra helpers for doing job-related operations 522 523 def record(self, *args, **dargs): 524 """ Helper method for recording status logs against Host.job that 525 silently becomes a NOP if Host.job is not available. The args and 526 dargs are passed on to Host.job.record unchanged. """ 527 if self.job: 528 self.job.record(*args, **dargs) 529 530 531 def log_kernel(self): 532 """ Helper method for logging kernel information into the status logs. 533 Intended for cases where the "current" kernel is not really defined 534 and we want to explicitly log it. Does nothing if this host isn't 535 actually associated with a job. """ 536 if self.job: 537 kernel = self.get_kernel_ver() 538 self.job.record("INFO", None, None, 539 optional_fields={"kernel": kernel}) 540 541 542 def log_op(self, op, op_func): 543 """ Decorator for wrapping a management operaiton in a group for status 544 logging purposes. 545 546 @param op: name of the operation. 547 @param op_func: a function that carries out the operation 548 (reboot, suspend) 549 """ 550 if self.job and not hasattr(self, "RUNNING_LOG_OP"): 551 self.RUNNING_LOG_OP = True 552 try: 553 self.job.run_op(op, op_func, self.get_kernel_ver) 554 finally: 555 del self.RUNNING_LOG_OP 556 else: 557 op_func() 558 559 560 def list_files_glob(self, glob): 561 """Get a list of files on a remote host given a glob pattern path. 562 563 @param glob: pattern 564 565 @return: list of files 566 """ 567 SCRIPT = ("python -c 'import cPickle, glob, sys;" 568 "cPickle.dump(glob.glob(sys.argv[1]), sys.stdout, 0)'") 569 output = self.run_very_slowly(SCRIPT, args=(glob,), stdout_tee=None, 570 timeout=60).stdout 571 return cPickle.loads(output) 572 573 574 def symlink_closure(self, paths): 575 """ 576 Given a sequence of path strings, return the set of all paths that 577 can be reached from the initial set by following symlinks. 578 579 @param paths: sequence of path strings. 580 @return: a sequence of path strings that are all the unique paths that 581 can be reached from the given ones after following symlinks. 582 """ 583 SCRIPT = ("python -c 'import cPickle, os, sys\n" 584 "paths = cPickle.load(sys.stdin)\n" 585 "closure = {}\n" 586 "while paths:\n" 587 " path = paths.keys()[0]\n" 588 " del paths[path]\n" 589 " if not os.path.exists(path):\n" 590 " continue\n" 591 " closure[path] = None\n" 592 " if os.path.islink(path):\n" 593 " link_to = os.path.join(os.path.dirname(path),\n" 594 " os.readlink(path))\n" 595 " if link_to not in closure.keys():\n" 596 " paths[link_to] = None\n" 597 "cPickle.dump(closure.keys(), sys.stdout, 0)'") 598 input_data = cPickle.dumps(dict((path, None) for path in paths), 0) 599 output = self.run_very_slowly(SCRIPT, stdout_tee=None, stdin=input_data, 600 timeout=60).stdout 601 return cPickle.loads(output) 602 603 604 def cleanup_kernels(self, boot_dir='/boot'): 605 """ 606 Remove any kernel image and associated files (vmlinux, system.map, 607 modules) for any image found in the boot directory that is not 608 referenced by entries in the bootloader configuration. 609 610 @param boot_dir: boot directory path string, default '/boot' 611 """ 612 # find all the vmlinuz images referenced by the bootloader 613 vmlinuz_prefix = os.path.join(boot_dir, 'vmlinuz-') 614 boot_info = self.bootloader.get_entries() 615 used_kernver = [boot['kernel'][len(vmlinuz_prefix):] 616 for boot in boot_info.itervalues()] 617 618 # find all the unused vmlinuz images in /boot 619 all_vmlinuz = self.list_files_glob(vmlinuz_prefix + '*') 620 used_vmlinuz = self.symlink_closure(vmlinuz_prefix + kernver 621 for kernver in used_kernver) 622 unused_vmlinuz = set(all_vmlinuz) - set(used_vmlinuz) 623 624 # find all the unused vmlinux images in /boot 625 vmlinux_prefix = os.path.join(boot_dir, 'vmlinux-') 626 all_vmlinux = self.list_files_glob(vmlinux_prefix + '*') 627 used_vmlinux = self.symlink_closure(vmlinux_prefix + kernver 628 for kernver in used_kernver) 629 unused_vmlinux = set(all_vmlinux) - set(used_vmlinux) 630 631 # find all the unused System.map files in /boot 632 systemmap_prefix = os.path.join(boot_dir, 'System.map-') 633 all_system_map = self.list_files_glob(systemmap_prefix + '*') 634 used_system_map = self.symlink_closure( 635 systemmap_prefix + kernver for kernver in used_kernver) 636 unused_system_map = set(all_system_map) - set(used_system_map) 637 638 # find all the module directories associated with unused kernels 639 modules_prefix = '/lib/modules/' 640 all_moddirs = [dir for dir in self.list_files_glob(modules_prefix + '*') 641 if re.match(modules_prefix + r'\d+\.\d+\.\d+.*', dir)] 642 used_moddirs = self.symlink_closure(modules_prefix + kernver 643 for kernver in used_kernver) 644 unused_moddirs = set(all_moddirs) - set(used_moddirs) 645 646 # remove all the vmlinuz files we don't use 647 # TODO: if needed this should become package manager agnostic 648 for vmlinuz in unused_vmlinuz: 649 # try and get an rpm package name 650 rpm = self.run_very_slowly('rpm -qf', args=(vmlinuz,), 651 ignore_status=True, timeout=120) 652 if rpm.exit_status == 0: 653 packages = set(line.strip() for line in 654 rpm.stdout.splitlines()) 655 # if we found some package names, try to remove them 656 for package in packages: 657 self.run_very_slowly('rpm -e', args=(package,), 658 ignore_status=True, timeout=120) 659 # remove the image files anyway, even if rpm didn't 660 self.run_very_slowly('rm -f', args=(vmlinuz,), 661 ignore_status=True, timeout=120) 662 663 # remove all the vmlinux and System.map files left over 664 for f in (unused_vmlinux | unused_system_map): 665 self.run_very_slowly('rm -f', args=(f,), 666 ignore_status=True, timeout=120) 667 668 # remove all unused module directories 669 # the regex match should keep us safe from removing the wrong files 670 for moddir in unused_moddirs: 671 self.run_very_slowly('rm -fr', args=(moddir,), ignore_status=True) 672 673 674 def get_attributes_to_clear_before_provision(self): 675 """Get a list of attributes to be cleared before machine_install starts. 676 677 If provision runs in a lab environment, it is necessary to clear certain 678 host attributes for the host in afe_host_attributes table. For example, 679 `job_repo_url` is a devserver url pointed to autotest packages for 680 CrosHost, it needs to be removed before provision starts for tests to 681 run reliably. 682 For ADBHost, the job repo url has a different format, i.e., appended by 683 adb_serial, so this method should be overriden in ADBHost. 684 """ 685 return ['job_repo_url'] 686 687 688 def get_platform(self): 689 """Determine the correct platform label for this host. 690 691 @return: A string representing this host's platform. 692 """ 693 raise NotImplementedError("Get platform not implemented!") 694 695 696 def get_labels(self): 697 """Return a list of the labels gathered from the devices connected. 698 699 @return: A list of strings that denote the labels from all the devices 700 connected. 701 """ 702 raise NotImplementedError("Get labels not implemented!") 703 704