abstract_ssh.py revision 53a216a5ae9cf732330846e652dff8d0ad29bd2a
1import os, time, socket, shutil, glob, logging, traceback, tempfile 2from autotest_lib.client.common_lib import autotemp, error 3from autotest_lib.server import utils, autotest 4from autotest_lib.server.hosts import remote 5from autotest_lib.client.common_lib.global_config import global_config 6 7# pylint: disable-msg=C0111 8 9get_value = global_config.get_config_value 10enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool, 11 default=False) 12 13 14class AbstractSSHHost(remote.RemoteHost): 15 """ 16 This class represents a generic implementation of most of the 17 framework necessary for controlling a host via ssh. It implements 18 almost all of the abstract Host methods, except for the core 19 Host.run method. 20 """ 21 22 def _initialize(self, hostname, user="root", port=22, password="", 23 *args, **dargs): 24 super(AbstractSSHHost, self)._initialize(hostname=hostname, 25 *args, **dargs) 26 self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0] 27 self.user = user 28 self.port = port 29 self.password = password 30 self._use_rsync = None 31 self.known_hosts_file = tempfile.mkstemp()[1] 32 33 """ 34 Master SSH connection background job, socket temp directory and socket 35 control path option. If master-SSH is enabled, these fields will be 36 initialized by start_master_ssh when a new SSH connection is initiated. 37 """ 38 self.master_ssh_job = None 39 self.master_ssh_tempdir = None 40 self.master_ssh_option = '' 41 42 43 def make_ssh_command(self, user="root", port=22, opts='', 44 hosts_file='/dev/null', 45 connect_timeout=30, alive_interval=300): 46 base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no " 47 "-o UserKnownHostsFile=%s -o BatchMode=yes " 48 "-o ConnectTimeout=%d -o ServerAliveInterval=%d " 49 "-l %s -p %d") 50 assert isinstance(connect_timeout, (int, long)) 51 assert connect_timeout > 0 # can't disable the timeout 52 return base_command % (opts, hosts_file, connect_timeout, 53 alive_interval, user, port) 54 55 56 def use_rsync(self): 57 if self._use_rsync is not None: 58 return self._use_rsync 59 60 # Check if rsync is available on the remote host. If it's not, 61 # don't try to use it for any future file transfers. 62 self._use_rsync = self._check_rsync() 63 if not self._use_rsync: 64 logging.warn("rsync not available on remote host %s -- disabled", 65 self.hostname) 66 return self._use_rsync 67 68 69 def _check_rsync(self): 70 """ 71 Check if rsync is available on the remote host. 72 """ 73 try: 74 self.run("rsync --version", stdout_tee=None, stderr_tee=None) 75 except error.AutoservRunError: 76 return False 77 return True 78 79 80 def _encode_remote_paths(self, paths, escape=True): 81 """ 82 Given a list of file paths, encodes it as a single remote path, in 83 the style used by rsync and scp. 84 """ 85 if escape: 86 paths = [utils.scp_remote_escape(path) for path in paths] 87 return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths)) 88 89 90 def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks): 91 """ 92 Given a list of source paths and a destination path, produces the 93 appropriate rsync command for copying them. Remote paths must be 94 pre-encoded. 95 """ 96 ssh_cmd = self.make_ssh_command(user=self.user, port=self.port, 97 opts=self.master_ssh_option, 98 hosts_file=self.known_hosts_file) 99 if delete_dest: 100 delete_flag = "--delete" 101 else: 102 delete_flag = "" 103 if preserve_symlinks: 104 symlink_flag = "" 105 else: 106 symlink_flag = "-L" 107 command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s" 108 return command % (symlink_flag, delete_flag, ssh_cmd, 109 " ".join(sources), dest) 110 111 112 def _make_ssh_cmd(self, cmd): 113 """ 114 Create a base ssh command string for the host which can be used 115 to run commands directly on the machine 116 """ 117 base_cmd = self.make_ssh_command(user=self.user, port=self.port, 118 opts=self.master_ssh_option, 119 hosts_file=self.known_hosts_file) 120 121 return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd)) 122 123 def _make_scp_cmd(self, sources, dest): 124 """ 125 Given a list of source paths and a destination path, produces the 126 appropriate scp command for encoding it. Remote paths must be 127 pre-encoded. 128 """ 129 command = ("scp -rq %s -o StrictHostKeyChecking=no " 130 "-o UserKnownHostsFile=%s -P %d %s '%s'") 131 return command % (self.master_ssh_option, self.known_hosts_file, 132 self.port, " ".join(sources), dest) 133 134 135 def _make_rsync_compatible_globs(self, path, is_local): 136 """ 137 Given an rsync-style path, returns a list of globbed paths 138 that will hopefully provide equivalent behaviour for scp. Does not 139 support the full range of rsync pattern matching behaviour, only that 140 exposed in the get/send_file interface (trailing slashes). 141 142 The is_local param is flag indicating if the paths should be 143 interpreted as local or remote paths. 144 """ 145 146 # non-trailing slash paths should just work 147 if len(path) == 0 or path[-1] != "/": 148 return [path] 149 150 # make a function to test if a pattern matches any files 151 if is_local: 152 def glob_matches_files(path, pattern): 153 return len(glob.glob(path + pattern)) > 0 154 else: 155 def glob_matches_files(path, pattern): 156 result = self.run("ls \"%s\"%s" % (utils.sh_escape(path), 157 pattern), 158 stdout_tee=None, ignore_status=True) 159 return result.exit_status == 0 160 161 # take a set of globs that cover all files, and see which are needed 162 patterns = ["*", ".[!.]*"] 163 patterns = [p for p in patterns if glob_matches_files(path, p)] 164 165 # convert them into a set of paths suitable for the commandline 166 if is_local: 167 return ["\"%s\"%s" % (utils.sh_escape(path), pattern) 168 for pattern in patterns] 169 else: 170 return [utils.scp_remote_escape(path) + pattern 171 for pattern in patterns] 172 173 174 def _make_rsync_compatible_source(self, source, is_local): 175 """ 176 Applies the same logic as _make_rsync_compatible_globs, but 177 applies it to an entire list of sources, producing a new list of 178 sources, properly quoted. 179 """ 180 return sum((self._make_rsync_compatible_globs(path, is_local) 181 for path in source), []) 182 183 184 def _set_umask_perms(self, dest): 185 """ 186 Given a destination file/dir (recursively) set the permissions on 187 all the files and directories to the max allowed by running umask. 188 """ 189 190 # now this looks strange but I haven't found a way in Python to _just_ 191 # get the umask, apparently the only option is to try to set it 192 umask = os.umask(0) 193 os.umask(umask) 194 195 max_privs = 0777 & ~umask 196 197 def set_file_privs(filename): 198 """Sets mode of |filename|. Assumes |filename| exists.""" 199 file_stat = os.stat(filename) 200 201 file_privs = max_privs 202 # if the original file permissions do not have at least one 203 # executable bit then do not set it anywhere 204 if not file_stat.st_mode & 0111: 205 file_privs &= ~0111 206 207 os.chmod(filename, file_privs) 208 209 # try a bottom-up walk so changes on directory permissions won't cut 210 # our access to the files/directories inside it 211 for root, dirs, files in os.walk(dest, topdown=False): 212 # when setting the privileges we emulate the chmod "X" behaviour 213 # that sets to execute only if it is a directory or any of the 214 # owner/group/other already has execute right 215 for dirname in dirs: 216 os.chmod(os.path.join(root, dirname), max_privs) 217 218 # Filter out broken symlinks as we go. 219 for filename in filter(os.path.exists, files): 220 set_file_privs(os.path.join(root, filename)) 221 222 223 # now set privs for the dest itself 224 if os.path.isdir(dest): 225 os.chmod(dest, max_privs) 226 else: 227 set_file_privs(dest) 228 229 230 def get_file(self, source, dest, delete_dest=False, preserve_perm=True, 231 preserve_symlinks=False): 232 """ 233 Copy files from the remote host to a local path. 234 235 Directories will be copied recursively. 236 If a source component is a directory with a trailing slash, 237 the content of the directory will be copied, otherwise, the 238 directory itself and its content will be copied. This 239 behavior is similar to that of the program 'rsync'. 240 241 Args: 242 source: either 243 1) a single file or directory, as a string 244 2) a list of one or more (possibly mixed) 245 files or directories 246 dest: a file or a directory (if source contains a 247 directory or more than one element, you must 248 supply a directory dest) 249 delete_dest: if this is true, the command will also clear 250 out any old files at dest that are not in the 251 source 252 preserve_perm: tells get_file() to try to preserve the sources 253 permissions on files and dirs 254 preserve_symlinks: try to preserve symlinks instead of 255 transforming them into files/dirs on copy 256 257 Raises: 258 AutoservRunError: the scp command failed 259 """ 260 261 # Start a master SSH connection if necessary. 262 self.start_master_ssh() 263 264 if isinstance(source, basestring): 265 source = [source] 266 dest = os.path.abspath(dest) 267 268 # If rsync is disabled or fails, try scp. 269 try_scp = True 270 if self.use_rsync(): 271 try: 272 remote_source = self._encode_remote_paths(source) 273 local_dest = utils.sh_escape(dest) 274 rsync = self._make_rsync_cmd([remote_source], local_dest, 275 delete_dest, preserve_symlinks) 276 utils.run(rsync) 277 try_scp = False 278 except error.CmdError, e: 279 logging.warn("trying scp, rsync failed: %s", e) 280 281 if try_scp: 282 # scp has no equivalent to --delete, just drop the entire dest dir 283 if delete_dest and os.path.isdir(dest): 284 shutil.rmtree(dest) 285 os.mkdir(dest) 286 287 remote_source = self._make_rsync_compatible_source(source, False) 288 if remote_source: 289 # _make_rsync_compatible_source() already did the escaping 290 remote_source = self._encode_remote_paths(remote_source, 291 escape=False) 292 local_dest = utils.sh_escape(dest) 293 scp = self._make_scp_cmd([remote_source], local_dest) 294 try: 295 utils.run(scp) 296 except error.CmdError, e: 297 raise error.AutoservRunError(e.args[0], e.args[1]) 298 299 if not preserve_perm: 300 # we have no way to tell scp to not try to preserve the 301 # permissions so set them after copy instead. 302 # for rsync we could use "--no-p --chmod=ugo=rwX" but those 303 # options are only in very recent rsync versions 304 self._set_umask_perms(dest) 305 306 307 def send_file(self, source, dest, delete_dest=False, 308 preserve_symlinks=False): 309 """ 310 Copy files from a local path to the remote host. 311 312 Directories will be copied recursively. 313 If a source component is a directory with a trailing slash, 314 the content of the directory will be copied, otherwise, the 315 directory itself and its content will be copied. This 316 behavior is similar to that of the program 'rsync'. 317 318 Args: 319 source: either 320 1) a single file or directory, as a string 321 2) a list of one or more (possibly mixed) 322 files or directories 323 dest: a file or a directory (if source contains a 324 directory or more than one element, you must 325 supply a directory dest) 326 delete_dest: if this is true, the command will also clear 327 out any old files at dest that are not in the 328 source 329 preserve_symlinks: controls if symlinks on the source will be 330 copied as such on the destination or transformed into the 331 referenced file/directory 332 333 Raises: 334 AutoservRunError: the scp command failed 335 """ 336 337 # Start a master SSH connection if necessary. 338 self.start_master_ssh() 339 340 if isinstance(source, basestring): 341 source = [source] 342 remote_dest = self._encode_remote_paths([dest]) 343 344 # If rsync is disabled or fails, try scp. 345 try_scp = True 346 if self.use_rsync(): 347 try: 348 local_sources = [utils.sh_escape(path) for path in source] 349 rsync = self._make_rsync_cmd(local_sources, remote_dest, 350 delete_dest, preserve_symlinks) 351 utils.run(rsync) 352 try_scp = False 353 except error.CmdError, e: 354 logging.warn("trying scp, rsync failed: %s", e) 355 356 if try_scp: 357 # scp has no equivalent to --delete, just drop the entire dest dir 358 if delete_dest: 359 is_dir = self.run("ls -d %s/" % dest, 360 ignore_status=True).exit_status == 0 361 if is_dir: 362 cmd = "rm -rf %s && mkdir %s" 363 cmd %= (dest, dest) 364 self.run(cmd) 365 366 local_sources = self._make_rsync_compatible_source(source, True) 367 if local_sources: 368 scp = self._make_scp_cmd(local_sources, remote_dest) 369 try: 370 utils.run(scp) 371 except error.CmdError, e: 372 raise error.AutoservRunError(e.args[0], e.args[1]) 373 374 375 def ssh_ping(self, timeout=60): 376 """ 377 Pings remote host via ssh. 378 379 @param timeout: Time in seconds before giving up. 380 Defaults to 60 seconds. 381 @raise AutoservSSHTimeout: If the ssh ping times out. 382 @raise AutoservSshPermissionDeniedError: If ssh ping fails due to 383 permissions. 384 @raise AutoservSshPingHostError: For other AutoservRunErrors. 385 """ 386 try: 387 self.run("true", timeout=timeout, connect_timeout=timeout) 388 except error.AutoservSSHTimeout: 389 msg = "Host (ssh) verify timed out (timeout = %d)" % timeout 390 raise error.AutoservSSHTimeout(msg) 391 except error.AutoservSshPermissionDeniedError: 392 #let AutoservSshPermissionDeniedError be visible to the callers 393 raise 394 except error.AutoservRunError, e: 395 # convert the generic AutoservRunError into something more 396 # specific for this context 397 raise error.AutoservSshPingHostError(e.description + '\n' + 398 repr(e.result_obj)) 399 400 401 def is_up(self, timeout=60): 402 """ 403 Check if the remote host is up. 404 405 @param timeout: timeout in seconds. 406 @returns True if the remote host is up before the timeout expires, 407 False otherwise. 408 """ 409 try: 410 self.ssh_ping(timeout=timeout) 411 except error.AutoservError: 412 return False 413 else: 414 return True 415 416 417 def wait_up(self, timeout=None): 418 """ 419 Wait until the remote host is up or the timeout expires. 420 421 In fact, it will wait until an ssh connection to the remote 422 host can be established, and getty is running. 423 424 @param timeout time limit in seconds before returning even 425 if the host is not up. 426 427 @returns True if the host was found to be up before the timeout expires, 428 False otherwise 429 """ 430 if timeout: 431 end_time = time.time() + timeout 432 current_time = time.time() 433 434 while not timeout or current_time < end_time: 435 if self.is_up(timeout=end_time - current_time): 436 try: 437 if self.are_wait_up_processes_up(): 438 logging.debug('Host %s is now up', self.hostname) 439 return True 440 except error.AutoservError: 441 pass 442 time.sleep(1) 443 current_time = time.time() 444 445 logging.debug('Host %s is still down after waiting %d seconds', 446 self.hostname, int(timeout + time.time() - end_time)) 447 return False 448 449 450 def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None): 451 """ 452 Wait until the remote host is down or the timeout expires. 453 454 If old_boot_id is provided, this will wait until either the machine 455 is unpingable or self.get_boot_id() returns a value different from 456 old_boot_id. If the boot_id value has changed then the function 457 returns true under the assumption that the machine has shut down 458 and has now already come back up. 459 460 If old_boot_id is None then until the machine becomes unreachable the 461 method assumes the machine has not yet shut down. 462 463 Based on this definition, the 4 possible permutations of timeout 464 and old_boot_id are: 465 1. timeout and old_boot_id: wait timeout seconds for either the 466 host to become unpingable, or the boot id 467 to change. In the latter case we've rebooted 468 and in the former case we've only shutdown, 469 but both cases return True. 470 2. only timeout: wait timeout seconds for the host to become unpingable. 471 If the host remains pingable throughout timeout seconds 472 we return False. 473 3. only old_boot_id: wait forever until either the host becomes 474 unpingable or the boot_id changes. Return true 475 when either of those conditions are met. 476 4. not timeout, not old_boot_id: wait forever till the host becomes 477 unpingable. 478 479 @param timeout Time limit in seconds before returning even 480 if the host is still up. 481 @param warning_timer Time limit in seconds that will generate 482 a warning if the host is not down yet. 483 @param old_boot_id A string containing the result of self.get_boot_id() 484 prior to the host being told to shut down. Can be None if this is 485 not available. 486 487 @returns True if the host was found to be down, False otherwise 488 """ 489 #TODO: there is currently no way to distinguish between knowing 490 #TODO: boot_id was unsupported and not knowing the boot_id. 491 current_time = time.time() 492 if timeout: 493 end_time = current_time + timeout 494 495 if warning_timer: 496 warn_time = current_time + warning_timer 497 498 if old_boot_id is not None: 499 logging.debug('Host %s pre-shutdown boot_id is %s', 500 self.hostname, old_boot_id) 501 502 # Impose semi real-time deadline constraints, since some clients 503 # (eg: watchdog timer tests) expect strict checking of time elapsed. 504 # Each iteration of this loop is treated as though it atomically 505 # completes within current_time, this is needed because if we used 506 # inline time.time() calls instead then the following could happen: 507 # 508 # while not timeout or time.time() < end_time: [23 < 30] 509 # some code. [takes 10 secs] 510 # try: 511 # new_boot_id = self.get_boot_id(timeout=end_time - time.time()) 512 # [30 - 33] 513 # The last step will lead to a return True, when in fact the machine 514 # went down at 32 seconds (>30). Hence we need to pass get_boot_id 515 # the same time that allowed us into that iteration of the loop. 516 while not timeout or current_time < end_time: 517 try: 518 new_boot_id = self.get_boot_id(timeout=end_time - current_time) 519 except error.AutoservError: 520 logging.debug('Host %s is now unreachable over ssh, is down', 521 self.hostname) 522 return True 523 else: 524 # if the machine is up but the boot_id value has changed from 525 # old boot id, then we can assume the machine has gone down 526 # and then already come back up 527 if old_boot_id is not None and old_boot_id != new_boot_id: 528 logging.debug('Host %s now has boot_id %s and so must ' 529 'have rebooted', self.hostname, new_boot_id) 530 return True 531 532 if warning_timer and current_time > warn_time: 533 self.record("WARN", None, "shutdown", 534 "Shutdown took longer than %ds" % warning_timer) 535 # Print the warning only once. 536 warning_timer = None 537 # If a machine is stuck switching runlevels 538 # This may cause the machine to reboot. 539 self.run('kill -HUP 1', ignore_status=True) 540 541 time.sleep(1) 542 current_time = time.time() 543 544 return False 545 546 547 # tunable constants for the verify & repair code 548 AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER", 549 "gb_diskspace_required", 550 type=float, 551 default=20.0) 552 553 554 def verify_connectivity(self): 555 super(AbstractSSHHost, self).verify_connectivity() 556 557 logging.info('Pinging host ' + self.hostname) 558 self.ssh_ping() 559 logging.info("Host (ssh) %s is alive", self.hostname) 560 561 if self.is_shutting_down(): 562 raise error.AutoservHostIsShuttingDownError("Host is shutting down") 563 564 565 def verify_software(self): 566 super(AbstractSSHHost, self).verify_software() 567 try: 568 self.check_diskspace(autotest.Autotest.get_install_dir(self), 569 self.AUTOTEST_GB_DISKSPACE_REQUIRED) 570 except error.AutoservHostError: 571 raise # only want to raise if it's a space issue 572 except autotest.AutodirNotFoundError: 573 # autotest dir may not exist, etc. ignore 574 logging.debug('autodir space check exception, this is probably ' 575 'safe to ignore\n' + traceback.format_exc()) 576 577 578 def close(self): 579 super(AbstractSSHHost, self).close() 580 self._cleanup_master_ssh() 581 os.remove(self.known_hosts_file) 582 583 584 def _cleanup_master_ssh(self): 585 """ 586 Release all resources (process, temporary directory) used by an active 587 master SSH connection. 588 """ 589 # If a master SSH connection is running, kill it. 590 if self.master_ssh_job is not None: 591 utils.nuke_subprocess(self.master_ssh_job.sp) 592 self.master_ssh_job = None 593 594 # Remove the temporary directory for the master SSH socket. 595 if self.master_ssh_tempdir is not None: 596 self.master_ssh_tempdir.clean() 597 self.master_ssh_tempdir = None 598 self.master_ssh_option = '' 599 600 601 def start_master_ssh(self): 602 """ 603 Called whenever a slave SSH connection needs to be initiated (e.g., by 604 run, rsync, scp). If master SSH support is enabled and a master SSH 605 connection is not active already, start a new one in the background. 606 Also, cleanup any zombie master SSH connections (e.g., dead due to 607 reboot). 608 """ 609 if not enable_master_ssh: 610 return 611 612 # If a previously started master SSH connection is not running 613 # anymore, it needs to be cleaned up and then restarted. 614 if self.master_ssh_job is not None: 615 if self.master_ssh_job.sp.poll() is not None: 616 logging.info("Master ssh connection to %s is down.", 617 self.hostname) 618 self._cleanup_master_ssh() 619 620 # Start a new master SSH connection. 621 if self.master_ssh_job is None: 622 # Create a shared socket in a temp location. 623 self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master') 624 self.master_ssh_option = ("-o ControlPath=%s/socket" % 625 self.master_ssh_tempdir.name) 626 627 # Start the master SSH connection in the background. 628 master_cmd = self.ssh_command(options="-N -o ControlMaster=yes") 629 logging.info("Starting master ssh connection '%s'" % master_cmd) 630 self.master_ssh_job = utils.BgJob(master_cmd, 631 nickname='master-ssh') 632 633 634 def clear_known_hosts(self): 635 """Clears out the temporary ssh known_hosts file. 636 637 This is useful if the test SSHes to the machine, then reinstalls it, 638 then SSHes to it again. It can be called after the reinstall to 639 reduce the spam in the logs. 640 """ 641 logging.info("Clearing known hosts for host '%s', file '%s'.", 642 self.hostname, self.known_hosts_file) 643 # Clear out the file by opening it for writing and then closing. 644 fh = open(self.known_hosts_file, "w") 645 fh.close() 646