abstract_ssh.py revision 53a216a5ae9cf732330846e652dff8d0ad29bd2a
1import os, time, socket, shutil, glob, logging, traceback, tempfile
2from autotest_lib.client.common_lib import autotemp, error
3from autotest_lib.server import utils, autotest
4from autotest_lib.server.hosts import remote
5from autotest_lib.client.common_lib.global_config import global_config
6
7# pylint: disable-msg=C0111
8
9get_value = global_config.get_config_value
10enable_master_ssh = get_value('AUTOSERV', 'enable_master_ssh', type=bool,
11                              default=False)
12
13
14class AbstractSSHHost(remote.RemoteHost):
15    """
16    This class represents a generic implementation of most of the
17    framework necessary for controlling a host via ssh. It implements
18    almost all of the abstract Host methods, except for the core
19    Host.run method.
20    """
21
22    def _initialize(self, hostname, user="root", port=22, password="",
23                    *args, **dargs):
24        super(AbstractSSHHost, self)._initialize(hostname=hostname,
25                                                 *args, **dargs)
26        self.ip = socket.getaddrinfo(self.hostname, None)[0][4][0]
27        self.user = user
28        self.port = port
29        self.password = password
30        self._use_rsync = None
31        self.known_hosts_file = tempfile.mkstemp()[1]
32
33        """
34        Master SSH connection background job, socket temp directory and socket
35        control path option. If master-SSH is enabled, these fields will be
36        initialized by start_master_ssh when a new SSH connection is initiated.
37        """
38        self.master_ssh_job = None
39        self.master_ssh_tempdir = None
40        self.master_ssh_option = ''
41
42
43    def make_ssh_command(self, user="root", port=22, opts='',
44                         hosts_file='/dev/null',
45                         connect_timeout=30, alive_interval=300):
46        base_command = ("/usr/bin/ssh -a -x %s -o StrictHostKeyChecking=no "
47                        "-o UserKnownHostsFile=%s -o BatchMode=yes "
48                        "-o ConnectTimeout=%d -o ServerAliveInterval=%d "
49                        "-l %s -p %d")
50        assert isinstance(connect_timeout, (int, long))
51        assert connect_timeout > 0 # can't disable the timeout
52        return base_command % (opts, hosts_file, connect_timeout,
53                               alive_interval, user, port)
54
55
56    def use_rsync(self):
57        if self._use_rsync is not None:
58            return self._use_rsync
59
60        # Check if rsync is available on the remote host. If it's not,
61        # don't try to use it for any future file transfers.
62        self._use_rsync = self._check_rsync()
63        if not self._use_rsync:
64            logging.warn("rsync not available on remote host %s -- disabled",
65                         self.hostname)
66        return self._use_rsync
67
68
69    def _check_rsync(self):
70        """
71        Check if rsync is available on the remote host.
72        """
73        try:
74            self.run("rsync --version", stdout_tee=None, stderr_tee=None)
75        except error.AutoservRunError:
76            return False
77        return True
78
79
80    def _encode_remote_paths(self, paths, escape=True):
81        """
82        Given a list of file paths, encodes it as a single remote path, in
83        the style used by rsync and scp.
84        """
85        if escape:
86            paths = [utils.scp_remote_escape(path) for path in paths]
87        return '%s@%s:"%s"' % (self.user, self.hostname, " ".join(paths))
88
89
90    def _make_rsync_cmd(self, sources, dest, delete_dest, preserve_symlinks):
91        """
92        Given a list of source paths and a destination path, produces the
93        appropriate rsync command for copying them. Remote paths must be
94        pre-encoded.
95        """
96        ssh_cmd = self.make_ssh_command(user=self.user, port=self.port,
97                                        opts=self.master_ssh_option,
98                                        hosts_file=self.known_hosts_file)
99        if delete_dest:
100            delete_flag = "--delete"
101        else:
102            delete_flag = ""
103        if preserve_symlinks:
104            symlink_flag = ""
105        else:
106            symlink_flag = "-L"
107        command = "rsync %s %s --timeout=1800 --rsh='%s' -az %s %s"
108        return command % (symlink_flag, delete_flag, ssh_cmd,
109                          " ".join(sources), dest)
110
111
112    def _make_ssh_cmd(self, cmd):
113        """
114        Create a base ssh command string for the host which can be used
115        to run commands directly on the machine
116        """
117        base_cmd = self.make_ssh_command(user=self.user, port=self.port,
118                                         opts=self.master_ssh_option,
119                                         hosts_file=self.known_hosts_file)
120
121        return '%s %s "%s"' % (base_cmd, self.hostname, utils.sh_escape(cmd))
122
123    def _make_scp_cmd(self, sources, dest):
124        """
125        Given a list of source paths and a destination path, produces the
126        appropriate scp command for encoding it. Remote paths must be
127        pre-encoded.
128        """
129        command = ("scp -rq %s -o StrictHostKeyChecking=no "
130                   "-o UserKnownHostsFile=%s -P %d %s '%s'")
131        return command % (self.master_ssh_option, self.known_hosts_file,
132                          self.port, " ".join(sources), dest)
133
134
135    def _make_rsync_compatible_globs(self, path, is_local):
136        """
137        Given an rsync-style path, returns a list of globbed paths
138        that will hopefully provide equivalent behaviour for scp. Does not
139        support the full range of rsync pattern matching behaviour, only that
140        exposed in the get/send_file interface (trailing slashes).
141
142        The is_local param is flag indicating if the paths should be
143        interpreted as local or remote paths.
144        """
145
146        # non-trailing slash paths should just work
147        if len(path) == 0 or path[-1] != "/":
148            return [path]
149
150        # make a function to test if a pattern matches any files
151        if is_local:
152            def glob_matches_files(path, pattern):
153                return len(glob.glob(path + pattern)) > 0
154        else:
155            def glob_matches_files(path, pattern):
156                result = self.run("ls \"%s\"%s" % (utils.sh_escape(path),
157                                                   pattern),
158                                  stdout_tee=None, ignore_status=True)
159                return result.exit_status == 0
160
161        # take a set of globs that cover all files, and see which are needed
162        patterns = ["*", ".[!.]*"]
163        patterns = [p for p in patterns if glob_matches_files(path, p)]
164
165        # convert them into a set of paths suitable for the commandline
166        if is_local:
167            return ["\"%s\"%s" % (utils.sh_escape(path), pattern)
168                    for pattern in patterns]
169        else:
170            return [utils.scp_remote_escape(path) + pattern
171                    for pattern in patterns]
172
173
174    def _make_rsync_compatible_source(self, source, is_local):
175        """
176        Applies the same logic as _make_rsync_compatible_globs, but
177        applies it to an entire list of sources, producing a new list of
178        sources, properly quoted.
179        """
180        return sum((self._make_rsync_compatible_globs(path, is_local)
181                    for path in source), [])
182
183
184    def _set_umask_perms(self, dest):
185        """
186        Given a destination file/dir (recursively) set the permissions on
187        all the files and directories to the max allowed by running umask.
188        """
189
190        # now this looks strange but I haven't found a way in Python to _just_
191        # get the umask, apparently the only option is to try to set it
192        umask = os.umask(0)
193        os.umask(umask)
194
195        max_privs = 0777 & ~umask
196
197        def set_file_privs(filename):
198            """Sets mode of |filename|.  Assumes |filename| exists."""
199            file_stat = os.stat(filename)
200
201            file_privs = max_privs
202            # if the original file permissions do not have at least one
203            # executable bit then do not set it anywhere
204            if not file_stat.st_mode & 0111:
205                file_privs &= ~0111
206
207            os.chmod(filename, file_privs)
208
209        # try a bottom-up walk so changes on directory permissions won't cut
210        # our access to the files/directories inside it
211        for root, dirs, files in os.walk(dest, topdown=False):
212            # when setting the privileges we emulate the chmod "X" behaviour
213            # that sets to execute only if it is a directory or any of the
214            # owner/group/other already has execute right
215            for dirname in dirs:
216                os.chmod(os.path.join(root, dirname), max_privs)
217
218            # Filter out broken symlinks as we go.
219            for filename in filter(os.path.exists, files):
220                set_file_privs(os.path.join(root, filename))
221
222
223        # now set privs for the dest itself
224        if os.path.isdir(dest):
225            os.chmod(dest, max_privs)
226        else:
227            set_file_privs(dest)
228
229
230    def get_file(self, source, dest, delete_dest=False, preserve_perm=True,
231                 preserve_symlinks=False):
232        """
233        Copy files from the remote host to a local path.
234
235        Directories will be copied recursively.
236        If a source component is a directory with a trailing slash,
237        the content of the directory will be copied, otherwise, the
238        directory itself and its content will be copied. This
239        behavior is similar to that of the program 'rsync'.
240
241        Args:
242                source: either
243                        1) a single file or directory, as a string
244                        2) a list of one or more (possibly mixed)
245                                files or directories
246                dest: a file or a directory (if source contains a
247                        directory or more than one element, you must
248                        supply a directory dest)
249                delete_dest: if this is true, the command will also clear
250                             out any old files at dest that are not in the
251                             source
252                preserve_perm: tells get_file() to try to preserve the sources
253                               permissions on files and dirs
254                preserve_symlinks: try to preserve symlinks instead of
255                                   transforming them into files/dirs on copy
256
257        Raises:
258                AutoservRunError: the scp command failed
259        """
260
261        # Start a master SSH connection if necessary.
262        self.start_master_ssh()
263
264        if isinstance(source, basestring):
265            source = [source]
266        dest = os.path.abspath(dest)
267
268        # If rsync is disabled or fails, try scp.
269        try_scp = True
270        if self.use_rsync():
271            try:
272                remote_source = self._encode_remote_paths(source)
273                local_dest = utils.sh_escape(dest)
274                rsync = self._make_rsync_cmd([remote_source], local_dest,
275                                             delete_dest, preserve_symlinks)
276                utils.run(rsync)
277                try_scp = False
278            except error.CmdError, e:
279                logging.warn("trying scp, rsync failed: %s", e)
280
281        if try_scp:
282            # scp has no equivalent to --delete, just drop the entire dest dir
283            if delete_dest and os.path.isdir(dest):
284                shutil.rmtree(dest)
285                os.mkdir(dest)
286
287            remote_source = self._make_rsync_compatible_source(source, False)
288            if remote_source:
289                # _make_rsync_compatible_source() already did the escaping
290                remote_source = self._encode_remote_paths(remote_source,
291                                                          escape=False)
292                local_dest = utils.sh_escape(dest)
293                scp = self._make_scp_cmd([remote_source], local_dest)
294                try:
295                    utils.run(scp)
296                except error.CmdError, e:
297                    raise error.AutoservRunError(e.args[0], e.args[1])
298
299        if not preserve_perm:
300            # we have no way to tell scp to not try to preserve the
301            # permissions so set them after copy instead.
302            # for rsync we could use "--no-p --chmod=ugo=rwX" but those
303            # options are only in very recent rsync versions
304            self._set_umask_perms(dest)
305
306
307    def send_file(self, source, dest, delete_dest=False,
308                  preserve_symlinks=False):
309        """
310        Copy files from a local path to the remote host.
311
312        Directories will be copied recursively.
313        If a source component is a directory with a trailing slash,
314        the content of the directory will be copied, otherwise, the
315        directory itself and its content will be copied. This
316        behavior is similar to that of the program 'rsync'.
317
318        Args:
319                source: either
320                        1) a single file or directory, as a string
321                        2) a list of one or more (possibly mixed)
322                                files or directories
323                dest: a file or a directory (if source contains a
324                        directory or more than one element, you must
325                        supply a directory dest)
326                delete_dest: if this is true, the command will also clear
327                             out any old files at dest that are not in the
328                             source
329                preserve_symlinks: controls if symlinks on the source will be
330                    copied as such on the destination or transformed into the
331                    referenced file/directory
332
333        Raises:
334                AutoservRunError: the scp command failed
335        """
336
337        # Start a master SSH connection if necessary.
338        self.start_master_ssh()
339
340        if isinstance(source, basestring):
341            source = [source]
342        remote_dest = self._encode_remote_paths([dest])
343
344        # If rsync is disabled or fails, try scp.
345        try_scp = True
346        if self.use_rsync():
347            try:
348                local_sources = [utils.sh_escape(path) for path in source]
349                rsync = self._make_rsync_cmd(local_sources, remote_dest,
350                                             delete_dest, preserve_symlinks)
351                utils.run(rsync)
352                try_scp = False
353            except error.CmdError, e:
354                logging.warn("trying scp, rsync failed: %s", e)
355
356        if try_scp:
357            # scp has no equivalent to --delete, just drop the entire dest dir
358            if delete_dest:
359                is_dir = self.run("ls -d %s/" % dest,
360                                  ignore_status=True).exit_status == 0
361                if is_dir:
362                    cmd = "rm -rf %s && mkdir %s"
363                    cmd %= (dest, dest)
364                    self.run(cmd)
365
366            local_sources = self._make_rsync_compatible_source(source, True)
367            if local_sources:
368                scp = self._make_scp_cmd(local_sources, remote_dest)
369                try:
370                    utils.run(scp)
371                except error.CmdError, e:
372                    raise error.AutoservRunError(e.args[0], e.args[1])
373
374
375    def ssh_ping(self, timeout=60):
376        """
377        Pings remote host via ssh.
378
379        @param timeout: Time in seconds before giving up.
380                        Defaults to 60 seconds.
381        @raise AutoservSSHTimeout: If the ssh ping times out.
382        @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
383                                                 permissions.
384        @raise AutoservSshPingHostError: For other AutoservRunErrors.
385        """
386        try:
387            self.run("true", timeout=timeout, connect_timeout=timeout)
388        except error.AutoservSSHTimeout:
389            msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
390            raise error.AutoservSSHTimeout(msg)
391        except error.AutoservSshPermissionDeniedError:
392            #let AutoservSshPermissionDeniedError be visible to the callers
393            raise
394        except error.AutoservRunError, e:
395            # convert the generic AutoservRunError into something more
396            # specific for this context
397            raise error.AutoservSshPingHostError(e.description + '\n' +
398                                                 repr(e.result_obj))
399
400
401    def is_up(self, timeout=60):
402        """
403        Check if the remote host is up.
404
405        @param timeout: timeout in seconds.
406        @returns True if the remote host is up before the timeout expires,
407                 False otherwise.
408        """
409        try:
410            self.ssh_ping(timeout=timeout)
411        except error.AutoservError:
412            return False
413        else:
414            return True
415
416
417    def wait_up(self, timeout=None):
418        """
419        Wait until the remote host is up or the timeout expires.
420
421        In fact, it will wait until an ssh connection to the remote
422        host can be established, and getty is running.
423
424        @param timeout time limit in seconds before returning even
425            if the host is not up.
426
427        @returns True if the host was found to be up before the timeout expires,
428                 False otherwise
429        """
430        if timeout:
431            end_time = time.time() + timeout
432            current_time = time.time()
433
434        while not timeout or current_time < end_time:
435            if self.is_up(timeout=end_time - current_time):
436                try:
437                    if self.are_wait_up_processes_up():
438                        logging.debug('Host %s is now up', self.hostname)
439                        return True
440                except error.AutoservError:
441                    pass
442            time.sleep(1)
443            current_time = time.time()
444
445        logging.debug('Host %s is still down after waiting %d seconds',
446                      self.hostname, int(timeout + time.time() - end_time))
447        return False
448
449
450    def wait_down(self, timeout=None, warning_timer=None, old_boot_id=None):
451        """
452        Wait until the remote host is down or the timeout expires.
453
454        If old_boot_id is provided, this will wait until either the machine
455        is unpingable or self.get_boot_id() returns a value different from
456        old_boot_id. If the boot_id value has changed then the function
457        returns true under the assumption that the machine has shut down
458        and has now already come back up.
459
460        If old_boot_id is None then until the machine becomes unreachable the
461        method assumes the machine has not yet shut down.
462
463        Based on this definition, the 4 possible permutations of timeout
464        and old_boot_id are:
465        1. timeout and old_boot_id: wait timeout seconds for either the
466                                    host to become unpingable, or the boot id
467                                    to change. In the latter case we've rebooted
468                                    and in the former case we've only shutdown,
469                                    but both cases return True.
470        2. only timeout: wait timeout seconds for the host to become unpingable.
471                         If the host remains pingable throughout timeout seconds
472                         we return False.
473        3. only old_boot_id: wait forever until either the host becomes
474                             unpingable or the boot_id changes. Return true
475                             when either of those conditions are met.
476        4. not timeout, not old_boot_id: wait forever till the host becomes
477                                         unpingable.
478
479        @param timeout Time limit in seconds before returning even
480            if the host is still up.
481        @param warning_timer Time limit in seconds that will generate
482            a warning if the host is not down yet.
483        @param old_boot_id A string containing the result of self.get_boot_id()
484            prior to the host being told to shut down. Can be None if this is
485            not available.
486
487        @returns True if the host was found to be down, False otherwise
488        """
489        #TODO: there is currently no way to distinguish between knowing
490        #TODO: boot_id was unsupported and not knowing the boot_id.
491        current_time = time.time()
492        if timeout:
493            end_time = current_time + timeout
494
495        if warning_timer:
496            warn_time = current_time + warning_timer
497
498        if old_boot_id is not None:
499            logging.debug('Host %s pre-shutdown boot_id is %s',
500                          self.hostname, old_boot_id)
501
502        # Impose semi real-time deadline constraints, since some clients
503        # (eg: watchdog timer tests) expect strict checking of time elapsed.
504        # Each iteration of this loop is treated as though it atomically
505        # completes within current_time, this is needed because if we used
506        # inline time.time() calls instead then the following could happen:
507        #
508        # while not timeout or time.time() < end_time:      [23 < 30]
509        #    some code.                                     [takes 10 secs]
510        #    try:
511        #        new_boot_id = self.get_boot_id(timeout=end_time - time.time())
512        #                                                   [30 - 33]
513        # The last step will lead to a return True, when in fact the machine
514        # went down at 32 seconds (>30). Hence we need to pass get_boot_id
515        # the same time that allowed us into that iteration of the loop.
516        while not timeout or current_time < end_time:
517            try:
518                new_boot_id = self.get_boot_id(timeout=end_time - current_time)
519            except error.AutoservError:
520                logging.debug('Host %s is now unreachable over ssh, is down',
521                              self.hostname)
522                return True
523            else:
524                # if the machine is up but the boot_id value has changed from
525                # old boot id, then we can assume the machine has gone down
526                # and then already come back up
527                if old_boot_id is not None and old_boot_id != new_boot_id:
528                    logging.debug('Host %s now has boot_id %s and so must '
529                                  'have rebooted', self.hostname, new_boot_id)
530                    return True
531
532            if warning_timer and current_time > warn_time:
533                self.record("WARN", None, "shutdown",
534                            "Shutdown took longer than %ds" % warning_timer)
535                # Print the warning only once.
536                warning_timer = None
537                # If a machine is stuck switching runlevels
538                # This may cause the machine to reboot.
539                self.run('kill -HUP 1', ignore_status=True)
540
541            time.sleep(1)
542            current_time = time.time()
543
544        return False
545
546
547    # tunable constants for the verify & repair code
548    AUTOTEST_GB_DISKSPACE_REQUIRED = get_value("SERVER",
549                                               "gb_diskspace_required",
550                                               type=float,
551                                               default=20.0)
552
553
554    def verify_connectivity(self):
555        super(AbstractSSHHost, self).verify_connectivity()
556
557        logging.info('Pinging host ' + self.hostname)
558        self.ssh_ping()
559        logging.info("Host (ssh) %s is alive", self.hostname)
560
561        if self.is_shutting_down():
562            raise error.AutoservHostIsShuttingDownError("Host is shutting down")
563
564
565    def verify_software(self):
566        super(AbstractSSHHost, self).verify_software()
567        try:
568            self.check_diskspace(autotest.Autotest.get_install_dir(self),
569                                 self.AUTOTEST_GB_DISKSPACE_REQUIRED)
570        except error.AutoservHostError:
571            raise           # only want to raise if it's a space issue
572        except autotest.AutodirNotFoundError:
573            # autotest dir may not exist, etc. ignore
574            logging.debug('autodir space check exception, this is probably '
575                          'safe to ignore\n' + traceback.format_exc())
576
577
578    def close(self):
579        super(AbstractSSHHost, self).close()
580        self._cleanup_master_ssh()
581        os.remove(self.known_hosts_file)
582
583
584    def _cleanup_master_ssh(self):
585        """
586        Release all resources (process, temporary directory) used by an active
587        master SSH connection.
588        """
589        # If a master SSH connection is running, kill it.
590        if self.master_ssh_job is not None:
591            utils.nuke_subprocess(self.master_ssh_job.sp)
592            self.master_ssh_job = None
593
594        # Remove the temporary directory for the master SSH socket.
595        if self.master_ssh_tempdir is not None:
596            self.master_ssh_tempdir.clean()
597            self.master_ssh_tempdir = None
598            self.master_ssh_option = ''
599
600
601    def start_master_ssh(self):
602        """
603        Called whenever a slave SSH connection needs to be initiated (e.g., by
604        run, rsync, scp). If master SSH support is enabled and a master SSH
605        connection is not active already, start a new one in the background.
606        Also, cleanup any zombie master SSH connections (e.g., dead due to
607        reboot).
608        """
609        if not enable_master_ssh:
610            return
611
612        # If a previously started master SSH connection is not running
613        # anymore, it needs to be cleaned up and then restarted.
614        if self.master_ssh_job is not None:
615            if self.master_ssh_job.sp.poll() is not None:
616                logging.info("Master ssh connection to %s is down.",
617                             self.hostname)
618                self._cleanup_master_ssh()
619
620        # Start a new master SSH connection.
621        if self.master_ssh_job is None:
622            # Create a shared socket in a temp location.
623            self.master_ssh_tempdir = autotemp.tempdir(unique_id='ssh-master')
624            self.master_ssh_option = ("-o ControlPath=%s/socket" %
625                                      self.master_ssh_tempdir.name)
626
627            # Start the master SSH connection in the background.
628            master_cmd = self.ssh_command(options="-N -o ControlMaster=yes")
629            logging.info("Starting master ssh connection '%s'" % master_cmd)
630            self.master_ssh_job = utils.BgJob(master_cmd,
631                                              nickname='master-ssh')
632
633
634    def clear_known_hosts(self):
635        """Clears out the temporary ssh known_hosts file.
636
637        This is useful if the test SSHes to the machine, then reinstalls it,
638        then SSHes to it again.  It can be called after the reinstall to
639        reduce the spam in the logs.
640        """
641        logging.info("Clearing known hosts for host '%s', file '%s'.",
642                     self.hostname, self.known_hosts_file)
643        # Clear out the file by opening it for writing and then closing.
644        fh = open(self.known_hosts_file, "w")
645        fh.close()
646