remote.py revision 53aaf388e48368aa0179622957912a5284b43dd1
1"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
4import os, time
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
8
9
10class RemoteHost(base_classes.Host):
11    """
12    This class represents a remote machine on which you can run
13    programs.
14
15    It may be accessed through a network, a serial line, ...
16    It is not the machine autoserv is running on.
17
18    Implementation details:
19    This is an abstract class, leaf subclasses must implement the methods
20    listed here and in parent classes which have no implementation. They
21    may reimplement methods which already have an implementation. You
22    must not instantiate this class but should instantiate one of those
23    leaf subclasses.
24    """
25
26    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27    LAST_BOOT_TAG = object()
28
29    def _initialize(self, hostname, autodir=None, *args, **dargs):
30        super(RemoteHost, self)._initialize(*args, **dargs)
31
32        self.hostname = hostname
33        self.autodir = autodir
34        self.tmp_dirs = []
35
36
37    def close(self):
38        super(RemoteHost, self).close()
39        self.stop_loggers()
40
41        if hasattr(self, 'tmp_dirs'):
42            for dir in self.tmp_dirs:
43                try:
44                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
45                except error.AutoservRunError:
46                    pass
47
48
49    def job_start(self):
50        """
51        Abstract method, called the first time a remote host object
52        is created for a specific host after a job starts.
53
54        This method depends on the create_host factory being used to
55        construct your host object. If you directly construct host objects
56        you will need to call this method yourself (and enforce the
57        single-call rule).
58        """
59        pass
60
61
62    def get_autodir(self):
63        return self.autodir
64
65
66    def set_autodir(self, autodir):
67        """
68        This method is called to make the host object aware of the
69        where autotest is installed. Called in server/autotest.py
70        after a successful install
71        """
72        self.autodir = autodir
73
74
75    def sysrq_reboot(self):
76        self.run('echo b > /proc/sysrq-trigger &')
77
78
79    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
80               kernel_args=None, wait=True, **dargs):
81        """
82        Reboot the remote host.
83
84        Args:
85                timeout - How long to wait for the reboot.
86                label - The label we should boot into.  If None, we will
87                        boot into the default kernel.  If it's LAST_BOOT_TAG,
88                        we'll boot into whichever kernel was .boot'ed last
89                        (or the default kernel if we haven't .boot'ed in this
90                        job).  If it's None, we'll boot into the default kernel.
91                        If it's something else, we'll boot into that.
92                wait - Should we wait to see if the machine comes back up.
93        """
94        if self.job:
95            if label == self.LAST_BOOT_TAG:
96                label = self.job.last_boot_tag
97            else:
98                self.job.last_boot_tag = label
99
100        self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
101
102        if label or kernel_args:
103            self.bootloader.install_boottool()
104            if not label:
105                default = int(self.bootloader.get_default())
106                label = self.bootloader.get_titles()[default]
107            self.bootloader.boot_once(label)
108            if kernel_args:
109                self.bootloader.add_args(label, kernel_args)
110
111        # define a function for the reboot and run it in a group
112        print "Reboot: initiating reboot"
113        def reboot():
114            self.record("GOOD", None, "reboot.start")
115            try:
116                self.run('(sleep 5; reboot) '
117                         '</dev/null >/dev/null 2>&1 &')
118            except error.AutoservRunError:
119                self.record("ABORT", None, "reboot.start",
120                              "reboot command failed")
121                raise
122            if wait:
123                self.wait_for_restart(timeout)
124                self.reboot_followup(**dargs)
125
126        # if this is a full reboot-and-wait, run the reboot inside a group
127        if wait:
128            self.log_reboot(reboot)
129        else:
130            reboot()
131
132
133    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT):
134        """
135        Wait for the host to come back from a reboot. This wraps the
136        generic wait_for_restart implementation in a reboot group.
137        """
138        def reboot_func():
139            super(RemoteHost, self).wait_for_restart(timeout=timeout)
140        self.log_reboot(reboot_func)
141
142
143    def cleanup(self):
144        super(RemoteHost, self).cleanup()
145        self.reboot()
146
147
148    def get_tmp_dir(self, parent='/tmp'):
149        """
150        Return the pathname of a directory on the host suitable
151        for temporary file storage.
152
153        The directory and its content will be deleted automatically
154        on the destruction of the Host object that was used to obtain
155        it.
156        """
157        template = os.path.join(parent, 'autoserv-XXXXXX')
158        dir_name= self.run("mktemp -d %s" % template).stdout.rstrip()
159        self.tmp_dirs.append(dir_name)
160        return dir_name
161
162
163    def ping(self):
164        """
165        Ping the remote system, and return whether it's available
166        """
167        fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname)
168        rc = utils.system(fpingcmd, ignore_status = 1)
169        return (rc == 0)
170
171
172    def check_uptime(self):
173        """
174        Check that uptime is available and monotonically increasing.
175        """
176        if not self.ping():
177            raise error.AutoservHostError('Client is not pingable')
178        result = self.run("/bin/cat /proc/uptime", 30)
179        return result.stdout.strip().split()[0]
180
181
182    def get_crashinfo(self, test_start_time):
183        print "Collecting crash information..."
184        super(RemoteHost, self).get_crashinfo(test_start_time)
185
186        # wait for four hours, to see if the machine comes back up
187        current_time = time.strftime("%b %d %H:%M:%S", time.localtime())
188        print "Waiting four hours for %s to come up (%s)" % (self.hostname,
189                                                             current_time)
190        if not self.wait_up(timeout=4*60*60):
191            print "%s down, unable to collect crash info" % self.hostname
192            return
193        else:
194            print "%s is back up, collecting crash info" % self.hostname
195
196        # find a directory to put the crashinfo into
197        if self.job:
198            infodir = self.job.resultdir
199        else:
200            infodir = os.path.abspath(os.getcwd())
201        infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname)
202        if not os.path.exists(infodir):
203            os.mkdir(infodir)
204
205        # collect various log files
206        log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"]
207        for log in log_files:
208            print "Collecting %s..." % log
209            try:
210                self.get_file(log, infodir)
211            except Exception, e:
212                print "crashinfo collection of %s failed with:\n%s" % (log, e)
213
214        # collect dmesg
215        print "Collecting dmesg..."
216        try:
217            result = self.run("dmesg").stdout
218            file(os.path.join(infodir, "dmesg"), "w").write(result)
219        except Exception, e:
220            print "crashinfo collection of dmesg failed with:\n%s" % e
221
222
223    def are_wait_up_processes_up(self):
224        """
225        Checks if any HOSTS waitup processes are running yet on the
226        remote host.
227
228        Returns True if any the waitup processes are running, False
229        otherwise.
230        """
231        processes = self.get_wait_up_processes()
232        if len(processes) == 0:
233            return True # wait up processes aren't being used
234        for procname in processes:
235            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
236                                   ignore_status=True).exit_status
237            if exit_status == 0:
238                return True
239        return False
240