remote.py revision 9af1963458b7ba2cb59c401e055aecb0b29bfbbc
1"""This class defines the Remote host class."""
2
3import os, logging, urllib, time
4from autotest_lib.client.common_lib import error
5from autotest_lib.server import utils
6from autotest_lib.server.hosts import base_classes
7
8
9class RemoteHost(base_classes.Host):
10    """
11    This class represents a remote machine on which you can run
12    programs.
13
14    It may be accessed through a network, a serial line, ...
15    It is not the machine autoserv is running on.
16
17    Implementation details:
18    This is an abstract class, leaf subclasses must implement the methods
19    listed here and in parent classes which have no implementation. They
20    may reimplement methods which already have an implementation. You
21    must not instantiate this class but should instantiate one of those
22    leaf subclasses.
23    """
24
25    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
26    LAST_BOOT_TAG = object()
27    DEFAULT_HALT_TIMEOUT = 2 * 60
28
29    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
30
31    def _initialize(self, hostname, autodir=None, *args, **dargs):
32        super(RemoteHost, self)._initialize(*args, **dargs)
33
34        self.hostname = hostname
35        self.autodir = autodir
36        self.tmp_dirs = []
37
38
39    def __repr__(self):
40        return "<remote host: %s>" % self.hostname
41
42
43    def close(self):
44        super(RemoteHost, self).close()
45        self.stop_loggers()
46
47        if hasattr(self, 'tmp_dirs'):
48            for dir in self.tmp_dirs:
49                try:
50                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
51                except error.AutoservRunError:
52                    pass
53
54
55    def job_start(self):
56        """
57        Abstract method, called the first time a remote host object
58        is created for a specific host after a job starts.
59
60        This method depends on the create_host factory being used to
61        construct your host object. If you directly construct host objects
62        you will need to call this method yourself (and enforce the
63        single-call rule).
64        """
65        try:
66            self.run('rm -f %s' % self.VAR_LOG_MESSAGES_COPY_PATH)
67            self.run('cp /var/log/messages %s' %
68                     self.VAR_LOG_MESSAGES_COPY_PATH)
69        except Exception, e:
70            # Non-fatal error
71            logging.info('Failed to copy /var/log/messages at startup: %s', e)
72
73
74    def get_autodir(self):
75        return self.autodir
76
77
78    def set_autodir(self, autodir):
79        """
80        This method is called to make the host object aware of the
81        where autotest is installed. Called in server/autotest.py
82        after a successful install
83        """
84        self.autodir = autodir
85
86
87    def sysrq_reboot(self):
88        self.run_background('echo b > /proc/sysrq-trigger')
89
90
91    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
92        self.run_background('sleep 1 ; halt')
93        if wait:
94            self.wait_down(timeout=timeout)
95
96
97    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
98               kernel_args=None, wait=True, fastsync=False,
99               reboot_cmd=None, **dargs):
100        """
101        Reboot the remote host.
102
103        Args:
104                timeout - How long to wait for the reboot.
105                label - The label we should boot into.  If None, we will
106                        boot into the default kernel.  If it's LAST_BOOT_TAG,
107                        we'll boot into whichever kernel was .boot'ed last
108                        (or the default kernel if we haven't .boot'ed in this
109                        job).  If it's None, we'll boot into the default kernel.
110                        If it's something else, we'll boot into that.
111                wait - Should we wait to see if the machine comes back up.
112                fastsync - Don't wait for the sync to complete, just start one
113                        and move on. This is for cases where rebooting prompty
114                        is more important than data integrity and/or the
115                        machine may have disks that cause sync to never return.
116                reboot_cmd - Reboot command to execute.
117        """
118        if self.job:
119            if label == self.LAST_BOOT_TAG:
120                label = self.job.last_boot_tag
121            else:
122                self.job.last_boot_tag = label
123
124        self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
125
126        if label or kernel_args:
127            if not label:
128                label = self.bootloader.get_default_title()
129            self.bootloader.boot_once(label)
130            if kernel_args:
131                self.bootloader.add_args(label, kernel_args)
132
133        if not reboot_cmd:
134            reboot_cmd = ('sync & sleep 5; '
135                          'reboot & sleep 60; '
136                          'reboot -f & sleep 10; '
137                          'reboot -nf & sleep 10; '
138                          'telinit 6')
139
140        def reboot():
141            self.record("GOOD", None, "reboot.start")
142            try:
143                current_boot_id = self.get_boot_id()
144
145                # sync before starting the reboot, so that a long sync during
146                # shutdown isn't timed out by wait_down's short timeout
147                if not fastsync:
148                    self.run('sync; sync', timeout=timeout, ignore_status=True)
149
150                self.run_background(reboot_cmd)
151            except error.AutoservRunError:
152                self.record("ABORT", None, "reboot.start",
153                              "reboot command failed")
154                raise
155            if wait:
156                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
157                                      **dargs)
158
159        # if this is a full reboot-and-wait, run the reboot inside a group
160        if wait:
161            self.log_op(self.OP_REBOOT, reboot)
162        else:
163            reboot()
164
165    def suspend(self, timeout, suspend_cmd, **dargs):
166        """
167        Suspend the remote host.
168
169        Args:
170                timeout - How long to wait for the suspend.
171                susped_cmd - suspend command to execute.
172        """
173        # define a function for the supend and run it in a group
174        def suspend():
175            self.record("GOOD", None, "suspend.start for %d seconds" % (timeout))
176            try:
177                self.run_background(suspend_cmd)
178            except error.AutoservRunError:
179                self.record("ABORT", None, "suspend.start",
180                            "suspend command failed")
181                raise error.AutoservSuspendError("suspend command failed")
182
183            # Wait for some time, to ensure the machine is going to sleep.
184            # Not too long to check if the machine really suspended.
185            time_slice = min(timeout / 2, 300)
186            time.sleep(time_slice)
187            time_counter = time_slice
188            while time_counter < timeout + 60:
189                # Check if the machine is back. We check regularely to
190                # ensure the machine was suspended long enough.
191                if utils.ping(self.hostname, tries=1, deadline=1) == 0:
192                    return
193                else:
194                    if time_counter > timeout - 10:
195                        time_slice = 5
196                    time.sleep(time_slice)
197                    time_counter += time_slice
198
199            if utils.ping(self.hostname, tries=1, deadline=1) != 0:
200                raise error.AutoservSuspendError(
201                    "DUT is not responding after %d seconds" % (time_counter))
202
203        start_time = time.time()
204        self.log_op(self.OP_SUSPEND, suspend)
205        lasted = time.time() - start_time
206        if (lasted < timeout):
207            raise error.AutoservSuspendError(
208                "Suspend did not last long enough: %d instead of %d" % (
209                    lasted, timeout))
210
211    def reboot_followup(self, *args, **dargs):
212        super(RemoteHost, self).reboot_followup(*args, **dargs)
213        if self.job:
214            self.job.profilers.handle_reboot(self)
215
216
217    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
218        """
219        Wait for the host to come back from a reboot. This wraps the
220        generic wait_for_restart implementation in a reboot group.
221        """
222        def op_func():
223            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
224        self.log_op(self.OP_REBOOT, op_func)
225
226
227    def cleanup(self):
228        super(RemoteHost, self).cleanup()
229        self.reboot()
230
231
232    def get_tmp_dir(self, parent='/tmp'):
233        """
234        Return the pathname of a directory on the host suitable
235        for temporary file storage.
236
237        The directory and its content will be deleted automatically
238        on the destruction of the Host object that was used to obtain
239        it.
240        """
241        self.run("mkdir -p %s" % parent)
242        template = os.path.join(parent, 'autoserv-XXXXXX')
243        dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
244        self.tmp_dirs.append(dir_name)
245        return dir_name
246
247
248    def get_platform_label(self):
249        """
250        Return the platform label, or None if platform label is not set.
251        """
252
253        if self.job:
254            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
255                                       self.hostname)
256            keyvals = utils.read_keyval(keyval_path)
257            return keyvals.get('platform', None)
258        else:
259            return None
260
261
262    def get_all_labels(self):
263        """
264        Return all labels, or empty list if label is not set.
265        """
266        if self.job:
267            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
268                                       self.hostname)
269            keyvals = utils.read_keyval(keyval_path)
270            all_labels = keyvals.get('labels', '')
271            if all_labels:
272                all_labels = all_labels.split(',')
273                return [urllib.unquote(label) for label in all_labels]
274        return []
275
276
277    def delete_tmp_dir(self, tmpdir):
278        """
279        Delete the given temporary directory on the remote machine.
280        """
281        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
282        self.tmp_dirs.remove(tmpdir)
283
284
285    def check_uptime(self):
286        """
287        Check that uptime is available and monotonically increasing.
288        """
289        if not self.is_up():
290            raise error.AutoservHostError('Client does not appear to be up')
291        result = self.run("/bin/cat /proc/uptime", 30)
292        return result.stdout.strip().split()[0]
293
294
295    def check_for_lkdtm(self):
296        """
297        Check for kernel dump test module. return True if exist.
298        """
299        cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT'
300        return self.run(cmd, ignore_status=True).exit_status == 0
301
302
303    def are_wait_up_processes_up(self):
304        """
305        Checks if any HOSTS waitup processes are running yet on the
306        remote host.
307
308        Returns True if any the waitup processes are running, False
309        otherwise.
310        """
311        processes = self.get_wait_up_processes()
312        if len(processes) == 0:
313            return True # wait up processes aren't being used
314        for procname in processes:
315            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
316                                   ignore_status=True).exit_status
317            if exit_status == 0:
318                return True
319        return False
320