remote.py revision 861b2d54aec24228cdb3895dbc40062cb40cb2ad
1"""This class defines the Remote host class, mixing in the SiteHost class
2if it is available."""
3
4import os, logging, urllib
5from autotest_lib.client.common_lib import error
6from autotest_lib.server import utils
7from autotest_lib.server.hosts import base_classes, bootloader
8
9
10class RemoteHost(base_classes.Host):
11    """
12    This class represents a remote machine on which you can run
13    programs.
14
15    It may be accessed through a network, a serial line, ...
16    It is not the machine autoserv is running on.
17
18    Implementation details:
19    This is an abstract class, leaf subclasses must implement the methods
20    listed here and in parent classes which have no implementation. They
21    may reimplement methods which already have an implementation. You
22    must not instantiate this class but should instantiate one of those
23    leaf subclasses.
24    """
25
26    DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT
27    LAST_BOOT_TAG = object()
28    DEFAULT_HALT_TIMEOUT = 2 * 60
29
30    VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start"
31
32    def _initialize(self, hostname, autodir=None, *args, **dargs):
33        super(RemoteHost, self)._initialize(*args, **dargs)
34
35        self.hostname = hostname
36        self.autodir = autodir
37        self.tmp_dirs = []
38
39
40    def __repr__(self):
41        return "<remote host: %s>" % self.hostname
42
43
44    def close(self):
45        super(RemoteHost, self).close()
46        self.stop_loggers()
47
48        if hasattr(self, 'tmp_dirs'):
49            for dir in self.tmp_dirs:
50                try:
51                    self.run('rm -rf "%s"' % (utils.sh_escape(dir)))
52                except error.AutoservRunError:
53                    pass
54
55
56    def job_start(self):
57        """
58        Abstract method, called the first time a remote host object
59        is created for a specific host after a job starts.
60
61        This method depends on the create_host factory being used to
62        construct your host object. If you directly construct host objects
63        you will need to call this method yourself (and enforce the
64        single-call rule).
65        """
66        try:
67            self.run('rm -f %s' % self.VAR_LOG_MESSAGES_COPY_PATH)
68            self.run('cp /var/log/messages %s' %
69                     self.VAR_LOG_MESSAGES_COPY_PATH)
70        except Exception, e:
71            # Non-fatal error
72            logging.info('Failed to copy /var/log/messages at startup: %s', e)
73
74
75    def get_autodir(self):
76        return self.autodir
77
78
79    def set_autodir(self, autodir):
80        """
81        This method is called to make the host object aware of the
82        where autotest is installed. Called in server/autotest.py
83        after a successful install
84        """
85        self.autodir = autodir
86
87
88    def sysrq_reboot(self):
89        self.run('echo b > /proc/sysrq-trigger &')
90
91
92    def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True):
93        self.run('/sbin/halt')
94        if wait:
95            self.wait_down(timeout=timeout)
96
97
98    def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG,
99               kernel_args=None, wait=True, fastsync=False,
100               reboot_cmd=None, **dargs):
101        """
102        Reboot the remote host.
103
104        Args:
105                timeout - How long to wait for the reboot.
106                label - The label we should boot into.  If None, we will
107                        boot into the default kernel.  If it's LAST_BOOT_TAG,
108                        we'll boot into whichever kernel was .boot'ed last
109                        (or the default kernel if we haven't .boot'ed in this
110                        job).  If it's None, we'll boot into the default kernel.
111                        If it's something else, we'll boot into that.
112                wait - Should we wait to see if the machine comes back up.
113                fastsync - Don't wait for the sync to complete, just start one
114                        and move on. This is for cases where rebooting prompty
115                        is more important than data integrity and/or the
116                        machine may have disks that cause sync to never return.
117                reboot_cmd - Reboot command to execute.
118        """
119        if self.job:
120            if label == self.LAST_BOOT_TAG:
121                label = self.job.last_boot_tag
122            else:
123                self.job.last_boot_tag = label
124
125        self.reboot_setup(label=label, kernel_args=kernel_args, **dargs)
126
127        if label or kernel_args:
128            if not label:
129                label = self.bootloader.get_default_title()
130            self.bootloader.boot_once(label)
131            if kernel_args:
132                self.bootloader.add_args(label, kernel_args)
133
134        # define a function for the reboot and run it in a group
135        print "Reboot: initiating reboot"
136        def reboot():
137            self.record("GOOD", None, "reboot.start")
138            try:
139                current_boot_id = self.get_boot_id()
140
141                # sync before starting the reboot, so that a long sync during
142                # shutdown isn't timed out by wait_down's short timeout
143                if not fastsync:
144                    self.run('sync; sync', timeout=timeout, ignore_status=True)
145
146                if reboot_cmd:
147                    self.run(reboot_cmd)
148                else:
149                  # Try several methods of rebooting in increasing harshness.
150                    self.run('(('
151                             ' sync &'
152                             ' sleep 5; reboot &'
153                             ' sleep 60; reboot -f &'
154                             ' sleep 10; reboot -nf &'
155                             ' sleep 10; telinit 6 &'
156                             ') </dev/null >/dev/null 2>&1 &)')
157            except error.AutoservRunError:
158                self.record("ABORT", None, "reboot.start",
159                              "reboot command failed")
160                raise
161            if wait:
162                self.wait_for_restart(timeout, old_boot_id=current_boot_id,
163                                      **dargs)
164
165        # if this is a full reboot-and-wait, run the reboot inside a group
166        if wait:
167            self.log_reboot(reboot)
168        else:
169            reboot()
170
171
172    def reboot_followup(self, *args, **dargs):
173        super(RemoteHost, self).reboot_followup(*args, **dargs)
174        if self.job:
175            self.job.profilers.handle_reboot(self)
176
177
178    def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs):
179        """
180        Wait for the host to come back from a reboot. This wraps the
181        generic wait_for_restart implementation in a reboot group.
182        """
183        def reboot_func():
184            super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs)
185        self.log_reboot(reboot_func)
186
187
188    def cleanup(self):
189        super(RemoteHost, self).cleanup()
190        self.reboot()
191
192
193    def get_tmp_dir(self, parent='/tmp'):
194        """
195        Return the pathname of a directory on the host suitable
196        for temporary file storage.
197
198        The directory and its content will be deleted automatically
199        on the destruction of the Host object that was used to obtain
200        it.
201        """
202        self.run("mkdir -p %s" % parent)
203        template = os.path.join(parent, 'autoserv-XXXXXX')
204        dir_name = self.run("mktemp -d %s" % template).stdout.rstrip()
205        self.tmp_dirs.append(dir_name)
206        return dir_name
207
208
209    def get_platform_label(self):
210        """
211        Return the platform label, or None if platform label is not set.
212        """
213
214        if self.job:
215            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
216                                       self.hostname)
217            keyvals = utils.read_keyval(keyval_path)
218            return keyvals.get('platform', None)
219        else:
220            return None
221
222
223    def get_all_labels(self):
224        """
225        Return all labels, or empty list if label is not set.
226        """
227        if self.job:
228            keyval_path = os.path.join(self.job.resultdir, 'host_keyvals',
229                                       self.hostname)
230            keyvals = utils.read_keyval(keyval_path)
231            all_labels = keyvals.get('labels', '')
232            if all_labels:
233                all_labels = all_labels.split(',')
234                return [urllib.unquote(label) for label in all_labels]
235        return []
236
237
238    def delete_tmp_dir(self, tmpdir):
239        """
240        Delete the given temporary directory on the remote machine.
241        """
242        self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True)
243        self.tmp_dirs.remove(tmpdir)
244
245
246    def check_uptime(self):
247        """
248        Check that uptime is available and monotonically increasing.
249        """
250        if not self.is_up():
251            raise error.AutoservHostError('Client does not appear to be up')
252        result = self.run("/bin/cat /proc/uptime", 30)
253        return result.stdout.strip().split()[0]
254
255
256    def are_wait_up_processes_up(self):
257        """
258        Checks if any HOSTS waitup processes are running yet on the
259        remote host.
260
261        Returns True if any the waitup processes are running, False
262        otherwise.
263        """
264        processes = self.get_wait_up_processes()
265        if len(processes) == 0:
266            return True # wait up processes aren't being used
267        for procname in processes:
268            exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname,
269                                   ignore_status=True).exit_status
270            if exit_status == 0:
271                return True
272        return False
273