remote.py revision 9af1963458b7ba2cb59c401e055aecb0b29bfbbc
1"""This class defines the Remote host class.""" 2 3import os, logging, urllib, time 4from autotest_lib.client.common_lib import error 5from autotest_lib.server import utils 6from autotest_lib.server.hosts import base_classes 7 8 9class RemoteHost(base_classes.Host): 10 """ 11 This class represents a remote machine on which you can run 12 programs. 13 14 It may be accessed through a network, a serial line, ... 15 It is not the machine autoserv is running on. 16 17 Implementation details: 18 This is an abstract class, leaf subclasses must implement the methods 19 listed here and in parent classes which have no implementation. They 20 may reimplement methods which already have an implementation. You 21 must not instantiate this class but should instantiate one of those 22 leaf subclasses. 23 """ 24 25 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 26 LAST_BOOT_TAG = object() 27 DEFAULT_HALT_TIMEOUT = 2 * 60 28 29 VAR_LOG_MESSAGES_COPY_PATH = "/var/tmp/messages.autotest_start" 30 31 def _initialize(self, hostname, autodir=None, *args, **dargs): 32 super(RemoteHost, self)._initialize(*args, **dargs) 33 34 self.hostname = hostname 35 self.autodir = autodir 36 self.tmp_dirs = [] 37 38 39 def __repr__(self): 40 return "<remote host: %s>" % self.hostname 41 42 43 def close(self): 44 super(RemoteHost, self).close() 45 self.stop_loggers() 46 47 if hasattr(self, 'tmp_dirs'): 48 for dir in self.tmp_dirs: 49 try: 50 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 51 except error.AutoservRunError: 52 pass 53 54 55 def job_start(self): 56 """ 57 Abstract method, called the first time a remote host object 58 is created for a specific host after a job starts. 59 60 This method depends on the create_host factory being used to 61 construct your host object. If you directly construct host objects 62 you will need to call this method yourself (and enforce the 63 single-call rule). 64 """ 65 try: 66 self.run('rm -f %s' % self.VAR_LOG_MESSAGES_COPY_PATH) 67 self.run('cp /var/log/messages %s' % 68 self.VAR_LOG_MESSAGES_COPY_PATH) 69 except Exception, e: 70 # Non-fatal error 71 logging.info('Failed to copy /var/log/messages at startup: %s', e) 72 73 74 def get_autodir(self): 75 return self.autodir 76 77 78 def set_autodir(self, autodir): 79 """ 80 This method is called to make the host object aware of the 81 where autotest is installed. Called in server/autotest.py 82 after a successful install 83 """ 84 self.autodir = autodir 85 86 87 def sysrq_reboot(self): 88 self.run_background('echo b > /proc/sysrq-trigger') 89 90 91 def halt(self, timeout=DEFAULT_HALT_TIMEOUT, wait=True): 92 self.run_background('sleep 1 ; halt') 93 if wait: 94 self.wait_down(timeout=timeout) 95 96 97 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG, 98 kernel_args=None, wait=True, fastsync=False, 99 reboot_cmd=None, **dargs): 100 """ 101 Reboot the remote host. 102 103 Args: 104 timeout - How long to wait for the reboot. 105 label - The label we should boot into. If None, we will 106 boot into the default kernel. If it's LAST_BOOT_TAG, 107 we'll boot into whichever kernel was .boot'ed last 108 (or the default kernel if we haven't .boot'ed in this 109 job). If it's None, we'll boot into the default kernel. 110 If it's something else, we'll boot into that. 111 wait - Should we wait to see if the machine comes back up. 112 fastsync - Don't wait for the sync to complete, just start one 113 and move on. This is for cases where rebooting prompty 114 is more important than data integrity and/or the 115 machine may have disks that cause sync to never return. 116 reboot_cmd - Reboot command to execute. 117 """ 118 if self.job: 119 if label == self.LAST_BOOT_TAG: 120 label = self.job.last_boot_tag 121 else: 122 self.job.last_boot_tag = label 123 124 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs) 125 126 if label or kernel_args: 127 if not label: 128 label = self.bootloader.get_default_title() 129 self.bootloader.boot_once(label) 130 if kernel_args: 131 self.bootloader.add_args(label, kernel_args) 132 133 if not reboot_cmd: 134 reboot_cmd = ('sync & sleep 5; ' 135 'reboot & sleep 60; ' 136 'reboot -f & sleep 10; ' 137 'reboot -nf & sleep 10; ' 138 'telinit 6') 139 140 def reboot(): 141 self.record("GOOD", None, "reboot.start") 142 try: 143 current_boot_id = self.get_boot_id() 144 145 # sync before starting the reboot, so that a long sync during 146 # shutdown isn't timed out by wait_down's short timeout 147 if not fastsync: 148 self.run('sync; sync', timeout=timeout, ignore_status=True) 149 150 self.run_background(reboot_cmd) 151 except error.AutoservRunError: 152 self.record("ABORT", None, "reboot.start", 153 "reboot command failed") 154 raise 155 if wait: 156 self.wait_for_restart(timeout, old_boot_id=current_boot_id, 157 **dargs) 158 159 # if this is a full reboot-and-wait, run the reboot inside a group 160 if wait: 161 self.log_op(self.OP_REBOOT, reboot) 162 else: 163 reboot() 164 165 def suspend(self, timeout, suspend_cmd, **dargs): 166 """ 167 Suspend the remote host. 168 169 Args: 170 timeout - How long to wait for the suspend. 171 susped_cmd - suspend command to execute. 172 """ 173 # define a function for the supend and run it in a group 174 def suspend(): 175 self.record("GOOD", None, "suspend.start for %d seconds" % (timeout)) 176 try: 177 self.run_background(suspend_cmd) 178 except error.AutoservRunError: 179 self.record("ABORT", None, "suspend.start", 180 "suspend command failed") 181 raise error.AutoservSuspendError("suspend command failed") 182 183 # Wait for some time, to ensure the machine is going to sleep. 184 # Not too long to check if the machine really suspended. 185 time_slice = min(timeout / 2, 300) 186 time.sleep(time_slice) 187 time_counter = time_slice 188 while time_counter < timeout + 60: 189 # Check if the machine is back. We check regularely to 190 # ensure the machine was suspended long enough. 191 if utils.ping(self.hostname, tries=1, deadline=1) == 0: 192 return 193 else: 194 if time_counter > timeout - 10: 195 time_slice = 5 196 time.sleep(time_slice) 197 time_counter += time_slice 198 199 if utils.ping(self.hostname, tries=1, deadline=1) != 0: 200 raise error.AutoservSuspendError( 201 "DUT is not responding after %d seconds" % (time_counter)) 202 203 start_time = time.time() 204 self.log_op(self.OP_SUSPEND, suspend) 205 lasted = time.time() - start_time 206 if (lasted < timeout): 207 raise error.AutoservSuspendError( 208 "Suspend did not last long enough: %d instead of %d" % ( 209 lasted, timeout)) 210 211 def reboot_followup(self, *args, **dargs): 212 super(RemoteHost, self).reboot_followup(*args, **dargs) 213 if self.job: 214 self.job.profilers.handle_reboot(self) 215 216 217 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT, **dargs): 218 """ 219 Wait for the host to come back from a reboot. This wraps the 220 generic wait_for_restart implementation in a reboot group. 221 """ 222 def op_func(): 223 super(RemoteHost, self).wait_for_restart(timeout=timeout, **dargs) 224 self.log_op(self.OP_REBOOT, op_func) 225 226 227 def cleanup(self): 228 super(RemoteHost, self).cleanup() 229 self.reboot() 230 231 232 def get_tmp_dir(self, parent='/tmp'): 233 """ 234 Return the pathname of a directory on the host suitable 235 for temporary file storage. 236 237 The directory and its content will be deleted automatically 238 on the destruction of the Host object that was used to obtain 239 it. 240 """ 241 self.run("mkdir -p %s" % parent) 242 template = os.path.join(parent, 'autoserv-XXXXXX') 243 dir_name = self.run("mktemp -d %s" % template).stdout.rstrip() 244 self.tmp_dirs.append(dir_name) 245 return dir_name 246 247 248 def get_platform_label(self): 249 """ 250 Return the platform label, or None if platform label is not set. 251 """ 252 253 if self.job: 254 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 255 self.hostname) 256 keyvals = utils.read_keyval(keyval_path) 257 return keyvals.get('platform', None) 258 else: 259 return None 260 261 262 def get_all_labels(self): 263 """ 264 Return all labels, or empty list if label is not set. 265 """ 266 if self.job: 267 keyval_path = os.path.join(self.job.resultdir, 'host_keyvals', 268 self.hostname) 269 keyvals = utils.read_keyval(keyval_path) 270 all_labels = keyvals.get('labels', '') 271 if all_labels: 272 all_labels = all_labels.split(',') 273 return [urllib.unquote(label) for label in all_labels] 274 return [] 275 276 277 def delete_tmp_dir(self, tmpdir): 278 """ 279 Delete the given temporary directory on the remote machine. 280 """ 281 self.run('rm -rf "%s"' % utils.sh_escape(tmpdir), ignore_status=True) 282 self.tmp_dirs.remove(tmpdir) 283 284 285 def check_uptime(self): 286 """ 287 Check that uptime is available and monotonically increasing. 288 """ 289 if not self.is_up(): 290 raise error.AutoservHostError('Client does not appear to be up') 291 result = self.run("/bin/cat /proc/uptime", 30) 292 return result.stdout.strip().split()[0] 293 294 295 def check_for_lkdtm(self): 296 """ 297 Check for kernel dump test module. return True if exist. 298 """ 299 cmd = 'ls /sys/kernel/debug/provoke-crash/DIRECT' 300 return self.run(cmd, ignore_status=True).exit_status == 0 301 302 303 def are_wait_up_processes_up(self): 304 """ 305 Checks if any HOSTS waitup processes are running yet on the 306 remote host. 307 308 Returns True if any the waitup processes are running, False 309 otherwise. 310 """ 311 processes = self.get_wait_up_processes() 312 if len(processes) == 0: 313 return True # wait up processes aren't being used 314 for procname in processes: 315 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 316 ignore_status=True).exit_status 317 if exit_status == 0: 318 return True 319 return False 320