remote.py revision 53aaf388e48368aa0179622957912a5284b43dd1
1"""This class defines the Remote host class, mixing in the SiteHost class 2if it is available.""" 3 4import os, time 5from autotest_lib.client.common_lib import error 6from autotest_lib.server import utils 7from autotest_lib.server.hosts import base_classes, bootloader 8 9 10class RemoteHost(base_classes.Host): 11 """ 12 This class represents a remote machine on which you can run 13 programs. 14 15 It may be accessed through a network, a serial line, ... 16 It is not the machine autoserv is running on. 17 18 Implementation details: 19 This is an abstract class, leaf subclasses must implement the methods 20 listed here and in parent classes which have no implementation. They 21 may reimplement methods which already have an implementation. You 22 must not instantiate this class but should instantiate one of those 23 leaf subclasses. 24 """ 25 26 DEFAULT_REBOOT_TIMEOUT = base_classes.Host.DEFAULT_REBOOT_TIMEOUT 27 LAST_BOOT_TAG = object() 28 29 def _initialize(self, hostname, autodir=None, *args, **dargs): 30 super(RemoteHost, self)._initialize(*args, **dargs) 31 32 self.hostname = hostname 33 self.autodir = autodir 34 self.tmp_dirs = [] 35 36 37 def close(self): 38 super(RemoteHost, self).close() 39 self.stop_loggers() 40 41 if hasattr(self, 'tmp_dirs'): 42 for dir in self.tmp_dirs: 43 try: 44 self.run('rm -rf "%s"' % (utils.sh_escape(dir))) 45 except error.AutoservRunError: 46 pass 47 48 49 def job_start(self): 50 """ 51 Abstract method, called the first time a remote host object 52 is created for a specific host after a job starts. 53 54 This method depends on the create_host factory being used to 55 construct your host object. If you directly construct host objects 56 you will need to call this method yourself (and enforce the 57 single-call rule). 58 """ 59 pass 60 61 62 def get_autodir(self): 63 return self.autodir 64 65 66 def set_autodir(self, autodir): 67 """ 68 This method is called to make the host object aware of the 69 where autotest is installed. Called in server/autotest.py 70 after a successful install 71 """ 72 self.autodir = autodir 73 74 75 def sysrq_reboot(self): 76 self.run('echo b > /proc/sysrq-trigger &') 77 78 79 def reboot(self, timeout=DEFAULT_REBOOT_TIMEOUT, label=LAST_BOOT_TAG, 80 kernel_args=None, wait=True, **dargs): 81 """ 82 Reboot the remote host. 83 84 Args: 85 timeout - How long to wait for the reboot. 86 label - The label we should boot into. If None, we will 87 boot into the default kernel. If it's LAST_BOOT_TAG, 88 we'll boot into whichever kernel was .boot'ed last 89 (or the default kernel if we haven't .boot'ed in this 90 job). If it's None, we'll boot into the default kernel. 91 If it's something else, we'll boot into that. 92 wait - Should we wait to see if the machine comes back up. 93 """ 94 if self.job: 95 if label == self.LAST_BOOT_TAG: 96 label = self.job.last_boot_tag 97 else: 98 self.job.last_boot_tag = label 99 100 self.reboot_setup(label=label, kernel_args=kernel_args, **dargs) 101 102 if label or kernel_args: 103 self.bootloader.install_boottool() 104 if not label: 105 default = int(self.bootloader.get_default()) 106 label = self.bootloader.get_titles()[default] 107 self.bootloader.boot_once(label) 108 if kernel_args: 109 self.bootloader.add_args(label, kernel_args) 110 111 # define a function for the reboot and run it in a group 112 print "Reboot: initiating reboot" 113 def reboot(): 114 self.record("GOOD", None, "reboot.start") 115 try: 116 self.run('(sleep 5; reboot) ' 117 '</dev/null >/dev/null 2>&1 &') 118 except error.AutoservRunError: 119 self.record("ABORT", None, "reboot.start", 120 "reboot command failed") 121 raise 122 if wait: 123 self.wait_for_restart(timeout) 124 self.reboot_followup(**dargs) 125 126 # if this is a full reboot-and-wait, run the reboot inside a group 127 if wait: 128 self.log_reboot(reboot) 129 else: 130 reboot() 131 132 133 def wait_for_restart(self, timeout=DEFAULT_REBOOT_TIMEOUT): 134 """ 135 Wait for the host to come back from a reboot. This wraps the 136 generic wait_for_restart implementation in a reboot group. 137 """ 138 def reboot_func(): 139 super(RemoteHost, self).wait_for_restart(timeout=timeout) 140 self.log_reboot(reboot_func) 141 142 143 def cleanup(self): 144 super(RemoteHost, self).cleanup() 145 self.reboot() 146 147 148 def get_tmp_dir(self, parent='/tmp'): 149 """ 150 Return the pathname of a directory on the host suitable 151 for temporary file storage. 152 153 The directory and its content will be deleted automatically 154 on the destruction of the Host object that was used to obtain 155 it. 156 """ 157 template = os.path.join(parent, 'autoserv-XXXXXX') 158 dir_name= self.run("mktemp -d %s" % template).stdout.rstrip() 159 self.tmp_dirs.append(dir_name) 160 return dir_name 161 162 163 def ping(self): 164 """ 165 Ping the remote system, and return whether it's available 166 """ 167 fpingcmd = "%s -q %s" % ('/usr/bin/fping', self.hostname) 168 rc = utils.system(fpingcmd, ignore_status = 1) 169 return (rc == 0) 170 171 172 def check_uptime(self): 173 """ 174 Check that uptime is available and monotonically increasing. 175 """ 176 if not self.ping(): 177 raise error.AutoservHostError('Client is not pingable') 178 result = self.run("/bin/cat /proc/uptime", 30) 179 return result.stdout.strip().split()[0] 180 181 182 def get_crashinfo(self, test_start_time): 183 print "Collecting crash information..." 184 super(RemoteHost, self).get_crashinfo(test_start_time) 185 186 # wait for four hours, to see if the machine comes back up 187 current_time = time.strftime("%b %d %H:%M:%S", time.localtime()) 188 print "Waiting four hours for %s to come up (%s)" % (self.hostname, 189 current_time) 190 if not self.wait_up(timeout=4*60*60): 191 print "%s down, unable to collect crash info" % self.hostname 192 return 193 else: 194 print "%s is back up, collecting crash info" % self.hostname 195 196 # find a directory to put the crashinfo into 197 if self.job: 198 infodir = self.job.resultdir 199 else: 200 infodir = os.path.abspath(os.getcwd()) 201 infodir = os.path.join(infodir, "crashinfo.%s" % self.hostname) 202 if not os.path.exists(infodir): 203 os.mkdir(infodir) 204 205 # collect various log files 206 log_files = ["/var/log/messages", "/var/log/monitor-ssh-reboots"] 207 for log in log_files: 208 print "Collecting %s..." % log 209 try: 210 self.get_file(log, infodir) 211 except Exception, e: 212 print "crashinfo collection of %s failed with:\n%s" % (log, e) 213 214 # collect dmesg 215 print "Collecting dmesg..." 216 try: 217 result = self.run("dmesg").stdout 218 file(os.path.join(infodir, "dmesg"), "w").write(result) 219 except Exception, e: 220 print "crashinfo collection of dmesg failed with:\n%s" % e 221 222 223 def are_wait_up_processes_up(self): 224 """ 225 Checks if any HOSTS waitup processes are running yet on the 226 remote host. 227 228 Returns True if any the waitup processes are running, False 229 otherwise. 230 """ 231 processes = self.get_wait_up_processes() 232 if len(processes) == 0: 233 return True # wait up processes aren't being used 234 for procname in processes: 235 exit_status = self.run("{ ps -e || ps; } | grep '%s'" % procname, 236 ignore_status=True).exit_status 237 if exit_status == 0: 238 return True 239 return False 240