autoupdater.py revision c193217c55a11367708be5c25bd1c8e1857ab6ff
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import httplib 6import logging 7import multiprocessing 8import os 9import re 10import urlparse 11 12from autotest_lib.client.bin import utils 13from autotest_lib.client.common_lib import error, global_config 14 15# Local stateful update path is relative to the CrOS source directory. 16LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 17LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 18REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 19STATEFUL_UPDATE = '/tmp/stateful_update' 20UPDATER_BIN = '/usr/bin/update_engine_client' 21UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 23UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 24UPDATER_LOGS = '/var/log/messages /var/log/update_engine' 25 26 27class ChromiumOSError(error.InstallError): 28 """Generic error for ChromiumOS-specific exceptions.""" 29 pass 30 31 32class RootFSUpdateError(ChromiumOSError): 33 """Raised when the RootFS fails to update.""" 34 pass 35 36 37class StatefulUpdateError(ChromiumOSError): 38 """Raised when the stateful partition fails to update.""" 39 pass 40 41 42def url_to_version(update_url): 43 """Return the version based on update_url. 44 45 @param update_url: url to the image to update to. 46 47 """ 48 # The Chrome OS version is generally the last element in the URL. The only 49 # exception is delta update URLs, which are rooted under the version; e.g., 50 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 51 # strip off the au section of the path before reading the version. 52 return re.sub('/au/.*', '', 53 urlparse.urlparse(update_url).path).split('/')[-1].strip() 54 55 56def url_to_image_name(update_url): 57 """Return the image name based on update_url. 58 59 From a URL like: 60 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 61 return lumpy-release/R27-3837.0.0 62 63 @param update_url: url to the image to update to. 64 @returns a string representing the image name in the update_url. 65 66 """ 67 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 68 69 70class ChromiumOSUpdater(): 71 """Helper class used to update DUT with image of desired version.""" 72 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 73 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 74 # Time to wait for new kernel to be marked successful after 75 # auto update. 76 KERNEL_UPDATE_TIMEOUT = 120 77 78 79 def __init__(self, update_url, host=None, local_devserver=False): 80 self.host = host 81 self.update_url = update_url 82 self._update_error_queue = multiprocessing.Queue(2) 83 self.local_devserver = local_devserver 84 if not local_devserver: 85 self.update_version = url_to_version(update_url) 86 else: 87 self.update_version = None 88 89 def check_update_status(self): 90 """Return current status from update-engine.""" 91 update_status = self._run( 92 '%s -status 2>&1 | grep CURRENT_OP' % UPDATER_BIN) 93 return update_status.stdout.strip().split('=')[-1] 94 95 96 def reset_update_engine(self): 97 """Restarts the update-engine service.""" 98 self._run('rm -f %s' % UPDATED_MARKER) 99 try: 100 self._run('initctl stop update-engine') 101 except error.AutoservRunError: 102 logging.warn('Stopping update-engine service failed. Already dead?') 103 self._run('initctl start update-engine') 104 105 if self.check_update_status() != UPDATER_IDLE: 106 raise ChromiumOSError('%s is not in an installable state' % 107 self.host.hostname) 108 109 110 def _run(self, cmd, *args, **kwargs): 111 """Abbreviated form of self.host.run(...)""" 112 return self.host.run(cmd, *args, **kwargs) 113 114 115 def rootdev(self, options=''): 116 """Returns the stripped output of rootdev <options>. 117 118 @param options: options to run rootdev. 119 120 """ 121 return self._run('rootdev %s' % options).stdout.strip() 122 123 124 def get_kernel_state(self): 125 """Returns the (<active>, <inactive>) kernel state as a pair.""" 126 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 127 if active_root == self.KERNEL_A['root']: 128 return self.KERNEL_A, self.KERNEL_B 129 elif active_root == self.KERNEL_B['root']: 130 return self.KERNEL_B, self.KERNEL_A 131 else: 132 raise ChromiumOSError('Encountered unknown root partition: %s' % 133 active_root) 134 135 136 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 137 """Return numeric cgpt value for the specified flag, kernel, device. """ 138 return int(self._run('cgpt show -n -i %d %s %s' % ( 139 kernel['kernel'], flag, dev)).stdout.strip()) 140 141 142 def get_kernel_priority(self, kernel): 143 """Return numeric priority for the specified kernel. 144 145 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 146 147 """ 148 return self._cgpt('-P', kernel) 149 150 151 def get_kernel_success(self, kernel): 152 """Return boolean success flag for the specified kernel. 153 154 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 155 156 """ 157 return self._cgpt('-S', kernel) != 0 158 159 160 def get_kernel_tries(self, kernel): 161 """Return tries count for the specified kernel. 162 163 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 164 165 """ 166 return self._cgpt('-T', kernel) 167 168 169 def get_stateful_update_script(self): 170 """Returns the path to the stateful update script on the target.""" 171 # We attempt to load the local stateful update path in 3 different 172 # ways. First we use the location specified in the autotest global 173 # config. If this doesn't exist, we attempt to use the Chromium OS 174 # Chroot path to the installed script. If all else fails, we use the 175 # stateful update script on the host. 176 stateful_update_path = os.path.join( 177 global_config.global_config.get_config_value( 178 'CROS', 'source_tree', default=''), 179 LOCAL_STATEFUL_UPDATE_PATH) 180 181 if not os.path.exists(stateful_update_path): 182 logging.warn('Could not find Chrome OS source location for ' 183 'stateful_update script at %s, falling back to chroot ' 184 'copy.', stateful_update_path) 185 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 186 187 if not os.path.exists(stateful_update_path): 188 logging.warn('Could not chroot stateful_update script, falling ' 189 'back on client copy.') 190 statefuldev_script = REMOTE_STATEUL_UPDATE_PATH 191 else: 192 self.host.send_file( 193 stateful_update_path, STATEFUL_UPDATE, delete_dest=True) 194 statefuldev_script = STATEFUL_UPDATE 195 196 return statefuldev_script 197 198 199 def reset_stateful_partition(self): 200 """Clear any pending stateful update request.""" 201 statefuldev_cmd = [self.get_stateful_update_script()] 202 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 203 self._run(' '.join(statefuldev_cmd)) 204 205 206 def revert_boot_partition(self): 207 """Revert the boot partition.""" 208 part = self.rootdev('-s') 209 logging.warn('Reverting update; Boot partition will be %s', part) 210 return self._run('/postinst %s 2>&1' % part) 211 212 213 def trigger_update(self): 214 """Triggers a background update on a test image. 215 216 @raise RootFSUpdateError if anything went wrong. 217 218 """ 219 autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % ( 220 UPDATER_BIN, self.update_url) 221 logging.info('Triggering update via: %s', autoupdate_cmd) 222 try: 223 self._run(autoupdate_cmd) 224 except error.AutoservRunError, e: 225 raise RootFSUpdateError('Update triggering failed on %s: %s' % 226 (self.host.hostname, str(e))) 227 228 def _verify_update_completed(self): 229 """Verifies that an update has completed. 230 231 @raise RootFSUpdateError: if verification fails. 232 """ 233 status = self.check_update_status() 234 if status != UPDATER_NEED_REBOOT: 235 raise RootFSUpdateError('Update did not complete with correct ' 236 'status. Expecting %s, actual %s' % 237 (UPDATER_NEED_REBOOT, status)) 238 239 240 def rollback_rootfs(self, powerwash): 241 """Triggers rollback and waits for it to complete. 242 243 @param powerwash: If true, powerwash as part of rollback. 244 245 @raise RootFSUpdateError if anything went wrong. 246 247 """ 248 #TODO(sosa): crbug.com/309051 - Make this one update_engine_client call. 249 rollback_cmd = '%s --rollback' % (UPDATER_BIN) 250 wait_for_update_to_complete_cmd = '%s --update' % (UPDATER_BIN) 251 if not powerwash: 252 rollback_cmd += ' --nopowerwash' 253 254 logging.info('Triggering rollback.') 255 try: 256 self._run(rollback_cmd) 257 self._run(wait_for_update_to_complete_cmd) 258 except error.AutoservRunError as e: 259 raise RootFSUpdateError('Rollback failed on %s: %s' % 260 (self.host.hostname, str(e))) 261 262 self._verify_update_completed() 263 264 265 def update_rootfs(self): 266 """Updates the rootfs partition only.""" 267 logging.info('Updating root partition...') 268 269 # Run update_engine using the specified URL. 270 try: 271 autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % ( 272 UPDATER_BIN, self.update_url) 273 self._run(autoupdate_cmd, timeout=900) 274 except error.AutoservRunError: 275 update_error = RootFSUpdateError('update-engine failed on %s' % 276 self.host.hostname) 277 self._update_error_queue.put(update_error) 278 raise update_error 279 280 try: 281 self._verify_update_completed() 282 except RootFSUpdateError as e: 283 self._update_error_queue.put(e) 284 raise 285 286 287 def update_stateful(self, clobber=True): 288 """Updates the stateful partition. 289 290 @param clobber: If True, a clean stateful installation. 291 """ 292 logging.info('Updating stateful partition...') 293 statefuldev_url = self.update_url.replace('update', 294 'static') 295 296 # Attempt stateful partition update; this must succeed so that the newly 297 # installed host is testable after update. 298 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 299 if clobber: 300 statefuldev_cmd.append('--stateful_change=clean') 301 302 statefuldev_cmd.append('2>&1') 303 try: 304 self._run(' '.join(statefuldev_cmd), timeout=600) 305 except error.AutoservRunError: 306 update_error = StatefulUpdateError('stateful_update failed on %s' % 307 self.host.hostname) 308 self._update_error_queue.put(update_error) 309 raise update_error 310 311 312 def run_update(self, force_update, update_root=True): 313 """Update the DUT with image of specific version. 314 315 @param force_update: True to update DUT even if it's running the same 316 version already. 317 @param update_root: True to force a kernel update. If it's False and 318 force_update is True, stateful update will be used to clean up 319 the DUT. 320 321 """ 322 booted_version = self.get_build_id() 323 if (self.check_version() and not force_update): 324 logging.info('System is already up to date. Skipping update.') 325 return False 326 327 if self.update_version: 328 logging.info('Updating from version %s to %s.', 329 booted_version, self.update_version) 330 331 # Check that Dev Server is accepting connections (from autoserv's host). 332 # If we can't talk to it, the machine host probably can't either. 333 auserver_host = urlparse.urlparse(self.update_url)[1] 334 try: 335 httplib.HTTPConnection(auserver_host).connect() 336 except IOError: 337 raise ChromiumOSError( 338 'Update server at %s not available' % auserver_host) 339 340 logging.info('Installing from %s to %s', self.update_url, 341 self.host.hostname) 342 343 # Reset update state. 344 self.reset_update_engine() 345 self.reset_stateful_partition() 346 347 try: 348 updaters = [ 349 multiprocessing.process.Process(target=self.update_rootfs), 350 multiprocessing.process.Process(target=self.update_stateful) 351 ] 352 if not update_root: 353 logging.info('Root update is skipped.') 354 updaters = updaters[1:] 355 356 # Run the updaters in parallel. 357 for updater in updaters: updater.start() 358 for updater in updaters: updater.join() 359 360 # Re-raise the first error that occurred. 361 if not self._update_error_queue.empty(): 362 update_error = self._update_error_queue.get() 363 self.revert_boot_partition() 364 self.reset_stateful_partition() 365 raise update_error 366 367 logging.info('Update complete.') 368 return True 369 except: 370 # Collect update engine logs in the event of failure. 371 if self.host.job: 372 logging.info('Collecting update engine logs...') 373 self.host.get_file( 374 UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 375 preserve_perm=False) 376 raise 377 finally: 378 self.host.show_update_engine_log() 379 380 381 def check_version(self): 382 """Check the image running in DUT has the desired version. 383 384 @returns: True if the DUT's image version matches the version that 385 the autoupdater tries to update to. 386 387 """ 388 booted_version = self.get_build_id() 389 return (self.update_version and 390 self.update_version.endswith(booted_version)) 391 392 393 def check_version_to_confirm_install(self): 394 """Check image running in DUT has the desired version to be installed. 395 396 The method should not be used to check if DUT needs to have a full 397 reimage. Only use it to confirm a image is installed. 398 399 The method is designed to verify version for following 4 scenarios with 400 samples of version to update to and expected booted version: 401 1. trybot paladin build. 402 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 403 booted version: 3837.0.2013_03_21_1340 404 405 2. trybot release build. 406 update version: trybot-lumpy-release/R27-3837.0.0-b456 407 booted version: 3837.0.0 408 409 3. buildbot official release build. 410 update version: lumpy-release/R27-3837.0.0 411 booted version: 3837.0.0 412 413 4. non-official paladin rc build. 414 update version: lumpy-paladin/R27-3878.0.0-rc7 415 booted version: 3837.0.0-rc7 416 417 5. chrome-perf build. 418 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 419 booted version: 3837.0.0 420 421 6. pgo-generate build. 422 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 423 booted version: 3837.0.0-pgo-generate 424 425 When we are checking if a DUT needs to do a full install, we should NOT 426 use this method to check if the DUT is running the same version, since 427 it may return false positive for a DUT running trybot paladin build to 428 be updated to another trybot paladin build. 429 430 TODO: This logic has a bug if a trybot paladin build failed to be 431 installed in a DUT running an older trybot paladin build with same 432 platform number, but different build number (-b###). So to conclusively 433 determine if a tryjob paladin build is imaged successfully, we may need 434 to find out the date string from update url. 435 436 @returns: True if the DUT's image version (without the date string if 437 the image is a trybot build), matches the version that the 438 autoupdater is trying to update to. 439 440 """ 441 # In the local_devserver case, we can't know the expected 442 # build, so just pass. 443 if not self.update_version: 444 return True 445 446 # Always try the default check_version method first, this prevents 447 # any backward compatibility issue. 448 if self.check_version(): 449 return True 450 451 # Remove R#- and -b# at the end of build version 452 stripped_version = re.sub(r'(R\d+-|-b\d+)', '', self.update_version) 453 454 booted_version = self.get_build_id() 455 456 is_trybot_paladin_build = re.match(r'.+trybot-.+-paladin', 457 self.update_url) 458 459 # Replace date string with 0 in booted_version 460 booted_version_no_date = re.sub(r'\d{4}_\d{2}_\d{2}_\d+', '0', 461 booted_version) 462 has_date_string = booted_version != booted_version_no_date 463 464 is_pgo_generate_build = re.match(r'.+-pgo-generate', 465 self.update_url) 466 467 # Remove |-pgo-generate| in booted_version 468 booted_version_no_pgo = booted_version.replace('-pgo-generate', '') 469 has_pgo_generate = booted_version != booted_version_no_pgo 470 471 if is_trybot_paladin_build: 472 if not has_date_string: 473 logging.error('A trybot paladin build is expected. Version ' + 474 '"%s" is not a paladin build.', booted_version) 475 return False 476 return stripped_version == booted_version_no_date 477 elif is_pgo_generate_build: 478 if not has_pgo_generate: 479 logging.error('A pgo-generate build is expected. Version ' + 480 '"%s" is not a pgo-generate build.', 481 booted_version) 482 return False 483 return stripped_version == booted_version_no_pgo 484 else: 485 if has_date_string: 486 logging.error('Unexpected date found in a non trybot paladin' + 487 ' build.') 488 return False 489 # Versioned build, i.e., rc or release build. 490 return stripped_version == booted_version 491 492 493 def get_build_id(self): 494 """Pulls the CHROMEOS_RELEASE_VERSION string from /etc/lsb-release.""" 495 return self._run('grep CHROMEOS_RELEASE_VERSION' 496 ' /etc/lsb-release').stdout.split('=')[1].strip() 497 498 499 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 500 """Verifies that we fully booted given expected kernel state. 501 502 This method both verifies that we booted using the correct kernel 503 state and that the OS has marked the kernel as good. 504 505 @param expected_kernel_state: kernel state that we are verifying with 506 i.e. I expect to be booted onto partition 4 etc. See output of 507 get_kernel_state. 508 @param rollback_message: string to raise as a ChromiumOSError 509 if we booted with the wrong partition. 510 511 @raises ChromiumOSError: If we didn't. 512 """ 513 # Figure out the newly active kernel. 514 active_kernel_state = self.get_kernel_state()[0] 515 516 # Check for rollback due to a bad build. 517 if (expected_kernel_state and 518 active_kernel_state != expected_kernel_state): 519 # Print out some information to make it easier to debug 520 # the rollback. 521 logging.debug('Dumping partition table.') 522 self._run('cgpt show $(rootdev -s -d)') 523 logging.debug('Dumping crossystem for firmware debugging.') 524 self._run('crossystem --all') 525 raise ChromiumOSError(rollback_message) 526 527 # Make sure chromeos-setgoodkernel runs. 528 try: 529 utils.poll_for_condition( 530 lambda: (self.get_kernel_tries(active_kernel_state) == 0 531 and self.get_kernel_success(active_kernel_state)), 532 exception=ChromiumOSError(), 533 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 534 except ChromiumOSError: 535 services_status = self._run('status system-services').stdout 536 if services_status != 'system-services start/running\n': 537 event = ('Chrome failed to reach login screen') 538 else: 539 event = ('update-engine failed to call ' 540 'chromeos-setgoodkernel') 541 raise ChromiumOSError( 542 'After update and reboot, %s ' 543 'within %d seconds' % (event, 544 self.KERNEL_UPDATE_TIMEOUT)) 545