autoupdater.py revision 0c0df7324b80a8fcf41739705b3bcbd10d3e2b8b
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import glob 6import httplib 7import logging 8import multiprocessing 9import os 10import re 11import urlparse 12import urllib2 13 14from autotest_lib.client.bin import utils 15from autotest_lib.client.common_lib import error, global_config 16from autotest_lib.client.common_lib.cros import dev_server 17 18# Local stateful update path is relative to the CrOS source directory. 19LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 20LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 21UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 23# A list of update engine client states that occur after an update is triggered. 24UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE', 25 'UPDATE_STATUS_UPDATE_AVAILABLE', 26 'UPDATE_STATUS_DOWNLOADING', 27 'UPDATE_STATUS_FINALIZING'] 28 29class ChromiumOSError(error.InstallError): 30 """Generic error for ChromiumOS-specific exceptions.""" 31 pass 32 33 34class RootFSUpdateError(ChromiumOSError): 35 """Raised when the RootFS fails to update.""" 36 pass 37 38 39class StatefulUpdateError(ChromiumOSError): 40 """Raised when the stateful partition fails to update.""" 41 pass 42 43 44def url_to_version(update_url): 45 """Return the version based on update_url. 46 47 @param update_url: url to the image to update to. 48 49 """ 50 # The Chrome OS version is generally the last element in the URL. The only 51 # exception is delta update URLs, which are rooted under the version; e.g., 52 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 53 # strip off the au section of the path before reading the version. 54 return re.sub('/au/.*', '', 55 urlparse.urlparse(update_url).path).split('/')[-1].strip() 56 57 58def url_to_image_name(update_url): 59 """Return the image name based on update_url. 60 61 From a URL like: 62 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 63 return lumpy-release/R27-3837.0.0 64 65 @param update_url: url to the image to update to. 66 @returns a string representing the image name in the update_url. 67 68 """ 69 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 70 71 72def _get_devserver_build_from_update_url(update_url): 73 """Get the devserver and build from the update url. 74 75 @param update_url: The url for update. 76 Eg: http://devserver:port/update/build. 77 78 @return: A tuple of (devserver url, build) or None if the update_url 79 doesn't match the expected pattern. 80 81 @raises ValueError: If the update_url doesn't match the expected pattern. 82 @raises ValueError: If no global_config was found, or it doesn't contain an 83 image_url_pattern. 84 """ 85 pattern = global_config.global_config.get_config_value( 86 'CROS', 'image_url_pattern', type=str, default='') 87 if not pattern: 88 raise ValueError('Cannot parse update_url, the global config needs ' 89 'an image_url_pattern.') 90 re_pattern = pattern.replace('%s', '(\S+)') 91 parts = re.search(re_pattern, update_url) 92 if not parts or len(parts.groups()) < 2: 93 raise ValueError('%s is not an update url' % update_url) 94 return parts.groups() 95 96 97def list_image_dir_contents(update_url): 98 """Lists the contents of the devserver for a given build/update_url. 99 100 @param update_url: An update url. Eg: http://devserver:port/update/build. 101 """ 102 if not update_url: 103 logging.warning('Need update_url to list contents of the devserver.') 104 return 105 error_msg = 'Cannot check contents of devserver, update url %s' % update_url 106 try: 107 devserver_url, build = _get_devserver_build_from_update_url(update_url) 108 except ValueError as e: 109 logging.warning('%s: %s', error_msg, e) 110 return 111 devserver = dev_server.ImageServer(devserver_url) 112 try: 113 devserver.list_image_dir(build) 114 # The devserver will retry on URLError to avoid flaky connections, but will 115 # eventually raise the URLError if it persists. All HTTPErrors get 116 # converted to DevServerExceptions. 117 except (dev_server.DevServerException, urllib2.URLError) as e: 118 logging.warning('%s: %s', error_msg, e) 119 120 121class ChromiumOSUpdater(): 122 """Helper class used to update DUT with image of desired version.""" 123 REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 124 UPDATER_BIN = '/usr/bin/update_engine_client' 125 STATEFUL_UPDATE = '/tmp/stateful_update' 126 UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 127 UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine'] 128 129 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 130 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 131 # Time to wait for new kernel to be marked successful after 132 # auto update. 133 KERNEL_UPDATE_TIMEOUT = 120 134 135 136 def __init__(self, update_url, host=None, local_devserver=False): 137 self.host = host 138 self.update_url = update_url 139 self._update_error_queue = multiprocessing.Queue(2) 140 self.local_devserver = local_devserver 141 if not local_devserver: 142 self.update_version = url_to_version(update_url) 143 else: 144 self.update_version = None 145 146 147 def check_update_status(self): 148 """Return current status from update-engine.""" 149 update_status = self._run( 150 '%s -status 2>&1 | grep CURRENT_OP' % self.UPDATER_BIN) 151 return update_status.stdout.strip().split('=')[-1] 152 153 154 def reset_update_engine(self): 155 """Resets the host to prepare for a clean update regardless of state.""" 156 self._run('rm -f %s' % self.UPDATED_MARKER) 157 self._run('stop ui || true') 158 self._run('stop update-engine || true') 159 self._run('start update-engine') 160 161 if self.check_update_status() != UPDATER_IDLE: 162 raise ChromiumOSError('%s is not in an installable state' % 163 self.host.hostname) 164 165 166 def _run(self, cmd, *args, **kwargs): 167 """Abbreviated form of self.host.run(...)""" 168 return self.host.run(cmd, *args, **kwargs) 169 170 171 def rootdev(self, options=''): 172 """Returns the stripped output of rootdev <options>. 173 174 @param options: options to run rootdev. 175 176 """ 177 return self._run('rootdev %s' % options).stdout.strip() 178 179 180 def get_kernel_state(self): 181 """Returns the (<active>, <inactive>) kernel state as a pair.""" 182 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 183 if active_root == self.KERNEL_A['root']: 184 return self.KERNEL_A, self.KERNEL_B 185 elif active_root == self.KERNEL_B['root']: 186 return self.KERNEL_B, self.KERNEL_A 187 else: 188 raise ChromiumOSError('Encountered unknown root partition: %s' % 189 active_root) 190 191 192 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 193 """Return numeric cgpt value for the specified flag, kernel, device. """ 194 return int(self._run('cgpt show -n -i %d %s %s' % ( 195 kernel['kernel'], flag, dev)).stdout.strip()) 196 197 198 def get_kernel_priority(self, kernel): 199 """Return numeric priority for the specified kernel. 200 201 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 202 203 """ 204 return self._cgpt('-P', kernel) 205 206 207 def get_kernel_success(self, kernel): 208 """Return boolean success flag for the specified kernel. 209 210 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 211 212 """ 213 return self._cgpt('-S', kernel) != 0 214 215 216 def get_kernel_tries(self, kernel): 217 """Return tries count for the specified kernel. 218 219 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 220 221 """ 222 return self._cgpt('-T', kernel) 223 224 225 def get_stateful_update_script(self): 226 """Returns the path to the stateful update script on the target.""" 227 # We attempt to load the local stateful update path in 3 different 228 # ways. First we use the location specified in the autotest global 229 # config. If this doesn't exist, we attempt to use the Chromium OS 230 # Chroot path to the installed script. If all else fails, we use the 231 # stateful update script on the host. 232 stateful_update_path = os.path.join( 233 global_config.global_config.get_config_value( 234 'CROS', 'source_tree', default=''), 235 LOCAL_STATEFUL_UPDATE_PATH) 236 237 if not os.path.exists(stateful_update_path): 238 logging.warning('Could not find Chrome OS source location for ' 239 'stateful_update script at %s, falling back to ' 240 'chroot copy.', stateful_update_path) 241 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 242 243 if not os.path.exists(stateful_update_path): 244 logging.warning('Could not chroot stateful_update script, falling ' 245 'back on client copy.') 246 statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH 247 else: 248 self.host.send_file( 249 stateful_update_path, self.STATEFUL_UPDATE, 250 delete_dest=True) 251 statefuldev_script = self.STATEFUL_UPDATE 252 253 return statefuldev_script 254 255 256 def reset_stateful_partition(self): 257 """Clear any pending stateful update request.""" 258 statefuldev_cmd = [self.get_stateful_update_script()] 259 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 260 self._run(' '.join(statefuldev_cmd)) 261 262 263 def revert_boot_partition(self): 264 """Revert the boot partition.""" 265 part = self.rootdev('-s') 266 logging.warning('Reverting update; Boot partition will be %s', part) 267 return self._run('/postinst %s 2>&1' % part) 268 269 270 def trigger_update(self): 271 """Triggers a background update on a test image. 272 273 @raise RootFSUpdateError if anything went wrong. 274 275 """ 276 autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % ( 277 self.UPDATER_BIN, self.update_url) 278 logging.info('Triggering update via: %s', autoupdate_cmd) 279 try: 280 self._run(autoupdate_cmd) 281 except (error.AutoservSshPermissionDeniedError, 282 error.AutoservSSHTimeout) as e: 283 raise RootFSUpdateError('SSH on %s is seeing %s' % 284 (self.host.hostname, type(e).__name__)) 285 except error.AutoservRunError as e: 286 # Check if the exit code is 255, if so it's probably a generic 287 # SSH error. 288 result = e.args[1] 289 if result.exit_status == 255: 290 raise RootFSUpdateError('SSH on %s is seeing a generic error.' % 291 self.host.hostname) 292 293 # We have ruled out all SSH cases, the error code is from 294 # update_engine_client, though we still don't know why. 295 list_image_dir_contents(self.update_url) 296 raise RootFSUpdateError( 297 'devserver unreachable, payload unavailable, ' 298 'or AU bug (unlikely) on %s: %s' % 299 (self.host.hostname, type(e).__name__)) 300 301 302 def _verify_update_completed(self): 303 """Verifies that an update has completed. 304 305 @raise RootFSUpdateError: if verification fails. 306 """ 307 status = self.check_update_status() 308 if status != UPDATER_NEED_REBOOT: 309 raise RootFSUpdateError('Update did not complete with correct ' 310 'status. Expecting %s, actual %s' % 311 (UPDATER_NEED_REBOOT, status)) 312 313 314 def rollback_rootfs(self, powerwash): 315 """Triggers rollback and waits for it to complete. 316 317 @param powerwash: If true, powerwash as part of rollback. 318 319 @raise RootFSUpdateError if anything went wrong. 320 321 """ 322 version = self.host.get_release_version() 323 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches 324 # X.Y.Z. This version split just pulls the first part out. 325 try: 326 build_number = int(version.split('.')[0]) 327 except ValueError: 328 logging.error('Could not parse build number.') 329 build_number = 0 330 331 if build_number >= 5772: 332 can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN 333 logging.info('Checking for rollback.') 334 try: 335 self._run(can_rollback_cmd) 336 except error.AutoservRunError as e: 337 raise RootFSUpdateError("Rollback isn't possible on %s: %s" % 338 (self.host.hostname, str(e))) 339 340 rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN 341 if not powerwash: 342 rollback_cmd += ' --nopowerwash' 343 344 logging.info('Performing rollback.') 345 try: 346 self._run(rollback_cmd) 347 except error.AutoservRunError as e: 348 raise RootFSUpdateError('Rollback failed on %s: %s' % 349 (self.host.hostname, str(e))) 350 351 self._verify_update_completed() 352 353 354 def update_rootfs(self): 355 """Run the standard command to force an update.""" 356 try: 357 autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % ( 358 self.UPDATER_BIN, self.update_url) 359 self._run(autoupdate_cmd, timeout=1200) 360 except error.AutoservRunError: 361 list_image_dir_contents(self.update_url) 362 update_error = RootFSUpdateError('update-engine failed on %s' % 363 self.host.hostname) 364 self._update_error_queue.put(update_error) 365 raise update_error 366 except Exception as e: 367 # Don't allow other exceptions to not be caught. 368 self._update_error_queue.put(e) 369 raise e 370 371 try: 372 self._verify_update_completed() 373 except RootFSUpdateError as e: 374 self._update_error_queue.put(e) 375 raise 376 377 378 def update_stateful(self, clobber=True): 379 """Updates the stateful partition. 380 381 @param clobber: If True, a clean stateful installation. 382 """ 383 logging.info('Updating stateful partition...') 384 statefuldev_url = self.update_url.replace('update', 385 'static') 386 387 # Attempt stateful partition update; this must succeed so that the newly 388 # installed host is testable after update. 389 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 390 if clobber: 391 statefuldev_cmd.append('--stateful_change=clean') 392 393 statefuldev_cmd.append('2>&1') 394 try: 395 self._run(' '.join(statefuldev_cmd), timeout=600) 396 except error.AutoservRunError: 397 update_error = StatefulUpdateError('stateful_update failed on %s' % 398 self.host.hostname) 399 self._update_error_queue.put(update_error) 400 raise update_error 401 except Exception as e: 402 # Don't allow other exceptions to not be caught. 403 self._update_error_queue.put(e) 404 raise e 405 406 407 def run_update(self, update_root=True): 408 """Update the DUT with image of specific version. 409 410 @param update_root: True to force a rootfs update. 411 """ 412 booted_version = self.host.get_release_version() 413 if self.update_version: 414 logging.info('Updating from version %s to %s.', 415 booted_version, self.update_version) 416 417 # Check that Dev Server is accepting connections (from autoserv's host). 418 # If we can't talk to it, the machine host probably can't either. 419 auserver_host = urlparse.urlparse(self.update_url)[1] 420 try: 421 httplib.HTTPConnection(auserver_host).connect() 422 except IOError: 423 raise ChromiumOSError( 424 'Update server at %s not available' % auserver_host) 425 426 logging.info('Installing from %s to %s', self.update_url, 427 self.host.hostname) 428 429 # Reset update state. 430 self.reset_update_engine() 431 self.reset_stateful_partition() 432 433 try: 434 updaters = [ 435 multiprocessing.process.Process(target=self.update_rootfs), 436 multiprocessing.process.Process(target=self.update_stateful) 437 ] 438 if not update_root: 439 logging.info('Root update is skipped.') 440 updaters = updaters[1:] 441 442 # Run the updaters in parallel. 443 for updater in updaters: updater.start() 444 for updater in updaters: updater.join() 445 446 # Re-raise the first error that occurred. 447 if not self._update_error_queue.empty(): 448 update_error = self._update_error_queue.get() 449 self.revert_boot_partition() 450 self.reset_stateful_partition() 451 raise update_error 452 453 logging.info('Update complete.') 454 except: 455 # Collect update engine logs in the event of failure. 456 if self.host.job: 457 logging.info('Collecting update engine logs...') 458 self.host.get_file( 459 self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 460 preserve_perm=False) 461 list_image_dir_contents(self.update_url) 462 raise 463 finally: 464 self.host.show_update_engine_log() 465 466 467 def check_version(self): 468 """Check the image running in DUT has the desired version. 469 470 @returns: True if the DUT's image version matches the version that 471 the autoupdater tries to update to. 472 473 """ 474 booted_version = self.host.get_release_version() 475 return (self.update_version and 476 self.update_version.endswith(booted_version)) 477 478 479 def check_version_to_confirm_install(self): 480 """Check image running in DUT has the desired version to be installed. 481 482 The method should not be used to check if DUT needs to have a full 483 reimage. Only use it to confirm a image is installed. 484 485 The method is designed to verify version for following 6 scenarios with 486 samples of version to update to and expected booted version: 487 1. trybot paladin build. 488 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 489 booted version: 3837.0.2013_03_21_1340 490 491 2. trybot release build. 492 update version: trybot-lumpy-release/R27-3837.0.0-b456 493 booted version: 3837.0.0 494 495 3. buildbot official release build. 496 update version: lumpy-release/R27-3837.0.0 497 booted version: 3837.0.0 498 499 4. non-official paladin rc build. 500 update version: lumpy-paladin/R27-3878.0.0-rc7 501 booted version: 3837.0.0-rc7 502 503 5. chrome-perf build. 504 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 505 booted version: 3837.0.0 506 507 6. pgo-generate build. 508 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 509 booted version: 3837.0.0-pgo-generate 510 511 When we are checking if a DUT needs to do a full install, we should NOT 512 use this method to check if the DUT is running the same version, since 513 it may return false positive for a DUT running trybot paladin build to 514 be updated to another trybot paladin build. 515 516 TODO: This logic has a bug if a trybot paladin build failed to be 517 installed in a DUT running an older trybot paladin build with same 518 platform number, but different build number (-b###). So to conclusively 519 determine if a tryjob paladin build is imaged successfully, we may need 520 to find out the date string from update url. 521 522 @returns: True if the DUT's image version (without the date string if 523 the image is a trybot build), matches the version that the 524 autoupdater is trying to update to. 525 526 """ 527 # In the local_devserver case, we can't know the expected 528 # build, so just pass. 529 if not self.update_version: 530 return True 531 532 # Always try the default check_version method first, this prevents 533 # any backward compatibility issue. 534 if self.check_version(): 535 return True 536 537 return utils.version_match(self.update_version, 538 self.host.get_release_version(), 539 self.update_url) 540 541 542 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 543 """Verifies that we fully booted given expected kernel state. 544 545 This method both verifies that we booted using the correct kernel 546 state and that the OS has marked the kernel as good. 547 548 @param expected_kernel_state: kernel state that we are verifying with 549 i.e. I expect to be booted onto partition 4 etc. See output of 550 get_kernel_state. 551 @param rollback_message: string to raise as a ChromiumOSError 552 if we booted with the wrong partition. 553 554 @raises ChromiumOSError: If we didn't. 555 """ 556 # Figure out the newly active kernel. 557 active_kernel_state = self.get_kernel_state()[0] 558 559 # Check for rollback due to a bad build. 560 if (expected_kernel_state and 561 active_kernel_state != expected_kernel_state): 562 563 # Kernel crash reports should be wiped between test runs, but 564 # may persist from earlier parts of the test, or from problems 565 # with provisioning. 566 # 567 # Kernel crash reports will NOT be present if the crash happened 568 # before encrypted stateful is mounted. 569 # 570 # TODO(dgarrett): Integrate with server/crashcollect.py at some 571 # point. 572 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash') 573 if kernel_crashes: 574 rollback_message += ': kernel_crash' 575 logging.debug('Found %d kernel crash reports:', 576 len(kernel_crashes)) 577 # The crash names contain timestamps that may be useful: 578 # kernel.20131207.005945.0.kcrash 579 for crash in kernel_crashes: 580 logging.debug(' %s', os.path.basename(crash)) 581 582 # Print out some information to make it easier to debug 583 # the rollback. 584 logging.debug('Dumping partition table.') 585 self._run('cgpt show $(rootdev -s -d)') 586 logging.debug('Dumping crossystem for firmware debugging.') 587 self._run('crossystem --all') 588 raise ChromiumOSError(rollback_message) 589 590 # Make sure chromeos-setgoodkernel runs. 591 try: 592 utils.poll_for_condition( 593 lambda: (self.get_kernel_tries(active_kernel_state) == 0 594 and self.get_kernel_success(active_kernel_state)), 595 exception=ChromiumOSError(), 596 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 597 except ChromiumOSError: 598 services_status = self._run('status system-services').stdout 599 if services_status != 'system-services start/running\n': 600 event = ('Chrome failed to reach login screen') 601 else: 602 event = ('update-engine failed to call ' 603 'chromeos-setgoodkernel') 604 raise ChromiumOSError( 605 'After update and reboot, %s ' 606 'within %d seconds' % (event, 607 self.KERNEL_UPDATE_TIMEOUT)) 608