autoupdater.py revision d035b0c458560c0421620a63191328522ff880a7
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import glob 6import httplib 7import logging 8import multiprocessing 9import os 10import re 11import urlparse 12import urllib2 13 14from autotest_lib.client.bin import utils 15from autotest_lib.client.common_lib import error, global_config 16from autotest_lib.client.common_lib.cros import dev_server 17 18# Local stateful update path is relative to the CrOS source directory. 19LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 20LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 21UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 23# A list of update engine client states that occur after an update is triggered. 24UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE', 25 'UPDATE_STATUS_UPDATE_AVAILABLE', 26 'UPDATE_STATUS_DOWNLOADING', 27 'UPDATE_STATUS_FINALIZING'] 28 29class ChromiumOSError(error.InstallError): 30 """Generic error for ChromiumOS-specific exceptions.""" 31 32 33class BrilloError(error.InstallError): 34 """Generic error for Brillo-specific exceptions.""" 35 36 37class RootFSUpdateError(ChromiumOSError): 38 """Raised when the RootFS fails to update.""" 39 40 41class StatefulUpdateError(ChromiumOSError): 42 """Raised when the stateful partition fails to update.""" 43 44 45def url_to_version(update_url): 46 """Return the version based on update_url. 47 48 @param update_url: url to the image to update to. 49 50 """ 51 # The Chrome OS version is generally the last element in the URL. The only 52 # exception is delta update URLs, which are rooted under the version; e.g., 53 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 54 # strip off the au section of the path before reading the version. 55 return re.sub('/au/.*', '', 56 urlparse.urlparse(update_url).path).split('/')[-1].strip() 57 58 59def url_to_image_name(update_url): 60 """Return the image name based on update_url. 61 62 From a URL like: 63 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 64 return lumpy-release/R27-3837.0.0 65 66 @param update_url: url to the image to update to. 67 @returns a string representing the image name in the update_url. 68 69 """ 70 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 71 72 73def _get_devserver_build_from_update_url(update_url): 74 """Get the devserver and build from the update url. 75 76 @param update_url: The url for update. 77 Eg: http://devserver:port/update/build. 78 79 @return: A tuple of (devserver url, build) or None if the update_url 80 doesn't match the expected pattern. 81 82 @raises ValueError: If the update_url doesn't match the expected pattern. 83 @raises ValueError: If no global_config was found, or it doesn't contain an 84 image_url_pattern. 85 """ 86 pattern = global_config.global_config.get_config_value( 87 'CROS', 'image_url_pattern', type=str, default='') 88 if not pattern: 89 raise ValueError('Cannot parse update_url, the global config needs ' 90 'an image_url_pattern.') 91 re_pattern = pattern.replace('%s', '(\S+)') 92 parts = re.search(re_pattern, update_url) 93 if not parts or len(parts.groups()) < 2: 94 raise ValueError('%s is not an update url' % update_url) 95 return parts.groups() 96 97 98def list_image_dir_contents(update_url): 99 """Lists the contents of the devserver for a given build/update_url. 100 101 @param update_url: An update url. Eg: http://devserver:port/update/build. 102 """ 103 if not update_url: 104 logging.warning('Need update_url to list contents of the devserver.') 105 return 106 error_msg = 'Cannot check contents of devserver, update url %s' % update_url 107 try: 108 devserver_url, build = _get_devserver_build_from_update_url(update_url) 109 except ValueError as e: 110 logging.warning('%s: %s', error_msg, e) 111 return 112 devserver = dev_server.ImageServer(devserver_url) 113 try: 114 devserver.list_image_dir(build) 115 # The devserver will retry on URLError to avoid flaky connections, but will 116 # eventually raise the URLError if it persists. All HTTPErrors get 117 # converted to DevServerExceptions. 118 except (dev_server.DevServerException, urllib2.URLError) as e: 119 logging.warning('%s: %s', error_msg, e) 120 121 122# TODO(garnold) This implements shared updater functionality needed for 123# supporting the autoupdate_EndToEnd server-side test. We should probably 124# migrate more of the existing ChromiumOSUpdater functionality to it as we 125# expand non-CrOS support in other tests. 126class BaseUpdater(object): 127 """Platform-agnostic DUT update functionality.""" 128 129 def __init__(self, updater_ctrl_bin, update_url, host): 130 """Initializes the object. 131 132 @param updater_ctrl_bin: Path to update_engine_client. 133 @param update_url: The URL we want the update to use. 134 @param host: A client.common_lib.hosts.Host implementation. 135 """ 136 self.updater_ctrl_bin = updater_ctrl_bin 137 self.update_url = update_url 138 self.host = host 139 self._update_error_queue = multiprocessing.Queue(2) 140 141 142 def check_update_status(self): 143 """Returns the current update engine state. 144 145 We use the `update_engine_client -status' command and parse the line 146 indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE". 147 """ 148 update_status = self.host.run( 149 '%s -status 2>&1 | grep CURRENT_OP' % self.updater_ctrl_bin) 150 return update_status.stdout.strip().split('=')[-1] 151 152 153 def trigger_update(self): 154 """Triggers a background update. 155 156 @raise RootFSUpdateError if anything went wrong. 157 """ 158 autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' % 159 (self.updater_ctrl_bin, self.update_url)) 160 err_msg = 'Failed to trigger an update on %s.' % self.host.hostname 161 logging.info('Triggering update via: %s', autoupdate_cmd) 162 try: 163 self.host.run(autoupdate_cmd) 164 except (error.AutoservSshPermissionDeniedError, 165 error.AutoservSSHTimeout) as e: 166 err_msg += ' SSH reports an error: %s' % type(e).__name__ 167 raise RootFSUpdateError(err_msg) 168 except error.AutoservRunError as e: 169 # Check if the exit code is 255, if so it's probably a generic 170 # SSH error. 171 result = e.args[1] 172 if result.exit_status == 255: 173 err_msg += (' SSH reports a generic error (255), which could ' 174 'indicate a problem with underlying connectivity ' 175 'layers.') 176 raise RootFSUpdateError(err_msg) 177 178 # We have ruled out all SSH cases, the error code is from 179 # update_engine_client, though we still don't know why. 180 list_image_dir_contents(self.update_url) 181 err_msg += (' It could be that the devserver is unreachable, the ' 182 'payload unavailable, or there is a bug in the update ' 183 'engine (unlikely). Reported error: %s' % 184 type(e).__name__) 185 raise RootFSUpdateError(err_msg) 186 187 188 def _verify_update_completed(self): 189 """Verifies that an update has completed. 190 191 @raise RootFSUpdateError: if verification fails. 192 """ 193 status = self.check_update_status() 194 if status != UPDATER_NEED_REBOOT: 195 raise RootFSUpdateError('Update did not complete with correct ' 196 'status. Expecting %s, actual %s' % 197 (UPDATER_NEED_REBOOT, status)) 198 199 200 def update_image(self): 201 """Updates the device image and verifies success.""" 202 try: 203 autoupdate_cmd = ('%s --update --omaha_url=%s 2>&1' % 204 (self.updater_ctrl_bin, self.update_url)) 205 self.host.run(autoupdate_cmd, timeout=3600) 206 except error.AutoservRunError as e: 207 list_image_dir_contents(self.update_url) 208 update_error = RootFSUpdateError( 209 'Failed to install device image using payload at %s ' 210 'on %s: %s' % 211 (self.update_url, self.host.hostname, e)) 212 self._update_error_queue.put(update_error) 213 raise update_error 214 except Exception as e: 215 # Don't allow other exceptions to not be caught. 216 self._update_error_queue.put(e) 217 raise e 218 219 try: 220 self._verify_update_completed() 221 except RootFSUpdateError as e: 222 self._update_error_queue.put(e) 223 raise 224 225 226class ChromiumOSUpdater(BaseUpdater): 227 """Helper class used to update DUT with image of desired version.""" 228 REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 229 UPDATER_BIN = '/usr/bin/update_engine_client' 230 STATEFUL_UPDATE = '/tmp/stateful_update' 231 UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 232 UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine'] 233 234 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 235 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 236 # Time to wait for new kernel to be marked successful after 237 # auto update. 238 KERNEL_UPDATE_TIMEOUT = 120 239 240 241 def __init__(self, update_url, host=None, local_devserver=False): 242 super(ChromiumOSUpdater, self).__init__(self.UPDATER_BIN, update_url, 243 host) 244 self.local_devserver = local_devserver 245 if not local_devserver: 246 self.update_version = url_to_version(update_url) 247 else: 248 self.update_version = None 249 250 251 def reset_update_engine(self): 252 """Resets the host to prepare for a clean update regardless of state.""" 253 self._run('rm -f %s' % self.UPDATED_MARKER) 254 self._run('stop ui || true') 255 self._run('stop update-engine || true') 256 self._run('start update-engine') 257 258 if self.check_update_status() != UPDATER_IDLE: 259 raise ChromiumOSError('%s is not in an installable state' % 260 self.host.hostname) 261 262 263 def _run(self, cmd, *args, **kwargs): 264 """Abbreviated form of self.host.run(...)""" 265 return self.host.run(cmd, *args, **kwargs) 266 267 268 def rootdev(self, options=''): 269 """Returns the stripped output of rootdev <options>. 270 271 @param options: options to run rootdev. 272 273 """ 274 return self._run('rootdev %s' % options).stdout.strip() 275 276 277 def get_kernel_state(self): 278 """Returns the (<active>, <inactive>) kernel state as a pair.""" 279 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 280 if active_root == self.KERNEL_A['root']: 281 return self.KERNEL_A, self.KERNEL_B 282 elif active_root == self.KERNEL_B['root']: 283 return self.KERNEL_B, self.KERNEL_A 284 else: 285 raise ChromiumOSError('Encountered unknown root partition: %s' % 286 active_root) 287 288 289 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 290 """Return numeric cgpt value for the specified flag, kernel, device. """ 291 return int(self._run('cgpt show -n -i %d %s %s' % ( 292 kernel['kernel'], flag, dev)).stdout.strip()) 293 294 295 def get_kernel_priority(self, kernel): 296 """Return numeric priority for the specified kernel. 297 298 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 299 300 """ 301 return self._cgpt('-P', kernel) 302 303 304 def get_kernel_success(self, kernel): 305 """Return boolean success flag for the specified kernel. 306 307 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 308 309 """ 310 return self._cgpt('-S', kernel) != 0 311 312 313 def get_kernel_tries(self, kernel): 314 """Return tries count for the specified kernel. 315 316 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 317 318 """ 319 return self._cgpt('-T', kernel) 320 321 322 def get_stateful_update_script(self): 323 """Returns the path to the stateful update script on the target.""" 324 # We attempt to load the local stateful update path in 3 different 325 # ways. First we use the location specified in the autotest global 326 # config. If this doesn't exist, we attempt to use the Chromium OS 327 # Chroot path to the installed script. If all else fails, we use the 328 # stateful update script on the host. 329 stateful_update_path = os.path.join( 330 global_config.global_config.get_config_value( 331 'CROS', 'source_tree', default=''), 332 LOCAL_STATEFUL_UPDATE_PATH) 333 334 if not os.path.exists(stateful_update_path): 335 logging.warning('Could not find Chrome OS source location for ' 336 'stateful_update script at %s, falling back to ' 337 'chroot copy.', stateful_update_path) 338 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 339 340 if not os.path.exists(stateful_update_path): 341 logging.warning('Could not chroot stateful_update script, falling ' 342 'back on client copy.') 343 statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH 344 else: 345 self.host.send_file( 346 stateful_update_path, self.STATEFUL_UPDATE, 347 delete_dest=True) 348 statefuldev_script = self.STATEFUL_UPDATE 349 350 return statefuldev_script 351 352 353 def reset_stateful_partition(self): 354 """Clear any pending stateful update request.""" 355 statefuldev_cmd = [self.get_stateful_update_script()] 356 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 357 self._run(' '.join(statefuldev_cmd)) 358 359 360 def revert_boot_partition(self): 361 """Revert the boot partition.""" 362 part = self.rootdev('-s') 363 logging.warning('Reverting update; Boot partition will be %s', part) 364 return self._run('/postinst %s 2>&1' % part) 365 366 367 def rollback_rootfs(self, powerwash): 368 """Triggers rollback and waits for it to complete. 369 370 @param powerwash: If true, powerwash as part of rollback. 371 372 @raise RootFSUpdateError if anything went wrong. 373 374 """ 375 version = self.host.get_release_version() 376 # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches 377 # X.Y.Z. This version split just pulls the first part out. 378 try: 379 build_number = int(version.split('.')[0]) 380 except ValueError: 381 logging.error('Could not parse build number.') 382 build_number = 0 383 384 if build_number >= 5772: 385 can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN 386 logging.info('Checking for rollback.') 387 try: 388 self._run(can_rollback_cmd) 389 except error.AutoservRunError as e: 390 raise RootFSUpdateError("Rollback isn't possible on %s: %s" % 391 (self.host.hostname, str(e))) 392 393 rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN 394 if not powerwash: 395 rollback_cmd += ' --nopowerwash' 396 397 logging.info('Performing rollback.') 398 try: 399 self._run(rollback_cmd) 400 except error.AutoservRunError as e: 401 raise RootFSUpdateError('Rollback failed on %s: %s' % 402 (self.host.hostname, str(e))) 403 404 self._verify_update_completed() 405 406 407 # TODO(garnold) This is here for backward compatibility and should be 408 # deprecated once we shift to using update_image() everywhere. 409 def update_rootfs(self): 410 """Run the standard command to force an update.""" 411 return self.update_image() 412 413 414 def update_stateful(self, clobber=True): 415 """Updates the stateful partition. 416 417 @param clobber: If True, a clean stateful installation. 418 """ 419 logging.info('Updating stateful partition...') 420 statefuldev_url = self.update_url.replace('update', 421 'static') 422 423 # Attempt stateful partition update; this must succeed so that the newly 424 # installed host is testable after update. 425 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 426 if clobber: 427 statefuldev_cmd.append('--stateful_change=clean') 428 429 statefuldev_cmd.append('2>&1') 430 try: 431 self._run(' '.join(statefuldev_cmd), timeout=600) 432 except error.AutoservRunError: 433 update_error = StatefulUpdateError( 434 'Failed to perform stateful update on %s' % 435 self.host.hostname) 436 self._update_error_queue.put(update_error) 437 raise update_error 438 except Exception as e: 439 # Don't allow other exceptions to not be caught. 440 self._update_error_queue.put(e) 441 raise e 442 443 444 def run_update(self, update_root=True): 445 """Update the DUT with image of specific version. 446 447 @param update_root: True to force a rootfs update. 448 """ 449 booted_version = self.host.get_release_version() 450 if self.update_version: 451 logging.info('Updating from version %s to %s.', 452 booted_version, self.update_version) 453 454 # Check that Dev Server is accepting connections (from autoserv's host). 455 # If we can't talk to it, the machine host probably can't either. 456 auserver_host = urlparse.urlparse(self.update_url)[1] 457 try: 458 httplib.HTTPConnection(auserver_host).connect() 459 except IOError: 460 raise ChromiumOSError( 461 'Update server at %s not available' % auserver_host) 462 463 logging.info('Installing from %s to %s', self.update_url, 464 self.host.hostname) 465 466 # Reset update state. 467 self.reset_update_engine() 468 self.reset_stateful_partition() 469 470 try: 471 updaters = [ 472 multiprocessing.process.Process(target=self.update_rootfs), 473 multiprocessing.process.Process(target=self.update_stateful) 474 ] 475 if not update_root: 476 logging.info('Root update is skipped.') 477 updaters = updaters[1:] 478 479 # Run the updaters in parallel. 480 for updater in updaters: updater.start() 481 for updater in updaters: updater.join() 482 483 # Re-raise the first error that occurred. 484 if not self._update_error_queue.empty(): 485 update_error = self._update_error_queue.get() 486 self.revert_boot_partition() 487 self.reset_stateful_partition() 488 raise update_error 489 490 logging.info('Update complete.') 491 except: 492 # Collect update engine logs in the event of failure. 493 if self.host.job: 494 logging.info('Collecting update engine logs...') 495 self.host.get_file( 496 self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 497 preserve_perm=False) 498 list_image_dir_contents(self.update_url) 499 raise 500 finally: 501 self.host.show_update_engine_log() 502 503 504 def check_version(self): 505 """Check the image running in DUT has the desired version. 506 507 @returns: True if the DUT's image version matches the version that 508 the autoupdater tries to update to. 509 510 """ 511 booted_version = self.host.get_release_version() 512 return (self.update_version and 513 self.update_version.endswith(booted_version)) 514 515 516 def check_version_to_confirm_install(self): 517 """Check image running in DUT has the desired version to be installed. 518 519 The method should not be used to check if DUT needs to have a full 520 reimage. Only use it to confirm a image is installed. 521 522 The method is designed to verify version for following 6 scenarios with 523 samples of version to update to and expected booted version: 524 1. trybot paladin build. 525 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 526 booted version: 3837.0.2013_03_21_1340 527 528 2. trybot release build. 529 update version: trybot-lumpy-release/R27-3837.0.0-b456 530 booted version: 3837.0.0 531 532 3. buildbot official release build. 533 update version: lumpy-release/R27-3837.0.0 534 booted version: 3837.0.0 535 536 4. non-official paladin rc build. 537 update version: lumpy-paladin/R27-3878.0.0-rc7 538 booted version: 3837.0.0-rc7 539 540 5. chrome-perf build. 541 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 542 booted version: 3837.0.0 543 544 6. pgo-generate build. 545 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 546 booted version: 3837.0.0-pgo-generate 547 548 When we are checking if a DUT needs to do a full install, we should NOT 549 use this method to check if the DUT is running the same version, since 550 it may return false positive for a DUT running trybot paladin build to 551 be updated to another trybot paladin build. 552 553 TODO: This logic has a bug if a trybot paladin build failed to be 554 installed in a DUT running an older trybot paladin build with same 555 platform number, but different build number (-b###). So to conclusively 556 determine if a tryjob paladin build is imaged successfully, we may need 557 to find out the date string from update url. 558 559 @returns: True if the DUT's image version (without the date string if 560 the image is a trybot build), matches the version that the 561 autoupdater is trying to update to. 562 563 """ 564 # In the local_devserver case, we can't know the expected 565 # build, so just pass. 566 if not self.update_version: 567 return True 568 569 # Always try the default check_version method first, this prevents 570 # any backward compatibility issue. 571 if self.check_version(): 572 return True 573 574 return utils.version_match(self.update_version, 575 self.host.get_release_version(), 576 self.update_url) 577 578 579 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 580 """Verifies that we fully booted given expected kernel state. 581 582 This method both verifies that we booted using the correct kernel 583 state and that the OS has marked the kernel as good. 584 585 @param expected_kernel_state: kernel state that we are verifying with 586 i.e. I expect to be booted onto partition 4 etc. See output of 587 get_kernel_state. 588 @param rollback_message: string to raise as a ChromiumOSError 589 if we booted with the wrong partition. 590 591 @raises ChromiumOSError: If we didn't. 592 """ 593 # Figure out the newly active kernel. 594 active_kernel_state = self.get_kernel_state()[0] 595 596 # Check for rollback due to a bad build. 597 if (expected_kernel_state and 598 active_kernel_state != expected_kernel_state): 599 600 # Kernel crash reports should be wiped between test runs, but 601 # may persist from earlier parts of the test, or from problems 602 # with provisioning. 603 # 604 # Kernel crash reports will NOT be present if the crash happened 605 # before encrypted stateful is mounted. 606 # 607 # TODO(dgarrett): Integrate with server/crashcollect.py at some 608 # point. 609 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash') 610 if kernel_crashes: 611 rollback_message += ': kernel_crash' 612 logging.debug('Found %d kernel crash reports:', 613 len(kernel_crashes)) 614 # The crash names contain timestamps that may be useful: 615 # kernel.20131207.005945.0.kcrash 616 for crash in kernel_crashes: 617 logging.debug(' %s', os.path.basename(crash)) 618 619 # Print out some information to make it easier to debug 620 # the rollback. 621 logging.debug('Dumping partition table.') 622 self._run('cgpt show $(rootdev -s -d)') 623 logging.debug('Dumping crossystem for firmware debugging.') 624 self._run('crossystem --all') 625 raise ChromiumOSError(rollback_message) 626 627 # Make sure chromeos-setgoodkernel runs. 628 try: 629 utils.poll_for_condition( 630 lambda: (self.get_kernel_tries(active_kernel_state) == 0 631 and self.get_kernel_success(active_kernel_state)), 632 exception=ChromiumOSError(), 633 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 634 except ChromiumOSError: 635 services_status = self._run('status system-services').stdout 636 if services_status != 'system-services start/running\n': 637 event = ('Chrome failed to reach login screen') 638 else: 639 event = ('update-engine failed to call ' 640 'chromeos-setgoodkernel') 641 raise ChromiumOSError( 642 'After update and reboot, %s ' 643 'within %d seconds' % (event, 644 self.KERNEL_UPDATE_TIMEOUT)) 645 646 647class BrilloUpdater(BaseUpdater): 648 """Helper class for updating a Brillo DUT.""" 649 650 def __init__(self, update_url, host=None): 651 """Initialize the object. 652 653 @param update_url: The URL we want the update to use. 654 @param host: A client.common_lib.hosts.Host implementation. 655 """ 656 super(BrilloUpdater, self).__init__( 657 '/system/bin/update_engine_client', update_url, host) 658