autoupdater.py revision 4c50c54c3cd1b4c9360b27b31aa276b215dcaab9
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import glob 6import httplib 7import logging 8import multiprocessing 9import os 10import re 11import urlparse 12import urllib2 13 14from autotest_lib.client.bin import utils 15from autotest_lib.client.common_lib import error, global_config 16from autotest_lib.client.common_lib.cros import dev_server 17 18# Local stateful update path is relative to the CrOS source directory. 19LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 20LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 21REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 22STATEFUL_UPDATE = '/tmp/stateful_update' 23UPDATER_BIN = '/usr/bin/update_engine_client' 24UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 25UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 26UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 27UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine'] 28# A list of update engine client states that occur after an update is triggered. 29UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE', 30 'UPDATE_STATUS_UPDATE_AVAILABLE', 31 'UPDATE_STATUS_DOWNLOADING', 32 'UPDATE_STATUS_FINALIZING'] 33 34class ChromiumOSError(error.InstallError): 35 """Generic error for ChromiumOS-specific exceptions.""" 36 pass 37 38 39class RootFSUpdateError(ChromiumOSError): 40 """Raised when the RootFS fails to update.""" 41 pass 42 43 44class StatefulUpdateError(ChromiumOSError): 45 """Raised when the stateful partition fails to update.""" 46 pass 47 48 49def url_to_version(update_url): 50 """Return the version based on update_url. 51 52 @param update_url: url to the image to update to. 53 54 """ 55 # The Chrome OS version is generally the last element in the URL. The only 56 # exception is delta update URLs, which are rooted under the version; e.g., 57 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 58 # strip off the au section of the path before reading the version. 59 return re.sub('/au/.*', '', 60 urlparse.urlparse(update_url).path).split('/')[-1].strip() 61 62 63def url_to_image_name(update_url): 64 """Return the image name based on update_url. 65 66 From a URL like: 67 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 68 return lumpy-release/R27-3837.0.0 69 70 @param update_url: url to the image to update to. 71 @returns a string representing the image name in the update_url. 72 73 """ 74 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 75 76 77def _get_devserver_build_from_update_url(update_url): 78 """Get the devserver and build from the update url. 79 80 @param update_url: The url for update. 81 Eg: http://devserver:port/update/build. 82 83 @return: A tuple of (devserver url, build) or None if the update_url 84 doesn't match the expected pattern. 85 86 @raises ValueError: If the update_url doesn't match the expected pattern. 87 @raises ValueError: If no global_config was found, or it doesn't contain an 88 image_url_pattern. 89 """ 90 pattern = global_config.global_config.get_config_value( 91 'CROS', 'image_url_pattern', type=str, default='') 92 if not pattern: 93 raise ValueError('Cannot parse update_url, the global config needs ' 94 'an image_url_pattern.') 95 re_pattern = pattern.replace('%s', '(\S+)') 96 parts = re.search(re_pattern, update_url) 97 if not parts or len(parts.groups()) < 2: 98 raise ValueError('%s is not an update url' % update_url) 99 return parts.groups() 100 101 102def list_image_dir_contents(update_url): 103 """Lists the contents of the devserver for a given build/update_url. 104 105 @param update_url: An update url. Eg: http://devserver:port/update/build. 106 """ 107 if not update_url: 108 logging.warning('Need update_url to list contents of the devserver.') 109 return 110 error_msg = 'Cannot check contents of devserver, update url %s' % update_url 111 try: 112 devserver_url, build = _get_devserver_build_from_update_url(update_url) 113 except ValueError as e: 114 logging.warning('%s: %s', error_msg, e) 115 return 116 devserver = dev_server.ImageServer(devserver_url) 117 try: 118 devserver.list_image_dir(build) 119 # The devserver will retry on URLError to avoid flaky connections, but will 120 # eventually raise the URLError if it persists. All HTTPErrors get 121 # converted to DevServerExceptions. 122 except (dev_server.DevServerException, urllib2.URLError) as e: 123 logging.warning('%s: %s', error_msg, e) 124 125 126class ChromiumOSUpdater(): 127 """Helper class used to update DUT with image of desired version.""" 128 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 129 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 130 # Time to wait for new kernel to be marked successful after 131 # auto update. 132 KERNEL_UPDATE_TIMEOUT = 120 133 134 135 def __init__(self, update_url, host=None, local_devserver=False): 136 self.host = host 137 self.update_url = update_url 138 self._update_error_queue = multiprocessing.Queue(2) 139 self.local_devserver = local_devserver 140 if not local_devserver: 141 self.update_version = url_to_version(update_url) 142 else: 143 self.update_version = None 144 145 def check_update_status(self): 146 """Return current status from update-engine.""" 147 update_status = self._run( 148 '%s -status 2>&1 | grep CURRENT_OP' % UPDATER_BIN) 149 return update_status.stdout.strip().split('=')[-1] 150 151 152 def reset_update_engine(self): 153 """Restarts the update-engine service.""" 154 self._run('rm -f %s' % UPDATED_MARKER) 155 try: 156 self._run('initctl stop update-engine') 157 except error.AutoservRunError: 158 logging.warn('Stopping update-engine service failed. Already dead?') 159 self._run('initctl start update-engine') 160 161 if self.check_update_status() != UPDATER_IDLE: 162 raise ChromiumOSError('%s is not in an installable state' % 163 self.host.hostname) 164 165 166 def _run(self, cmd, *args, **kwargs): 167 """Abbreviated form of self.host.run(...)""" 168 return self.host.run(cmd, *args, **kwargs) 169 170 171 def rootdev(self, options=''): 172 """Returns the stripped output of rootdev <options>. 173 174 @param options: options to run rootdev. 175 176 """ 177 return self._run('rootdev %s' % options).stdout.strip() 178 179 180 def get_kernel_state(self): 181 """Returns the (<active>, <inactive>) kernel state as a pair.""" 182 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 183 if active_root == self.KERNEL_A['root']: 184 return self.KERNEL_A, self.KERNEL_B 185 elif active_root == self.KERNEL_B['root']: 186 return self.KERNEL_B, self.KERNEL_A 187 else: 188 raise ChromiumOSError('Encountered unknown root partition: %s' % 189 active_root) 190 191 192 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 193 """Return numeric cgpt value for the specified flag, kernel, device. """ 194 return int(self._run('cgpt show -n -i %d %s %s' % ( 195 kernel['kernel'], flag, dev)).stdout.strip()) 196 197 198 def get_kernel_priority(self, kernel): 199 """Return numeric priority for the specified kernel. 200 201 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 202 203 """ 204 return self._cgpt('-P', kernel) 205 206 207 def get_kernel_success(self, kernel): 208 """Return boolean success flag for the specified kernel. 209 210 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 211 212 """ 213 return self._cgpt('-S', kernel) != 0 214 215 216 def get_kernel_tries(self, kernel): 217 """Return tries count for the specified kernel. 218 219 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 220 221 """ 222 return self._cgpt('-T', kernel) 223 224 225 def get_stateful_update_script(self): 226 """Returns the path to the stateful update script on the target.""" 227 # We attempt to load the local stateful update path in 3 different 228 # ways. First we use the location specified in the autotest global 229 # config. If this doesn't exist, we attempt to use the Chromium OS 230 # Chroot path to the installed script. If all else fails, we use the 231 # stateful update script on the host. 232 stateful_update_path = os.path.join( 233 global_config.global_config.get_config_value( 234 'CROS', 'source_tree', default=''), 235 LOCAL_STATEFUL_UPDATE_PATH) 236 237 if not os.path.exists(stateful_update_path): 238 logging.warn('Could not find Chrome OS source location for ' 239 'stateful_update script at %s, falling back to chroot ' 240 'copy.', stateful_update_path) 241 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 242 243 if not os.path.exists(stateful_update_path): 244 logging.warn('Could not chroot stateful_update script, falling ' 245 'back on client copy.') 246 statefuldev_script = REMOTE_STATEUL_UPDATE_PATH 247 else: 248 self.host.send_file( 249 stateful_update_path, STATEFUL_UPDATE, delete_dest=True) 250 statefuldev_script = STATEFUL_UPDATE 251 252 return statefuldev_script 253 254 255 def reset_stateful_partition(self): 256 """Clear any pending stateful update request.""" 257 statefuldev_cmd = [self.get_stateful_update_script()] 258 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 259 self._run(' '.join(statefuldev_cmd)) 260 261 262 def revert_boot_partition(self): 263 """Revert the boot partition.""" 264 part = self.rootdev('-s') 265 logging.warn('Reverting update; Boot partition will be %s', part) 266 return self._run('/postinst %s 2>&1' % part) 267 268 269 def trigger_update(self): 270 """Triggers a background update on a test image. 271 272 @raise RootFSUpdateError if anything went wrong. 273 274 """ 275 autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % ( 276 UPDATER_BIN, self.update_url) 277 logging.info('Triggering update via: %s', autoupdate_cmd) 278 try: 279 self._run(autoupdate_cmd) 280 except (error.AutoservSshPermissionDeniedError, 281 error.AutoservSSHTimeout) as e: 282 raise RootFSUpdateError('SSH on %s is seeing %s' % 283 (self.host.hostname, type(e).__name__)) 284 except error.AutoservRunError as e: 285 286 # Check if the exit code is 255, if so it's probably a generic 287 # SSH error. 288 result = e.args[1] 289 if result.exit_status == 255: 290 raise RootFSUpdateError('SSH on %s is seeing a generic error.' % 291 self.host.hostname) 292 293 # We have ruled out all SSH cases, the error code is from 294 # update_engine_client, though we still don't know why. 295 list_image_dir_contents(self.update_url) 296 raise RootFSUpdateError( 297 'devserver unreachable, payload unavailable, ' 298 'or AU bug (unlikely) on %s: %s' % 299 (self.host.hostname, type(e).__name__)) 300 301 302 def _verify_update_completed(self): 303 """Verifies that an update has completed. 304 305 @raise RootFSUpdateError: if verification fails. 306 """ 307 status = self.check_update_status() 308 if status != UPDATER_NEED_REBOOT: 309 raise RootFSUpdateError('Update did not complete with correct ' 310 'status. Expecting %s, actual %s' % 311 (UPDATER_NEED_REBOOT, status)) 312 313 314 def rollback_rootfs(self, powerwash): 315 """Triggers rollback and waits for it to complete. 316 317 @param powerwash: If true, powerwash as part of rollback. 318 319 @raise RootFSUpdateError if anything went wrong. 320 321 """ 322 can_rollback_cmd = '%s --can_rollback' % (UPDATER_BIN) 323 logging.info('Checking for rollback.') 324 try: 325 self._run(can_rollback_cmd) 326 except error.AutoservRunError as e: 327 raise RootFSUpdateError("Rollback isn't possible on %s: %s" % 328 (self.host.hostname, str(e))) 329 330 rollback_cmd = '%s --rollback --follow' % (UPDATER_BIN) 331 if not powerwash: 332 rollback_cmd += ' --nopowerwash' 333 334 logging.info('Performing rollback.') 335 try: 336 self._run(rollback_cmd) 337 except error.AutoservRunError as e: 338 raise RootFSUpdateError('Rollback failed on %s: %s' % 339 (self.host.hostname, str(e))) 340 341 self._verify_update_completed() 342 343 344 def update_rootfs(self): 345 """Updates the rootfs partition only.""" 346 logging.info('Updating root partition...') 347 348 # Run update_engine using the specified URL. 349 try: 350 autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % ( 351 UPDATER_BIN, self.update_url) 352 self._run(autoupdate_cmd, timeout=900) 353 except error.AutoservRunError: 354 list_image_dir_contents(self.update_url) 355 update_error = RootFSUpdateError('update-engine failed on %s' % 356 self.host.hostname) 357 self._update_error_queue.put(update_error) 358 raise update_error 359 360 try: 361 self._verify_update_completed() 362 except RootFSUpdateError as e: 363 self._update_error_queue.put(e) 364 raise 365 366 367 def update_stateful(self, clobber=True): 368 """Updates the stateful partition. 369 370 @param clobber: If True, a clean stateful installation. 371 """ 372 logging.info('Updating stateful partition...') 373 statefuldev_url = self.update_url.replace('update', 374 'static') 375 376 # Attempt stateful partition update; this must succeed so that the newly 377 # installed host is testable after update. 378 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 379 if clobber: 380 statefuldev_cmd.append('--stateful_change=clean') 381 382 statefuldev_cmd.append('2>&1') 383 try: 384 self._run(' '.join(statefuldev_cmd), timeout=600) 385 except error.AutoservRunError: 386 update_error = StatefulUpdateError('stateful_update failed on %s' % 387 self.host.hostname) 388 self._update_error_queue.put(update_error) 389 raise update_error 390 391 392 def run_update(self, force_update, update_root=True): 393 """Update the DUT with image of specific version. 394 395 @param force_update: True to update DUT even if it's running the same 396 version already. 397 @param update_root: True to force a kernel update. If it's False and 398 force_update is True, stateful update will be used to clean up 399 the DUT. 400 401 """ 402 booted_version = self.get_build_id() 403 if (self.check_version() and not force_update): 404 logging.info('System is already up to date. Skipping update.') 405 return False 406 407 if self.update_version: 408 logging.info('Updating from version %s to %s.', 409 booted_version, self.update_version) 410 411 # Check that Dev Server is accepting connections (from autoserv's host). 412 # If we can't talk to it, the machine host probably can't either. 413 auserver_host = urlparse.urlparse(self.update_url)[1] 414 try: 415 httplib.HTTPConnection(auserver_host).connect() 416 except IOError: 417 raise ChromiumOSError( 418 'Update server at %s not available' % auserver_host) 419 420 logging.info('Installing from %s to %s', self.update_url, 421 self.host.hostname) 422 423 # Reset update state. 424 self.reset_update_engine() 425 self.reset_stateful_partition() 426 427 try: 428 updaters = [ 429 multiprocessing.process.Process(target=self.update_rootfs), 430 multiprocessing.process.Process(target=self.update_stateful) 431 ] 432 if not update_root: 433 logging.info('Root update is skipped.') 434 updaters = updaters[1:] 435 436 # Run the updaters in parallel. 437 for updater in updaters: updater.start() 438 for updater in updaters: updater.join() 439 440 # Re-raise the first error that occurred. 441 if not self._update_error_queue.empty(): 442 update_error = self._update_error_queue.get() 443 self.revert_boot_partition() 444 self.reset_stateful_partition() 445 raise update_error 446 447 logging.info('Update complete.') 448 return True 449 except: 450 # Collect update engine logs in the event of failure. 451 if self.host.job: 452 logging.info('Collecting update engine logs...') 453 self.host.get_file( 454 UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 455 preserve_perm=False) 456 list_image_dir_contents(self.update_url) 457 raise 458 finally: 459 self.host.show_update_engine_log() 460 461 462 def check_version(self): 463 """Check the image running in DUT has the desired version. 464 465 @returns: True if the DUT's image version matches the version that 466 the autoupdater tries to update to. 467 468 """ 469 booted_version = self.get_build_id() 470 return (self.update_version and 471 self.update_version.endswith(booted_version)) 472 473 474 def check_version_to_confirm_install(self): 475 """Check image running in DUT has the desired version to be installed. 476 477 The method should not be used to check if DUT needs to have a full 478 reimage. Only use it to confirm a image is installed. 479 480 The method is designed to verify version for following 4 scenarios with 481 samples of version to update to and expected booted version: 482 1. trybot paladin build. 483 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 484 booted version: 3837.0.2013_03_21_1340 485 486 2. trybot release build. 487 update version: trybot-lumpy-release/R27-3837.0.0-b456 488 booted version: 3837.0.0 489 490 3. buildbot official release build. 491 update version: lumpy-release/R27-3837.0.0 492 booted version: 3837.0.0 493 494 4. non-official paladin rc build. 495 update version: lumpy-paladin/R27-3878.0.0-rc7 496 booted version: 3837.0.0-rc7 497 498 5. chrome-perf build. 499 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 500 booted version: 3837.0.0 501 502 6. pgo-generate build. 503 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 504 booted version: 3837.0.0-pgo-generate 505 506 When we are checking if a DUT needs to do a full install, we should NOT 507 use this method to check if the DUT is running the same version, since 508 it may return false positive for a DUT running trybot paladin build to 509 be updated to another trybot paladin build. 510 511 TODO: This logic has a bug if a trybot paladin build failed to be 512 installed in a DUT running an older trybot paladin build with same 513 platform number, but different build number (-b###). So to conclusively 514 determine if a tryjob paladin build is imaged successfully, we may need 515 to find out the date string from update url. 516 517 @returns: True if the DUT's image version (without the date string if 518 the image is a trybot build), matches the version that the 519 autoupdater is trying to update to. 520 521 """ 522 # In the local_devserver case, we can't know the expected 523 # build, so just pass. 524 if not self.update_version: 525 return True 526 527 # Always try the default check_version method first, this prevents 528 # any backward compatibility issue. 529 if self.check_version(): 530 return True 531 532 # Remove R#- and -b# at the end of build version 533 stripped_version = re.sub(r'(R\d+-|-b\d+)', '', self.update_version) 534 535 booted_version = self.get_build_id() 536 537 is_trybot_paladin_build = re.match(r'.+trybot-.+-paladin', 538 self.update_url) 539 540 # Replace date string with 0 in booted_version 541 booted_version_no_date = re.sub(r'\d{4}_\d{2}_\d{2}_\d+', '0', 542 booted_version) 543 has_date_string = booted_version != booted_version_no_date 544 545 is_pgo_generate_build = re.match(r'.+-pgo-generate', 546 self.update_url) 547 548 # Remove |-pgo-generate| in booted_version 549 booted_version_no_pgo = booted_version.replace('-pgo-generate', '') 550 has_pgo_generate = booted_version != booted_version_no_pgo 551 552 if is_trybot_paladin_build: 553 if not has_date_string: 554 logging.error('A trybot paladin build is expected. Version ' + 555 '"%s" is not a paladin build.', booted_version) 556 return False 557 return stripped_version == booted_version_no_date 558 elif is_pgo_generate_build: 559 if not has_pgo_generate: 560 logging.error('A pgo-generate build is expected. Version ' + 561 '"%s" is not a pgo-generate build.', 562 booted_version) 563 return False 564 return stripped_version == booted_version_no_pgo 565 else: 566 if has_date_string: 567 logging.error('Unexpected date found in a non trybot paladin' + 568 ' build.') 569 return False 570 # Versioned build, i.e., rc or release build. 571 return stripped_version == booted_version 572 573 574 def get_build_id(self): 575 """Pulls the CHROMEOS_RELEASE_VERSION string from /etc/lsb-release.""" 576 return self._run('grep CHROMEOS_RELEASE_VERSION' 577 ' /etc/lsb-release').stdout.split('=')[1].strip() 578 579 580 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 581 """Verifies that we fully booted given expected kernel state. 582 583 This method both verifies that we booted using the correct kernel 584 state and that the OS has marked the kernel as good. 585 586 @param expected_kernel_state: kernel state that we are verifying with 587 i.e. I expect to be booted onto partition 4 etc. See output of 588 get_kernel_state. 589 @param rollback_message: string to raise as a ChromiumOSError 590 if we booted with the wrong partition. 591 592 @raises ChromiumOSError: If we didn't. 593 """ 594 # Figure out the newly active kernel. 595 active_kernel_state = self.get_kernel_state()[0] 596 597 # Check for rollback due to a bad build. 598 if (expected_kernel_state and 599 active_kernel_state != expected_kernel_state): 600 601 # Kernel crash reports should be wiped between test runs, but 602 # may persist from earlier parts of the test, or from problems 603 # with provisioning. 604 # 605 # Kernel crash reports will NOT be present if the crash happened 606 # before encrypted stateful is mounted. 607 # 608 # TODO(dgarrett): Integrate with server/crashcollect.py at some 609 # point. 610 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash') 611 if kernel_crashes: 612 rollback_message += ': kernel_crash' 613 logging.debug('Found %d kernel crash reports:', 614 len(kernel_crashes)) 615 # The crash names contain timestamps that may be useful: 616 # kernel.20131207.005945.0.kcrash 617 for crash in kernel_crashes: 618 logging.debug(' %s', os.path.basename(crash)) 619 620 # Print out some information to make it easier to debug 621 # the rollback. 622 logging.debug('Dumping partition table.') 623 self._run('cgpt show $(rootdev -s -d)') 624 logging.debug('Dumping crossystem for firmware debugging.') 625 self._run('crossystem --all') 626 raise ChromiumOSError(rollback_message) 627 628 # Make sure chromeos-setgoodkernel runs. 629 try: 630 utils.poll_for_condition( 631 lambda: (self.get_kernel_tries(active_kernel_state) == 0 632 and self.get_kernel_success(active_kernel_state)), 633 exception=ChromiumOSError(), 634 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 635 except ChromiumOSError: 636 services_status = self._run('status system-services').stdout 637 if services_status != 'system-services start/running\n': 638 event = ('Chrome failed to reach login screen') 639 else: 640 event = ('update-engine failed to call ' 641 'chromeos-setgoodkernel') 642 raise ChromiumOSError( 643 'After update and reboot, %s ' 644 'within %d seconds' % (event, 645 self.KERNEL_UPDATE_TIMEOUT)) 646