autoupdater.py revision 5e8c45adeeaaa493462262a1138e53d42caae014
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import glob 6import httplib 7import logging 8import multiprocessing 9import os 10import re 11import urlparse 12 13from autotest_lib.client.bin import utils 14from autotest_lib.client.common_lib import error, global_config 15 16# Local stateful update path is relative to the CrOS source directory. 17LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update' 18LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update' 19REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update' 20STATEFUL_UPDATE = '/tmp/stateful_update' 21UPDATER_BIN = '/usr/bin/update_engine_client' 22UPDATER_IDLE = 'UPDATE_STATUS_IDLE' 23UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT' 24UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed' 25UPDATER_LOGS = '/var/log/messages /var/log/update_engine' 26# A list of update engine client states that occur after an update is triggered. 27UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE', 28 'UPDATE_STATUS_UPDATE_AVAILABLE', 29 'UPDATE_STATUS_DOWNLOADING', 30 'UPDATE_STATUS_FINALIZING'] 31 32class ChromiumOSError(error.InstallError): 33 """Generic error for ChromiumOS-specific exceptions.""" 34 pass 35 36 37class RootFSUpdateError(ChromiumOSError): 38 """Raised when the RootFS fails to update.""" 39 pass 40 41 42class StatefulUpdateError(ChromiumOSError): 43 """Raised when the stateful partition fails to update.""" 44 pass 45 46 47def url_to_version(update_url): 48 """Return the version based on update_url. 49 50 @param update_url: url to the image to update to. 51 52 """ 53 # The Chrome OS version is generally the last element in the URL. The only 54 # exception is delta update URLs, which are rooted under the version; e.g., 55 # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to 56 # strip off the au section of the path before reading the version. 57 return re.sub('/au/.*', '', 58 urlparse.urlparse(update_url).path).split('/')[-1].strip() 59 60 61def url_to_image_name(update_url): 62 """Return the image name based on update_url. 63 64 From a URL like: 65 http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0 66 return lumpy-release/R27-3837.0.0 67 68 @param update_url: url to the image to update to. 69 @returns a string representing the image name in the update_url. 70 71 """ 72 return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:]) 73 74 75class ChromiumOSUpdater(): 76 """Helper class used to update DUT with image of desired version.""" 77 KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3} 78 KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5} 79 # Time to wait for new kernel to be marked successful after 80 # auto update. 81 KERNEL_UPDATE_TIMEOUT = 120 82 83 84 def __init__(self, update_url, host=None, local_devserver=False): 85 self.host = host 86 self.update_url = update_url 87 self._update_error_queue = multiprocessing.Queue(2) 88 self.local_devserver = local_devserver 89 if not local_devserver: 90 self.update_version = url_to_version(update_url) 91 else: 92 self.update_version = None 93 94 def check_update_status(self): 95 """Return current status from update-engine.""" 96 update_status = self._run( 97 '%s -status 2>&1 | grep CURRENT_OP' % UPDATER_BIN) 98 return update_status.stdout.strip().split('=')[-1] 99 100 101 def reset_update_engine(self): 102 """Restarts the update-engine service.""" 103 self._run('rm -f %s' % UPDATED_MARKER) 104 try: 105 self._run('initctl stop update-engine') 106 except error.AutoservRunError: 107 logging.warn('Stopping update-engine service failed. Already dead?') 108 self._run('initctl start update-engine') 109 110 if self.check_update_status() != UPDATER_IDLE: 111 raise ChromiumOSError('%s is not in an installable state' % 112 self.host.hostname) 113 114 115 def _run(self, cmd, *args, **kwargs): 116 """Abbreviated form of self.host.run(...)""" 117 return self.host.run(cmd, *args, **kwargs) 118 119 120 def rootdev(self, options=''): 121 """Returns the stripped output of rootdev <options>. 122 123 @param options: options to run rootdev. 124 125 """ 126 return self._run('rootdev %s' % options).stdout.strip() 127 128 129 def get_kernel_state(self): 130 """Returns the (<active>, <inactive>) kernel state as a pair.""" 131 active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0]) 132 if active_root == self.KERNEL_A['root']: 133 return self.KERNEL_A, self.KERNEL_B 134 elif active_root == self.KERNEL_B['root']: 135 return self.KERNEL_B, self.KERNEL_A 136 else: 137 raise ChromiumOSError('Encountered unknown root partition: %s' % 138 active_root) 139 140 141 def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'): 142 """Return numeric cgpt value for the specified flag, kernel, device. """ 143 return int(self._run('cgpt show -n -i %d %s %s' % ( 144 kernel['kernel'], flag, dev)).stdout.strip()) 145 146 147 def get_kernel_priority(self, kernel): 148 """Return numeric priority for the specified kernel. 149 150 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 151 152 """ 153 return self._cgpt('-P', kernel) 154 155 156 def get_kernel_success(self, kernel): 157 """Return boolean success flag for the specified kernel. 158 159 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 160 161 """ 162 return self._cgpt('-S', kernel) != 0 163 164 165 def get_kernel_tries(self, kernel): 166 """Return tries count for the specified kernel. 167 168 @param kernel: information of the given kernel, KERNEL_A or KERNEL_B. 169 170 """ 171 return self._cgpt('-T', kernel) 172 173 174 def get_stateful_update_script(self): 175 """Returns the path to the stateful update script on the target.""" 176 # We attempt to load the local stateful update path in 3 different 177 # ways. First we use the location specified in the autotest global 178 # config. If this doesn't exist, we attempt to use the Chromium OS 179 # Chroot path to the installed script. If all else fails, we use the 180 # stateful update script on the host. 181 stateful_update_path = os.path.join( 182 global_config.global_config.get_config_value( 183 'CROS', 'source_tree', default=''), 184 LOCAL_STATEFUL_UPDATE_PATH) 185 186 if not os.path.exists(stateful_update_path): 187 logging.warn('Could not find Chrome OS source location for ' 188 'stateful_update script at %s, falling back to chroot ' 189 'copy.', stateful_update_path) 190 stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH 191 192 if not os.path.exists(stateful_update_path): 193 logging.warn('Could not chroot stateful_update script, falling ' 194 'back on client copy.') 195 statefuldev_script = REMOTE_STATEUL_UPDATE_PATH 196 else: 197 self.host.send_file( 198 stateful_update_path, STATEFUL_UPDATE, delete_dest=True) 199 statefuldev_script = STATEFUL_UPDATE 200 201 return statefuldev_script 202 203 204 def reset_stateful_partition(self): 205 """Clear any pending stateful update request.""" 206 statefuldev_cmd = [self.get_stateful_update_script()] 207 statefuldev_cmd += ['--stateful_change=reset', '2>&1'] 208 self._run(' '.join(statefuldev_cmd)) 209 210 211 def revert_boot_partition(self): 212 """Revert the boot partition.""" 213 part = self.rootdev('-s') 214 logging.warn('Reverting update; Boot partition will be %s', part) 215 return self._run('/postinst %s 2>&1' % part) 216 217 218 def trigger_update(self): 219 """Triggers a background update on a test image. 220 221 @raise RootFSUpdateError if anything went wrong. 222 223 """ 224 autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % ( 225 UPDATER_BIN, self.update_url) 226 logging.info('Triggering update via: %s', autoupdate_cmd) 227 try: 228 self._run(autoupdate_cmd) 229 except (error.AutoservSshPermissionDeniedError, 230 error.AutoservSSHTimeout) as e: 231 raise RootFSUpdateError('SSH on %s is seeing %s' % 232 (self.host.hostname, type(e).__name__)) 233 except error.AutoservRunError as e: 234 235 # Check if the exit code is 255, if so it's probably a generic 236 # SSH error. 237 result = e.args[1] 238 if result.exit_status == 255: 239 raise RootFSUpdateError('SSH on %s is seeing a generic error.' % 240 self.host.hostname) 241 242 # We have ruled out all SSH cases, the error code is from 243 # update_engine_client, though we still don't know why. 244 raise RootFSUpdateError( 245 'devserver unreachable, payload unavailable, ' 246 'or AU bug (unlikely) on %s: %s' % 247 (self.host.hostname, type(e).__name__)) 248 249 250 def _verify_update_completed(self): 251 """Verifies that an update has completed. 252 253 @raise RootFSUpdateError: if verification fails. 254 """ 255 status = self.check_update_status() 256 if status != UPDATER_NEED_REBOOT: 257 raise RootFSUpdateError('Update did not complete with correct ' 258 'status. Expecting %s, actual %s' % 259 (UPDATER_NEED_REBOOT, status)) 260 261 262 def rollback_rootfs(self, powerwash): 263 """Triggers rollback and waits for it to complete. 264 265 @param powerwash: If true, powerwash as part of rollback. 266 267 @raise RootFSUpdateError if anything went wrong. 268 269 """ 270 #TODO(sosa): crbug.com/309051 - Make this one update_engine_client call. 271 rollback_cmd = '%s --rollback' % (UPDATER_BIN) 272 wait_for_update_to_complete_cmd = '%s --update' % (UPDATER_BIN) 273 if not powerwash: 274 rollback_cmd += ' --nopowerwash' 275 276 logging.info('Triggering rollback.') 277 try: 278 self._run(rollback_cmd) 279 self._run(wait_for_update_to_complete_cmd) 280 except error.AutoservRunError as e: 281 raise RootFSUpdateError('Rollback failed on %s: %s' % 282 (self.host.hostname, str(e))) 283 284 self._verify_update_completed() 285 286 287 def update_rootfs(self): 288 """Updates the rootfs partition only.""" 289 logging.info('Updating root partition...') 290 291 # Run update_engine using the specified URL. 292 try: 293 autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % ( 294 UPDATER_BIN, self.update_url) 295 self._run(autoupdate_cmd, timeout=900) 296 except error.AutoservRunError: 297 update_error = RootFSUpdateError('update-engine failed on %s' % 298 self.host.hostname) 299 self._update_error_queue.put(update_error) 300 raise update_error 301 302 try: 303 self._verify_update_completed() 304 except RootFSUpdateError as e: 305 self._update_error_queue.put(e) 306 raise 307 308 309 def update_stateful(self, clobber=True): 310 """Updates the stateful partition. 311 312 @param clobber: If True, a clean stateful installation. 313 """ 314 logging.info('Updating stateful partition...') 315 statefuldev_url = self.update_url.replace('update', 316 'static') 317 318 # Attempt stateful partition update; this must succeed so that the newly 319 # installed host is testable after update. 320 statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url] 321 if clobber: 322 statefuldev_cmd.append('--stateful_change=clean') 323 324 statefuldev_cmd.append('2>&1') 325 try: 326 self._run(' '.join(statefuldev_cmd), timeout=600) 327 except error.AutoservRunError: 328 update_error = StatefulUpdateError('stateful_update failed on %s' % 329 self.host.hostname) 330 self._update_error_queue.put(update_error) 331 raise update_error 332 333 334 def run_update(self, force_update, update_root=True): 335 """Update the DUT with image of specific version. 336 337 @param force_update: True to update DUT even if it's running the same 338 version already. 339 @param update_root: True to force a kernel update. If it's False and 340 force_update is True, stateful update will be used to clean up 341 the DUT. 342 343 """ 344 booted_version = self.get_build_id() 345 if (self.check_version() and not force_update): 346 logging.info('System is already up to date. Skipping update.') 347 return False 348 349 if self.update_version: 350 logging.info('Updating from version %s to %s.', 351 booted_version, self.update_version) 352 353 # Check that Dev Server is accepting connections (from autoserv's host). 354 # If we can't talk to it, the machine host probably can't either. 355 auserver_host = urlparse.urlparse(self.update_url)[1] 356 try: 357 httplib.HTTPConnection(auserver_host).connect() 358 except IOError: 359 raise ChromiumOSError( 360 'Update server at %s not available' % auserver_host) 361 362 logging.info('Installing from %s to %s', self.update_url, 363 self.host.hostname) 364 365 # Reset update state. 366 self.reset_update_engine() 367 self.reset_stateful_partition() 368 369 try: 370 updaters = [ 371 multiprocessing.process.Process(target=self.update_rootfs), 372 multiprocessing.process.Process(target=self.update_stateful) 373 ] 374 if not update_root: 375 logging.info('Root update is skipped.') 376 updaters = updaters[1:] 377 378 # Run the updaters in parallel. 379 for updater in updaters: updater.start() 380 for updater in updaters: updater.join() 381 382 # Re-raise the first error that occurred. 383 if not self._update_error_queue.empty(): 384 update_error = self._update_error_queue.get() 385 self.revert_boot_partition() 386 self.reset_stateful_partition() 387 raise update_error 388 389 logging.info('Update complete.') 390 return True 391 except: 392 # Collect update engine logs in the event of failure. 393 if self.host.job: 394 logging.info('Collecting update engine logs...') 395 self.host.get_file( 396 UPDATER_LOGS, self.host.job.sysinfo.sysinfodir, 397 preserve_perm=False) 398 raise 399 finally: 400 self.host.show_update_engine_log() 401 402 403 def check_version(self): 404 """Check the image running in DUT has the desired version. 405 406 @returns: True if the DUT's image version matches the version that 407 the autoupdater tries to update to. 408 409 """ 410 booted_version = self.get_build_id() 411 return (self.update_version and 412 self.update_version.endswith(booted_version)) 413 414 415 def check_version_to_confirm_install(self): 416 """Check image running in DUT has the desired version to be installed. 417 418 The method should not be used to check if DUT needs to have a full 419 reimage. Only use it to confirm a image is installed. 420 421 The method is designed to verify version for following 4 scenarios with 422 samples of version to update to and expected booted version: 423 1. trybot paladin build. 424 update version: trybot-lumpy-paladin/R27-3837.0.0-b123 425 booted version: 3837.0.2013_03_21_1340 426 427 2. trybot release build. 428 update version: trybot-lumpy-release/R27-3837.0.0-b456 429 booted version: 3837.0.0 430 431 3. buildbot official release build. 432 update version: lumpy-release/R27-3837.0.0 433 booted version: 3837.0.0 434 435 4. non-official paladin rc build. 436 update version: lumpy-paladin/R27-3878.0.0-rc7 437 booted version: 3837.0.0-rc7 438 439 5. chrome-perf build. 440 update version: lumpy-chrome-perf/R28-3837.0.0-b2996 441 booted version: 3837.0.0 442 443 6. pgo-generate build. 444 update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996 445 booted version: 3837.0.0-pgo-generate 446 447 When we are checking if a DUT needs to do a full install, we should NOT 448 use this method to check if the DUT is running the same version, since 449 it may return false positive for a DUT running trybot paladin build to 450 be updated to another trybot paladin build. 451 452 TODO: This logic has a bug if a trybot paladin build failed to be 453 installed in a DUT running an older trybot paladin build with same 454 platform number, but different build number (-b###). So to conclusively 455 determine if a tryjob paladin build is imaged successfully, we may need 456 to find out the date string from update url. 457 458 @returns: True if the DUT's image version (without the date string if 459 the image is a trybot build), matches the version that the 460 autoupdater is trying to update to. 461 462 """ 463 # In the local_devserver case, we can't know the expected 464 # build, so just pass. 465 if not self.update_version: 466 return True 467 468 # Always try the default check_version method first, this prevents 469 # any backward compatibility issue. 470 if self.check_version(): 471 return True 472 473 # Remove R#- and -b# at the end of build version 474 stripped_version = re.sub(r'(R\d+-|-b\d+)', '', self.update_version) 475 476 booted_version = self.get_build_id() 477 478 is_trybot_paladin_build = re.match(r'.+trybot-.+-paladin', 479 self.update_url) 480 481 # Replace date string with 0 in booted_version 482 booted_version_no_date = re.sub(r'\d{4}_\d{2}_\d{2}_\d+', '0', 483 booted_version) 484 has_date_string = booted_version != booted_version_no_date 485 486 is_pgo_generate_build = re.match(r'.+-pgo-generate', 487 self.update_url) 488 489 # Remove |-pgo-generate| in booted_version 490 booted_version_no_pgo = booted_version.replace('-pgo-generate', '') 491 has_pgo_generate = booted_version != booted_version_no_pgo 492 493 if is_trybot_paladin_build: 494 if not has_date_string: 495 logging.error('A trybot paladin build is expected. Version ' + 496 '"%s" is not a paladin build.', booted_version) 497 return False 498 return stripped_version == booted_version_no_date 499 elif is_pgo_generate_build: 500 if not has_pgo_generate: 501 logging.error('A pgo-generate build is expected. Version ' + 502 '"%s" is not a pgo-generate build.', 503 booted_version) 504 return False 505 return stripped_version == booted_version_no_pgo 506 else: 507 if has_date_string: 508 logging.error('Unexpected date found in a non trybot paladin' + 509 ' build.') 510 return False 511 # Versioned build, i.e., rc or release build. 512 return stripped_version == booted_version 513 514 515 def get_build_id(self): 516 """Pulls the CHROMEOS_RELEASE_VERSION string from /etc/lsb-release.""" 517 return self._run('grep CHROMEOS_RELEASE_VERSION' 518 ' /etc/lsb-release').stdout.split('=')[1].strip() 519 520 521 def verify_boot_expectations(self, expected_kernel_state, rollback_message): 522 """Verifies that we fully booted given expected kernel state. 523 524 This method both verifies that we booted using the correct kernel 525 state and that the OS has marked the kernel as good. 526 527 @param expected_kernel_state: kernel state that we are verifying with 528 i.e. I expect to be booted onto partition 4 etc. See output of 529 get_kernel_state. 530 @param rollback_message: string to raise as a ChromiumOSError 531 if we booted with the wrong partition. 532 533 @raises ChromiumOSError: If we didn't. 534 """ 535 # Figure out the newly active kernel. 536 active_kernel_state = self.get_kernel_state()[0] 537 538 # Check for rollback due to a bad build. 539 if (expected_kernel_state and 540 active_kernel_state != expected_kernel_state): 541 542 # Kernel crash reports should be wiped between test runs, but 543 # may persist from earlier parts of the test, or from problems 544 # with provisioning. 545 # 546 # Kernel crash reports will NOT be present if the crash happened 547 # before encrypted stateful is mounted. 548 # 549 # TODO(dgarrett): Integrate with server/crashcollect.py at some 550 # point. 551 kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash') 552 if kernel_crashes: 553 rollback_message += ': kernel_crash' 554 logging.debug('Found %d kernel crash reports:', 555 len(kernel_crashes)) 556 # The crash names contain timestamps that may be useful: 557 # kernel.20131207.005945.0.kcrash 558 for crash in kernel_crashes: 559 logging.debug(' %s', os.path.basename(crash)) 560 561 # Print out some information to make it easier to debug 562 # the rollback. 563 logging.debug('Dumping partition table.') 564 self._run('cgpt show $(rootdev -s -d)') 565 logging.debug('Dumping crossystem for firmware debugging.') 566 self._run('crossystem --all') 567 raise ChromiumOSError(rollback_message) 568 569 # Make sure chromeos-setgoodkernel runs. 570 try: 571 utils.poll_for_condition( 572 lambda: (self.get_kernel_tries(active_kernel_state) == 0 573 and self.get_kernel_success(active_kernel_state)), 574 exception=ChromiumOSError(), 575 timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5) 576 except ChromiumOSError: 577 services_status = self._run('status system-services').stdout 578 if services_status != 'system-services start/running\n': 579 event = ('Chrome failed to reach login screen') 580 else: 581 event = ('update-engine failed to call ' 582 'chromeos-setgoodkernel') 583 raise ChromiumOSError( 584 'After update and reboot, %s ' 585 'within %d seconds' % (event, 586 self.KERNEL_UPDATE_TIMEOUT)) 587