autoupdater.py revision 5e8c45adeeaaa493462262a1138e53d42caae014
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import httplib
7import logging
8import multiprocessing
9import os
10import re
11import urlparse
12
13from autotest_lib.client.bin import utils
14from autotest_lib.client.common_lib import error, global_config
15
16# Local stateful update path is relative to the CrOS source directory.
17LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update'
18LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update'
19REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update'
20STATEFUL_UPDATE = '/tmp/stateful_update'
21UPDATER_BIN = '/usr/bin/update_engine_client'
22UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
23UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
24UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed'
25UPDATER_LOGS = '/var/log/messages /var/log/update_engine'
26# A list of update engine client states that occur after an update is triggered.
27UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
28                             'UPDATE_STATUS_UPDATE_AVAILABLE',
29                             'UPDATE_STATUS_DOWNLOADING',
30                             'UPDATE_STATUS_FINALIZING']
31
32class ChromiumOSError(error.InstallError):
33    """Generic error for ChromiumOS-specific exceptions."""
34    pass
35
36
37class RootFSUpdateError(ChromiumOSError):
38    """Raised when the RootFS fails to update."""
39    pass
40
41
42class StatefulUpdateError(ChromiumOSError):
43    """Raised when the stateful partition fails to update."""
44    pass
45
46
47def url_to_version(update_url):
48    """Return the version based on update_url.
49
50    @param update_url: url to the image to update to.
51
52    """
53    # The Chrome OS version is generally the last element in the URL. The only
54    # exception is delta update URLs, which are rooted under the version; e.g.,
55    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
56    # strip off the au section of the path before reading the version.
57    return re.sub('/au/.*', '',
58                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
59
60
61def url_to_image_name(update_url):
62    """Return the image name based on update_url.
63
64    From a URL like:
65        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
66    return lumpy-release/R27-3837.0.0
67
68    @param update_url: url to the image to update to.
69    @returns a string representing the image name in the update_url.
70
71    """
72    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
73
74
75class ChromiumOSUpdater():
76    """Helper class used to update DUT with image of desired version."""
77    KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
78    KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
79    # Time to wait for new kernel to be marked successful after
80    # auto update.
81    KERNEL_UPDATE_TIMEOUT = 120
82
83
84    def __init__(self, update_url, host=None, local_devserver=False):
85        self.host = host
86        self.update_url = update_url
87        self._update_error_queue = multiprocessing.Queue(2)
88        self.local_devserver = local_devserver
89        if not local_devserver:
90          self.update_version = url_to_version(update_url)
91        else:
92          self.update_version = None
93
94    def check_update_status(self):
95        """Return current status from update-engine."""
96        update_status = self._run(
97            '%s -status 2>&1 | grep CURRENT_OP' % UPDATER_BIN)
98        return update_status.stdout.strip().split('=')[-1]
99
100
101    def reset_update_engine(self):
102        """Restarts the update-engine service."""
103        self._run('rm -f %s' % UPDATED_MARKER)
104        try:
105            self._run('initctl stop update-engine')
106        except error.AutoservRunError:
107            logging.warn('Stopping update-engine service failed. Already dead?')
108        self._run('initctl start update-engine')
109
110        if self.check_update_status() != UPDATER_IDLE:
111            raise ChromiumOSError('%s is not in an installable state' %
112                                  self.host.hostname)
113
114
115    def _run(self, cmd, *args, **kwargs):
116        """Abbreviated form of self.host.run(...)"""
117        return self.host.run(cmd, *args, **kwargs)
118
119
120    def rootdev(self, options=''):
121        """Returns the stripped output of rootdev <options>.
122
123        @param options: options to run rootdev.
124
125        """
126        return self._run('rootdev %s' % options).stdout.strip()
127
128
129    def get_kernel_state(self):
130        """Returns the (<active>, <inactive>) kernel state as a pair."""
131        active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0])
132        if active_root == self.KERNEL_A['root']:
133            return self.KERNEL_A, self.KERNEL_B
134        elif active_root == self.KERNEL_B['root']:
135            return self.KERNEL_B, self.KERNEL_A
136        else:
137            raise ChromiumOSError('Encountered unknown root partition: %s' %
138                                  active_root)
139
140
141    def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'):
142        """Return numeric cgpt value for the specified flag, kernel, device. """
143        return int(self._run('cgpt show -n -i %d %s %s' % (
144            kernel['kernel'], flag, dev)).stdout.strip())
145
146
147    def get_kernel_priority(self, kernel):
148        """Return numeric priority for the specified kernel.
149
150        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
151
152        """
153        return self._cgpt('-P', kernel)
154
155
156    def get_kernel_success(self, kernel):
157        """Return boolean success flag for the specified kernel.
158
159        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
160
161        """
162        return self._cgpt('-S', kernel) != 0
163
164
165    def get_kernel_tries(self, kernel):
166        """Return tries count for the specified kernel.
167
168        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
169
170        """
171        return self._cgpt('-T', kernel)
172
173
174    def get_stateful_update_script(self):
175        """Returns the path to the stateful update script on the target."""
176        # We attempt to load the local stateful update path in 3 different
177        # ways. First we use the location specified in the autotest global
178        # config. If this doesn't exist, we attempt to use the Chromium OS
179        # Chroot path to the installed script. If all else fails, we use the
180        # stateful update script on the host.
181        stateful_update_path = os.path.join(
182                global_config.global_config.get_config_value(
183                        'CROS', 'source_tree', default=''),
184                LOCAL_STATEFUL_UPDATE_PATH)
185
186        if not os.path.exists(stateful_update_path):
187            logging.warn('Could not find Chrome OS source location for '
188                         'stateful_update script at %s, falling back to chroot '
189                         'copy.', stateful_update_path)
190            stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH
191
192        if not os.path.exists(stateful_update_path):
193            logging.warn('Could not chroot stateful_update script, falling '
194                         'back on client copy.')
195            statefuldev_script = REMOTE_STATEUL_UPDATE_PATH
196        else:
197            self.host.send_file(
198                    stateful_update_path, STATEFUL_UPDATE, delete_dest=True)
199            statefuldev_script = STATEFUL_UPDATE
200
201        return statefuldev_script
202
203
204    def reset_stateful_partition(self):
205        """Clear any pending stateful update request."""
206        statefuldev_cmd = [self.get_stateful_update_script()]
207        statefuldev_cmd += ['--stateful_change=reset', '2>&1']
208        self._run(' '.join(statefuldev_cmd))
209
210
211    def revert_boot_partition(self):
212        """Revert the boot partition."""
213        part = self.rootdev('-s')
214        logging.warn('Reverting update; Boot partition will be %s', part)
215        return self._run('/postinst %s 2>&1' % part)
216
217
218    def trigger_update(self):
219        """Triggers a background update on a test image.
220
221        @raise RootFSUpdateError if anything went wrong.
222
223        """
224        autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % (
225            UPDATER_BIN, self.update_url)
226        logging.info('Triggering update via: %s', autoupdate_cmd)
227        try:
228            self._run(autoupdate_cmd)
229        except (error.AutoservSshPermissionDeniedError,
230                error.AutoservSSHTimeout) as e:
231            raise RootFSUpdateError('SSH on %s is seeing %s' %
232                                    (self.host.hostname, type(e).__name__))
233        except error.AutoservRunError as e:
234
235            # Check if the exit code is 255, if so it's probably a generic
236            # SSH error.
237            result = e.args[1]
238            if result.exit_status == 255:
239              raise RootFSUpdateError('SSH on %s is seeing a generic error.' %
240                                      self.host.hostname)
241
242            # We have ruled out all SSH cases, the error code is from
243            # update_engine_client, though we still don't know why.
244            raise RootFSUpdateError(
245                    'devserver unreachable, payload unavailable, '
246                    'or AU bug (unlikely) on %s: %s' %
247                    (self.host.hostname, type(e).__name__))
248
249
250    def _verify_update_completed(self):
251        """Verifies that an update has completed.
252
253        @raise RootFSUpdateError: if verification fails.
254        """
255        status = self.check_update_status()
256        if status != UPDATER_NEED_REBOOT:
257            raise RootFSUpdateError('Update did not complete with correct '
258                                    'status. Expecting %s, actual %s' %
259                                            (UPDATER_NEED_REBOOT, status))
260
261
262    def rollback_rootfs(self, powerwash):
263        """Triggers rollback and waits for it to complete.
264
265        @param powerwash: If true, powerwash as part of rollback.
266
267        @raise RootFSUpdateError if anything went wrong.
268
269        """
270        #TODO(sosa): crbug.com/309051 - Make this one update_engine_client call.
271        rollback_cmd = '%s --rollback' % (UPDATER_BIN)
272        wait_for_update_to_complete_cmd = '%s --update' % (UPDATER_BIN)
273        if not powerwash:
274          rollback_cmd += ' --nopowerwash'
275
276        logging.info('Triggering rollback.')
277        try:
278            self._run(rollback_cmd)
279            self._run(wait_for_update_to_complete_cmd)
280        except error.AutoservRunError as e:
281            raise RootFSUpdateError('Rollback failed on %s: %s' %
282                                    (self.host.hostname, str(e)))
283
284        self._verify_update_completed()
285
286
287    def update_rootfs(self):
288        """Updates the rootfs partition only."""
289        logging.info('Updating root partition...')
290
291        # Run update_engine using the specified URL.
292        try:
293            autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % (
294                UPDATER_BIN, self.update_url)
295            self._run(autoupdate_cmd, timeout=900)
296        except error.AutoservRunError:
297            update_error = RootFSUpdateError('update-engine failed on %s' %
298                                             self.host.hostname)
299            self._update_error_queue.put(update_error)
300            raise update_error
301
302        try:
303            self._verify_update_completed()
304        except RootFSUpdateError as e:
305            self._update_error_queue.put(e)
306            raise
307
308
309    def update_stateful(self, clobber=True):
310        """Updates the stateful partition.
311
312        @param clobber: If True, a clean stateful installation.
313        """
314        logging.info('Updating stateful partition...')
315        statefuldev_url = self.update_url.replace('update',
316                                                  'static')
317
318        # Attempt stateful partition update; this must succeed so that the newly
319        # installed host is testable after update.
320        statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
321        if clobber:
322            statefuldev_cmd.append('--stateful_change=clean')
323
324        statefuldev_cmd.append('2>&1')
325        try:
326            self._run(' '.join(statefuldev_cmd), timeout=600)
327        except error.AutoservRunError:
328            update_error = StatefulUpdateError('stateful_update failed on %s' %
329                                               self.host.hostname)
330            self._update_error_queue.put(update_error)
331            raise update_error
332
333
334    def run_update(self, force_update, update_root=True):
335        """Update the DUT with image of specific version.
336
337        @param force_update: True to update DUT even if it's running the same
338            version already.
339        @param update_root: True to force a kernel update. If it's False and
340            force_update is True, stateful update will be used to clean up
341            the DUT.
342
343        """
344        booted_version = self.get_build_id()
345        if (self.check_version() and not force_update):
346            logging.info('System is already up to date. Skipping update.')
347            return False
348
349        if self.update_version:
350            logging.info('Updating from version %s to %s.',
351                         booted_version, self.update_version)
352
353        # Check that Dev Server is accepting connections (from autoserv's host).
354        # If we can't talk to it, the machine host probably can't either.
355        auserver_host = urlparse.urlparse(self.update_url)[1]
356        try:
357            httplib.HTTPConnection(auserver_host).connect()
358        except IOError:
359            raise ChromiumOSError(
360                'Update server at %s not available' % auserver_host)
361
362        logging.info('Installing from %s to %s', self.update_url,
363                     self.host.hostname)
364
365        # Reset update state.
366        self.reset_update_engine()
367        self.reset_stateful_partition()
368
369        try:
370            updaters = [
371                multiprocessing.process.Process(target=self.update_rootfs),
372                multiprocessing.process.Process(target=self.update_stateful)
373                ]
374            if not update_root:
375                logging.info('Root update is skipped.')
376                updaters = updaters[1:]
377
378            # Run the updaters in parallel.
379            for updater in updaters: updater.start()
380            for updater in updaters: updater.join()
381
382            # Re-raise the first error that occurred.
383            if not self._update_error_queue.empty():
384                update_error = self._update_error_queue.get()
385                self.revert_boot_partition()
386                self.reset_stateful_partition()
387                raise update_error
388
389            logging.info('Update complete.')
390            return True
391        except:
392            # Collect update engine logs in the event of failure.
393            if self.host.job:
394                logging.info('Collecting update engine logs...')
395                self.host.get_file(
396                    UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
397                    preserve_perm=False)
398            raise
399        finally:
400            self.host.show_update_engine_log()
401
402
403    def check_version(self):
404        """Check the image running in DUT has the desired version.
405
406        @returns: True if the DUT's image version matches the version that
407            the autoupdater tries to update to.
408
409        """
410        booted_version = self.get_build_id()
411        return (self.update_version and
412                self.update_version.endswith(booted_version))
413
414
415    def check_version_to_confirm_install(self):
416        """Check image running in DUT has the desired version to be installed.
417
418        The method should not be used to check if DUT needs to have a full
419        reimage. Only use it to confirm a image is installed.
420
421        The method is designed to verify version for following 4 scenarios with
422        samples of version to update to and expected booted version:
423        1. trybot paladin build.
424        update version: trybot-lumpy-paladin/R27-3837.0.0-b123
425        booted version: 3837.0.2013_03_21_1340
426
427        2. trybot release build.
428        update version: trybot-lumpy-release/R27-3837.0.0-b456
429        booted version: 3837.0.0
430
431        3. buildbot official release build.
432        update version: lumpy-release/R27-3837.0.0
433        booted version: 3837.0.0
434
435        4. non-official paladin rc build.
436        update version: lumpy-paladin/R27-3878.0.0-rc7
437        booted version: 3837.0.0-rc7
438
439        5. chrome-perf build.
440        update version: lumpy-chrome-perf/R28-3837.0.0-b2996
441        booted version: 3837.0.0
442
443        6. pgo-generate build.
444        update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996
445        booted version: 3837.0.0-pgo-generate
446
447        When we are checking if a DUT needs to do a full install, we should NOT
448        use this method to check if the DUT is running the same version, since
449        it may return false positive for a DUT running trybot paladin build to
450        be updated to another trybot paladin build.
451
452        TODO: This logic has a bug if a trybot paladin build failed to be
453        installed in a DUT running an older trybot paladin build with same
454        platform number, but different build number (-b###). So to conclusively
455        determine if a tryjob paladin build is imaged successfully, we may need
456        to find out the date string from update url.
457
458        @returns: True if the DUT's image version (without the date string if
459            the image is a trybot build), matches the version that the
460            autoupdater is trying to update to.
461
462        """
463        # In the local_devserver case, we can't know the expected
464        # build, so just pass.
465        if not self.update_version:
466            return True
467
468        # Always try the default check_version method first, this prevents
469        # any backward compatibility issue.
470        if self.check_version():
471            return True
472
473        # Remove R#- and -b# at the end of build version
474        stripped_version = re.sub(r'(R\d+-|-b\d+)', '', self.update_version)
475
476        booted_version = self.get_build_id()
477
478        is_trybot_paladin_build = re.match(r'.+trybot-.+-paladin',
479                                           self.update_url)
480
481        # Replace date string with 0 in booted_version
482        booted_version_no_date = re.sub(r'\d{4}_\d{2}_\d{2}_\d+', '0',
483                                        booted_version)
484        has_date_string = booted_version != booted_version_no_date
485
486        is_pgo_generate_build = re.match(r'.+-pgo-generate',
487                                           self.update_url)
488
489        # Remove |-pgo-generate| in booted_version
490        booted_version_no_pgo = booted_version.replace('-pgo-generate', '')
491        has_pgo_generate = booted_version != booted_version_no_pgo
492
493        if is_trybot_paladin_build:
494            if not has_date_string:
495                logging.error('A trybot paladin build is expected. Version ' +
496                              '"%s" is not a paladin build.', booted_version)
497                return False
498            return stripped_version == booted_version_no_date
499        elif is_pgo_generate_build:
500            if not has_pgo_generate:
501                logging.error('A pgo-generate build is expected. Version ' +
502                              '"%s" is not a pgo-generate build.',
503                              booted_version)
504                return False
505            return stripped_version == booted_version_no_pgo
506        else:
507            if has_date_string:
508                logging.error('Unexpected date found in a non trybot paladin' +
509                              ' build.')
510                return False
511            # Versioned build, i.e., rc or release build.
512            return stripped_version == booted_version
513
514
515    def get_build_id(self):
516        """Pulls the CHROMEOS_RELEASE_VERSION string from /etc/lsb-release."""
517        return self._run('grep CHROMEOS_RELEASE_VERSION'
518                         ' /etc/lsb-release').stdout.split('=')[1].strip()
519
520
521    def verify_boot_expectations(self, expected_kernel_state, rollback_message):
522        """Verifies that we fully booted given expected kernel state.
523
524        This method both verifies that we booted using the correct kernel
525        state and that the OS has marked the kernel as good.
526
527        @param expected_kernel_state: kernel state that we are verifying with
528            i.e. I expect to be booted onto partition 4 etc. See output of
529            get_kernel_state.
530        @param rollback_message: string to raise as a ChromiumOSError
531            if we booted with the wrong partition.
532
533        @raises ChromiumOSError: If we didn't.
534        """
535        # Figure out the newly active kernel.
536        active_kernel_state = self.get_kernel_state()[0]
537
538        # Check for rollback due to a bad build.
539        if (expected_kernel_state and
540                active_kernel_state != expected_kernel_state):
541
542            # Kernel crash reports should be wiped between test runs, but
543            # may persist from earlier parts of the test, or from problems
544            # with provisioning.
545            #
546            # Kernel crash reports will NOT be present if the crash happened
547            # before encrypted stateful is mounted.
548            #
549            # TODO(dgarrett): Integrate with server/crashcollect.py at some
550            # point.
551            kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
552            if kernel_crashes:
553                rollback_message += ': kernel_crash'
554                logging.debug('Found %d kernel crash reports:',
555                              len(kernel_crashes))
556                # The crash names contain timestamps that may be useful:
557                #   kernel.20131207.005945.0.kcrash
558                for crash in kernel_crashes:
559                  logging.debug('  %s', os.path.basename(crash))
560
561            # Print out some information to make it easier to debug
562            # the rollback.
563            logging.debug('Dumping partition table.')
564            self._run('cgpt show $(rootdev -s -d)')
565            logging.debug('Dumping crossystem for firmware debugging.')
566            self._run('crossystem --all')
567            raise ChromiumOSError(rollback_message)
568
569        # Make sure chromeos-setgoodkernel runs.
570        try:
571            utils.poll_for_condition(
572                lambda: (self.get_kernel_tries(active_kernel_state) == 0
573                         and self.get_kernel_success(active_kernel_state)),
574                exception=ChromiumOSError(),
575                timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
576        except ChromiumOSError:
577            services_status = self._run('status system-services').stdout
578            if services_status != 'system-services start/running\n':
579                event = ('Chrome failed to reach login screen')
580            else:
581                event = ('update-engine failed to call '
582                         'chromeos-setgoodkernel')
583            raise ChromiumOSError(
584                    'After update and reboot, %s '
585                    'within %d seconds' % (event,
586                                           self.KERNEL_UPDATE_TIMEOUT))
587