autoupdater.py revision d035b0c458560c0421620a63191328522ff880a7
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import httplib
7import logging
8import multiprocessing
9import os
10import re
11import urlparse
12import urllib2
13
14from autotest_lib.client.bin import utils
15from autotest_lib.client.common_lib import error, global_config
16from autotest_lib.client.common_lib.cros import dev_server
17
18# Local stateful update path is relative to the CrOS source directory.
19LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update'
20LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update'
21UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
23# A list of update engine client states that occur after an update is triggered.
24UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
25                             'UPDATE_STATUS_UPDATE_AVAILABLE',
26                             'UPDATE_STATUS_DOWNLOADING',
27                             'UPDATE_STATUS_FINALIZING']
28
29class ChromiumOSError(error.InstallError):
30    """Generic error for ChromiumOS-specific exceptions."""
31
32
33class BrilloError(error.InstallError):
34    """Generic error for Brillo-specific exceptions."""
35
36
37class RootFSUpdateError(ChromiumOSError):
38    """Raised when the RootFS fails to update."""
39
40
41class StatefulUpdateError(ChromiumOSError):
42    """Raised when the stateful partition fails to update."""
43
44
45def url_to_version(update_url):
46    """Return the version based on update_url.
47
48    @param update_url: url to the image to update to.
49
50    """
51    # The Chrome OS version is generally the last element in the URL. The only
52    # exception is delta update URLs, which are rooted under the version; e.g.,
53    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
54    # strip off the au section of the path before reading the version.
55    return re.sub('/au/.*', '',
56                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
57
58
59def url_to_image_name(update_url):
60    """Return the image name based on update_url.
61
62    From a URL like:
63        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
64    return lumpy-release/R27-3837.0.0
65
66    @param update_url: url to the image to update to.
67    @returns a string representing the image name in the update_url.
68
69    """
70    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
71
72
73def _get_devserver_build_from_update_url(update_url):
74    """Get the devserver and build from the update url.
75
76    @param update_url: The url for update.
77        Eg: http://devserver:port/update/build.
78
79    @return: A tuple of (devserver url, build) or None if the update_url
80        doesn't match the expected pattern.
81
82    @raises ValueError: If the update_url doesn't match the expected pattern.
83    @raises ValueError: If no global_config was found, or it doesn't contain an
84        image_url_pattern.
85    """
86    pattern = global_config.global_config.get_config_value(
87            'CROS', 'image_url_pattern', type=str, default='')
88    if not pattern:
89        raise ValueError('Cannot parse update_url, the global config needs '
90                'an image_url_pattern.')
91    re_pattern = pattern.replace('%s', '(\S+)')
92    parts = re.search(re_pattern, update_url)
93    if not parts or len(parts.groups()) < 2:
94        raise ValueError('%s is not an update url' % update_url)
95    return parts.groups()
96
97
98def list_image_dir_contents(update_url):
99    """Lists the contents of the devserver for a given build/update_url.
100
101    @param update_url: An update url. Eg: http://devserver:port/update/build.
102    """
103    if not update_url:
104        logging.warning('Need update_url to list contents of the devserver.')
105        return
106    error_msg = 'Cannot check contents of devserver, update url %s' % update_url
107    try:
108        devserver_url, build = _get_devserver_build_from_update_url(update_url)
109    except ValueError as e:
110        logging.warning('%s: %s', error_msg, e)
111        return
112    devserver = dev_server.ImageServer(devserver_url)
113    try:
114        devserver.list_image_dir(build)
115    # The devserver will retry on URLError to avoid flaky connections, but will
116    # eventually raise the URLError if it persists. All HTTPErrors get
117    # converted to DevServerExceptions.
118    except (dev_server.DevServerException, urllib2.URLError) as e:
119        logging.warning('%s: %s', error_msg, e)
120
121
122# TODO(garnold) This implements shared updater functionality needed for
123# supporting the autoupdate_EndToEnd server-side test. We should probably
124# migrate more of the existing ChromiumOSUpdater functionality to it as we
125# expand non-CrOS support in other tests.
126class BaseUpdater(object):
127    """Platform-agnostic DUT update functionality."""
128
129    def __init__(self, updater_ctrl_bin, update_url, host):
130        """Initializes the object.
131
132        @param updater_ctrl_bin: Path to update_engine_client.
133        @param update_url: The URL we want the update to use.
134        @param host: A client.common_lib.hosts.Host implementation.
135        """
136        self.updater_ctrl_bin = updater_ctrl_bin
137        self.update_url = update_url
138        self.host = host
139        self._update_error_queue = multiprocessing.Queue(2)
140
141
142    def check_update_status(self):
143        """Returns the current update engine state.
144
145        We use the `update_engine_client -status' command and parse the line
146        indicating the update state, e.g. "CURRENT_OP=UPDATE_STATUS_IDLE".
147        """
148        update_status = self.host.run(
149            '%s -status 2>&1 | grep CURRENT_OP' % self.updater_ctrl_bin)
150        return update_status.stdout.strip().split('=')[-1]
151
152
153    def trigger_update(self):
154        """Triggers a background update.
155
156        @raise RootFSUpdateError if anything went wrong.
157        """
158        autoupdate_cmd = ('%s --check_for_update --omaha_url=%s' %
159                          (self.updater_ctrl_bin, self.update_url))
160        err_msg = 'Failed to trigger an update on %s.' % self.host.hostname
161        logging.info('Triggering update via: %s', autoupdate_cmd)
162        try:
163            self.host.run(autoupdate_cmd)
164        except (error.AutoservSshPermissionDeniedError,
165                error.AutoservSSHTimeout) as e:
166            err_msg += ' SSH reports an error: %s' % type(e).__name__
167            raise RootFSUpdateError(err_msg)
168        except error.AutoservRunError as e:
169            # Check if the exit code is 255, if so it's probably a generic
170            # SSH error.
171            result = e.args[1]
172            if result.exit_status == 255:
173                err_msg += (' SSH reports a generic error (255), which could '
174                            'indicate a problem with underlying connectivity '
175                            'layers.')
176                raise RootFSUpdateError(err_msg)
177
178            # We have ruled out all SSH cases, the error code is from
179            # update_engine_client, though we still don't know why.
180            list_image_dir_contents(self.update_url)
181            err_msg += (' It could be that the devserver is unreachable, the '
182                        'payload unavailable, or there is a bug in the update '
183                        'engine (unlikely). Reported error: %s' %
184                        type(e).__name__)
185            raise RootFSUpdateError(err_msg)
186
187
188    def _verify_update_completed(self):
189        """Verifies that an update has completed.
190
191        @raise RootFSUpdateError: if verification fails.
192        """
193        status = self.check_update_status()
194        if status != UPDATER_NEED_REBOOT:
195            raise RootFSUpdateError('Update did not complete with correct '
196                                    'status. Expecting %s, actual %s' %
197                                    (UPDATER_NEED_REBOOT, status))
198
199
200    def update_image(self):
201        """Updates the device image and verifies success."""
202        try:
203            autoupdate_cmd = ('%s --update --omaha_url=%s 2>&1' %
204                              (self.updater_ctrl_bin, self.update_url))
205            self.host.run(autoupdate_cmd, timeout=3600)
206        except error.AutoservRunError as e:
207            list_image_dir_contents(self.update_url)
208            update_error = RootFSUpdateError(
209                    'Failed to install device image using payload at %s '
210                    'on %s: %s' %
211                    (self.update_url, self.host.hostname, e))
212            self._update_error_queue.put(update_error)
213            raise update_error
214        except Exception as e:
215            # Don't allow other exceptions to not be caught.
216            self._update_error_queue.put(e)
217            raise e
218
219        try:
220            self._verify_update_completed()
221        except RootFSUpdateError as e:
222            self._update_error_queue.put(e)
223            raise
224
225
226class ChromiumOSUpdater(BaseUpdater):
227    """Helper class used to update DUT with image of desired version."""
228    REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update'
229    UPDATER_BIN = '/usr/bin/update_engine_client'
230    STATEFUL_UPDATE = '/tmp/stateful_update'
231    UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed'
232    UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
233
234    KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
235    KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
236    # Time to wait for new kernel to be marked successful after
237    # auto update.
238    KERNEL_UPDATE_TIMEOUT = 120
239
240
241    def __init__(self, update_url, host=None, local_devserver=False):
242        super(ChromiumOSUpdater, self).__init__(self.UPDATER_BIN, update_url,
243                                                host)
244        self.local_devserver = local_devserver
245        if not local_devserver:
246          self.update_version = url_to_version(update_url)
247        else:
248          self.update_version = None
249
250
251    def reset_update_engine(self):
252        """Resets the host to prepare for a clean update regardless of state."""
253        self._run('rm -f %s' % self.UPDATED_MARKER)
254        self._run('stop ui || true')
255        self._run('stop update-engine || true')
256        self._run('start update-engine')
257
258        if self.check_update_status() != UPDATER_IDLE:
259            raise ChromiumOSError('%s is not in an installable state' %
260                                  self.host.hostname)
261
262
263    def _run(self, cmd, *args, **kwargs):
264        """Abbreviated form of self.host.run(...)"""
265        return self.host.run(cmd, *args, **kwargs)
266
267
268    def rootdev(self, options=''):
269        """Returns the stripped output of rootdev <options>.
270
271        @param options: options to run rootdev.
272
273        """
274        return self._run('rootdev %s' % options).stdout.strip()
275
276
277    def get_kernel_state(self):
278        """Returns the (<active>, <inactive>) kernel state as a pair."""
279        active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0])
280        if active_root == self.KERNEL_A['root']:
281            return self.KERNEL_A, self.KERNEL_B
282        elif active_root == self.KERNEL_B['root']:
283            return self.KERNEL_B, self.KERNEL_A
284        else:
285            raise ChromiumOSError('Encountered unknown root partition: %s' %
286                                  active_root)
287
288
289    def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'):
290        """Return numeric cgpt value for the specified flag, kernel, device. """
291        return int(self._run('cgpt show -n -i %d %s %s' % (
292            kernel['kernel'], flag, dev)).stdout.strip())
293
294
295    def get_kernel_priority(self, kernel):
296        """Return numeric priority for the specified kernel.
297
298        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
299
300        """
301        return self._cgpt('-P', kernel)
302
303
304    def get_kernel_success(self, kernel):
305        """Return boolean success flag for the specified kernel.
306
307        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
308
309        """
310        return self._cgpt('-S', kernel) != 0
311
312
313    def get_kernel_tries(self, kernel):
314        """Return tries count for the specified kernel.
315
316        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
317
318        """
319        return self._cgpt('-T', kernel)
320
321
322    def get_stateful_update_script(self):
323        """Returns the path to the stateful update script on the target."""
324        # We attempt to load the local stateful update path in 3 different
325        # ways. First we use the location specified in the autotest global
326        # config. If this doesn't exist, we attempt to use the Chromium OS
327        # Chroot path to the installed script. If all else fails, we use the
328        # stateful update script on the host.
329        stateful_update_path = os.path.join(
330                global_config.global_config.get_config_value(
331                        'CROS', 'source_tree', default=''),
332                LOCAL_STATEFUL_UPDATE_PATH)
333
334        if not os.path.exists(stateful_update_path):
335            logging.warning('Could not find Chrome OS source location for '
336                            'stateful_update script at %s, falling back to '
337                            'chroot copy.', stateful_update_path)
338            stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH
339
340        if not os.path.exists(stateful_update_path):
341            logging.warning('Could not chroot stateful_update script, falling '
342                            'back on client copy.')
343            statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH
344        else:
345            self.host.send_file(
346                    stateful_update_path, self.STATEFUL_UPDATE,
347                    delete_dest=True)
348            statefuldev_script = self.STATEFUL_UPDATE
349
350        return statefuldev_script
351
352
353    def reset_stateful_partition(self):
354        """Clear any pending stateful update request."""
355        statefuldev_cmd = [self.get_stateful_update_script()]
356        statefuldev_cmd += ['--stateful_change=reset', '2>&1']
357        self._run(' '.join(statefuldev_cmd))
358
359
360    def revert_boot_partition(self):
361        """Revert the boot partition."""
362        part = self.rootdev('-s')
363        logging.warning('Reverting update; Boot partition will be %s', part)
364        return self._run('/postinst %s 2>&1' % part)
365
366
367    def rollback_rootfs(self, powerwash):
368        """Triggers rollback and waits for it to complete.
369
370        @param powerwash: If true, powerwash as part of rollback.
371
372        @raise RootFSUpdateError if anything went wrong.
373
374        """
375        version = self.host.get_release_version()
376        # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
377        # X.Y.Z. This version split just pulls the first part out.
378        try:
379            build_number = int(version.split('.')[0])
380        except ValueError:
381            logging.error('Could not parse build number.')
382            build_number = 0
383
384        if build_number >= 5772:
385            can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN
386            logging.info('Checking for rollback.')
387            try:
388                self._run(can_rollback_cmd)
389            except error.AutoservRunError as e:
390                raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
391                                        (self.host.hostname, str(e)))
392
393        rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN
394        if not powerwash:
395          rollback_cmd += ' --nopowerwash'
396
397        logging.info('Performing rollback.')
398        try:
399            self._run(rollback_cmd)
400        except error.AutoservRunError as e:
401            raise RootFSUpdateError('Rollback failed on %s: %s' %
402                                    (self.host.hostname, str(e)))
403
404        self._verify_update_completed()
405
406
407    # TODO(garnold) This is here for backward compatibility and should be
408    # deprecated once we shift to using update_image() everywhere.
409    def update_rootfs(self):
410        """Run the standard command to force an update."""
411        return self.update_image()
412
413
414    def update_stateful(self, clobber=True):
415        """Updates the stateful partition.
416
417        @param clobber: If True, a clean stateful installation.
418        """
419        logging.info('Updating stateful partition...')
420        statefuldev_url = self.update_url.replace('update',
421                                                  'static')
422
423        # Attempt stateful partition update; this must succeed so that the newly
424        # installed host is testable after update.
425        statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
426        if clobber:
427            statefuldev_cmd.append('--stateful_change=clean')
428
429        statefuldev_cmd.append('2>&1')
430        try:
431            self._run(' '.join(statefuldev_cmd), timeout=600)
432        except error.AutoservRunError:
433            update_error = StatefulUpdateError(
434                    'Failed to perform stateful update on %s' %
435                    self.host.hostname)
436            self._update_error_queue.put(update_error)
437            raise update_error
438        except Exception as e:
439            # Don't allow other exceptions to not be caught.
440            self._update_error_queue.put(e)
441            raise e
442
443
444    def run_update(self, update_root=True):
445        """Update the DUT with image of specific version.
446
447        @param update_root: True to force a rootfs update.
448        """
449        booted_version = self.host.get_release_version()
450        if self.update_version:
451            logging.info('Updating from version %s to %s.',
452                         booted_version, self.update_version)
453
454        # Check that Dev Server is accepting connections (from autoserv's host).
455        # If we can't talk to it, the machine host probably can't either.
456        auserver_host = urlparse.urlparse(self.update_url)[1]
457        try:
458            httplib.HTTPConnection(auserver_host).connect()
459        except IOError:
460            raise ChromiumOSError(
461                'Update server at %s not available' % auserver_host)
462
463        logging.info('Installing from %s to %s', self.update_url,
464                     self.host.hostname)
465
466        # Reset update state.
467        self.reset_update_engine()
468        self.reset_stateful_partition()
469
470        try:
471            updaters = [
472                multiprocessing.process.Process(target=self.update_rootfs),
473                multiprocessing.process.Process(target=self.update_stateful)
474                ]
475            if not update_root:
476                logging.info('Root update is skipped.')
477                updaters = updaters[1:]
478
479            # Run the updaters in parallel.
480            for updater in updaters: updater.start()
481            for updater in updaters: updater.join()
482
483            # Re-raise the first error that occurred.
484            if not self._update_error_queue.empty():
485                update_error = self._update_error_queue.get()
486                self.revert_boot_partition()
487                self.reset_stateful_partition()
488                raise update_error
489
490            logging.info('Update complete.')
491        except:
492            # Collect update engine logs in the event of failure.
493            if self.host.job:
494                logging.info('Collecting update engine logs...')
495                self.host.get_file(
496                        self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
497                        preserve_perm=False)
498            list_image_dir_contents(self.update_url)
499            raise
500        finally:
501            self.host.show_update_engine_log()
502
503
504    def check_version(self):
505        """Check the image running in DUT has the desired version.
506
507        @returns: True if the DUT's image version matches the version that
508            the autoupdater tries to update to.
509
510        """
511        booted_version = self.host.get_release_version()
512        return (self.update_version and
513                self.update_version.endswith(booted_version))
514
515
516    def check_version_to_confirm_install(self):
517        """Check image running in DUT has the desired version to be installed.
518
519        The method should not be used to check if DUT needs to have a full
520        reimage. Only use it to confirm a image is installed.
521
522        The method is designed to verify version for following 6 scenarios with
523        samples of version to update to and expected booted version:
524        1. trybot paladin build.
525        update version: trybot-lumpy-paladin/R27-3837.0.0-b123
526        booted version: 3837.0.2013_03_21_1340
527
528        2. trybot release build.
529        update version: trybot-lumpy-release/R27-3837.0.0-b456
530        booted version: 3837.0.0
531
532        3. buildbot official release build.
533        update version: lumpy-release/R27-3837.0.0
534        booted version: 3837.0.0
535
536        4. non-official paladin rc build.
537        update version: lumpy-paladin/R27-3878.0.0-rc7
538        booted version: 3837.0.0-rc7
539
540        5. chrome-perf build.
541        update version: lumpy-chrome-perf/R28-3837.0.0-b2996
542        booted version: 3837.0.0
543
544        6. pgo-generate build.
545        update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996
546        booted version: 3837.0.0-pgo-generate
547
548        When we are checking if a DUT needs to do a full install, we should NOT
549        use this method to check if the DUT is running the same version, since
550        it may return false positive for a DUT running trybot paladin build to
551        be updated to another trybot paladin build.
552
553        TODO: This logic has a bug if a trybot paladin build failed to be
554        installed in a DUT running an older trybot paladin build with same
555        platform number, but different build number (-b###). So to conclusively
556        determine if a tryjob paladin build is imaged successfully, we may need
557        to find out the date string from update url.
558
559        @returns: True if the DUT's image version (without the date string if
560            the image is a trybot build), matches the version that the
561            autoupdater is trying to update to.
562
563        """
564        # In the local_devserver case, we can't know the expected
565        # build, so just pass.
566        if not self.update_version:
567            return True
568
569        # Always try the default check_version method first, this prevents
570        # any backward compatibility issue.
571        if self.check_version():
572            return True
573
574        return utils.version_match(self.update_version,
575                                   self.host.get_release_version(),
576                                   self.update_url)
577
578
579    def verify_boot_expectations(self, expected_kernel_state, rollback_message):
580        """Verifies that we fully booted given expected kernel state.
581
582        This method both verifies that we booted using the correct kernel
583        state and that the OS has marked the kernel as good.
584
585        @param expected_kernel_state: kernel state that we are verifying with
586            i.e. I expect to be booted onto partition 4 etc. See output of
587            get_kernel_state.
588        @param rollback_message: string to raise as a ChromiumOSError
589            if we booted with the wrong partition.
590
591        @raises ChromiumOSError: If we didn't.
592        """
593        # Figure out the newly active kernel.
594        active_kernel_state = self.get_kernel_state()[0]
595
596        # Check for rollback due to a bad build.
597        if (expected_kernel_state and
598                active_kernel_state != expected_kernel_state):
599
600            # Kernel crash reports should be wiped between test runs, but
601            # may persist from earlier parts of the test, or from problems
602            # with provisioning.
603            #
604            # Kernel crash reports will NOT be present if the crash happened
605            # before encrypted stateful is mounted.
606            #
607            # TODO(dgarrett): Integrate with server/crashcollect.py at some
608            # point.
609            kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
610            if kernel_crashes:
611                rollback_message += ': kernel_crash'
612                logging.debug('Found %d kernel crash reports:',
613                              len(kernel_crashes))
614                # The crash names contain timestamps that may be useful:
615                #   kernel.20131207.005945.0.kcrash
616                for crash in kernel_crashes:
617                    logging.debug('  %s', os.path.basename(crash))
618
619            # Print out some information to make it easier to debug
620            # the rollback.
621            logging.debug('Dumping partition table.')
622            self._run('cgpt show $(rootdev -s -d)')
623            logging.debug('Dumping crossystem for firmware debugging.')
624            self._run('crossystem --all')
625            raise ChromiumOSError(rollback_message)
626
627        # Make sure chromeos-setgoodkernel runs.
628        try:
629            utils.poll_for_condition(
630                lambda: (self.get_kernel_tries(active_kernel_state) == 0
631                         and self.get_kernel_success(active_kernel_state)),
632                exception=ChromiumOSError(),
633                timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
634        except ChromiumOSError:
635            services_status = self._run('status system-services').stdout
636            if services_status != 'system-services start/running\n':
637                event = ('Chrome failed to reach login screen')
638            else:
639                event = ('update-engine failed to call '
640                         'chromeos-setgoodkernel')
641            raise ChromiumOSError(
642                    'After update and reboot, %s '
643                    'within %d seconds' % (event,
644                                           self.KERNEL_UPDATE_TIMEOUT))
645
646
647class BrilloUpdater(BaseUpdater):
648    """Helper class for updating a Brillo DUT."""
649
650    def __init__(self, update_url, host=None):
651        """Initialize the object.
652
653        @param update_url: The URL we want the update to use.
654        @param host: A client.common_lib.hosts.Host implementation.
655        """
656        super(BrilloUpdater, self).__init__(
657                '/system/bin/update_engine_client', update_url, host)
658