autoupdater.py revision 0c0df7324b80a8fcf41739705b3bcbd10d3e2b8b
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import glob
6import httplib
7import logging
8import multiprocessing
9import os
10import re
11import urlparse
12import urllib2
13
14from autotest_lib.client.bin import utils
15from autotest_lib.client.common_lib import error, global_config
16from autotest_lib.client.common_lib.cros import dev_server
17
18# Local stateful update path is relative to the CrOS source directory.
19LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update'
20LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update'
21UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
23# A list of update engine client states that occur after an update is triggered.
24UPDATER_PROCESSING_UPDATE = ['UPDATE_STATUS_CHECKING_FORUPDATE',
25                             'UPDATE_STATUS_UPDATE_AVAILABLE',
26                             'UPDATE_STATUS_DOWNLOADING',
27                             'UPDATE_STATUS_FINALIZING']
28
29class ChromiumOSError(error.InstallError):
30    """Generic error for ChromiumOS-specific exceptions."""
31    pass
32
33
34class RootFSUpdateError(ChromiumOSError):
35    """Raised when the RootFS fails to update."""
36    pass
37
38
39class StatefulUpdateError(ChromiumOSError):
40    """Raised when the stateful partition fails to update."""
41    pass
42
43
44def url_to_version(update_url):
45    """Return the version based on update_url.
46
47    @param update_url: url to the image to update to.
48
49    """
50    # The Chrome OS version is generally the last element in the URL. The only
51    # exception is delta update URLs, which are rooted under the version; e.g.,
52    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
53    # strip off the au section of the path before reading the version.
54    return re.sub('/au/.*', '',
55                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
56
57
58def url_to_image_name(update_url):
59    """Return the image name based on update_url.
60
61    From a URL like:
62        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
63    return lumpy-release/R27-3837.0.0
64
65    @param update_url: url to the image to update to.
66    @returns a string representing the image name in the update_url.
67
68    """
69    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
70
71
72def _get_devserver_build_from_update_url(update_url):
73    """Get the devserver and build from the update url.
74
75    @param update_url: The url for update.
76        Eg: http://devserver:port/update/build.
77
78    @return: A tuple of (devserver url, build) or None if the update_url
79        doesn't match the expected pattern.
80
81    @raises ValueError: If the update_url doesn't match the expected pattern.
82    @raises ValueError: If no global_config was found, or it doesn't contain an
83        image_url_pattern.
84    """
85    pattern = global_config.global_config.get_config_value(
86            'CROS', 'image_url_pattern', type=str, default='')
87    if not pattern:
88        raise ValueError('Cannot parse update_url, the global config needs '
89                'an image_url_pattern.')
90    re_pattern = pattern.replace('%s', '(\S+)')
91    parts = re.search(re_pattern, update_url)
92    if not parts or len(parts.groups()) < 2:
93        raise ValueError('%s is not an update url' % update_url)
94    return parts.groups()
95
96
97def list_image_dir_contents(update_url):
98    """Lists the contents of the devserver for a given build/update_url.
99
100    @param update_url: An update url. Eg: http://devserver:port/update/build.
101    """
102    if not update_url:
103        logging.warning('Need update_url to list contents of the devserver.')
104        return
105    error_msg = 'Cannot check contents of devserver, update url %s' % update_url
106    try:
107        devserver_url, build = _get_devserver_build_from_update_url(update_url)
108    except ValueError as e:
109        logging.warning('%s: %s', error_msg, e)
110        return
111    devserver = dev_server.ImageServer(devserver_url)
112    try:
113        devserver.list_image_dir(build)
114    # The devserver will retry on URLError to avoid flaky connections, but will
115    # eventually raise the URLError if it persists. All HTTPErrors get
116    # converted to DevServerExceptions.
117    except (dev_server.DevServerException, urllib2.URLError) as e:
118        logging.warning('%s: %s', error_msg, e)
119
120
121class ChromiumOSUpdater():
122    """Helper class used to update DUT with image of desired version."""
123    REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update'
124    UPDATER_BIN = '/usr/bin/update_engine_client'
125    STATEFUL_UPDATE = '/tmp/stateful_update'
126    UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed'
127    UPDATER_LOGS = ['/var/log/messages', '/var/log/update_engine']
128
129    KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
130    KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
131    # Time to wait for new kernel to be marked successful after
132    # auto update.
133    KERNEL_UPDATE_TIMEOUT = 120
134
135
136    def __init__(self, update_url, host=None, local_devserver=False):
137        self.host = host
138        self.update_url = update_url
139        self._update_error_queue = multiprocessing.Queue(2)
140        self.local_devserver = local_devserver
141        if not local_devserver:
142          self.update_version = url_to_version(update_url)
143        else:
144          self.update_version = None
145
146
147    def check_update_status(self):
148        """Return current status from update-engine."""
149        update_status = self._run(
150            '%s -status 2>&1 | grep CURRENT_OP' % self.UPDATER_BIN)
151        return update_status.stdout.strip().split('=')[-1]
152
153
154    def reset_update_engine(self):
155        """Resets the host to prepare for a clean update regardless of state."""
156        self._run('rm -f %s' % self.UPDATED_MARKER)
157        self._run('stop ui || true')
158        self._run('stop update-engine || true')
159        self._run('start update-engine')
160
161        if self.check_update_status() != UPDATER_IDLE:
162            raise ChromiumOSError('%s is not in an installable state' %
163                                  self.host.hostname)
164
165
166    def _run(self, cmd, *args, **kwargs):
167        """Abbreviated form of self.host.run(...)"""
168        return self.host.run(cmd, *args, **kwargs)
169
170
171    def rootdev(self, options=''):
172        """Returns the stripped output of rootdev <options>.
173
174        @param options: options to run rootdev.
175
176        """
177        return self._run('rootdev %s' % options).stdout.strip()
178
179
180    def get_kernel_state(self):
181        """Returns the (<active>, <inactive>) kernel state as a pair."""
182        active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0])
183        if active_root == self.KERNEL_A['root']:
184            return self.KERNEL_A, self.KERNEL_B
185        elif active_root == self.KERNEL_B['root']:
186            return self.KERNEL_B, self.KERNEL_A
187        else:
188            raise ChromiumOSError('Encountered unknown root partition: %s' %
189                                  active_root)
190
191
192    def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'):
193        """Return numeric cgpt value for the specified flag, kernel, device. """
194        return int(self._run('cgpt show -n -i %d %s %s' % (
195            kernel['kernel'], flag, dev)).stdout.strip())
196
197
198    def get_kernel_priority(self, kernel):
199        """Return numeric priority for the specified kernel.
200
201        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
202
203        """
204        return self._cgpt('-P', kernel)
205
206
207    def get_kernel_success(self, kernel):
208        """Return boolean success flag for the specified kernel.
209
210        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
211
212        """
213        return self._cgpt('-S', kernel) != 0
214
215
216    def get_kernel_tries(self, kernel):
217        """Return tries count for the specified kernel.
218
219        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
220
221        """
222        return self._cgpt('-T', kernel)
223
224
225    def get_stateful_update_script(self):
226        """Returns the path to the stateful update script on the target."""
227        # We attempt to load the local stateful update path in 3 different
228        # ways. First we use the location specified in the autotest global
229        # config. If this doesn't exist, we attempt to use the Chromium OS
230        # Chroot path to the installed script. If all else fails, we use the
231        # stateful update script on the host.
232        stateful_update_path = os.path.join(
233                global_config.global_config.get_config_value(
234                        'CROS', 'source_tree', default=''),
235                LOCAL_STATEFUL_UPDATE_PATH)
236
237        if not os.path.exists(stateful_update_path):
238            logging.warning('Could not find Chrome OS source location for '
239                            'stateful_update script at %s, falling back to '
240                            'chroot copy.', stateful_update_path)
241            stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH
242
243        if not os.path.exists(stateful_update_path):
244            logging.warning('Could not chroot stateful_update script, falling '
245                            'back on client copy.')
246            statefuldev_script = self.REMOTE_STATEUL_UPDATE_PATH
247        else:
248            self.host.send_file(
249                    stateful_update_path, self.STATEFUL_UPDATE,
250                    delete_dest=True)
251            statefuldev_script = self.STATEFUL_UPDATE
252
253        return statefuldev_script
254
255
256    def reset_stateful_partition(self):
257        """Clear any pending stateful update request."""
258        statefuldev_cmd = [self.get_stateful_update_script()]
259        statefuldev_cmd += ['--stateful_change=reset', '2>&1']
260        self._run(' '.join(statefuldev_cmd))
261
262
263    def revert_boot_partition(self):
264        """Revert the boot partition."""
265        part = self.rootdev('-s')
266        logging.warning('Reverting update; Boot partition will be %s', part)
267        return self._run('/postinst %s 2>&1' % part)
268
269
270    def trigger_update(self):
271        """Triggers a background update on a test image.
272
273        @raise RootFSUpdateError if anything went wrong.
274
275        """
276        autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % (
277            self.UPDATER_BIN, self.update_url)
278        logging.info('Triggering update via: %s', autoupdate_cmd)
279        try:
280            self._run(autoupdate_cmd)
281        except (error.AutoservSshPermissionDeniedError,
282                error.AutoservSSHTimeout) as e:
283            raise RootFSUpdateError('SSH on %s is seeing %s' %
284                                    (self.host.hostname, type(e).__name__))
285        except error.AutoservRunError as e:
286            # Check if the exit code is 255, if so it's probably a generic
287            # SSH error.
288            result = e.args[1]
289            if result.exit_status == 255:
290              raise RootFSUpdateError('SSH on %s is seeing a generic error.' %
291                                      self.host.hostname)
292
293            # We have ruled out all SSH cases, the error code is from
294            # update_engine_client, though we still don't know why.
295            list_image_dir_contents(self.update_url)
296            raise RootFSUpdateError(
297                    'devserver unreachable, payload unavailable, '
298                    'or AU bug (unlikely) on %s: %s' %
299                    (self.host.hostname, type(e).__name__))
300
301
302    def _verify_update_completed(self):
303        """Verifies that an update has completed.
304
305        @raise RootFSUpdateError: if verification fails.
306        """
307        status = self.check_update_status()
308        if status != UPDATER_NEED_REBOOT:
309            raise RootFSUpdateError('Update did not complete with correct '
310                                    'status. Expecting %s, actual %s' %
311                                    (UPDATER_NEED_REBOOT, status))
312
313
314    def rollback_rootfs(self, powerwash):
315        """Triggers rollback and waits for it to complete.
316
317        @param powerwash: If true, powerwash as part of rollback.
318
319        @raise RootFSUpdateError if anything went wrong.
320
321        """
322        version = self.host.get_release_version()
323        # Introduced can_rollback in M36 (build 5772). # etc/lsb-release matches
324        # X.Y.Z. This version split just pulls the first part out.
325        try:
326            build_number = int(version.split('.')[0])
327        except ValueError:
328            logging.error('Could not parse build number.')
329            build_number = 0
330
331        if build_number >= 5772:
332            can_rollback_cmd = '%s --can_rollback' % self.UPDATER_BIN
333            logging.info('Checking for rollback.')
334            try:
335                self._run(can_rollback_cmd)
336            except error.AutoservRunError as e:
337                raise RootFSUpdateError("Rollback isn't possible on %s: %s" %
338                                        (self.host.hostname, str(e)))
339
340        rollback_cmd = '%s --rollback --follow' % self.UPDATER_BIN
341        if not powerwash:
342          rollback_cmd += ' --nopowerwash'
343
344        logging.info('Performing rollback.')
345        try:
346            self._run(rollback_cmd)
347        except error.AutoservRunError as e:
348            raise RootFSUpdateError('Rollback failed on %s: %s' %
349                                    (self.host.hostname, str(e)))
350
351        self._verify_update_completed()
352
353
354    def update_rootfs(self):
355        """Run the standard command to force an update."""
356        try:
357            autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % (
358                    self.UPDATER_BIN, self.update_url)
359            self._run(autoupdate_cmd, timeout=1200)
360        except error.AutoservRunError:
361            list_image_dir_contents(self.update_url)
362            update_error = RootFSUpdateError('update-engine failed on %s' %
363                                             self.host.hostname)
364            self._update_error_queue.put(update_error)
365            raise update_error
366        except Exception as e:
367            # Don't allow other exceptions to not be caught.
368            self._update_error_queue.put(e)
369            raise e
370
371        try:
372            self._verify_update_completed()
373        except RootFSUpdateError as e:
374            self._update_error_queue.put(e)
375            raise
376
377
378    def update_stateful(self, clobber=True):
379        """Updates the stateful partition.
380
381        @param clobber: If True, a clean stateful installation.
382        """
383        logging.info('Updating stateful partition...')
384        statefuldev_url = self.update_url.replace('update',
385                                                  'static')
386
387        # Attempt stateful partition update; this must succeed so that the newly
388        # installed host is testable after update.
389        statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
390        if clobber:
391            statefuldev_cmd.append('--stateful_change=clean')
392
393        statefuldev_cmd.append('2>&1')
394        try:
395            self._run(' '.join(statefuldev_cmd), timeout=600)
396        except error.AutoservRunError:
397            update_error = StatefulUpdateError('stateful_update failed on %s' %
398                                               self.host.hostname)
399            self._update_error_queue.put(update_error)
400            raise update_error
401        except Exception as e:
402            # Don't allow other exceptions to not be caught.
403            self._update_error_queue.put(e)
404            raise e
405
406
407    def run_update(self, update_root=True):
408        """Update the DUT with image of specific version.
409
410        @param update_root: True to force a rootfs update.
411        """
412        booted_version = self.host.get_release_version()
413        if self.update_version:
414            logging.info('Updating from version %s to %s.',
415                         booted_version, self.update_version)
416
417        # Check that Dev Server is accepting connections (from autoserv's host).
418        # If we can't talk to it, the machine host probably can't either.
419        auserver_host = urlparse.urlparse(self.update_url)[1]
420        try:
421            httplib.HTTPConnection(auserver_host).connect()
422        except IOError:
423            raise ChromiumOSError(
424                'Update server at %s not available' % auserver_host)
425
426        logging.info('Installing from %s to %s', self.update_url,
427                     self.host.hostname)
428
429        # Reset update state.
430        self.reset_update_engine()
431        self.reset_stateful_partition()
432
433        try:
434            updaters = [
435                multiprocessing.process.Process(target=self.update_rootfs),
436                multiprocessing.process.Process(target=self.update_stateful)
437                ]
438            if not update_root:
439                logging.info('Root update is skipped.')
440                updaters = updaters[1:]
441
442            # Run the updaters in parallel.
443            for updater in updaters: updater.start()
444            for updater in updaters: updater.join()
445
446            # Re-raise the first error that occurred.
447            if not self._update_error_queue.empty():
448                update_error = self._update_error_queue.get()
449                self.revert_boot_partition()
450                self.reset_stateful_partition()
451                raise update_error
452
453            logging.info('Update complete.')
454        except:
455            # Collect update engine logs in the event of failure.
456            if self.host.job:
457                logging.info('Collecting update engine logs...')
458                self.host.get_file(
459                        self.UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
460                        preserve_perm=False)
461            list_image_dir_contents(self.update_url)
462            raise
463        finally:
464            self.host.show_update_engine_log()
465
466
467    def check_version(self):
468        """Check the image running in DUT has the desired version.
469
470        @returns: True if the DUT's image version matches the version that
471            the autoupdater tries to update to.
472
473        """
474        booted_version = self.host.get_release_version()
475        return (self.update_version and
476                self.update_version.endswith(booted_version))
477
478
479    def check_version_to_confirm_install(self):
480        """Check image running in DUT has the desired version to be installed.
481
482        The method should not be used to check if DUT needs to have a full
483        reimage. Only use it to confirm a image is installed.
484
485        The method is designed to verify version for following 6 scenarios with
486        samples of version to update to and expected booted version:
487        1. trybot paladin build.
488        update version: trybot-lumpy-paladin/R27-3837.0.0-b123
489        booted version: 3837.0.2013_03_21_1340
490
491        2. trybot release build.
492        update version: trybot-lumpy-release/R27-3837.0.0-b456
493        booted version: 3837.0.0
494
495        3. buildbot official release build.
496        update version: lumpy-release/R27-3837.0.0
497        booted version: 3837.0.0
498
499        4. non-official paladin rc build.
500        update version: lumpy-paladin/R27-3878.0.0-rc7
501        booted version: 3837.0.0-rc7
502
503        5. chrome-perf build.
504        update version: lumpy-chrome-perf/R28-3837.0.0-b2996
505        booted version: 3837.0.0
506
507        6. pgo-generate build.
508        update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996
509        booted version: 3837.0.0-pgo-generate
510
511        When we are checking if a DUT needs to do a full install, we should NOT
512        use this method to check if the DUT is running the same version, since
513        it may return false positive for a DUT running trybot paladin build to
514        be updated to another trybot paladin build.
515
516        TODO: This logic has a bug if a trybot paladin build failed to be
517        installed in a DUT running an older trybot paladin build with same
518        platform number, but different build number (-b###). So to conclusively
519        determine if a tryjob paladin build is imaged successfully, we may need
520        to find out the date string from update url.
521
522        @returns: True if the DUT's image version (without the date string if
523            the image is a trybot build), matches the version that the
524            autoupdater is trying to update to.
525
526        """
527        # In the local_devserver case, we can't know the expected
528        # build, so just pass.
529        if not self.update_version:
530            return True
531
532        # Always try the default check_version method first, this prevents
533        # any backward compatibility issue.
534        if self.check_version():
535            return True
536
537        return utils.version_match(self.update_version,
538                                   self.host.get_release_version(),
539                                   self.update_url)
540
541
542    def verify_boot_expectations(self, expected_kernel_state, rollback_message):
543        """Verifies that we fully booted given expected kernel state.
544
545        This method both verifies that we booted using the correct kernel
546        state and that the OS has marked the kernel as good.
547
548        @param expected_kernel_state: kernel state that we are verifying with
549            i.e. I expect to be booted onto partition 4 etc. See output of
550            get_kernel_state.
551        @param rollback_message: string to raise as a ChromiumOSError
552            if we booted with the wrong partition.
553
554        @raises ChromiumOSError: If we didn't.
555        """
556        # Figure out the newly active kernel.
557        active_kernel_state = self.get_kernel_state()[0]
558
559        # Check for rollback due to a bad build.
560        if (expected_kernel_state and
561                active_kernel_state != expected_kernel_state):
562
563            # Kernel crash reports should be wiped between test runs, but
564            # may persist from earlier parts of the test, or from problems
565            # with provisioning.
566            #
567            # Kernel crash reports will NOT be present if the crash happened
568            # before encrypted stateful is mounted.
569            #
570            # TODO(dgarrett): Integrate with server/crashcollect.py at some
571            # point.
572            kernel_crashes = glob.glob('/var/spool/crash/kernel.*.kcrash')
573            if kernel_crashes:
574                rollback_message += ': kernel_crash'
575                logging.debug('Found %d kernel crash reports:',
576                              len(kernel_crashes))
577                # The crash names contain timestamps that may be useful:
578                #   kernel.20131207.005945.0.kcrash
579                for crash in kernel_crashes:
580                    logging.debug('  %s', os.path.basename(crash))
581
582            # Print out some information to make it easier to debug
583            # the rollback.
584            logging.debug('Dumping partition table.')
585            self._run('cgpt show $(rootdev -s -d)')
586            logging.debug('Dumping crossystem for firmware debugging.')
587            self._run('crossystem --all')
588            raise ChromiumOSError(rollback_message)
589
590        # Make sure chromeos-setgoodkernel runs.
591        try:
592            utils.poll_for_condition(
593                lambda: (self.get_kernel_tries(active_kernel_state) == 0
594                         and self.get_kernel_success(active_kernel_state)),
595                exception=ChromiumOSError(),
596                timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
597        except ChromiumOSError:
598            services_status = self._run('status system-services').stdout
599            if services_status != 'system-services start/running\n':
600                event = ('Chrome failed to reach login screen')
601            else:
602                event = ('update-engine failed to call '
603                         'chromeos-setgoodkernel')
604            raise ChromiumOSError(
605                    'After update and reboot, %s '
606                    'within %d seconds' % (event,
607                                           self.KERNEL_UPDATE_TIMEOUT))
608