autoupdater.py revision c193217c55a11367708be5c25bd1c8e1857ab6ff
1# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import httplib
6import logging
7import multiprocessing
8import os
9import re
10import urlparse
11
12from autotest_lib.client.bin import utils
13from autotest_lib.client.common_lib import error, global_config
14
15# Local stateful update path is relative to the CrOS source directory.
16LOCAL_STATEFUL_UPDATE_PATH = 'src/platform/dev/stateful_update'
17LOCAL_CHROOT_STATEFUL_UPDATE_PATH = '/usr/bin/stateful_update'
18REMOTE_STATEUL_UPDATE_PATH = '/usr/local/bin/stateful_update'
19STATEFUL_UPDATE = '/tmp/stateful_update'
20UPDATER_BIN = '/usr/bin/update_engine_client'
21UPDATER_IDLE = 'UPDATE_STATUS_IDLE'
22UPDATER_NEED_REBOOT = 'UPDATE_STATUS_UPDATED_NEED_REBOOT'
23UPDATED_MARKER = '/var/run/update_engine_autoupdate_completed'
24UPDATER_LOGS = '/var/log/messages /var/log/update_engine'
25
26
27class ChromiumOSError(error.InstallError):
28    """Generic error for ChromiumOS-specific exceptions."""
29    pass
30
31
32class RootFSUpdateError(ChromiumOSError):
33    """Raised when the RootFS fails to update."""
34    pass
35
36
37class StatefulUpdateError(ChromiumOSError):
38    """Raised when the stateful partition fails to update."""
39    pass
40
41
42def url_to_version(update_url):
43    """Return the version based on update_url.
44
45    @param update_url: url to the image to update to.
46
47    """
48    # The Chrome OS version is generally the last element in the URL. The only
49    # exception is delta update URLs, which are rooted under the version; e.g.,
50    # http://.../update/.../0.14.755.0/au/0.14.754.0. In this case we want to
51    # strip off the au section of the path before reading the version.
52    return re.sub('/au/.*', '',
53                  urlparse.urlparse(update_url).path).split('/')[-1].strip()
54
55
56def url_to_image_name(update_url):
57    """Return the image name based on update_url.
58
59    From a URL like:
60        http://172.22.50.205:8082/update/lumpy-release/R27-3837.0.0
61    return lumpy-release/R27-3837.0.0
62
63    @param update_url: url to the image to update to.
64    @returns a string representing the image name in the update_url.
65
66    """
67    return '/'.join(urlparse.urlparse(update_url).path.split('/')[-2:])
68
69
70class ChromiumOSUpdater():
71    """Helper class used to update DUT with image of desired version."""
72    KERNEL_A = {'name': 'KERN-A', 'kernel': 2, 'root': 3}
73    KERNEL_B = {'name': 'KERN-B', 'kernel': 4, 'root': 5}
74    # Time to wait for new kernel to be marked successful after
75    # auto update.
76    KERNEL_UPDATE_TIMEOUT = 120
77
78
79    def __init__(self, update_url, host=None, local_devserver=False):
80        self.host = host
81        self.update_url = update_url
82        self._update_error_queue = multiprocessing.Queue(2)
83        self.local_devserver = local_devserver
84        if not local_devserver:
85          self.update_version = url_to_version(update_url)
86        else:
87          self.update_version = None
88
89    def check_update_status(self):
90        """Return current status from update-engine."""
91        update_status = self._run(
92            '%s -status 2>&1 | grep CURRENT_OP' % UPDATER_BIN)
93        return update_status.stdout.strip().split('=')[-1]
94
95
96    def reset_update_engine(self):
97        """Restarts the update-engine service."""
98        self._run('rm -f %s' % UPDATED_MARKER)
99        try:
100            self._run('initctl stop update-engine')
101        except error.AutoservRunError:
102            logging.warn('Stopping update-engine service failed. Already dead?')
103        self._run('initctl start update-engine')
104
105        if self.check_update_status() != UPDATER_IDLE:
106            raise ChromiumOSError('%s is not in an installable state' %
107                                  self.host.hostname)
108
109
110    def _run(self, cmd, *args, **kwargs):
111        """Abbreviated form of self.host.run(...)"""
112        return self.host.run(cmd, *args, **kwargs)
113
114
115    def rootdev(self, options=''):
116        """Returns the stripped output of rootdev <options>.
117
118        @param options: options to run rootdev.
119
120        """
121        return self._run('rootdev %s' % options).stdout.strip()
122
123
124    def get_kernel_state(self):
125        """Returns the (<active>, <inactive>) kernel state as a pair."""
126        active_root = int(re.findall('\d+\Z', self.rootdev('-s'))[0])
127        if active_root == self.KERNEL_A['root']:
128            return self.KERNEL_A, self.KERNEL_B
129        elif active_root == self.KERNEL_B['root']:
130            return self.KERNEL_B, self.KERNEL_A
131        else:
132            raise ChromiumOSError('Encountered unknown root partition: %s' %
133                                  active_root)
134
135
136    def _cgpt(self, flag, kernel, dev='$(rootdev -s -d)'):
137        """Return numeric cgpt value for the specified flag, kernel, device. """
138        return int(self._run('cgpt show -n -i %d %s %s' % (
139            kernel['kernel'], flag, dev)).stdout.strip())
140
141
142    def get_kernel_priority(self, kernel):
143        """Return numeric priority for the specified kernel.
144
145        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
146
147        """
148        return self._cgpt('-P', kernel)
149
150
151    def get_kernel_success(self, kernel):
152        """Return boolean success flag for the specified kernel.
153
154        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
155
156        """
157        return self._cgpt('-S', kernel) != 0
158
159
160    def get_kernel_tries(self, kernel):
161        """Return tries count for the specified kernel.
162
163        @param kernel: information of the given kernel, KERNEL_A or KERNEL_B.
164
165        """
166        return self._cgpt('-T', kernel)
167
168
169    def get_stateful_update_script(self):
170        """Returns the path to the stateful update script on the target."""
171        # We attempt to load the local stateful update path in 3 different
172        # ways. First we use the location specified in the autotest global
173        # config. If this doesn't exist, we attempt to use the Chromium OS
174        # Chroot path to the installed script. If all else fails, we use the
175        # stateful update script on the host.
176        stateful_update_path = os.path.join(
177                global_config.global_config.get_config_value(
178                        'CROS', 'source_tree', default=''),
179                LOCAL_STATEFUL_UPDATE_PATH)
180
181        if not os.path.exists(stateful_update_path):
182            logging.warn('Could not find Chrome OS source location for '
183                         'stateful_update script at %s, falling back to chroot '
184                         'copy.', stateful_update_path)
185            stateful_update_path = LOCAL_CHROOT_STATEFUL_UPDATE_PATH
186
187        if not os.path.exists(stateful_update_path):
188            logging.warn('Could not chroot stateful_update script, falling '
189                         'back on client copy.')
190            statefuldev_script = REMOTE_STATEUL_UPDATE_PATH
191        else:
192            self.host.send_file(
193                    stateful_update_path, STATEFUL_UPDATE, delete_dest=True)
194            statefuldev_script = STATEFUL_UPDATE
195
196        return statefuldev_script
197
198
199    def reset_stateful_partition(self):
200        """Clear any pending stateful update request."""
201        statefuldev_cmd = [self.get_stateful_update_script()]
202        statefuldev_cmd += ['--stateful_change=reset', '2>&1']
203        self._run(' '.join(statefuldev_cmd))
204
205
206    def revert_boot_partition(self):
207        """Revert the boot partition."""
208        part = self.rootdev('-s')
209        logging.warn('Reverting update; Boot partition will be %s', part)
210        return self._run('/postinst %s 2>&1' % part)
211
212
213    def trigger_update(self):
214        """Triggers a background update on a test image.
215
216        @raise RootFSUpdateError if anything went wrong.
217
218        """
219        autoupdate_cmd = '%s --check_for_update --omaha_url=%s' % (
220            UPDATER_BIN, self.update_url)
221        logging.info('Triggering update via: %s', autoupdate_cmd)
222        try:
223            self._run(autoupdate_cmd)
224        except error.AutoservRunError, e:
225            raise RootFSUpdateError('Update triggering failed on %s: %s' %
226                                    (self.host.hostname, str(e)))
227
228    def _verify_update_completed(self):
229        """Verifies that an update has completed.
230
231        @raise RootFSUpdateError: if verification fails.
232        """
233        status = self.check_update_status()
234        if status != UPDATER_NEED_REBOOT:
235            raise RootFSUpdateError('Update did not complete with correct '
236                                    'status. Expecting %s, actual %s' %
237                                            (UPDATER_NEED_REBOOT, status))
238
239
240    def rollback_rootfs(self, powerwash):
241        """Triggers rollback and waits for it to complete.
242
243        @param powerwash: If true, powerwash as part of rollback.
244
245        @raise RootFSUpdateError if anything went wrong.
246
247        """
248        #TODO(sosa): crbug.com/309051 - Make this one update_engine_client call.
249        rollback_cmd = '%s --rollback' % (UPDATER_BIN)
250        wait_for_update_to_complete_cmd = '%s --update' % (UPDATER_BIN)
251        if not powerwash:
252          rollback_cmd += ' --nopowerwash'
253
254        logging.info('Triggering rollback.')
255        try:
256            self._run(rollback_cmd)
257            self._run(wait_for_update_to_complete_cmd)
258        except error.AutoservRunError as e:
259            raise RootFSUpdateError('Rollback failed on %s: %s' %
260                                    (self.host.hostname, str(e)))
261
262        self._verify_update_completed()
263
264
265    def update_rootfs(self):
266        """Updates the rootfs partition only."""
267        logging.info('Updating root partition...')
268
269        # Run update_engine using the specified URL.
270        try:
271            autoupdate_cmd = '%s --update --omaha_url=%s 2>&1' % (
272                UPDATER_BIN, self.update_url)
273            self._run(autoupdate_cmd, timeout=900)
274        except error.AutoservRunError:
275            update_error = RootFSUpdateError('update-engine failed on %s' %
276                                             self.host.hostname)
277            self._update_error_queue.put(update_error)
278            raise update_error
279
280        try:
281            self._verify_update_completed()
282        except RootFSUpdateError as e:
283            self._update_error_queue.put(e)
284            raise
285
286
287    def update_stateful(self, clobber=True):
288        """Updates the stateful partition.
289
290        @param clobber: If True, a clean stateful installation.
291        """
292        logging.info('Updating stateful partition...')
293        statefuldev_url = self.update_url.replace('update',
294                                                  'static')
295
296        # Attempt stateful partition update; this must succeed so that the newly
297        # installed host is testable after update.
298        statefuldev_cmd = [self.get_stateful_update_script(), statefuldev_url]
299        if clobber:
300            statefuldev_cmd.append('--stateful_change=clean')
301
302        statefuldev_cmd.append('2>&1')
303        try:
304            self._run(' '.join(statefuldev_cmd), timeout=600)
305        except error.AutoservRunError:
306            update_error = StatefulUpdateError('stateful_update failed on %s' %
307                                               self.host.hostname)
308            self._update_error_queue.put(update_error)
309            raise update_error
310
311
312    def run_update(self, force_update, update_root=True):
313        """Update the DUT with image of specific version.
314
315        @param force_update: True to update DUT even if it's running the same
316            version already.
317        @param update_root: True to force a kernel update. If it's False and
318            force_update is True, stateful update will be used to clean up
319            the DUT.
320
321        """
322        booted_version = self.get_build_id()
323        if (self.check_version() and not force_update):
324            logging.info('System is already up to date. Skipping update.')
325            return False
326
327        if self.update_version:
328            logging.info('Updating from version %s to %s.',
329                         booted_version, self.update_version)
330
331        # Check that Dev Server is accepting connections (from autoserv's host).
332        # If we can't talk to it, the machine host probably can't either.
333        auserver_host = urlparse.urlparse(self.update_url)[1]
334        try:
335            httplib.HTTPConnection(auserver_host).connect()
336        except IOError:
337            raise ChromiumOSError(
338                'Update server at %s not available' % auserver_host)
339
340        logging.info('Installing from %s to %s', self.update_url,
341                     self.host.hostname)
342
343        # Reset update state.
344        self.reset_update_engine()
345        self.reset_stateful_partition()
346
347        try:
348            updaters = [
349                multiprocessing.process.Process(target=self.update_rootfs),
350                multiprocessing.process.Process(target=self.update_stateful)
351                ]
352            if not update_root:
353                logging.info('Root update is skipped.')
354                updaters = updaters[1:]
355
356            # Run the updaters in parallel.
357            for updater in updaters: updater.start()
358            for updater in updaters: updater.join()
359
360            # Re-raise the first error that occurred.
361            if not self._update_error_queue.empty():
362                update_error = self._update_error_queue.get()
363                self.revert_boot_partition()
364                self.reset_stateful_partition()
365                raise update_error
366
367            logging.info('Update complete.')
368            return True
369        except:
370            # Collect update engine logs in the event of failure.
371            if self.host.job:
372                logging.info('Collecting update engine logs...')
373                self.host.get_file(
374                    UPDATER_LOGS, self.host.job.sysinfo.sysinfodir,
375                    preserve_perm=False)
376            raise
377        finally:
378            self.host.show_update_engine_log()
379
380
381    def check_version(self):
382        """Check the image running in DUT has the desired version.
383
384        @returns: True if the DUT's image version matches the version that
385            the autoupdater tries to update to.
386
387        """
388        booted_version = self.get_build_id()
389        return (self.update_version and
390                self.update_version.endswith(booted_version))
391
392
393    def check_version_to_confirm_install(self):
394        """Check image running in DUT has the desired version to be installed.
395
396        The method should not be used to check if DUT needs to have a full
397        reimage. Only use it to confirm a image is installed.
398
399        The method is designed to verify version for following 4 scenarios with
400        samples of version to update to and expected booted version:
401        1. trybot paladin build.
402        update version: trybot-lumpy-paladin/R27-3837.0.0-b123
403        booted version: 3837.0.2013_03_21_1340
404
405        2. trybot release build.
406        update version: trybot-lumpy-release/R27-3837.0.0-b456
407        booted version: 3837.0.0
408
409        3. buildbot official release build.
410        update version: lumpy-release/R27-3837.0.0
411        booted version: 3837.0.0
412
413        4. non-official paladin rc build.
414        update version: lumpy-paladin/R27-3878.0.0-rc7
415        booted version: 3837.0.0-rc7
416
417        5. chrome-perf build.
418        update version: lumpy-chrome-perf/R28-3837.0.0-b2996
419        booted version: 3837.0.0
420
421        6. pgo-generate build.
422        update version: lumpy-release-pgo-generate/R28-3837.0.0-b2996
423        booted version: 3837.0.0-pgo-generate
424
425        When we are checking if a DUT needs to do a full install, we should NOT
426        use this method to check if the DUT is running the same version, since
427        it may return false positive for a DUT running trybot paladin build to
428        be updated to another trybot paladin build.
429
430        TODO: This logic has a bug if a trybot paladin build failed to be
431        installed in a DUT running an older trybot paladin build with same
432        platform number, but different build number (-b###). So to conclusively
433        determine if a tryjob paladin build is imaged successfully, we may need
434        to find out the date string from update url.
435
436        @returns: True if the DUT's image version (without the date string if
437            the image is a trybot build), matches the version that the
438            autoupdater is trying to update to.
439
440        """
441        # In the local_devserver case, we can't know the expected
442        # build, so just pass.
443        if not self.update_version:
444            return True
445
446        # Always try the default check_version method first, this prevents
447        # any backward compatibility issue.
448        if self.check_version():
449            return True
450
451        # Remove R#- and -b# at the end of build version
452        stripped_version = re.sub(r'(R\d+-|-b\d+)', '', self.update_version)
453
454        booted_version = self.get_build_id()
455
456        is_trybot_paladin_build = re.match(r'.+trybot-.+-paladin',
457                                           self.update_url)
458
459        # Replace date string with 0 in booted_version
460        booted_version_no_date = re.sub(r'\d{4}_\d{2}_\d{2}_\d+', '0',
461                                        booted_version)
462        has_date_string = booted_version != booted_version_no_date
463
464        is_pgo_generate_build = re.match(r'.+-pgo-generate',
465                                           self.update_url)
466
467        # Remove |-pgo-generate| in booted_version
468        booted_version_no_pgo = booted_version.replace('-pgo-generate', '')
469        has_pgo_generate = booted_version != booted_version_no_pgo
470
471        if is_trybot_paladin_build:
472            if not has_date_string:
473                logging.error('A trybot paladin build is expected. Version ' +
474                              '"%s" is not a paladin build.', booted_version)
475                return False
476            return stripped_version == booted_version_no_date
477        elif is_pgo_generate_build:
478            if not has_pgo_generate:
479                logging.error('A pgo-generate build is expected. Version ' +
480                              '"%s" is not a pgo-generate build.',
481                              booted_version)
482                return False
483            return stripped_version == booted_version_no_pgo
484        else:
485            if has_date_string:
486                logging.error('Unexpected date found in a non trybot paladin' +
487                              ' build.')
488                return False
489            # Versioned build, i.e., rc or release build.
490            return stripped_version == booted_version
491
492
493    def get_build_id(self):
494        """Pulls the CHROMEOS_RELEASE_VERSION string from /etc/lsb-release."""
495        return self._run('grep CHROMEOS_RELEASE_VERSION'
496                         ' /etc/lsb-release').stdout.split('=')[1].strip()
497
498
499    def verify_boot_expectations(self, expected_kernel_state, rollback_message):
500        """Verifies that we fully booted given expected kernel state.
501
502        This method both verifies that we booted using the correct kernel
503        state and that the OS has marked the kernel as good.
504
505        @param expected_kernel_state: kernel state that we are verifying with
506            i.e. I expect to be booted onto partition 4 etc. See output of
507            get_kernel_state.
508        @param rollback_message: string to raise as a ChromiumOSError
509            if we booted with the wrong partition.
510
511        @raises ChromiumOSError: If we didn't.
512        """
513        # Figure out the newly active kernel.
514        active_kernel_state = self.get_kernel_state()[0]
515
516        # Check for rollback due to a bad build.
517        if (expected_kernel_state and
518                active_kernel_state != expected_kernel_state):
519            # Print out some information to make it easier to debug
520            # the rollback.
521            logging.debug('Dumping partition table.')
522            self._run('cgpt show $(rootdev -s -d)')
523            logging.debug('Dumping crossystem for firmware debugging.')
524            self._run('crossystem --all')
525            raise ChromiumOSError(rollback_message)
526
527        # Make sure chromeos-setgoodkernel runs.
528        try:
529            utils.poll_for_condition(
530                lambda: (self.get_kernel_tries(active_kernel_state) == 0
531                         and self.get_kernel_success(active_kernel_state)),
532                exception=ChromiumOSError(),
533                timeout=self.KERNEL_UPDATE_TIMEOUT, sleep_interval=5)
534        except ChromiumOSError:
535            services_status = self._run('status system-services').stdout
536            if services_status != 'system-services start/running\n':
537                event = ('Chrome failed to reach login screen')
538            else:
539                event = ('update-engine failed to call '
540                         'chromeos-setgoodkernel')
541            raise ChromiumOSError(
542                    'After update and reboot, %s '
543                    'within %d seconds' % (event,
544                                           self.KERNEL_UPDATE_TIMEOUT))
545