packages.py revision dbfc4e3bc0b0ec3bcefdc0d045e501ef96869339
1#!/usr/bin/python
2
3"""
4This module defines the BasePackageManager Class which provides an
5implementation of the packaging system API providing methods to fetch,
6upload and remove packages. Site specific extensions to any of these methods
7should inherit this class.
8"""
9
10import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse
11import fcntl
12from autotest_lib.client.common_lib import error, utils
13
14
15class PackageUploadError(error.AutotestError):
16    'Raised when there is an error uploading the package'
17
18class PackageFetchError(error.AutotestError):
19    'Raised when there is an error fetching the package'
20
21class PackageRemoveError(error.AutotestError):
22    'Raised when there is an error removing the package'
23
24class PackageInstallError(error.AutotestError):
25    'Raised when there is an error installing the package'
26
27# the name of the checksum file that stores the packages' checksums
28CHECKSUM_FILE = "packages.checksum"
29
30class BasePackageManager(object):
31    _repo_exception = {}
32    REPO_OK = object()
33
34    def __init__(self, pkgmgr_dir, repo_urls=None, upload_paths=None,
35                 do_locking=True, run_function=utils.run, run_function_args=[],
36                 run_function_dargs={}):
37        '''
38        repo_urls: The list of the repository urls which is consulted
39                   whilst fetching the package
40        upload_paths: The list of the upload of repositories to which
41                      the package is uploaded to
42        pkgmgr_dir : A directory that can be used by the package manager
43                      to dump stuff (like checksum files of the repositories
44                      etc.).
45        do_locking : Enable locking when the packages are installed.
46
47        run_function is used to execute the commands throughout this file.
48        It defaults to utils.run() but a custom method (if provided) should
49        be of the same schema as utils.run. It should return a CmdResult
50        object and throw a CmdError exception. The reason for using a separate
51        function to run the commands is that the same code can be run to fetch
52        a package on the local machine or on a remote machine (in which case
53        ssh_host's run function is passed in for run_function).
54        '''
55        # In memory dictionary that stores the checksum's of packages
56        self._checksum_dict = {}
57
58        self.pkgmgr_dir = pkgmgr_dir
59        self.do_locking = do_locking
60
61        # Process the repository URLs and the upload paths if specified
62        if not repo_urls:
63            self.repo_urls = []
64        else:
65            self.repo_urls = list(repo_urls)
66        if not upload_paths:
67            self.upload_paths = []
68        else:
69            self.upload_paths = list(upload_paths)
70
71        # Create an internal function that is a simple wrapper of
72        # run_function and takes in the args and dargs as arguments
73        def _run_command(command, _run_command_args=run_function_args,
74                         _run_command_dargs={}):
75            '''
76            Special internal function that takes in a command as
77            argument and passes it on to run_function (if specified).
78            The _run_command_dargs are merged into run_function_dargs
79            with the former having more precedence than the latter.
80            '''
81            new_dargs = dict(run_function_dargs)
82            new_dargs.update(_run_command_dargs)
83
84            return run_function(command, *_run_command_args,
85                                **new_dargs)
86
87        self._run_command = _run_command
88
89
90    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
91                    preserve_install_dir=False, repo_url=None):
92        '''
93        Remove install_dir if it already exists and then recreate it unless
94        preserve_install_dir is specified as True.
95        Fetch the package into the pkg_dir. Untar the package into install_dir
96        The assumption is that packages are of the form :
97        <pkg_type>.<pkg_name>.tar.bz2
98        name        : name of the package
99        type        : type of the package
100        fetch_dir   : The directory into which the package tarball will be
101                      fetched to.
102        install_dir : the directory where the package files will be untarred to
103        repo_url    : the url of the repository to fetch the package from.
104        '''
105
106        # do_locking flag is on by default unless you disable it (typically
107        # in the cases where packages are directly installed from the server
108        # onto the client in which case fcntl stuff wont work as the code
109        # will run on the server in that case..
110        if self.do_locking:
111            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
112            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
113
114        try:
115            if self.do_locking:
116                fcntl.flock(lockfile, fcntl.LOCK_EX)
117
118            self._run_command('mkdir -p %s' % fetch_dir)
119
120            pkg_name = self.get_tarball_name(name, pkg_type)
121            fetch_path = os.path.join(fetch_dir, pkg_name)
122            try:
123                # Fetch the package into fetch_dir
124                self.fetch_pkg(pkg_name, fetch_path)
125
126                # check to see if the install_dir exists and if it does
127                # then check to see if the .checksum file is the latest
128                install_dir_exists = False
129                try:
130                    self._run_command("ls %s" % install_dir)
131                    install_dir_exists = True
132                except (error.CmdError, error.AutoservRunError):
133                    pass
134
135                if (install_dir_exists and
136                    not self.untar_required(fetch_path, install_dir)):
137                    return
138
139                # untar the package into install_dir and
140                # update the checksum in that directory
141                if not preserve_install_dir:
142                    # Make sure we clean up the install_dir
143                    self._run_command('rm -rf %s' % install_dir)
144                self._run_command('mkdir -p %s' % install_dir)
145
146                self.untar_pkg(fetch_path, install_dir)
147
148            except PackageFetchError, why:
149                raise PackageInstallError('Installation of %s(type:%s) failed'
150                                          ' : %s' % (name, pkg_type, why))
151        finally:
152            if self.do_locking:
153                fcntl.flock(lockfile, fcntl.LOCK_UN)
154                lockfile.close()
155
156
157    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=True):
158        '''
159        Fetch the package into dest_dir from repo_url. By default repo_url
160        is None and the package is looked in all the repostories specified.
161        Otherwise it fetches it from the specific repo_url.
162        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
163                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
164        repo_url     : the URL of the repository where the package is located.
165        dest_path    : complete path of where the package will be fetched to.
166        use_checksum : This is set to False to fetch the packages.checksum file
167                       so that the checksum comparison is bypassed for the
168                       checksum file itself. This is used internally by the
169                       packaging system. It should be ignored by externals
170                       callers of this method who use it fetch custom packages.
171        '''
172
173        try:
174            self._run_command("ls %s" % os.path.dirname(dest_path))
175        except (error.CmdError, error.AutoservRunError):
176            raise PackageFetchError("Please provide a valid "
177                                    "destination: %s " % dest_path)
178
179        # See if the package was already fetched earlier, if so
180        # the checksums need to be compared and the package is now
181        # fetched only if they differ.
182        pkg_exists = False
183        try:
184            self._run_command("ls %s" % dest_path)
185            pkg_exists = True
186        except (error.CmdError, error.AutoservRunError):
187            pass
188
189        # if a repository location is explicitly provided, fetch the package
190        # from there and return
191        if repo_url:
192            repo_url_list = [repo_url]
193        elif len(self.repo_urls) > 0:
194            repo_url_list = self.repo_urls
195        else:
196            raise PackageFetchError("There are no repository urls specified")
197
198        error_msgs = {}
199        for location in repo_url_list:
200            try:
201                # Fetch the checksum if it not there
202                if not use_checksum:
203                    self.fetch_pkg_file(pkg_name, dest_path, location)
204
205                # Fetch the package if a) the pkg does not exist or
206                # b) if the checksum differs for the existing package
207                elif (not pkg_exists or
208                      not self.compare_checksum(dest_path, location)):
209                    self.fetch_pkg_file(pkg_name, dest_path, location)
210                    # Update the checksum of the package in the packages'
211                    # checksum file
212                    self.update_checksum(dest_path)
213                return
214            except (PackageFetchError, error.AutoservRunError), e:
215                # The package could not be found in this repo, continue looking
216                error_msgs[location] = str(e)
217                print >> sys.stderr, ('Package - could not be fetched from '
218                                      '- %s : %s' % (location, e))
219
220        # if we got here then that means the package is not found
221        # in any of the repositories.
222        raise PackageFetchError("Package could not be fetched from any of"
223                                " the repos %s : %s " % (repo_url_list,
224                                                         error_msgs))
225
226
227    def fetch_pkg_file(self, file_name, dest_path, source_url):
228        """
229        Fetch the file from source_url into dest_path. The package repository
230        url is parsed and the appropriate retrieval method is determined.
231
232        """
233        if source_url.startswith('http://'):
234            self.fetch_file_http(file_name, dest_path, source_url)
235        else:
236            raise PackageFetchError("Invalid location specified")
237
238
239    def fetch_file_http(self, file_name, dest_path, source_url):
240        """
241        Fetch the package using http protocol. Raises a PackageFetchError.
242        """
243        # check to see if the source_url is reachable or not
244        self.run_http_test(source_url, os.path.dirname(dest_path))
245
246        pkg_path = os.path.join(source_url, file_name)
247        try:
248            self._run_command('wget %s -O %s' % (pkg_path, dest_path))
249        except error.CmdError, e:
250            raise PackageFetchError("Package - %s not found in %s: %s"
251                                    % (file_name, source_url, e))
252
253
254    def run_http_test(self, source_url, dest_dir):
255        '''
256        Run a simple 30 sec wget on source_url
257        just to see if it can be reachable or not. This avoids the need
258        for waiting for a 10min timeout.
259        '''
260        dest_file_path = os.path.join(dest_dir, 'http_test_file')
261
262        BPM = BasePackageManager
263        error_msg = "HTTP test failed. Failed to contact"
264        # We should never get here unless the source_url starts with http://
265        assert(source_url.startswith('http://'))
266
267        # Get the http server name from the URL
268        server_name = urlparse.urlparse(source_url)[1]
269        http_cmd = 'printf "GET / HTTP/1.0\n\n" | nc %s 80' % server_name
270
271        if server_name in BPM._repo_exception:
272            if BPM._repo_exception[server_name] == BPM.REPO_OK:
273                # This repository is fine. Simply return
274                return
275            else:
276                raise PackageFetchError("%s - %s : %s "
277                                        % (error_msg, server_name,
278                                           BPM._repo_exception[server_name]))
279        try:
280            try:
281                self._run_command(http_cmd,
282                                  _run_command_dargs={'timeout':30})
283                BPM._repo_exception[server_name] = BPM.REPO_OK
284            finally:
285                self._run_command('rm -f %s' % dest_file_path)
286        except error.CmdError, e:
287            BPM._repo_exception[server_name] = e
288            raise PackageFetchError("%s - %s: %s " % (error_msg,
289                                                      server_name, e))
290
291
292
293    # TODO(aganti): Fix the bug with the current checksum logic where
294    # packages' checksums that are not present consistently in all the
295    # repositories are not handled properly. This is a corner case though
296    # but the ideal solution is to make the checksum file repository specific
297    # and then maintain it.
298    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False):
299        '''
300        Uploads to a specified upload_path or to all the repos.
301        Also uploads the checksum file to all the repos.
302        pkg_path        : The complete path to the package file
303        upload_path     : the absolute path where the files are copied to.
304                          if set to 'None' assumes 'all' repos
305        update_checksum : If set to False, the checksum file is not
306                          going to be updated which happens by default.
307                          This is necessary for custom
308                          packages (like custom kernels and custom tests)
309                          that get uploaded which do not need to be part of
310                          the checksum file and bloat it.
311        '''
312        if update_checksum:
313            # get the packages' checksum file and update it with the current
314            # package's checksum
315            checksum_path = self._get_checksum_file_path()
316            self.update_checksum(pkg_path)
317
318        if upload_path:
319            upload_path_list = [upload_path]
320        elif len(self.upload_paths) > 0:
321            upload_path_list = self.upload_paths
322        else:
323            raise PackageUploadError("Invalid Upload Path specified")
324
325        # upload the package
326        for path in upload_path_list:
327            self.upload_pkg_file(pkg_path, path)
328            if update_checksum:
329                self.upload_pkg_file(checksum_path, path)
330
331
332    def upload_pkg_file(self, file_path, upload_path):
333        '''
334        Upload a single file. Depending on the upload path, the appropriate
335        method for that protocol is called. Currently this simply copies the
336        file to the target directory (but can be extended for other protocols)
337        This assumes that the web server is running on the same machine where
338        the method is being called from. The upload_path's files are
339        basically served by that web server.
340        '''
341        try:
342            shutil.copy(file_path, upload_path)
343            os.chmod(os.path.join(upload_path,
344                                  os.path.basename(file_path)), 0755)
345        except (IOError, os.error), why:
346            raise PackageUploadError("Upload of %s to %s failed: %s"
347                                     % (file_path, upload_path, why))
348
349
350    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
351        '''
352        Remove the package from the specified remove_path
353        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
354                                           dep-gcc.tar.bz2)
355        remove_path : the location to remove the package from.
356
357        '''
358        if remove_path:
359            remove_path_list = [remove_path]
360        elif len(self.upload_paths) > 0:
361            remove_path_list = self.upload_paths
362        else:
363            raise PackageRemoveError("Invalid path to remove the pkg from")
364
365        checksum_path = self._get_checksum_file_path()
366
367        if remove_checksum:
368            self.remove_checksum(pkg_name)
369
370        # remove the package and upload the checksum file to the repos
371        for path in remove_path_list:
372            self.remove_pkg_file(pkg_name, path)
373            self.upload_pkg_file(checksum_path, path)
374
375
376    def remove_pkg_file(self, file_name, pkg_dir):
377        '''
378        Remove the file named file_name from pkg_dir
379        '''
380        try:
381            # Remove the file
382            os.remove(os.path.join(pkg_dir, file_name))
383        except (IOError, os.error), why:
384            raise PackageRemoveError("Could not remove %s from %s: %s "
385                                     % (file_name, pkg_dir, why))
386
387
388    def _get_checksum_file_path(self):
389        '''
390        Return the complete path of the checksum file (assumed to be stored
391        in self.pkgmgr_dir
392        '''
393        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
394
395
396    def _get_checksum_dict(self):
397        '''
398        Fetch the checksum file if not already fetched. If the checksum file
399        cannot be fetched from the repos then a new file is created with
400        the current package's (specified in pkg_path) checksum value in it.
401        Populate the local checksum dictionary with the values read from
402        the checksum file.
403        The checksum file is assumed to be present in self.pkgmgr_dir
404        '''
405        checksum_path = self._get_checksum_file_path()
406        if not self._checksum_dict:
407            # Fetch the checksum file
408            try:
409                try:
410                    self._run_command("ls %s" % checksum_path)
411                except (error.CmdError, error.AutoservRunError):
412                    # The packages checksum file does not exist locally.
413                    # See if it is present in the repositories.
414                    self.fetch_pkg(CHECKSUM_FILE, checksum_path,
415                                   use_checksum=False)
416            except PackageFetchError, e:
417                # This should not happen whilst fetching a package..if a
418                # package is present in the repository, the corresponding
419                # checksum file should also be automatically present. This
420                # case happens only when a package
421                # is being uploaded and if it is the first package to be
422                # uploaded to the repos (hence no checksum file created yet)
423                # Return an empty dictionary in that case
424                return {}
425
426            # Read the checksum file into memory
427            checksum_file_contents = self._run_command('cat '
428                                                       + checksum_path).stdout
429
430            # Return {} if we have an empty checksum file present
431            if not checksum_file_contents.strip():
432                return {}
433
434            # Parse the checksum file contents into self._checksum_dict
435            for line in checksum_file_contents.splitlines():
436                checksum, package_name = line.split(None, 1)
437                self._checksum_dict[package_name] = checksum
438
439        return self._checksum_dict
440
441
442    def _save_checksum_dict(self, checksum_dict):
443        '''
444        Save the checksum dictionary onto the checksum file. Update the
445        local _checksum_dict variable with this new set of values.
446        checksum_dict :  New checksum dictionary
447        checksum_dir  :  The directory in which to store the checksum file to.
448        '''
449        checksum_path = self._get_checksum_file_path()
450        self._checksum_dict = checksum_dict.copy()
451        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
452                                      for pkg_name,checksum in
453                                      checksum_dict.iteritems())
454        # Write the checksum file back to disk
455        self._run_command('echo "%s" > %s' % (checksum_contents,
456                                              checksum_path))
457
458
459    def compute_checksum(self, pkg_path):
460        '''
461        Compute the MD5 checksum for the package file and return it.
462        pkg_path : The complete path for the package file
463        '''
464        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
465        return md5sum_output.split()[0]
466
467
468    def update_checksum(self, pkg_path):
469        '''
470        Update the checksum of the package in the packages' checksum
471        file. This method is called whenever a package is fetched just
472        to be sure that the checksums in the local file are the latest.
473        pkg_path : The complete path to the package file.
474        '''
475        # Compute the new checksum
476        new_checksum = self.compute_checksum(pkg_path)
477        checksum_dict = self._get_checksum_dict()
478        checksum_dict[os.path.basename(pkg_path)] = new_checksum
479        self._save_checksum_dict(checksum_dict)
480
481
482    def remove_checksum(self, pkg_name):
483        '''
484        Remove the checksum of the package from the packages checksum file.
485        This method is called whenever a package is removed from the
486        repositories in order clean its corresponding checksum.
487        pkg_name :  The name of the package to be removed
488        '''
489        checksum_dict = self._get_checksum_dict()
490        if pkg_name in checksum_dict:
491            del checksum_dict[pkg_name]
492        self._save_checksum_dict(checksum_dict)
493
494
495    def compare_checksum(self, pkg_path, repo_url):
496        '''
497        Calculate the checksum of the file specified in pkg_path and
498        compare it with the checksum in the checksum file
499        Return True if both match else return False.
500        pkg_path : The full path to the package file for which the
501                   checksum is being compared
502        repo_url : The URL to fetch the checksum from
503        '''
504        checksum_dict = self._get_checksum_dict()
505        package_name = os.path.basename(pkg_path)
506        if not checksum_dict or package_name not in checksum_dict:
507            return False
508
509        repository_checksum = checksum_dict[package_name]
510        local_checksum = self.compute_checksum(pkg_path)
511        return (local_checksum == repository_checksum)
512
513
514    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
515        '''
516        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
517        Excludes the directories specified in exclude_dirs while tarring
518        the source. Returns the tarball path.
519        '''
520        tarball_path = os.path.join(dest_dir, pkg_name)
521
522        utils.system("tar -cvjf %s -C %s %s "
523                     % (tarball_path, src_dir, exclude_string))
524
525        return tarball_path
526
527
528    def untar_required(self, tarball_path, dest_dir):
529        '''
530        Compare the checksum of the tarball_path with the .checksum file
531        in the dest_dir and return False if it matches. The untar
532        of the package happens only if the checksums do not match.
533        '''
534        checksum_path = os.path.join(dest_dir, '.checksum')
535        try:
536            existing_checksum = self._run_command('cat ' + checksum_path).stdout
537        except (error.CmdError, error.AutoservRunError):
538            # If the .checksum file is not present (generally, this should
539            # not be the case) then return True so that the untar happens
540            return True
541
542        new_checksum = self.compute_checksum(tarball_path)
543        return (new_checksum.strip() != existing_checksum.strip())
544
545
546    def untar_pkg(self, tarball_path, dest_dir):
547        '''
548        Untar the package present in the tarball_path and put a
549        ".checksum" file in the dest_dir containing the checksum
550        of the tarball. This method
551        assumes that the package to be untarred is of the form
552        <name>.tar.bz2
553        '''
554        self._run_command('tar xvjf %s -C %s' % (tarball_path, dest_dir))
555        # Put the .checksum file in the install_dir to note
556        # where the package came from
557        pkg_checksum = self.compute_checksum(tarball_path)
558        pkg_checksum_path = os.path.join(dest_dir,
559                                         '.checksum')
560        self._run_command('echo "%s" > %s '
561                          % (pkg_checksum, pkg_checksum_path))
562
563
564    def get_tarball_name(self, name, pkg_type):
565        return "%s-%s.tar.bz2" % (pkg_type, name)
566
567
568    def is_url(self, url):
569        """Return true if path looks like a URL"""
570        return url.startswith('http://')
571
572
573    def get_package_name(self, url, pkg_type):
574        '''
575        Extract the group and test name for the url. This method is currently
576        used only for tests.
577        '''
578        if pkg_type == 'test':
579            regex = '([^:]+://(.*)/([^/]*)$'
580            return self._get_package_name(url, regex)
581        else:
582            return ('', url)
583
584
585    def _get_package_name(self, url, regex):
586        if not self.is_url(url):
587            if url.endswith('.tar.bz2'):
588                testname = url.replace('.tar.bz2', '')
589                testname = re.sub(r'(\d*)\.', '', testname)
590                return (testname, testname)
591            else:
592                return ('', url)
593
594        match = re.match(regex, url)
595        if not match:
596            return ('', url)
597        group, filename = match.groups()
598        # Generate the group prefix.
599        group = re.sub(r'\W', '_', group)
600        # Drop the extension to get the raw test name.
601        testname = re.sub(r'\.tar\.bz2', '', filename)
602        # Drop any random numbers at the end of the test name if any
603        testname = re.sub(r'\.(\d*)', '', testname)
604        return (group, testname)
605
606
607# site_packages.py may be non-existant or empty, make sure that an appropriate
608# SitePackage class is created nevertheless
609try:
610    from site_packages import SitePackageManager
611except ImportError:
612    class SitePackageManager(BasePackageManager):
613        pass
614
615class PackageManager(SitePackageManager):
616    pass
617