revision abe330e3a18aac1a7ab946201ed0fe8cdb0f9924
4This module defines the BasePackageManager Class which provides an
5implementation of the packaging system API providing methods to fetch,
6upload and remove packages. Site specific extensions to any of these methods
7should inherit this class.
10import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse
11import fcntl
12from autotest_lib.client.common_lib import error, utils
15class PackageUploadError(error.AutotestError):
16    'Raised when there is an error uploading the package'
18class PackageFetchError(error.AutotestError):
19    'Raised when there is an error fetching the package'
21class PackageRemoveError(error.AutotestError):
22    'Raised when there is an error removing the package'
24class PackageInstallError(error.AutotestError):
25    'Raised when there is an error installing the package'
27# the name of the checksum file that stores the packages' checksums
28CHECKSUM_FILE = "packages.checksum"
30class BasePackageManager(object):
31    _repo_exception = {}
32    REPO_OK = object()
34    def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
35                 upload_paths=None, do_locking=True,,
36                 run_function_args=[], run_function_dargs={}):
37        '''
38        repo_urls: The list of the repository urls which is consulted
39                   whilst fetching the package
40        upload_paths: The list of the upload of repositories to which
41                      the package is uploaded to
42        pkgmgr_dir : A directory that can be used by the package manager
43                      to dump stuff (like checksum files of the repositories
44                      etc.).
45        do_locking : Enable locking when the packages are installed.
47        run_function is used to execute the commands throughout this file.
48        It defaults to but a custom method (if provided) should
49        be of the same schema as It should return a CmdResult
50        object and throw a CmdError exception. The reason for using a separate
51        function to run the commands is that the same code can be run to fetch
52        a package on the local machine or on a remote machine (in which case
53        ssh_host's run function is passed in for run_function).
54        '''
55        # In memory dictionary that stores the checksum's of packages
56        self._checksum_dict = {}
58        self.pkgmgr_dir = pkgmgr_dir
59        self.do_locking = do_locking
60        self.hostname = hostname
62        # Process the repository URLs and the upload paths if specified
63        if not repo_urls:
64            self.repo_urls = []
65        else:
66            if hostname:
67                self.repo_urls = repo_urls
68                self.repo_urls = list(self.get_mirror_list())
69            else:
70                self.repo_urls = list(repo_urls)
71        if not upload_paths:
72            self.upload_paths = []
73        else:
74            self.upload_paths = list(upload_paths)
76        # Create an internal function that is a simple wrapper of
77        # run_function and takes in the args and dargs as arguments
78        def _run_command(command, _run_command_args=run_function_args,
79                         _run_command_dargs={}):
80            '''
81            Special internal function that takes in a command as
82            argument and passes it on to run_function (if specified).
83            The _run_command_dargs are merged into run_function_dargs
84            with the former having more precedence than the latter.
85            '''
86            new_dargs = dict(run_function_dargs)
87            new_dargs.update(_run_command_dargs)
89            return run_function(command, *_run_command_args,
90                                **new_dargs)
92        self._run_command = _run_command
95    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
96                    preserve_install_dir=False, repo_url=None):
97        '''
98        Remove install_dir if it already exists and then recreate it unless
99        preserve_install_dir is specified as True.
100        Fetch the package into the pkg_dir. Untar the package into install_dir
101        The assumption is that packages are of the form :
102        <pkg_type>.<pkg_name>.tar.bz2
103        name        : name of the package
104        type        : type of the package
105        fetch_dir   : The directory into which the package tarball will be
106                      fetched to.
107        install_dir : the directory where the package files will be untarred to
108        repo_url    : the url of the repository to fetch the package from.
109        '''
111        # do_locking flag is on by default unless you disable it (typically
112        # in the cases where packages are directly installed from the server
113        # onto the client in which case fcntl stuff wont work as the code
114        # will run on the server in that case..
115        if self.do_locking:
116            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
117            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
119        try:
120            if self.do_locking:
121                fcntl.flock(lockfile, fcntl.LOCK_EX)
123            self._run_command('mkdir -p %s' % fetch_dir)
125            pkg_name = self.get_tarball_name(name, pkg_type)
126            fetch_path = os.path.join(fetch_dir, pkg_name)
127            try:
128                # Fetch the package into fetch_dir
129                self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
131                # check to see if the install_dir exists and if it does
132                # then check to see if the .checksum file is the latest
133                install_dir_exists = False
134                try:
135                    self._run_command("ls %s" % install_dir)
136                    install_dir_exists = True
137                except (error.CmdError, error.AutoservRunError):
138                    pass
140                if (install_dir_exists and
141                    not self.untar_required(fetch_path, install_dir)):
142                    return
144                # untar the package into install_dir and
145                # update the checksum in that directory
146                if not preserve_install_dir:
147                    # Make sure we clean up the install_dir
148                    self._run_command('rm -rf %s' % install_dir)
149                self._run_command('mkdir -p %s' % install_dir)
151                self.untar_pkg(fetch_path, install_dir)
153            except PackageFetchError, why:
154                raise PackageInstallError('Installation of %s(type:%s) failed'
155                                          ' : %s' % (name, pkg_type, why))
156        finally:
157            if self.do_locking:
158                fcntl.flock(lockfile, fcntl.LOCK_UN)
159                lockfile.close()
162    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
163        '''
164        Fetch the package into dest_dir from repo_url. By default repo_url
165        is None and the package is looked in all the repostories specified.
166        Otherwise it fetches it from the specific repo_url.
167        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
168                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
169        repo_url     : the URL of the repository where the package is located.
170        dest_path    : complete path of where the package will be fetched to.
171        use_checksum : This is set to False to fetch the packages.checksum file
172                       so that the checksum comparison is bypassed for the
173                       checksum file itself. This is used internally by the
174                       packaging system. It should be ignored by externals
175                       callers of this method who use it fetch custom packages.
176        '''
178        try:
179            self._run_command("ls %s" % os.path.dirname(dest_path))
180        except (error.CmdError, error.AutoservRunError):
181            raise PackageFetchError("Please provide a valid "
182                                    "destination: %s " % dest_path)
184        # See if the package was already fetched earlier, if so
185        # the checksums need to be compared and the package is now
186        # fetched only if they differ.
187        pkg_exists = False
188        try:
189            self._run_command("ls %s" % dest_path)
190            pkg_exists = True
191        except (error.CmdError, error.AutoservRunError):
192            pass
194        # if a repository location is explicitly provided, fetch the package
195        # from there and return
196        if repo_url:
197            repo_url_list = [repo_url]
198        elif len(self.repo_urls) > 0:
199            repo_url_list = self.repo_urls
200        else:
201            raise PackageFetchError("There are no repository urls specified")
203        error_msgs = {}
204        for location in repo_url_list:
205            try:
206                # Fetch the checksum if it not there
207                if not use_checksum:
208                    self.fetch_pkg_file(pkg_name, dest_path, location)
210                # Fetch the package if a) the pkg does not exist or
211                # b) if the checksum differs for the existing package
212                elif (not pkg_exists or
213                      not self.compare_checksum(dest_path, location)):
214                    self.fetch_pkg_file(pkg_name, dest_path, location)
215                    # Update the checksum of the package in the packages'
216                    # checksum file
217                    self.update_checksum(dest_path)
218                return
219            except (PackageFetchError, error.AutoservRunError), e:
220                # The package could not be found in this repo, continue looking
221                error_msgs[location] = str(e)
222                print '%s could not be fetched from - %s : %s' % (pkg_name,
223                                                                  location, e)
225        # if we got here then that means the package is not found
226        # in any of the repositories.
227        raise PackageFetchError("%s could not be fetched from any of"
228                                " the repos %s : %s " % (pkg_name,
229                                                         repo_url_list,
230                                                         error_msgs))
233    def fetch_pkg_file(self, filename, dest_path, source_url):
234        """
235        Fetch the file from source_url into dest_path. The package repository
236        url is parsed and the appropriate retrieval method is determined.
238        """
239        if source_url.startswith('http://'):
240            self.fetch_file_http(filename, dest_path, source_url)
241        else:
242            raise PackageFetchError("Invalid location %s" % source_url)
245    def fetch_file_http(self, filename, dest_path, source_url):
246        """
247        Fetch the package using http protocol. Raises a PackageFetchError.
248        """
249        print "Fetching %s from %s to %s" % (filename, source_url, dest_path)
250        # check to see if the source_url is reachable or not
251        self.run_http_test(source_url, os.path.dirname(dest_path))
253        pkg_path = os.path.join(source_url, filename)
254        try:
255            self._run_command('wget -nv %s -O %s' % (pkg_path, dest_path))
256        except error.CmdError, e:
257            raise PackageFetchError("%s not found in %s: %s"
258                                    % (filename, source_url, e))
261    def run_http_test(self, source_url, dest_dir):
262        '''
263        Run a simple 30 sec wget on source_url
264        just to see if it can be reachable or not. This avoids the need
265        for waiting for a 10min timeout.
266        '''
267        dest_file_path = os.path.join(dest_dir, 'http_test_file')
269        BPM = BasePackageManager
270        error_msg = "HTTP test failed. Failed to contact"
271        # We should never get here unless the source_url starts with http://
272        assert(source_url.startswith('http://'))
274        # Get the http server name from the URL
275        server_name = urlparse.urlparse(source_url)[1]
276        http_cmd = 'wget -nv %s -O %s' % (server_name, dest_file_path)
278        # Following repo_exception optimization is disabled for now.
279        # Checksum files are optional.  The attempted download of a
280        # missing checksum file erroneously causes the repos to be marked
281        # dead, causing download of its custom kernels to fail.
282        # It also stays dead until Autotest is restarted.
283        if server_name in BPM._repo_exception and False:  #  <--- TEMP
284            if BPM._repo_exception[server_name] == BPM.REPO_OK:
285                # This repository is fine. Simply return
286                return
287            else:
288                raise PackageFetchError("%s - %s : %s "
289                                        % (error_msg, server_name,
290                                           BPM._repo_exception[server_name]))
291        try:
292            try:
293                self._run_command(http_cmd,
294                                  _run_command_dargs={'timeout':30})
295                BPM._repo_exception[server_name] = BPM.REPO_OK
296            finally:
297                self._run_command('rm -f %s' % dest_file_path)
298        except Exception, e:
299            BPM._repo_exception[server_name] = e
300            raise PackageFetchError("%s - %s: %s " % (error_msg, server_name,
301                                                      e))
304    # TODO(aganti): Fix the bug with the current checksum logic where
305    # packages' checksums that are not present consistently in all the
306    # repositories are not handled properly. This is a corner case though
307    # but the ideal solution is to make the checksum file repository specific
308    # and then maintain it.
309    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False):
310        '''
311        Uploads to a specified upload_path or to all the repos.
312        Also uploads the checksum file to all the repos.
313        pkg_path        : The complete path to the package file
314        upload_path     : the absolute path where the files are copied to.
315                          if set to 'None' assumes 'all' repos
316        update_checksum : If set to False, the checksum file is not
317                          going to be updated which happens by default.
318                          This is necessary for custom
319                          packages (like custom kernels and custom tests)
320                          that get uploaded which do not need to be part of
321                          the checksum file and bloat it.
322        '''
323        if update_checksum:
324            # get the packages' checksum file and update it with the current
325            # package's checksum
326            checksum_path = self._get_checksum_file_path()
327            self.update_checksum(pkg_path)
329        if upload_path:
330            upload_path_list = [upload_path]
331        elif len(self.upload_paths) > 0:
332            upload_path_list = self.upload_paths
333        else:
334            raise PackageUploadError("Invalid Upload Path specified")
336        # upload the package
337        for path in upload_path_list:
338            self.upload_pkg_file(pkg_path, path)
339            if update_checksum:
340                self.upload_pkg_file(checksum_path, path)
343    def upload_pkg_file(self, file_path, upload_path):
344        '''
345        Upload a single file. Depending on the upload path, the appropriate
346        method for that protocol is called. Currently this simply copies the
347        file to the target directory (but can be extended for other protocols)
348        This assumes that the web server is running on the same machine where
349        the method is being called from. The upload_path's files are
350        basically served by that web server.
351        '''
352        try:
353            if upload_path.startswith('ssh://'):
354                # parse ssh://user@host/usr/local/autotest/packages
355                hostline, remote_path = self._parse_ssh_path(upload_path)
356                try:
357          'scp %s %s:%s' % (file_path, hostline,
358                                                remote_path))
359                    r_path = os.path.join(remote_path,
360                                          os.path.basename(file_path))
361          "ssh %s 'chmod 644 %s'" % (hostline, r_path))
362                except error.CmdError:
363                    print "Error uploading to repository " + upload_path
364                    pass
365            else:
366                shutil.copy(file_path, upload_path)
367                os.chmod(os.path.join(upload_path,
368                                      os.path.basename(file_path)), 0644)
369        except (IOError, os.error), why:
370            raise PackageUploadError("Upload of %s to %s failed: %s"
371                                     % (file_path, upload_path, why))
374    def upload_pkg_dir(self, dir_path, upload_path):
375        '''
376        Upload a full directory. Depending on the upload path, the appropriate
377        method for that protocol is called. Currently this copies the whole
378        tmp package directory to the target directory.
379        This assumes that the web server is running on the same machine where
380        the method is being called from. The upload_path's files are
381        basically served by that web server.
382        '''
383        local_path = os.path.join(dir_path, "*")
384        try:
385            if upload_path.startswith('ssh://'):
386                hostline, remote_path = self._parse_ssh_path(upload_path)
387                try:
388          'scp %s %s:%s' % (local_path, hostline,
389                                                remote_path))
390                    ssh_path = os.path.join(remote_path, "*")
391          "ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
392                except error.CmdError:
393                    print "Error uploading to repository: " + upload_path
394                    pass
395            else:
396      "cp %s %s " % (local_path, upload_path))
397                up_path = os.path.join(upload_path, "*")
398      "chmod 644 %s" % up_path)
399        except (IOError, os.error), why:
400            raise PackageUploadError("Upload of %s to %s failed: %s"
401                                     % (dir_path, upload_path, why))
404    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
405        '''
406        Remove the package from the specified remove_path
407        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
408                                           dep-gcc.tar.bz2)
409        remove_path : the location to remove the package from.
411        '''
412        if remove_path:
413            remove_path_list = [remove_path]
414        elif len(self.upload_paths) > 0:
415            remove_path_list = self.upload_paths
416        else:
417            raise PackageRemoveError("Invalid path to remove the pkg from")
419        checksum_path = self._get_checksum_file_path()
421        if remove_checksum:
422            self.remove_checksum(pkg_name)
424        # remove the package and upload the checksum file to the repos
425        for path in remove_path_list:
426            self.remove_pkg_file(pkg_name, path)
427            self.upload_pkg_file(checksum_path, path)
430    def remove_pkg_file(self, filename, pkg_dir):
431        '''
432        Remove the file named filename from pkg_dir
433        '''
434        try:
435            # Remove the file
436            if pkg_dir.startswith('ssh://'):
437                hostline, remote_path = self._parse_ssh_path(pkg_dir)
438                path = os.path.join(remote_path, filename)
439      "ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
440                          path))
441            else:
442                os.remove(os.path.join(pkg_dir, filename))
443        except (IOError, os.error), why:
444            raise PackageRemoveError("Could not remove %s from %s: %s "
445                                     % (filename, pkg_dir, why))
448    def get_mirror_list(self):
449        '''
450            Stub function for site specific mirrors.
452            Returns:
453                Priority ordered list
454        '''
455        return self.repo_urls
458    def _get_checksum_file_path(self):
459        '''
460        Return the complete path of the checksum file (assumed to be stored
461        in self.pkgmgr_dir
462        '''
463        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
466    def _get_checksum_dict(self):
467        '''
468        Fetch the checksum file if not already fetched. If the checksum file
469        cannot be fetched from the repos then a new file is created with
470        the current package's (specified in pkg_path) checksum value in it.
471        Populate the local checksum dictionary with the values read from
472        the checksum file.
473        The checksum file is assumed to be present in self.pkgmgr_dir
474        '''
475        checksum_path = self._get_checksum_file_path()
476        if not self._checksum_dict:
477            # Fetch the checksum file
478            try:
479                try:
480                    self._run_command("ls %s" % checksum_path)
481                except (error.CmdError, error.AutoservRunError):
482                    # The packages checksum file does not exist locally.
483                    # See if it is present in the repositories.
484                    self.fetch_pkg(CHECKSUM_FILE, checksum_path)
485            except PackageFetchError, e:
486                # This should not happen whilst fetching a package..if a
487                # package is present in the repository, the corresponding
488                # checksum file should also be automatically present. This
489                # case happens only when a package
490                # is being uploaded and if it is the first package to be
491                # uploaded to the repos (hence no checksum file created yet)
492                # Return an empty dictionary in that case
493                return {}
495            # Read the checksum file into memory
496            checksum_file_contents = self._run_command('cat '
497                                                       + checksum_path).stdout
499            # Return {} if we have an empty checksum file present
500            if not checksum_file_contents.strip():
501                return {}
503            # Parse the checksum file contents into self._checksum_dict
504            for line in checksum_file_contents.splitlines():
505                checksum, package_name = line.split(None, 1)
506                self._checksum_dict[package_name] = checksum
508        return self._checksum_dict
511    def _save_checksum_dict(self, checksum_dict):
512        '''
513        Save the checksum dictionary onto the checksum file. Update the
514        local _checksum_dict variable with this new set of values.
515        checksum_dict :  New checksum dictionary
516        checksum_dir  :  The directory in which to store the checksum file to.
517        '''
518        checksum_path = self._get_checksum_file_path()
519        self._checksum_dict = checksum_dict.copy()
520        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
521                                      for pkg_name,checksum in
522                                      checksum_dict.iteritems())
523        # Write the checksum file back to disk
524        self._run_command('echo "%s" > %s' % (checksum_contents,
525                                              checksum_path))
527    def _parse_ssh_path(self, pkg_path):
528        '''
529        Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
530        remote path
531        '''
533        match ='^ssh://(.*?)(/.*)$', pkg_path)
534        if match:
535            return match.groups()
536        else:
537            raise PackageUploadError("Incorrect SSH path in global_config: %s"
538                                     % upload_path)
541    def compute_checksum(self, pkg_path):
542        '''
543        Compute the MD5 checksum for the package file and return it.
544        pkg_path : The complete path for the package file
545        '''
546        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
547        return md5sum_output.split()[0]
550    def update_checksum(self, pkg_path):
551        '''
552        Update the checksum of the package in the packages' checksum
553        file. This method is called whenever a package is fetched just
554        to be sure that the checksums in the local file are the latest.
555        pkg_path : The complete path to the package file.
556        '''
557        # Compute the new checksum
558        new_checksum = self.compute_checksum(pkg_path)
559        checksum_dict = self._get_checksum_dict()
560        checksum_dict[os.path.basename(pkg_path)] = new_checksum
561        self._save_checksum_dict(checksum_dict)
564    def remove_checksum(self, pkg_name):
565        '''
566        Remove the checksum of the package from the packages checksum file.
567        This method is called whenever a package is removed from the
568        repositories in order clean its corresponding checksum.
569        pkg_name :  The name of the package to be removed
570        '''
571        checksum_dict = self._get_checksum_dict()
572        if pkg_name in checksum_dict:
573            del checksum_dict[pkg_name]
574        self._save_checksum_dict(checksum_dict)
577    def compare_checksum(self, pkg_path, repo_url):
578        '''
579        Calculate the checksum of the file specified in pkg_path and
580        compare it with the checksum in the checksum file
581        Return True if both match else return False.
582        pkg_path : The full path to the package file for which the
583                   checksum is being compared
584        repo_url : The URL to fetch the checksum from
585        '''
586        checksum_dict = self._get_checksum_dict()
587        package_name = os.path.basename(pkg_path)
588        if not checksum_dict or package_name not in checksum_dict:
589            return False
591        repository_checksum = checksum_dict[package_name]
592        local_checksum = self.compute_checksum(pkg_path)
593        return (local_checksum == repository_checksum)
596    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
597        '''
598        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
599        Excludes the directories specified in exclude_string while tarring
600        the source. Returns the tarball path.
601        '''
602        tarball_path = os.path.join(dest_dir, pkg_name)
603        cmd = "tar -cvjf %s -C %s %s " % (tarball_path, src_dir, exclude_string)
605        utils.system(cmd)
606        return tarball_path
609    def untar_required(self, tarball_path, dest_dir):
610        '''
611        Compare the checksum of the tarball_path with the .checksum file
612        in the dest_dir and return False if it matches. The untar
613        of the package happens only if the checksums do not match.
614        '''
615        checksum_path = os.path.join(dest_dir, '.checksum')
616        try:
617            existing_checksum = self._run_command('cat ' + checksum_path).stdout
618        except (error.CmdError, error.AutoservRunError):
619            # If the .checksum file is not present (generally, this should
620            # not be the case) then return True so that the untar happens
621            return True
623        new_checksum = self.compute_checksum(tarball_path)
624        return (new_checksum.strip() != existing_checksum.strip())
627    def untar_pkg(self, tarball_path, dest_dir):
628        '''
629        Untar the package present in the tarball_path and put a
630        ".checksum" file in the dest_dir containing the checksum
631        of the tarball. This method
632        assumes that the package to be untarred is of the form
633        <name>.tar.bz2
634        '''
635        self._run_command('tar xjf %s -C %s' % (tarball_path, dest_dir))
636        # Put the .checksum file in the install_dir to note
637        # where the package came from
638        pkg_checksum = self.compute_checksum(tarball_path)
639        pkg_checksum_path = os.path.join(dest_dir,
640                                         '.checksum')
641        self._run_command('echo "%s" > %s '
642                          % (pkg_checksum, pkg_checksum_path))
645    def get_tarball_name(self, name, pkg_type):
646        return "%s-%s.tar.bz2" % (pkg_type, name)
649    def is_url(self, url):
650        """Return true if path looks like a URL"""
651        return url.startswith('http://')
654    def get_package_name(self, url, pkg_type):
655        '''
656        Extract the group and test name for the url. This method is currently
657        used only for tests.
658        '''
659        if pkg_type == 'test':
660            regex = '[^:]+://(.*)/([^/]*)$'
661            return self._get_package_name(url, regex)
662        else:
663            return ('', url)
666    def _get_package_name(self, url, regex):
667        if not self.is_url(url):
668            if url.endswith('.tar.bz2'):
669                testname = url.replace('.tar.bz2', '')
670                testname = re.sub(r'(\d*)\.', '', testname)
671                return (testname, testname)
672            else:
673                return ('', url)
675        match = re.match(regex, url)
676        if not match:
677            return ('', url)
678        group, filename = match.groups()
679        # Generate the group prefix.
680        group = re.sub(r'\W', '_', group)
681        # Drop the extension to get the raw test name.
682        testname = re.sub(r'\.tar\.bz2', '', filename)
683        # Drop any random numbers at the end of the test name if any
684        testname = re.sub(r'\.(\d*)', '', testname)
685        return (group, testname)
688# may be non-existant or empty, make sure that an appropriate
689# SitePackage class is created nevertheless
691    from site_packages import SitePackageManager
692except ImportError:
693    class SitePackageManager(BasePackageManager):
694        pass
696class PackageManager(SitePackageManager):
697    pass