1"""
2This module defines the PackageManager class which provides an
3implementation of the packaging system API providing methods to fetch,
4upload and remove packages.
5"""
6
7#pylint: disable=missing-docstring
8
9import fcntl
10import logging
11import os
12import re
13import shutil
14
15import common
16from autotest_lib.client.bin import os_dep
17from autotest_lib.client.common_lib import error
18from autotest_lib.client.common_lib import global_config
19from autotest_lib.client.common_lib import utils
20
21
22# the name of the checksum file that stores the packages' checksums
23CHECKSUM_FILE = "packages.checksum"
24
25
26def has_pbzip2():
27    '''Check if parallel bzip2 is available on this system.'''
28    try:
29        os_dep.command('pbzip2')
30    except ValueError:
31        return False
32    return True
33
34
35# is parallel bzip2 available for use?
36_PBZIP2_AVAILABLE = has_pbzip2()
37
38
39def parse_ssh_path(repo):
40    '''
41    Parse ssh://xx@xx/path/to/ and return a tuple with host_line and
42    remote path
43    '''
44
45    match = re.search('^ssh://(.*?)(/.*)$', repo)
46    if match:
47        return match.groups()
48    else:
49        raise error.PackageUploadError(
50            "Incorrect SSH path in global_config: %s" % repo)
51
52
53def repo_run_command(repo, cmd, ignore_status=False, cd=True):
54    """Run a command relative to the repos path"""
55    repo = repo.strip()
56    run_cmd = None
57    cd_str = ''
58    if repo.startswith('ssh://'):
59        username = None
60        hostline, remote_path = parse_ssh_path(repo)
61        if cd:
62            cd_str = 'cd %s && ' % remote_path
63        if '@' in hostline:
64            username, host = hostline.split('@')
65            run_cmd = 'ssh %s@%s "%s%s"' % (username, host, cd_str, cmd)
66        else:
67            run_cmd = 'ssh %s "%s%s"' % (host, cd_str, cmd)
68
69    else:
70        if cd:
71            cd_str = 'cd %s && ' % repo
72        run_cmd = "%s%s" % (cd_str, cmd)
73
74    if run_cmd:
75        return utils.run(run_cmd, ignore_status=ignore_status)
76
77
78def create_directory(repo):
79    remote_path = repo
80    if repo.startswith('ssh://'):
81        _, remote_path = parse_ssh_path(repo)
82    repo_run_command(repo, 'mkdir -p %s' % remote_path, cd=False)
83
84
85def check_diskspace(repo, min_free=None):
86    # Note: 1 GB = 10**9 bytes (SI unit).
87    if min_free is None:
88        min_free = global_config.global_config.get_config_value('PACKAGES',
89                                                          'minimum_free_space',
90                                                          type=int, default=1)
91    try:
92        df = repo_run_command(repo,
93                              'df -PB %d . | tail -1' % 10 ** 9).stdout.split()
94        free_space_gb = int(df[3])
95    except Exception, e:
96        raise error.RepoUnknownError('Unknown Repo Error: %s' % e)
97    if free_space_gb < min_free:
98        raise error.RepoDiskFullError('Not enough disk space available '
99                                      '%sg < %sg' % (free_space_gb, min_free))
100
101
102def check_write(repo):
103    try:
104        repo_testfile = '.repo_test_file'
105        repo_run_command(repo, 'touch %s' % repo_testfile).stdout.strip()
106        repo_run_command(repo, 'rm ' + repo_testfile)
107    except error.CmdError:
108        raise error.RepoWriteError('Unable to write to ' + repo)
109
110
111def trim_custom_directories(repo, older_than_days=None):
112    if not repo:
113        return
114
115    if older_than_days is None:
116        older_than_days = global_config.global_config.get_config_value(
117            'PACKAGES', 'custom_max_age', type=int, default=40)
118    cmd = 'find . -type f -atime +%s -exec rm -f {} \;' % older_than_days
119    repo_run_command(repo, cmd, ignore_status=True)
120
121
122class RepositoryFetcher(object):
123    url = None
124
125
126    def fetch_pkg_file(self, filename, dest_path):
127        """ Fetch a package file from a package repository.
128
129        @param filename: The filename of the package file to fetch.
130        @param dest_path: Destination path to download the file to.
131
132        @raises PackageFetchError if the fetch failed
133        """
134        raise NotImplementedError()
135
136
137class HttpFetcher(RepositoryFetcher):
138    curl_cmd_pattern = 'curl --connect-timeout 15 -s %s -o %s'
139
140
141    def __init__(self, package_manager, repository_url):
142        """
143        @param repository_url: The base URL of the http repository
144        """
145        self.run_command = package_manager._run_command
146        self.url = repository_url
147
148    def exists(self, destpath, target='file'):
149        """Check if a file or directory exists using `test`.
150
151        This is a wrapper for run_command.
152
153        Args:
154          target: Optional string that should either be 'file' or 'dir'
155                  indicating what should exist.
156        """
157        if target == 'dir':
158            test_cmd = 'test -d %s'
159        else:
160            test_cmd = 'test -e %s'
161
162        try:
163            self.run_command(test_cmd % destpath)
164            return True
165        except (error.CmdError, error.AutoservRunError):
166            return False
167
168    def _quick_http_test(self):
169        """ Run a simple 30 second curl on the repository to see if it is
170        reachable. This avoids the need to wait for a full 10min timeout.
171        """
172        # just make a temp file to write a test fetch into
173        mktemp = 'mktemp -u /tmp/tmp.XXXXXX'
174        dest_file_path = self.run_command(mktemp).stdout.strip()
175
176        try:
177            # build up a curl command
178            http_cmd = self.curl_cmd_pattern % (self.url, dest_file_path)
179            try:
180                self.run_command(http_cmd, _run_command_dargs={'timeout': 30})
181            except Exception, e:
182                msg = 'HTTP test failed, unable to contact %s: %s'
183                raise error.PackageFetchError(msg % (self.url, e))
184        finally:
185            self.run_command('rm -rf %s' % dest_file_path)
186
187
188    def fetch_pkg_file(self, filename, dest_path):
189        logging.info('Fetching %s from %s to %s', filename, self.url,
190                     dest_path)
191
192        # do a quick test to verify the repo is reachable
193        self._quick_http_test()
194
195        # try to retrieve the package via http
196        package_url = os.path.join(self.url, filename)
197        try:
198            cmd = self.curl_cmd_pattern % (package_url, dest_path)
199            result = self.run_command(cmd,
200                                      _run_command_dargs={'timeout': 1200})
201
202            if not self.exists(dest_path):
203                logging.error('curl failed: %s', result)
204                raise error.CmdError(cmd, result)
205
206            logging.info('Successfully fetched %s from %s', filename,
207                         package_url)
208        except error.CmdError as e:
209            # remove whatever junk was retrieved when the get failed
210            self.run_command('rm -f %s' % dest_path)
211
212            raise error.PackageFetchError('%s not found in %s\n%s'
213                    'curl error code: %d' % (filename, package_url,
214                    e.result_obj.stderr, e.result_obj.exit_status))
215
216
217class LocalFilesystemFetcher(RepositoryFetcher):
218    def __init__(self, package_manager, local_dir):
219        self.run_command = package_manager._run_command
220        self.url = local_dir
221
222
223    def fetch_pkg_file(self, filename, dest_path):
224        logging.info('Fetching %s from %s to %s', filename, self.url,
225                     dest_path)
226        local_path = os.path.join(self.url, filename)
227        try:
228            self.run_command('cp %s %s' % (local_path, dest_path))
229            logging.debug('Successfully fetched %s from %s', filename,
230                          local_path)
231        except error.CmdError, e:
232            raise error.PackageFetchError(
233                'Package %s could not be fetched from %s'
234                % (filename, self.url), e)
235
236
237class BasePackageManager(object):
238    def __init__(self, pkgmgr_dir, hostname=None, repo_urls=None,
239                 upload_paths=None, do_locking=True, run_function=utils.run,
240                 run_function_args=[], run_function_dargs={}):
241        '''
242        repo_urls: The list of the repository urls which is consulted
243                   whilst fetching the package
244        upload_paths: The list of the upload of repositories to which
245                      the package is uploaded to
246        pkgmgr_dir : A directory that can be used by the package manager
247                      to dump stuff (like checksum files of the repositories
248                      etc.).
249        do_locking : Enable locking when the packages are installed.
250
251        run_function is used to execute the commands throughout this file.
252        It defaults to utils.run() but a custom method (if provided) should
253        be of the same schema as utils.run. It should return a CmdResult
254        object and throw a CmdError exception. The reason for using a separate
255        function to run the commands is that the same code can be run to fetch
256        a package on the local machine or on a remote machine (in which case
257        ssh_host's run function is passed in for run_function).
258        '''
259        # In memory dictionary that stores the checksum's of packages
260        self._checksum_dict = {}
261
262        self.pkgmgr_dir = pkgmgr_dir
263        self.do_locking = do_locking
264        self.hostname = hostname
265        self.repositories = []
266
267        # Create an internal function that is a simple wrapper of
268        # run_function and takes in the args and dargs as arguments
269        def _run_command(command, _run_command_args=run_function_args,
270                         _run_command_dargs={}):
271            '''
272            Special internal function that takes in a command as
273            argument and passes it on to run_function (if specified).
274            The _run_command_dargs are merged into run_function_dargs
275            with the former having more precedence than the latter.
276            '''
277            new_dargs = dict(run_function_dargs)
278            new_dargs.update(_run_command_dargs)
279            # avoid polluting logs with extremely verbose packaging output
280            new_dargs.update({'stdout_tee' : None})
281
282            return run_function(command, *_run_command_args,
283                                **new_dargs)
284
285        self._run_command = _run_command
286
287        # Process the repository URLs
288        if not repo_urls:
289            repo_urls = []
290        elif hostname:
291            repo_urls = self.get_mirror_list(repo_urls)
292        for url in repo_urls:
293            self.add_repository(url)
294
295        # Process the upload URLs
296        if not upload_paths:
297            self.upload_paths = []
298        else:
299            self.upload_paths = list(upload_paths)
300
301
302    def add_repository(self, repo):
303        if isinstance(repo, basestring):
304            self.repositories.append(self.get_fetcher(repo))
305        elif isinstance(repo, RepositoryFetcher):
306            self.repositories.append(repo)
307        else:
308            raise TypeError("repo must be RepositoryFetcher or url string")
309
310    def exists(self, destpath, target='file'):
311        """Check if a file or directory exists using `test`.
312
313        This is a wrapper for _run_command.
314
315        Args:
316          target: Optional string that should either be 'file' or 'dir'
317                  indicating what should exist.
318        """
319        if target == 'dir':
320            test_cmd = 'test -d %s'
321        else:
322            test_cmd = 'test -e %s'
323
324        try:
325            self._run_command(test_cmd % destpath)
326            return True
327        except (error.CmdError, error.AutoservRunError):
328            return False
329
330    def get_fetcher(self, url):
331        if url.startswith('http://'):
332            return HttpFetcher(self, url)
333        else:
334            return LocalFilesystemFetcher(self, url)
335
336
337    def repo_check(self, repo):
338        '''
339        Check to make sure the repo is in a sane state:
340        ensure we have at least XX amount of free space
341        Make sure we can write to the repo
342        '''
343        if not repo.startswith('/') and not repo.startswith('ssh:'):
344            return
345        try:
346            create_directory(repo)
347            check_diskspace(repo)
348            check_write(repo)
349        except (error.RepoWriteError, error.RepoUnknownError,
350                error.RepoDiskFullError), e:
351            raise error.RepoError("ERROR: Repo %s: %s" % (repo, e))
352
353
354    def upkeep(self, custom_repos=None):
355        '''
356        Clean up custom upload/download areas
357        '''
358        from autotest_lib.server import subcommand
359        if not custom_repos:
360            # Not all package types necessarily require or allow custom repos
361            try:
362                custom_repos = global_config.global_config.get_config_value(
363                    'PACKAGES', 'custom_upload_location').split(',')
364            except global_config.ConfigError:
365                custom_repos = []
366            try:
367                custom_download = global_config.global_config.get_config_value(
368                    'PACKAGES', 'custom_download_location')
369                custom_repos += [custom_download]
370            except global_config.ConfigError:
371                pass
372
373            if not custom_repos:
374                return
375
376        subcommand.parallel_simple(trim_custom_directories, custom_repos,
377                                   log=False)
378
379
380    def install_pkg(self, name, pkg_type, fetch_dir, install_dir,
381                    preserve_install_dir=False, repo_url=None):
382        '''
383        Remove install_dir if it already exists and then recreate it unless
384        preserve_install_dir is specified as True.
385        Fetch the package into the pkg_dir. Untar the package into install_dir
386        The assumption is that packages are of the form :
387        <pkg_type>.<pkg_name>.tar.bz2
388        name        : name of the package
389        type        : type of the package
390        fetch_dir   : The directory into which the package tarball will be
391                      fetched to.
392        install_dir : the directory where the package files will be untarred to
393        repo_url    : the url of the repository to fetch the package from.
394        '''
395
396        # do_locking flag is on by default unless you disable it (typically
397        # in the cases where packages are directly installed from the server
398        # onto the client in which case fcntl stuff wont work as the code
399        # will run on the server in that case..
400        if self.do_locking:
401            lockfile_name = '.%s-%s-lock' % (name, pkg_type)
402            lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w')
403
404        try:
405            if self.do_locking:
406                fcntl.flock(lockfile, fcntl.LOCK_EX)
407
408            self._run_command('mkdir -p %s' % fetch_dir)
409
410            pkg_name = self.get_tarball_name(name, pkg_type)
411            fetch_path = os.path.join(fetch_dir, pkg_name)
412            try:
413                # Fetch the package into fetch_dir
414                self.fetch_pkg(pkg_name, fetch_path, use_checksum=True)
415
416                # check to see if the install_dir exists and if it does
417                # then check to see if the .checksum file is the latest
418                if (self.exists(install_dir, target='dir') and
419                    not self.untar_required(fetch_path, install_dir)):
420                    return
421
422                # untar the package into install_dir and
423                # update the checksum in that directory
424                if not preserve_install_dir:
425                    # Make sure we clean up the install_dir
426                    self._run_command('rm -rf %s' % install_dir)
427                self._run_command('mkdir -p %s' % install_dir)
428
429                self.untar_pkg(fetch_path, install_dir)
430
431            except error.PackageFetchError, why:
432                raise error.PackageInstallError(
433                    'Installation of %s(type:%s) failed : %s'
434                    % (name, pkg_type, why))
435        finally:
436            if self.do_locking:
437                fcntl.flock(lockfile, fcntl.LOCK_UN)
438                lockfile.close()
439
440
441    def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=False):
442        '''
443        Fetch the package into dest_dir from repo_url. By default repo_url
444        is None and the package is looked in all the repositories specified.
445        Otherwise it fetches it from the specific repo_url.
446        pkg_name     : name of the package (ex: test-sleeptest.tar.bz2,
447                                            dep-gcc.tar.bz2, kernel.1-1.rpm)
448        repo_url     : the URL of the repository where the package is located.
449        dest_path    : complete path of where the package will be fetched to.
450        use_checksum : This is set to False to fetch the packages.checksum file
451                       so that the checksum comparison is bypassed for the
452                       checksum file itself. This is used internally by the
453                       packaging system. It should be ignored by externals
454                       callers of this method who use it fetch custom packages.
455        '''
456        # Check if the destination dir exists.
457        if not self.exists(os.path.dirname(dest_path), target='dir'):
458            raise error.PackageFetchError("Please provide a valid "
459                                          "destination: %s " % dest_path)
460
461        # See if the package was already fetched earlier, if so
462        # the checksums need to be compared and the package is now
463        # fetched only if they differ.
464        pkg_exists = self.exists(dest_path)
465
466        # if a repository location is explicitly provided, fetch the package
467        # from there and return
468        if repo_url:
469            repositories = [self.get_fetcher(repo_url)]
470        elif self.repositories:
471            repositories = self.repositories
472        else:
473            raise error.PackageFetchError("No repository urls specified")
474
475        # install the package from the package repos, try the repos in
476        # reverse order, assuming that the 'newest' repos are most desirable
477        for fetcher in reversed(repositories):
478            try:
479                # Fetch the package if it is not there, the checksum does
480                # not match, or checksums are disabled entirely
481                need_to_fetch = (
482                        not use_checksum or not pkg_exists
483                        or not self.compare_checksum(dest_path))
484                if need_to_fetch:
485                    fetcher.fetch_pkg_file(pkg_name, dest_path)
486                    # update checksum so we won't refetch next time.
487                    if use_checksum:
488                        self.update_checksum(dest_path)
489                return
490            except (error.PackageFetchError, error.AutoservRunError) as e:
491                # The package could not be found in this repo, continue looking
492                logging.debug(e)
493
494        repo_url_list = [repo.url for repo in repositories]
495        message = ('%s could not be fetched from any of the repos %s' %
496                   (pkg_name, repo_url_list))
497        logging.error(message)
498        # if we got here then that means the package is not found
499        # in any of the repositories.
500        raise error.PackageFetchError(message)
501
502
503    def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False,
504                   timeout=300):
505        from autotest_lib.server import subcommand
506        if upload_path:
507            upload_path_list = [upload_path]
508            self.upkeep(upload_path_list)
509        elif len(self.upload_paths) > 0:
510            self.upkeep()
511            upload_path_list = self.upload_paths
512        else:
513            raise error.PackageUploadError("Invalid Upload Path specified")
514
515        if update_checksum:
516            # get the packages' checksum file and update it with the current
517            # package's checksum
518            self.update_checksum(pkg_path)
519
520        commands = []
521        for path in upload_path_list:
522            commands.append(subcommand.subcommand(self.upload_pkg_parallel,
523                                                  (pkg_path, path,
524                                                   update_checksum)))
525
526        results = subcommand.parallel(commands, timeout, return_results=True)
527        for result in results:
528            if result:
529                print str(result)
530
531
532    # TODO(aganti): Fix the bug with the current checksum logic where
533    # packages' checksums that are not present consistently in all the
534    # repositories are not handled properly. This is a corner case though
535    # but the ideal solution is to make the checksum file repository specific
536    # and then maintain it.
537    def upload_pkg_parallel(self, pkg_path, upload_path, update_checksum=False):
538        '''
539        Uploads to a specified upload_path or to all the repos.
540        Also uploads the checksum file to all the repos.
541        pkg_path        : The complete path to the package file
542        upload_path     : the absolute path where the files are copied to.
543                          if set to 'None' assumes 'all' repos
544        update_checksum : If set to False, the checksum file is not
545                          going to be updated which happens by default.
546                          This is necessary for custom
547                          packages (like custom kernels and custom tests)
548                          that get uploaded which do not need to be part of
549                          the checksum file and bloat it.
550        '''
551        self.repo_check(upload_path)
552        # upload the package
553        if os.path.isdir(pkg_path):
554            self.upload_pkg_dir(pkg_path, upload_path)
555        else:
556            self.upload_pkg_file(pkg_path, upload_path)
557            if update_checksum:
558                self.upload_pkg_file(self._get_checksum_file_path(),
559                                     upload_path)
560
561
562    def upload_pkg_file(self, file_path, upload_path):
563        '''
564        Upload a single file. Depending on the upload path, the appropriate
565        method for that protocol is called. Currently this simply copies the
566        file to the target directory (but can be extended for other protocols)
567        This assumes that the web server is running on the same machine where
568        the method is being called from. The upload_path's files are
569        basically served by that web server.
570        '''
571        try:
572            if upload_path.startswith('ssh://'):
573                # parse ssh://user@host/usr/local/autotest/packages
574                hostline, remote_path = parse_ssh_path(upload_path)
575                try:
576                    utils.run('scp %s %s:%s' % (file_path, hostline,
577                                                remote_path))
578                    r_path = os.path.join(remote_path,
579                                          os.path.basename(file_path))
580                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, r_path))
581                except error.CmdError:
582                    logging.error("Error uploading to repository %s",
583                                  upload_path)
584            else:
585                # Delete any older version of the package that might exist.
586                orig_file = os.path.join(upload_path,
587                                         os.path.basename(file_path))
588                if os.path.exists(orig_file):
589                    os.remove(orig_file)
590
591                shutil.copy(file_path, upload_path)
592                os.chmod(orig_file, 0644)
593        except (IOError, os.error), why:
594            logging.error("Upload of %s to %s failed: %s", file_path,
595                          upload_path, why)
596
597
598    def upload_pkg_dir(self, dir_path, upload_path):
599        '''
600        Upload a full directory. Depending on the upload path, the appropriate
601        method for that protocol is called. Currently this copies the whole
602        tmp package directory to the target directory.
603        This assumes that the web server is running on the same machine where
604        the method is being called from. The upload_path's files are
605        basically served by that web server.
606        '''
607        local_path = os.path.join(dir_path, "*")
608        try:
609            if upload_path.startswith('ssh://'):
610                hostline, remote_path = parse_ssh_path(upload_path)
611                try:
612                    utils.run('scp %s %s:%s' % (local_path, hostline,
613                                                remote_path))
614                    ssh_path = os.path.join(remote_path, "*")
615                    utils.run("ssh %s 'chmod 644 %s'" % (hostline, ssh_path))
616                except error.CmdError:
617                    logging.error("Error uploading to repository: %s",
618                                  upload_path)
619            else:
620                utils.run("cp %s %s " % (local_path, upload_path))
621                up_path = os.path.join(upload_path, "*")
622                utils.run("chmod 644 %s" % up_path)
623        except (IOError, os.error), why:
624            raise error.PackageUploadError("Upload of %s to %s failed: %s"
625                                           % (dir_path, upload_path, why))
626
627
628    def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False):
629        '''
630        Remove the package from the specified remove_path
631        pkg_name    : name of the package (ex: test-sleeptest.tar.bz2,
632                                           dep-gcc.tar.bz2)
633        remove_path : the location to remove the package from.
634
635        '''
636        if remove_path:
637            remove_path_list = [remove_path]
638        elif len(self.upload_paths) > 0:
639            remove_path_list = self.upload_paths
640        else:
641            raise error.PackageRemoveError(
642                "Invalid path to remove the pkg from")
643
644        checksum_path = self._get_checksum_file_path()
645
646        if remove_checksum:
647            self.remove_checksum(pkg_name)
648
649        # remove the package and upload the checksum file to the repos
650        for path in remove_path_list:
651            self.remove_pkg_file(pkg_name, path)
652            self.upload_pkg_file(checksum_path, path)
653
654
655    def remove_pkg_file(self, filename, pkg_dir):
656        '''
657        Remove the file named filename from pkg_dir
658        '''
659        try:
660            # Remove the file
661            if pkg_dir.startswith('ssh://'):
662                hostline, remote_path = parse_ssh_path(pkg_dir)
663                path = os.path.join(remote_path, filename)
664                utils.run("ssh %s 'rm -rf %s/%s'" % (hostline, remote_path,
665                          path))
666            else:
667                os.remove(os.path.join(pkg_dir, filename))
668        except (IOError, os.error), why:
669            raise error.PackageRemoveError("Could not remove %s from %s: %s "
670                                           % (filename, pkg_dir, why))
671
672
673    def get_mirror_list(self, repo_urls):
674        '''
675            Stub function for site specific mirrors.
676
677            Returns:
678                Priority ordered list
679        '''
680        return repo_urls
681
682
683    def _get_checksum_file_path(self):
684        '''
685        Return the complete path of the checksum file (assumed to be stored
686        in self.pkgmgr_dir
687        '''
688        return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE)
689
690
691    def _get_checksum_dict(self):
692        '''
693        Fetch the checksum file if not already fetched. If the checksum file
694        cannot be fetched from the repos then a new file is created with
695        the current package's (specified in pkg_path) checksum value in it.
696        Populate the local checksum dictionary with the values read from
697        the checksum file.
698        The checksum file is assumed to be present in self.pkgmgr_dir
699        '''
700        checksum_path = self._get_checksum_file_path()
701        if not self._checksum_dict:
702            # Fetch the checksum file
703            try:
704                if not self.exists(checksum_path):
705                    # The packages checksum file does not exist locally.
706                    # See if it is present in the repositories.
707                    self.fetch_pkg(CHECKSUM_FILE, checksum_path)
708            except error.PackageFetchError:
709                # This should not happen whilst fetching a package..if a
710                # package is present in the repository, the corresponding
711                # checksum file should also be automatically present. This
712                # case happens only when a package
713                # is being uploaded and if it is the first package to be
714                # uploaded to the repos (hence no checksum file created yet)
715                # Return an empty dictionary in that case
716                return {}
717
718            # Read the checksum file into memory
719            checksum_file_contents = self._run_command('cat '
720                                                       + checksum_path).stdout
721
722            # Return {} if we have an empty checksum file present
723            if not checksum_file_contents.strip():
724                return {}
725
726            # Parse the checksum file contents into self._checksum_dict
727            for line in checksum_file_contents.splitlines():
728                checksum, package_name = line.split(None, 1)
729                self._checksum_dict[package_name] = checksum
730
731        return self._checksum_dict
732
733
734    def _save_checksum_dict(self, checksum_dict):
735        '''
736        Save the checksum dictionary onto the checksum file. Update the
737        local _checksum_dict variable with this new set of values.
738        checksum_dict :  New checksum dictionary
739        checksum_dir  :  The directory in which to store the checksum file to.
740        '''
741        checksum_path = self._get_checksum_file_path()
742        self._checksum_dict = checksum_dict.copy()
743        checksum_contents = '\n'.join(checksum + ' ' + pkg_name
744                                      for pkg_name, checksum in
745                                      checksum_dict.iteritems())
746        # Write the checksum file back to disk
747        self._run_command('echo "%s" > %s' % (checksum_contents,
748                                              checksum_path),
749                          _run_command_dargs={'verbose': False})
750
751
752    def compute_checksum(self, pkg_path):
753        '''
754        Compute the MD5 checksum for the package file and return it.
755        pkg_path : The complete path for the package file
756        '''
757        # Check if the checksum has been pre-calculated.
758        # There are two modes of operation:
759        #
760        # 1. Package is compiled on dev machine / build server : In this
761        # case, we will have the freshest checksum during the install
762        # phase (which was computed and stored during src_compile). The
763        # checksum always gets recomputed during src_compile.
764        #
765        # 2. Package in installed from a fetched prebuilt: Here, we will
766        # have the checksum associated with what was used to compile
767        # the prebuilt. So it is expected to be the same.
768        checksum_path = pkg_path + '.checksum'
769        if os.path.exists(checksum_path):
770            print ("Checksum %s exists" % checksum_path)
771            with open(checksum_path, "r") as f:
772                return f.read().replace('\n', '')
773        md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout
774        return md5sum_output.split()[0]
775
776
777    def update_checksum(self, pkg_path):
778        '''
779        Update the checksum of the package in the packages' checksum
780        file. This method is called whenever a package is fetched just
781        to be sure that the checksums in the local file are the latest.
782        pkg_path : The complete path to the package file.
783        '''
784        # Compute the new checksum
785        new_checksum = self.compute_checksum(pkg_path)
786        checksum_dict = self._get_checksum_dict()
787        checksum_dict[os.path.basename(pkg_path)] = new_checksum
788        self._save_checksum_dict(checksum_dict)
789
790
791    def remove_checksum(self, pkg_name):
792        '''
793        Remove the checksum of the package from the packages checksum file.
794        This method is called whenever a package is removed from the
795        repositories in order clean its corresponding checksum.
796        pkg_name :  The name of the package to be removed
797        '''
798        checksum_dict = self._get_checksum_dict()
799        if pkg_name in checksum_dict:
800            del checksum_dict[pkg_name]
801        self._save_checksum_dict(checksum_dict)
802
803
804    def compare_checksum(self, pkg_path):
805        '''
806        Calculate the checksum of the file specified in pkg_path and
807        compare it with the checksum in the checksum file
808        Return True if both match else return False.
809        pkg_path : The full path to the package file for which the
810                   checksum is being compared
811        '''
812        checksum_dict = self._get_checksum_dict()
813        package_name = os.path.basename(pkg_path)
814        if not checksum_dict or package_name not in checksum_dict:
815            return False
816
817        repository_checksum = checksum_dict[package_name]
818        local_checksum = self.compute_checksum(pkg_path)
819        return (local_checksum == repository_checksum)
820
821
822    def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None):
823        '''
824        Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2.
825        Excludes the directories specified in exclude_string while tarring
826        the source. Returns the tarball path.
827        '''
828        tarball_path = os.path.join(dest_dir, pkg_name)
829        temp_path = tarball_path + '.tmp'
830        cmd_list = ['tar', '-cf', temp_path, '-C', src_dir]
831        if _PBZIP2_AVAILABLE:
832            cmd_list.append('--use-compress-prog=pbzip2')
833        else:
834            cmd_list.append('-j')
835        if exclude_string is not None:
836            cmd_list.append(exclude_string)
837
838        try:
839            utils.system(' '.join(cmd_list))
840        except:
841            os.unlink(temp_path)
842            raise
843
844        os.rename(temp_path, tarball_path)
845        return tarball_path
846
847
848    def untar_required(self, tarball_path, dest_dir):
849        '''
850        Compare the checksum of the tarball_path with the .checksum file
851        in the dest_dir and return False if it matches. The untar
852        of the package happens only if the checksums do not match.
853        '''
854        checksum_path = os.path.join(dest_dir, '.checksum')
855        try:
856            existing_checksum = self._run_command('cat ' + checksum_path).stdout
857        except (error.CmdError, error.AutoservRunError):
858            # If the .checksum file is not present (generally, this should
859            # not be the case) then return True so that the untar happens
860            return True
861
862        new_checksum = self.compute_checksum(tarball_path)
863        return (new_checksum.strip() != existing_checksum.strip())
864
865
866    def untar_pkg(self, tarball_path, dest_dir):
867        '''
868        Untar the package present in the tarball_path and put a
869        ".checksum" file in the dest_dir containing the checksum
870        of the tarball. This method
871        assumes that the package to be untarred is of the form
872        <name>.tar.bz2
873        '''
874        self._run_command('tar --no-same-owner -xjf %s -C %s' %
875                          (tarball_path, dest_dir))
876        # Put the .checksum file in the install_dir to note
877        # where the package came from
878        pkg_checksum = self.compute_checksum(tarball_path)
879        pkg_checksum_path = os.path.join(dest_dir,
880                                         '.checksum')
881        self._run_command('echo "%s" > %s '
882                          % (pkg_checksum, pkg_checksum_path))
883
884
885    @staticmethod
886    def get_tarball_name(name, pkg_type):
887        """Converts a package name and type into a tarball name.
888
889        @param name: The name of the package
890        @param pkg_type: The type of the package
891
892        @returns A tarball filename for that specific type of package
893        """
894        assert '-' not in pkg_type
895        return '%s-%s.tar.bz2' % (pkg_type, name)
896
897
898    @staticmethod
899    def parse_tarball_name(tarball_name):
900        """Coverts a package tarball name into a package name and type.
901
902        @param tarball_name: The filename of the tarball
903
904        @returns (name, pkg_type) where name is the package name and pkg_type
905            is the package type.
906        """
907        match = re.search(r'^([^-]*)-(.*)\.tar\.bz2$', tarball_name)
908        pkg_type, name = match.groups()
909        return name, pkg_type
910
911
912    def is_url(self, url):
913        """Return true if path looks like a URL"""
914        return url.startswith('http://')
915
916
917    def get_package_name(self, url, pkg_type):
918        '''
919        Extract the group and test name for the url. This method is currently
920        used only for tests.
921        '''
922        if pkg_type == 'test':
923            regex = '[^:]+://(.*)/([^/]*)$'
924            return self._get_package_name(url, regex)
925        else:
926            return ('', url)
927
928
929    def _get_package_name(self, url, regex):
930        if not self.is_url(url):
931            if url.endswith('.tar.bz2'):
932                testname = url.replace('.tar.bz2', '')
933                testname = re.sub(r'(\d*)\.', '', testname)
934                return (testname, testname)
935            else:
936                return ('', url)
937
938        match = re.match(regex, url)
939        if not match:
940            return ('', url)
941        group, filename = match.groups()
942        # Generate the group prefix.
943        group = re.sub(r'\W', '_', group)
944        # Drop the extension to get the raw test name.
945        testname = re.sub(r'\.tar\.bz2', '', filename)
946        # Drop any random numbers at the end of the test name if any
947        testname = re.sub(r'\.(\d*)', '', testname)
948        return (group, testname)
949
950
951class SiteHttpFetcher(HttpFetcher):
952    curl_cmd_pattern = ('curl --connect-timeout 15 --retry 5 '
953                        '--retry-delay 5 -s %s -o %s')
954
955    # shortcut quick http test for now since our dev server does not support
956    # this operation.
957    def _quick_http_test(self):
958        return
959
960
961class PackageManager(BasePackageManager):
962    def get_fetcher(self, url):
963        if url.startswith('http://'):
964            return SiteHttpFetcher(self, url)
965        else:
966            return super(PackageManager, self).get_fetcher(url)
967