packages.py revision dbfc4e3bc0b0ec3bcefdc0d045e501ef96869339
1#!/usr/bin/python 2 3""" 4This module defines the BasePackageManager Class which provides an 5implementation of the packaging system API providing methods to fetch, 6upload and remove packages. Site specific extensions to any of these methods 7should inherit this class. 8""" 9 10import re, os, sys, traceback, subprocess, shutil, time, traceback, urlparse 11import fcntl 12from autotest_lib.client.common_lib import error, utils 13 14 15class PackageUploadError(error.AutotestError): 16 'Raised when there is an error uploading the package' 17 18class PackageFetchError(error.AutotestError): 19 'Raised when there is an error fetching the package' 20 21class PackageRemoveError(error.AutotestError): 22 'Raised when there is an error removing the package' 23 24class PackageInstallError(error.AutotestError): 25 'Raised when there is an error installing the package' 26 27# the name of the checksum file that stores the packages' checksums 28CHECKSUM_FILE = "packages.checksum" 29 30class BasePackageManager(object): 31 _repo_exception = {} 32 REPO_OK = object() 33 34 def __init__(self, pkgmgr_dir, repo_urls=None, upload_paths=None, 35 do_locking=True, run_function=utils.run, run_function_args=[], 36 run_function_dargs={}): 37 ''' 38 repo_urls: The list of the repository urls which is consulted 39 whilst fetching the package 40 upload_paths: The list of the upload of repositories to which 41 the package is uploaded to 42 pkgmgr_dir : A directory that can be used by the package manager 43 to dump stuff (like checksum files of the repositories 44 etc.). 45 do_locking : Enable locking when the packages are installed. 46 47 run_function is used to execute the commands throughout this file. 48 It defaults to utils.run() but a custom method (if provided) should 49 be of the same schema as utils.run. It should return a CmdResult 50 object and throw a CmdError exception. The reason for using a separate 51 function to run the commands is that the same code can be run to fetch 52 a package on the local machine or on a remote machine (in which case 53 ssh_host's run function is passed in for run_function). 54 ''' 55 # In memory dictionary that stores the checksum's of packages 56 self._checksum_dict = {} 57 58 self.pkgmgr_dir = pkgmgr_dir 59 self.do_locking = do_locking 60 61 # Process the repository URLs and the upload paths if specified 62 if not repo_urls: 63 self.repo_urls = [] 64 else: 65 self.repo_urls = list(repo_urls) 66 if not upload_paths: 67 self.upload_paths = [] 68 else: 69 self.upload_paths = list(upload_paths) 70 71 # Create an internal function that is a simple wrapper of 72 # run_function and takes in the args and dargs as arguments 73 def _run_command(command, _run_command_args=run_function_args, 74 _run_command_dargs={}): 75 ''' 76 Special internal function that takes in a command as 77 argument and passes it on to run_function (if specified). 78 The _run_command_dargs are merged into run_function_dargs 79 with the former having more precedence than the latter. 80 ''' 81 new_dargs = dict(run_function_dargs) 82 new_dargs.update(_run_command_dargs) 83 84 return run_function(command, *_run_command_args, 85 **new_dargs) 86 87 self._run_command = _run_command 88 89 90 def install_pkg(self, name, pkg_type, fetch_dir, install_dir, 91 preserve_install_dir=False, repo_url=None): 92 ''' 93 Remove install_dir if it already exists and then recreate it unless 94 preserve_install_dir is specified as True. 95 Fetch the package into the pkg_dir. Untar the package into install_dir 96 The assumption is that packages are of the form : 97 <pkg_type>.<pkg_name>.tar.bz2 98 name : name of the package 99 type : type of the package 100 fetch_dir : The directory into which the package tarball will be 101 fetched to. 102 install_dir : the directory where the package files will be untarred to 103 repo_url : the url of the repository to fetch the package from. 104 ''' 105 106 # do_locking flag is on by default unless you disable it (typically 107 # in the cases where packages are directly installed from the server 108 # onto the client in which case fcntl stuff wont work as the code 109 # will run on the server in that case.. 110 if self.do_locking: 111 lockfile_name = '.%s-%s-lock' % (name, pkg_type) 112 lockfile = open(os.path.join(self.pkgmgr_dir, lockfile_name), 'w') 113 114 try: 115 if self.do_locking: 116 fcntl.flock(lockfile, fcntl.LOCK_EX) 117 118 self._run_command('mkdir -p %s' % fetch_dir) 119 120 pkg_name = self.get_tarball_name(name, pkg_type) 121 fetch_path = os.path.join(fetch_dir, pkg_name) 122 try: 123 # Fetch the package into fetch_dir 124 self.fetch_pkg(pkg_name, fetch_path) 125 126 # check to see if the install_dir exists and if it does 127 # then check to see if the .checksum file is the latest 128 install_dir_exists = False 129 try: 130 self._run_command("ls %s" % install_dir) 131 install_dir_exists = True 132 except (error.CmdError, error.AutoservRunError): 133 pass 134 135 if (install_dir_exists and 136 not self.untar_required(fetch_path, install_dir)): 137 return 138 139 # untar the package into install_dir and 140 # update the checksum in that directory 141 if not preserve_install_dir: 142 # Make sure we clean up the install_dir 143 self._run_command('rm -rf %s' % install_dir) 144 self._run_command('mkdir -p %s' % install_dir) 145 146 self.untar_pkg(fetch_path, install_dir) 147 148 except PackageFetchError, why: 149 raise PackageInstallError('Installation of %s(type:%s) failed' 150 ' : %s' % (name, pkg_type, why)) 151 finally: 152 if self.do_locking: 153 fcntl.flock(lockfile, fcntl.LOCK_UN) 154 lockfile.close() 155 156 157 def fetch_pkg(self, pkg_name, dest_path, repo_url=None, use_checksum=True): 158 ''' 159 Fetch the package into dest_dir from repo_url. By default repo_url 160 is None and the package is looked in all the repostories specified. 161 Otherwise it fetches it from the specific repo_url. 162 pkg_name : name of the package (ex: test-sleeptest.tar.bz2, 163 dep-gcc.tar.bz2, kernel.1-1.rpm) 164 repo_url : the URL of the repository where the package is located. 165 dest_path : complete path of where the package will be fetched to. 166 use_checksum : This is set to False to fetch the packages.checksum file 167 so that the checksum comparison is bypassed for the 168 checksum file itself. This is used internally by the 169 packaging system. It should be ignored by externals 170 callers of this method who use it fetch custom packages. 171 ''' 172 173 try: 174 self._run_command("ls %s" % os.path.dirname(dest_path)) 175 except (error.CmdError, error.AutoservRunError): 176 raise PackageFetchError("Please provide a valid " 177 "destination: %s " % dest_path) 178 179 # See if the package was already fetched earlier, if so 180 # the checksums need to be compared and the package is now 181 # fetched only if they differ. 182 pkg_exists = False 183 try: 184 self._run_command("ls %s" % dest_path) 185 pkg_exists = True 186 except (error.CmdError, error.AutoservRunError): 187 pass 188 189 # if a repository location is explicitly provided, fetch the package 190 # from there and return 191 if repo_url: 192 repo_url_list = [repo_url] 193 elif len(self.repo_urls) > 0: 194 repo_url_list = self.repo_urls 195 else: 196 raise PackageFetchError("There are no repository urls specified") 197 198 error_msgs = {} 199 for location in repo_url_list: 200 try: 201 # Fetch the checksum if it not there 202 if not use_checksum: 203 self.fetch_pkg_file(pkg_name, dest_path, location) 204 205 # Fetch the package if a) the pkg does not exist or 206 # b) if the checksum differs for the existing package 207 elif (not pkg_exists or 208 not self.compare_checksum(dest_path, location)): 209 self.fetch_pkg_file(pkg_name, dest_path, location) 210 # Update the checksum of the package in the packages' 211 # checksum file 212 self.update_checksum(dest_path) 213 return 214 except (PackageFetchError, error.AutoservRunError), e: 215 # The package could not be found in this repo, continue looking 216 error_msgs[location] = str(e) 217 print >> sys.stderr, ('Package - could not be fetched from ' 218 '- %s : %s' % (location, e)) 219 220 # if we got here then that means the package is not found 221 # in any of the repositories. 222 raise PackageFetchError("Package could not be fetched from any of" 223 " the repos %s : %s " % (repo_url_list, 224 error_msgs)) 225 226 227 def fetch_pkg_file(self, file_name, dest_path, source_url): 228 """ 229 Fetch the file from source_url into dest_path. The package repository 230 url is parsed and the appropriate retrieval method is determined. 231 232 """ 233 if source_url.startswith('http://'): 234 self.fetch_file_http(file_name, dest_path, source_url) 235 else: 236 raise PackageFetchError("Invalid location specified") 237 238 239 def fetch_file_http(self, file_name, dest_path, source_url): 240 """ 241 Fetch the package using http protocol. Raises a PackageFetchError. 242 """ 243 # check to see if the source_url is reachable or not 244 self.run_http_test(source_url, os.path.dirname(dest_path)) 245 246 pkg_path = os.path.join(source_url, file_name) 247 try: 248 self._run_command('wget %s -O %s' % (pkg_path, dest_path)) 249 except error.CmdError, e: 250 raise PackageFetchError("Package - %s not found in %s: %s" 251 % (file_name, source_url, e)) 252 253 254 def run_http_test(self, source_url, dest_dir): 255 ''' 256 Run a simple 30 sec wget on source_url 257 just to see if it can be reachable or not. This avoids the need 258 for waiting for a 10min timeout. 259 ''' 260 dest_file_path = os.path.join(dest_dir, 'http_test_file') 261 262 BPM = BasePackageManager 263 error_msg = "HTTP test failed. Failed to contact" 264 # We should never get here unless the source_url starts with http:// 265 assert(source_url.startswith('http://')) 266 267 # Get the http server name from the URL 268 server_name = urlparse.urlparse(source_url)[1] 269 http_cmd = 'printf "GET / HTTP/1.0\n\n" | nc %s 80' % server_name 270 271 if server_name in BPM._repo_exception: 272 if BPM._repo_exception[server_name] == BPM.REPO_OK: 273 # This repository is fine. Simply return 274 return 275 else: 276 raise PackageFetchError("%s - %s : %s " 277 % (error_msg, server_name, 278 BPM._repo_exception[server_name])) 279 try: 280 try: 281 self._run_command(http_cmd, 282 _run_command_dargs={'timeout':30}) 283 BPM._repo_exception[server_name] = BPM.REPO_OK 284 finally: 285 self._run_command('rm -f %s' % dest_file_path) 286 except error.CmdError, e: 287 BPM._repo_exception[server_name] = e 288 raise PackageFetchError("%s - %s: %s " % (error_msg, 289 server_name, e)) 290 291 292 293 # TODO(aganti): Fix the bug with the current checksum logic where 294 # packages' checksums that are not present consistently in all the 295 # repositories are not handled properly. This is a corner case though 296 # but the ideal solution is to make the checksum file repository specific 297 # and then maintain it. 298 def upload_pkg(self, pkg_path, upload_path=None, update_checksum=False): 299 ''' 300 Uploads to a specified upload_path or to all the repos. 301 Also uploads the checksum file to all the repos. 302 pkg_path : The complete path to the package file 303 upload_path : the absolute path where the files are copied to. 304 if set to 'None' assumes 'all' repos 305 update_checksum : If set to False, the checksum file is not 306 going to be updated which happens by default. 307 This is necessary for custom 308 packages (like custom kernels and custom tests) 309 that get uploaded which do not need to be part of 310 the checksum file and bloat it. 311 ''' 312 if update_checksum: 313 # get the packages' checksum file and update it with the current 314 # package's checksum 315 checksum_path = self._get_checksum_file_path() 316 self.update_checksum(pkg_path) 317 318 if upload_path: 319 upload_path_list = [upload_path] 320 elif len(self.upload_paths) > 0: 321 upload_path_list = self.upload_paths 322 else: 323 raise PackageUploadError("Invalid Upload Path specified") 324 325 # upload the package 326 for path in upload_path_list: 327 self.upload_pkg_file(pkg_path, path) 328 if update_checksum: 329 self.upload_pkg_file(checksum_path, path) 330 331 332 def upload_pkg_file(self, file_path, upload_path): 333 ''' 334 Upload a single file. Depending on the upload path, the appropriate 335 method for that protocol is called. Currently this simply copies the 336 file to the target directory (but can be extended for other protocols) 337 This assumes that the web server is running on the same machine where 338 the method is being called from. The upload_path's files are 339 basically served by that web server. 340 ''' 341 try: 342 shutil.copy(file_path, upload_path) 343 os.chmod(os.path.join(upload_path, 344 os.path.basename(file_path)), 0755) 345 except (IOError, os.error), why: 346 raise PackageUploadError("Upload of %s to %s failed: %s" 347 % (file_path, upload_path, why)) 348 349 350 def remove_pkg(self, pkg_name, remove_path=None, remove_checksum=False): 351 ''' 352 Remove the package from the specified remove_path 353 pkg_name : name of the package (ex: test-sleeptest.tar.bz2, 354 dep-gcc.tar.bz2) 355 remove_path : the location to remove the package from. 356 357 ''' 358 if remove_path: 359 remove_path_list = [remove_path] 360 elif len(self.upload_paths) > 0: 361 remove_path_list = self.upload_paths 362 else: 363 raise PackageRemoveError("Invalid path to remove the pkg from") 364 365 checksum_path = self._get_checksum_file_path() 366 367 if remove_checksum: 368 self.remove_checksum(pkg_name) 369 370 # remove the package and upload the checksum file to the repos 371 for path in remove_path_list: 372 self.remove_pkg_file(pkg_name, path) 373 self.upload_pkg_file(checksum_path, path) 374 375 376 def remove_pkg_file(self, file_name, pkg_dir): 377 ''' 378 Remove the file named file_name from pkg_dir 379 ''' 380 try: 381 # Remove the file 382 os.remove(os.path.join(pkg_dir, file_name)) 383 except (IOError, os.error), why: 384 raise PackageRemoveError("Could not remove %s from %s: %s " 385 % (file_name, pkg_dir, why)) 386 387 388 def _get_checksum_file_path(self): 389 ''' 390 Return the complete path of the checksum file (assumed to be stored 391 in self.pkgmgr_dir 392 ''' 393 return os.path.join(self.pkgmgr_dir, CHECKSUM_FILE) 394 395 396 def _get_checksum_dict(self): 397 ''' 398 Fetch the checksum file if not already fetched. If the checksum file 399 cannot be fetched from the repos then a new file is created with 400 the current package's (specified in pkg_path) checksum value in it. 401 Populate the local checksum dictionary with the values read from 402 the checksum file. 403 The checksum file is assumed to be present in self.pkgmgr_dir 404 ''' 405 checksum_path = self._get_checksum_file_path() 406 if not self._checksum_dict: 407 # Fetch the checksum file 408 try: 409 try: 410 self._run_command("ls %s" % checksum_path) 411 except (error.CmdError, error.AutoservRunError): 412 # The packages checksum file does not exist locally. 413 # See if it is present in the repositories. 414 self.fetch_pkg(CHECKSUM_FILE, checksum_path, 415 use_checksum=False) 416 except PackageFetchError, e: 417 # This should not happen whilst fetching a package..if a 418 # package is present in the repository, the corresponding 419 # checksum file should also be automatically present. This 420 # case happens only when a package 421 # is being uploaded and if it is the first package to be 422 # uploaded to the repos (hence no checksum file created yet) 423 # Return an empty dictionary in that case 424 return {} 425 426 # Read the checksum file into memory 427 checksum_file_contents = self._run_command('cat ' 428 + checksum_path).stdout 429 430 # Return {} if we have an empty checksum file present 431 if not checksum_file_contents.strip(): 432 return {} 433 434 # Parse the checksum file contents into self._checksum_dict 435 for line in checksum_file_contents.splitlines(): 436 checksum, package_name = line.split(None, 1) 437 self._checksum_dict[package_name] = checksum 438 439 return self._checksum_dict 440 441 442 def _save_checksum_dict(self, checksum_dict): 443 ''' 444 Save the checksum dictionary onto the checksum file. Update the 445 local _checksum_dict variable with this new set of values. 446 checksum_dict : New checksum dictionary 447 checksum_dir : The directory in which to store the checksum file to. 448 ''' 449 checksum_path = self._get_checksum_file_path() 450 self._checksum_dict = checksum_dict.copy() 451 checksum_contents = '\n'.join(checksum + ' ' + pkg_name 452 for pkg_name,checksum in 453 checksum_dict.iteritems()) 454 # Write the checksum file back to disk 455 self._run_command('echo "%s" > %s' % (checksum_contents, 456 checksum_path)) 457 458 459 def compute_checksum(self, pkg_path): 460 ''' 461 Compute the MD5 checksum for the package file and return it. 462 pkg_path : The complete path for the package file 463 ''' 464 md5sum_output = self._run_command("md5sum %s " % pkg_path).stdout 465 return md5sum_output.split()[0] 466 467 468 def update_checksum(self, pkg_path): 469 ''' 470 Update the checksum of the package in the packages' checksum 471 file. This method is called whenever a package is fetched just 472 to be sure that the checksums in the local file are the latest. 473 pkg_path : The complete path to the package file. 474 ''' 475 # Compute the new checksum 476 new_checksum = self.compute_checksum(pkg_path) 477 checksum_dict = self._get_checksum_dict() 478 checksum_dict[os.path.basename(pkg_path)] = new_checksum 479 self._save_checksum_dict(checksum_dict) 480 481 482 def remove_checksum(self, pkg_name): 483 ''' 484 Remove the checksum of the package from the packages checksum file. 485 This method is called whenever a package is removed from the 486 repositories in order clean its corresponding checksum. 487 pkg_name : The name of the package to be removed 488 ''' 489 checksum_dict = self._get_checksum_dict() 490 if pkg_name in checksum_dict: 491 del checksum_dict[pkg_name] 492 self._save_checksum_dict(checksum_dict) 493 494 495 def compare_checksum(self, pkg_path, repo_url): 496 ''' 497 Calculate the checksum of the file specified in pkg_path and 498 compare it with the checksum in the checksum file 499 Return True if both match else return False. 500 pkg_path : The full path to the package file for which the 501 checksum is being compared 502 repo_url : The URL to fetch the checksum from 503 ''' 504 checksum_dict = self._get_checksum_dict() 505 package_name = os.path.basename(pkg_path) 506 if not checksum_dict or package_name not in checksum_dict: 507 return False 508 509 repository_checksum = checksum_dict[package_name] 510 local_checksum = self.compute_checksum(pkg_path) 511 return (local_checksum == repository_checksum) 512 513 514 def tar_package(self, pkg_name, src_dir, dest_dir, exclude_string=None): 515 ''' 516 Create a tar.bz2 file with the name 'pkg_name' say test-blah.tar.bz2. 517 Excludes the directories specified in exclude_dirs while tarring 518 the source. Returns the tarball path. 519 ''' 520 tarball_path = os.path.join(dest_dir, pkg_name) 521 522 utils.system("tar -cvjf %s -C %s %s " 523 % (tarball_path, src_dir, exclude_string)) 524 525 return tarball_path 526 527 528 def untar_required(self, tarball_path, dest_dir): 529 ''' 530 Compare the checksum of the tarball_path with the .checksum file 531 in the dest_dir and return False if it matches. The untar 532 of the package happens only if the checksums do not match. 533 ''' 534 checksum_path = os.path.join(dest_dir, '.checksum') 535 try: 536 existing_checksum = self._run_command('cat ' + checksum_path).stdout 537 except (error.CmdError, error.AutoservRunError): 538 # If the .checksum file is not present (generally, this should 539 # not be the case) then return True so that the untar happens 540 return True 541 542 new_checksum = self.compute_checksum(tarball_path) 543 return (new_checksum.strip() != existing_checksum.strip()) 544 545 546 def untar_pkg(self, tarball_path, dest_dir): 547 ''' 548 Untar the package present in the tarball_path and put a 549 ".checksum" file in the dest_dir containing the checksum 550 of the tarball. This method 551 assumes that the package to be untarred is of the form 552 <name>.tar.bz2 553 ''' 554 self._run_command('tar xvjf %s -C %s' % (tarball_path, dest_dir)) 555 # Put the .checksum file in the install_dir to note 556 # where the package came from 557 pkg_checksum = self.compute_checksum(tarball_path) 558 pkg_checksum_path = os.path.join(dest_dir, 559 '.checksum') 560 self._run_command('echo "%s" > %s ' 561 % (pkg_checksum, pkg_checksum_path)) 562 563 564 def get_tarball_name(self, name, pkg_type): 565 return "%s-%s.tar.bz2" % (pkg_type, name) 566 567 568 def is_url(self, url): 569 """Return true if path looks like a URL""" 570 return url.startswith('http://') 571 572 573 def get_package_name(self, url, pkg_type): 574 ''' 575 Extract the group and test name for the url. This method is currently 576 used only for tests. 577 ''' 578 if pkg_type == 'test': 579 regex = '([^:]+://(.*)/([^/]*)$' 580 return self._get_package_name(url, regex) 581 else: 582 return ('', url) 583 584 585 def _get_package_name(self, url, regex): 586 if not self.is_url(url): 587 if url.endswith('.tar.bz2'): 588 testname = url.replace('.tar.bz2', '') 589 testname = re.sub(r'(\d*)\.', '', testname) 590 return (testname, testname) 591 else: 592 return ('', url) 593 594 match = re.match(regex, url) 595 if not match: 596 return ('', url) 597 group, filename = match.groups() 598 # Generate the group prefix. 599 group = re.sub(r'\W', '_', group) 600 # Drop the extension to get the raw test name. 601 testname = re.sub(r'\.tar\.bz2', '', filename) 602 # Drop any random numbers at the end of the test name if any 603 testname = re.sub(r'\.(\d*)', '', testname) 604 return (group, testname) 605 606 607# site_packages.py may be non-existant or empty, make sure that an appropriate 608# SitePackage class is created nevertheless 609try: 610 from site_packages import SitePackageManager 611except ImportError: 612 class SitePackageManager(BasePackageManager): 613 pass 614 615class PackageManager(SitePackageManager): 616 pass 617