1#!/usr/bin/env python 2# 3# Copyright 2009 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""A class to serve pages from zip files and use memcache for performance. 19 20This contains a class and a function to create an anonymous instance of the 21class to serve HTTP GET requests. Memcache is used to increase response speed 22and lower processing cycles used in serving. Credit to Guido van Rossum and 23his implementation of zipserve which served as a reference as I wrote this. 24 25 MemcachedZipHandler: Class that serves request 26 create_handler: method to create instance of MemcachedZipHandler 27""" 28 29__author__ = 'jmatt@google.com (Justin Mattson)' 30 31import email.Utils 32import logging 33import mimetypes 34import re 35import sys 36import time 37import yaml 38import zipfile 39 40from google.appengine.api import memcache 41from google.appengine.ext import webapp 42from google.appengine.ext.webapp import util 43from time import localtime, strftime 44 45def create_handler(zip_files, max_age=None, public=None): 46 """Factory method to create a MemcachedZipHandler instance. 47 48 Args: 49 zip_files: A list of file names, or a list of lists of file name, first 50 member of file mappings. See MemcachedZipHandler documentation for 51 more information about using the list of lists format 52 max_age: The maximum client-side cache lifetime 53 public: Whether this should be declared public in the client-side cache 54 Returns: 55 A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App 56 Engine 57 58 Raises: 59 ValueError: if the zip_files argument is not a list 60 """ 61 # verify argument integrity. If the argument is passed in list format, 62 # convert it to list of lists format 63 if zip_files and type(zip_files).__name__ == 'list': 64 num_items = len(zip_files) 65 while num_items > 0: 66 if type(zip_files[num_items - 1]).__name__ != 'list': 67 zip_files[num_items - 1] = [zip_files[num_items-1]] 68 num_items -= 1 69 else: 70 raise ValueError('File name arguments must be a list') 71 72 class HandlerWrapper(MemcachedZipHandler): 73 """Simple wrapper for an instance of MemcachedZipHandler. 74 75 I'm still not sure why this is needed 76 """ 77 def get(self, name): 78 self.zipfilenames = zip_files 79 self.TrueGet(name) 80 if max_age is not None: 81 MAX_AGE = max_age 82 if public is not None: 83 PUBLIC = public 84 85 return HandlerWrapper 86 87 88class MemcachedZipHandler(webapp.RequestHandler): 89 """Handles get requests for a given URL. 90 91 Serves a GET request from a series of zip files. As files are served they are 92 put into memcache, which is much faster than retreiving them from the zip 93 source file again. It also uses considerably fewer CPU cycles. 94 """ 95 zipfile_cache = {} # class cache of source zip files 96 MAX_AGE = 600 # max client-side cache lifetime 97 PUBLIC = True # public cache setting 98 CACHE_PREFIX = 'cache://' # memcache key prefix for actual URLs 99 NEG_CACHE_PREFIX = 'noncache://' # memcache key prefix for non-existant URL 100 REDIRECT_PREFIX = 'redirect://' # memcache key prefix for redirect data 101 REDIRECT_FILE = 'redirects.yaml' # Name of file that contains redirect table 102 REDIRECT_SRC = 'src' # Name of the 'source' attribute for a 103 # redirect table entry 104 REDIRECT_DST = 'dst' # Name of the 'destination' attribute for 105 # a redirect table entry 106 REDIRECT_TYPE = 'type' # Name of the 'type' attribute for a 107 # redirect table entry 108 REDIRECT_TYPE_PERM = 'permanent' # Redirect 'type' string indicating a 301 109 # redirect should be served 110 REDIRECT_TYPE_TEMP = 'temporary' # Redirect 'type'string indicate a 302 111 # Redirect should be served 112 intlString = 'intl/' 113 validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-TW'] 114 115 def TrueGet(self, reqUri): 116 """The top-level entry point to serving requests. 117 118 Called 'True' get because it does the work when called from the wrapper 119 class' get method. Some logic is applied to the request to serve files 120 from an intl/<lang>/... directory or fall through to the default language. 121 122 Args: 123 name: URL requested 124 125 Returns: 126 None 127 """ 128 langName = 'en' 129 resetLangCookie = False 130 urlLangName = None 131 retry = False 132 isValidIntl = False 133 isStripped = False 134 135 # Try to retrieve the user's lang pref from the cookie. If there is no 136 # lang pref cookie in the request, add set-cookie to the response with the 137 # default value of 'en'. 138 try: 139 langName = self.request.cookies['android_developer_pref_lang'] 140 except KeyError: 141 resetLangCookie = True 142 #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName) 143 logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie) 144 145 # Do some prep for handling intl requests. Parse the url and validate 146 # the intl/lang substring, extract the url lang code (urlLangName) and the 147 # the uri that follows the intl/lang substring(contentUri) 148 sections = reqUri.split("/", 2) 149 isIntl = len(sections) > 2 and (sections[0] == "intl") 150 if isIntl: 151 isValidIntl = sections[1] in self.validLangs 152 urlLangName = sections[1] 153 contentUri = sections[2] 154 logging.info(' Content URI is [%s]...', contentUri) 155 if isValidIntl: 156 if (langName != urlLangName) or (langName == 'en'): 157 # if the lang code in the request is different from that in 158 # the cookie, or if the target lang is en, strip the 159 # intl/nn substring. It will later be redirected to 160 # the user's preferred language url. 161 # logging.info(' Handling a MISMATCHED intl request') 162 reqUri = contentUri 163 isStripped = True 164 isValidIntl = False 165 isIntl = False 166 #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName) 167 #else: 168 # logging.info('INTL PREP no need to reset langName') 169 else: 170 contentUri = reqUri 171 172 # Apply manual redirects from redirects.yaml. This occurs before any 173 # other mutations are performed, to avoid odd redirect behavior 174 # (For example, a user may want to redirect a directory without having 175 # /index.html appended.) 176 did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl) 177 if did_redirect: 178 return 179 180 # Preprocess the req url. If it references a directory or the domain itself, 181 # append '/index.html' to the url and 302 redirect. Otherwise, continue 182 # processing the request below. 183 did_redirect = self.PreprocessUrl(reqUri, langName) 184 if did_redirect: 185 return 186 187 # Send for processing 188 if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped): 189 # handle a 'clean' request. 190 # Try to form a response using the actual request url. 191 # logging.info(' Request being handled as clean: [%s]', name) 192 if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie): 193 # If CreateResponse returns False, there was no such document 194 # in the intl/lang tree. Before going to 404, see if there is an 195 # English-language version of the doc in the default 196 # default tree and return it, else go to 404. 197 self.CreateResponse(contentUri, langName, False, resetLangCookie) 198 199 elif isIntl: 200 # handle the case where we need to pass through an invalid intl req 201 # for processing (so as to get 404 as appropriate). This is needed 202 # because intl urls are passed through clean and retried in English, 203 # if necessary. 204 # logging.info(' Handling an invalid intl request...') 205 self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie) 206 207 else: 208 # handle the case where we have a non-clean url (usually a non-intl 209 # url) that we need to interpret in the context of any lang pref 210 # that is set. Prepend an intl/lang string to the request url and 211 # send it as a 302 redirect. After the redirect, the subsequent 212 # request will be handled as a clean url. 213 self.RedirToIntl(reqUri, self.intlString, langName) 214 215 def ProcessManualRedirects(self, contentUri, langName, isIntl): 216 """Compute any manual redirects for a request and execute them. 217 218 This allows content authors to manually define a set of regex rules which, 219 when matched, will cause an HTTP redirect to be performed. 220 221 Redirect rules are typically stored in a file named redirects.yaml. See the 222 comments in that file for more information about formatting. 223 224 Redirect computations are stored in memcache for performance. 225 226 Note that international URIs are handled automatically, and are assumed to 227 mirror redirects for non-intl requests. 228 229 Args: 230 contentUri: The relative URI (without leading slash) that was requested. 231 This should NOT contain an intl-prefix, if otherwise present. 232 langName: The requested language. 233 isIntl: True if contentUri originally contained an intl prefix. 234 235 Results: 236 boolean: True if a redirect has been set, False otherwise. 237 """ 238 # Redirect data is stored in memcache for performance 239 memcache_key = self.REDIRECT_PREFIX + contentUri 240 redirect_data = memcache.get(memcache_key) 241 if redirect_data is None: 242 logging.info('Redirect cache miss. Computing new redirect data.\n' 243 'Memcache Key: ' + memcache_key) 244 redirect_data = self.ComputeManualRedirectUrl(contentUri) 245 memcache.set(memcache_key, redirect_data) 246 contentUri = redirect_data[0] 247 redirectType = redirect_data[1] 248 249 # If this is an international URL, prepend intl path to minimize 250 # number of redirects 251 if isIntl: 252 contentUri = '/%s%s%s' % (self.intlString, langName, contentUri) 253 254 if redirectType is None: 255 # No redirect necessary 256 return False 257 elif redirectType == self.REDIRECT_TYPE_PERM: 258 logging.info('Sending permanent redirect: ' + contentUri); 259 self.redirect(contentUri, permanent=True) 260 return True 261 elif redirectType == self.REDIRECT_TYPE_TEMP: 262 logging.info('Sending temporary redirect: ' + contentUri); 263 self.redirect(contentUri, permanent=False) 264 return True 265 else: 266 # Invalid redirect type 267 logging.error('Invalid redirect type: %s', redirectType) 268 raise ('Invalid redirect type: %s', redirectType) 269 270 def ComputeManualRedirectUrl(self, uri): 271 """Read redirects file and evaluate redirect rules for a given URI. 272 273 Args: 274 uri: The relative URI (without leading slash) for which redirect data 275 should be computed. No special handling of intl URIs is pefromed 276 at this level. 277 278 Returns: 279 tuple: The computed redirect data. This tuple has two parts: 280 redirect_uri: The new URI that should be used. (If no redirect rule is 281 found, the original input to 'uri' will be returned. 282 redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary' 283 for an HTTP 302 redirect, or None if no redirect should be performed. 284 """ 285 # Redircts are defined in a file named redirects.yaml. 286 try: 287 f = open(self.REDIRECT_FILE) 288 data = yaml.load(f) 289 f.close() 290 except IOError, e: 291 logging.warning('Error opening redirect file (' + self.REDIRECT_FILE + 292 '): ' + e.strerror) 293 return (uri, None) 294 295 # The incoming path is missing a leading slash. However, many parts of the 296 # redirect system require leading slashes to distinguish between relative 297 # and absolute redirects. So, to compensate for this, we'll add a leading 298 # slash here as well. 299 uri = '/' + uri 300 301 # Check to make sure we actually got an iterable list out of the YAML file 302 if data is None: 303 logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid ' 304 'YAML.') 305 elif 'redirects' not in data: 306 logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not ' 307 'properly formatted -- no \'redirects:\' header.') 308 elif hasattr(data['redirects'], '__iter__'): 309 # Iterate through redirect data, try to find a redirect that matches. 310 for redirect in data['redirects']: 311 # Note: re.search adds an implied '^' to the beginning of the regex 312 # This means that the regex must match from the beginning of the 313 # string. 314 try: 315 if re.match(redirect[self.REDIRECT_SRC], uri): 316 # Match found. Apply redirect rule. 317 redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC], 318 redirect[self.REDIRECT_DST], uri) 319 logging.info('Redirect rule matched.\n' 320 'Rule: %s\n' 321 'Src: %s\n' 322 'Dst: %s', 323 redirect[self.REDIRECT_SRC], uri, redirect_uri) 324 if self.REDIRECT_TYPE in redirect: 325 redirect_type = redirect[self.REDIRECT_TYPE] 326 else: 327 # Default redirect type, if unspecified 328 redirect_type = self.REDIRECT_TYPE_PERM 329 return (redirect_uri, redirect_type) 330 except: 331 e = sys.exc_info()[1] 332 raise ('Error while processing redirect rule.\n' 333 'Rule: %s\n' 334 'Error: %s' % (redirect[self.REDIRECT_SRC], e)) 335 # No redirect found, return URL unchanged 336 return (uri, None) 337 338 def isCleanUrl(self, name, langName, isValidIntl, isStripped): 339 """Determine whether to pass an incoming url straight to processing. 340 341 Args: 342 name: The incoming URL 343 344 Returns: 345 boolean: Whether the URL should be sent straight to processing 346 """ 347 # logging.info(' >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl) 348 if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName): 349 return True 350 351 def PreprocessUrl(self, name, langName): 352 """Any preprocessing work on the URL when it comes in. 353 354 Put any work related to interpreting the incoming URL here. For example, 355 this is used to redirect requests for a directory to the index.html file 356 in that directory. Subclasses should override this method to do different 357 preprocessing. 358 359 Args: 360 name: The incoming URL 361 362 Returns: 363 True if the request was redirected to '/index.html'. 364 Otherewise False. 365 """ 366 367 # determine if this is a request for a directory 368 final_path_segment = name 369 final_slash_offset = name.rfind('/') 370 if final_slash_offset != len(name) - 1: 371 final_path_segment = name[final_slash_offset + 1:] 372 if final_path_segment.find('.') == -1: 373 name = ''.join([name, '/']) 374 375 # if this is a directory or the domain itself, redirect to /index.html 376 if not name or (name[len(name) - 1:] == '/'): 377 uri = ''.join(['/', name, 'index.html']) 378 # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName) 379 self.redirect(uri, False) 380 return True 381 else: 382 return False 383 384 def RedirToIntl(self, name, intlString, langName): 385 """Redirect an incoming request to the appropriate intl uri. 386 387 For non-en langName, builds the intl/lang string from a 388 base (en) string and redirects (302) the request to look for 389 a version of the file in langName. For en langName, simply 390 redirects a stripped uri string (intl/nn removed). 391 392 Args: 393 name: The incoming, preprocessed URL 394 395 Returns: 396 The lang-specific URL 397 """ 398 if not (langName == 'en'): 399 builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string]) 400 else: 401 builtIntlLangUri = name 402 uri = ''.join(['/', builtIntlLangUri]) 403 logging.info('-->REDIRECTING %s to %s', name, uri) 404 self.redirect(uri, False) 405 return uri 406 407 def CreateResponse(self, name, langName, isValidIntl, resetLangCookie): 408 """Process the url and form a response, if appropriate. 409 410 Attempts to retrieve the requested file (name) from cache, 411 negative cache, or store (zip) and form the response. 412 For intl requests that are not found (in the localized tree), 413 returns False rather than forming a response, so that 414 the request can be retried with the base url (this is the 415 fallthrough to default language). 416 417 For requests that are found, forms the headers and 418 adds the content to the response entity. If the request was 419 for an intl (localized) url, also resets the language cookie 420 to the language specified in the url if needed, to ensure that 421 the client language and response data remain harmonious. 422 423 Args: 424 name: The incoming, preprocessed URL 425 langName: The language id. Used as necessary to reset the 426 language cookie in the response. 427 isValidIntl: If present, indicates whether the request is 428 for a language-specific url 429 resetLangCookie: Whether the response should reset the 430 language cookie to 'langName' 431 432 Returns: 433 True: A response was successfully created for the request 434 False: No response was created. 435 """ 436 # see if we have the page in the memcache 437 logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]', 438 name, langName, isValidIntl, resetLangCookie) 439 resp_data = self.GetFromCache(name) 440 if resp_data is None: 441 logging.info(' Cache miss for %s', name) 442 resp_data = self.GetFromNegativeCache(name) 443 if resp_data is None: 444 resp_data = self.GetFromStore(name) 445 446 # IF we have the file, put it in the memcache 447 # ELSE put it in the negative cache 448 if resp_data is not None: 449 self.StoreOrUpdateInCache(name, resp_data) 450 elif isValidIntl: 451 # couldn't find the intl doc. Try to fall through to English. 452 #logging.info(' Retrying with base uri...') 453 return False 454 else: 455 logging.info(' Adding %s to negative cache, serving 404', name) 456 self.StoreInNegativeCache(name) 457 self.Write404Error() 458 return True 459 else: 460 # found it in negative cache 461 self.Write404Error() 462 return True 463 464 # found content from cache or store 465 logging.info('FOUND CLEAN') 466 if resetLangCookie: 467 logging.info(' Resetting android_developer_pref_lang cookie to [%s]', 468 langName) 469 expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10 470 self.response.headers.add_header('Set-Cookie', 471 'android_developer_pref_lang=%s; path=/; expires=%s' % 472 (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate)))) 473 mustRevalidate = False 474 if ('.html' in name): 475 # revalidate html files -- workaround for cache inconsistencies for 476 # negotiated responses 477 mustRevalidate = True 478 #logging.info(' Adding [Vary: Cookie] to response...') 479 self.response.headers.add_header('Vary', 'Cookie') 480 content_type, encoding = mimetypes.guess_type(name) 481 if content_type: 482 self.response.headers['Content-Type'] = content_type 483 self.SetCachingHeaders(mustRevalidate) 484 self.response.out.write(resp_data) 485 elif (name == 'favicon.ico'): 486 self.response.headers['Content-Type'] = 'image/x-icon' 487 self.SetCachingHeaders(mustRevalidate) 488 self.response.out.write(resp_data) 489 elif name.endswith('.psd'): 490 self.response.headers['Content-Type'] = 'application/octet-stream' 491 self.SetCachingHeaders(mustRevalidate) 492 self.response.out.write(resp_data) 493 elif name.endswith('.svg'): 494 self.response.headers['Content-Type'] = 'image/svg+xml' 495 self.SetCachingHeaders(mustRevalidate) 496 self.response.out.write(resp_data) 497 elif name.endswith('.mp4'): 498 self.response.headers['Content-Type'] = 'video/mp4' 499 self.SetCachingHeaders(mustRevalidate) 500 self.response.out.write(resp_data) 501 elif name.endswith('.webm'): 502 self.response.headers['Content-Type'] = 'video/webm' 503 self.SetCachingHeaders(mustRevalidate) 504 self.response.out.write(resp_data) 505 elif name.endswith('.ogv'): 506 self.response.headers['Content-Type'] = 'video/ogg' 507 self.SetCachingHeaders(mustRevalidate) 508 self.response.out.write(resp_data) 509 return True 510 511 def GetFromStore(self, file_path): 512 """Retrieve file from zip files. 513 514 Get the file from the source, it must not have been in the memcache. If 515 possible, we'll use the zip file index to quickly locate where the file 516 should be found. (See MapToFileArchive documentation for assumptions about 517 file ordering.) If we don't have an index or don't find the file where the 518 index says we should, look through all the zip files to find it. 519 520 Args: 521 file_path: the file that we're looking for 522 523 Returns: 524 The contents of the requested file 525 """ 526 resp_data = None 527 file_itr = iter(self.zipfilenames) 528 529 # check the index, if we have one, to see what archive the file is in 530 archive_name = self.MapFileToArchive(file_path) 531 if not archive_name: 532 archive_name = file_itr.next()[0] 533 534 while resp_data is None and archive_name: 535 zip_archive = self.LoadZipFile(archive_name) 536 if zip_archive: 537 538 # we expect some lookups will fail, and that's okay, 404s will deal 539 # with that 540 try: 541 resp_data = zip_archive.read(file_path) 542 except (KeyError, RuntimeError), err: 543 # no op 544 x = False 545 if resp_data is not None: 546 logging.info('%s read from %s', file_path, archive_name) 547 548 try: 549 archive_name = file_itr.next()[0] 550 except (StopIteration), err: 551 archive_name = False 552 553 return resp_data 554 555 def LoadZipFile(self, zipfilename): 556 """Convenience method to load zip file. 557 558 Just a convenience method to load the zip file from the data store. This is 559 useful if we ever want to change data stores and also as a means of 560 dependency injection for testing. This method will look at our file cache 561 first, and then load and cache the file if there's a cache miss 562 563 Args: 564 zipfilename: the name of the zip file to load 565 566 Returns: 567 The zip file requested, or None if there is an I/O error 568 """ 569 zip_archive = None 570 zip_archive = self.zipfile_cache.get(zipfilename) 571 if zip_archive is None: 572 try: 573 zip_archive = zipfile.ZipFile(zipfilename) 574 self.zipfile_cache[zipfilename] = zip_archive 575 except (IOError, RuntimeError), err: 576 logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename, 577 err)) 578 return zip_archive 579 580 def MapFileToArchive(self, file_path): 581 """Given a file name, determine what archive it should be in. 582 583 This method makes two critical assumptions. 584 (1) The zip files passed as an argument to the handler, if concatenated 585 in that same order, would result in a total ordering 586 of all the files. See (2) for ordering type. 587 (2) Upper case letters before lower case letters. The traversal of a 588 directory tree is depth first. A parent directory's files are added 589 before the files of any child directories 590 591 Args: 592 file_path: the file to be mapped to an archive 593 594 Returns: 595 The name of the archive where we expect the file to be 596 """ 597 num_archives = len(self.zipfilenames) 598 while num_archives > 0: 599 target = self.zipfilenames[num_archives - 1] 600 if len(target) > 1: 601 if self.CompareFilenames(target[1], file_path) >= 0: 602 return target[0] 603 num_archives -= 1 604 605 return None 606 607 def CompareFilenames(self, file1, file2): 608 """Determines whether file1 is lexigraphically 'before' file2. 609 610 WARNING: This method assumes that paths are output in a depth-first, 611 with parent directories' files stored before childs' 612 613 We say that file1 is lexigraphically before file2 if the last non-matching 614 path segment of file1 is alphabetically before file2. 615 616 Args: 617 file1: the first file path 618 file2: the second file path 619 620 Returns: 621 A positive number if file1 is before file2 622 A negative number if file2 is before file1 623 0 if filenames are the same 624 """ 625 f1_segments = file1.split('/') 626 f2_segments = file2.split('/') 627 628 segment_ptr = 0 629 while (segment_ptr < len(f1_segments) and 630 segment_ptr < len(f2_segments) and 631 f1_segments[segment_ptr] == f2_segments[segment_ptr]): 632 segment_ptr += 1 633 634 if len(f1_segments) == len(f2_segments): 635 636 # we fell off the end, the paths much be the same 637 if segment_ptr == len(f1_segments): 638 return 0 639 640 # we didn't fall of the end, compare the segments where they differ 641 if f1_segments[segment_ptr] < f2_segments[segment_ptr]: 642 return 1 643 elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: 644 return -1 645 else: 646 return 0 647 648 # the number of segments differs, we either mismatched comparing 649 # directories, or comparing a file to a directory 650 else: 651 652 # IF we were looking at the last segment of one of the paths, 653 # the one with fewer segments is first because files come before 654 # directories 655 # ELSE we just need to compare directory names 656 if (segment_ptr + 1 == len(f1_segments) or 657 segment_ptr + 1 == len(f2_segments)): 658 return len(f2_segments) - len(f1_segments) 659 else: 660 if f1_segments[segment_ptr] < f2_segments[segment_ptr]: 661 return 1 662 elif f1_segments[segment_ptr] > f2_segments[segment_ptr]: 663 return -1 664 else: 665 return 0 666 667 def SetCachingHeaders(self, revalidate): 668 """Set caching headers for the request.""" 669 max_age = self.MAX_AGE 670 #self.response.headers['Expires'] = email.Utils.formatdate( 671 # time.time() + max_age, usegmt=True) 672 cache_control = [] 673 if self.PUBLIC: 674 cache_control.append('public') 675 cache_control.append('max-age=%d' % max_age) 676 if revalidate: 677 cache_control.append('must-revalidate') 678 self.response.headers['Cache-Control'] = ', '.join(cache_control) 679 680 def GetFromCache(self, filename): 681 """Get file from memcache, if available. 682 683 Args: 684 filename: The URL of the file to return 685 686 Returns: 687 The content of the file 688 """ 689 return memcache.get('%s%s' % (self.CACHE_PREFIX, filename)) 690 691 def StoreOrUpdateInCache(self, filename, data): 692 """Store data in the cache. 693 694 Store a piece of data in the memcache. Memcache has a maximum item size of 695 1*10^6 bytes. If the data is too large, fail, but log the failure. Future 696 work will consider compressing the data before storing or chunking it 697 698 Args: 699 filename: the name of the file to store 700 data: the data of the file 701 702 Returns: 703 None 704 """ 705 try: 706 if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data): 707 memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data) 708 except (ValueError), err: 709 logging.warning('Data size too large to cache\n%s' % err) 710 711 def Write404Error(self): 712 """Ouptut a simple 404 response.""" 713 self.error(404) 714 self.response.out.write( 715 ''.join(['<html><head><title>404: Not Found</title></head>', 716 '<body><b><h2>Error 404</h2><br/>', 717 'File not found</b></body></html>'])) 718 719 def StoreInNegativeCache(self, filename): 720 """If a non-existant URL is accessed, cache this result as well. 721 722 Future work should consider setting a maximum negative cache size to 723 prevent it from from negatively impacting the real cache. 724 725 Args: 726 filename: URL to add ot negative cache 727 728 Returns: 729 None 730 """ 731 memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1) 732 733 def GetFromNegativeCache(self, filename): 734 """Retrieve from negative cache. 735 736 Args: 737 filename: URL to retreive 738 739 Returns: 740 The file contents if present in the negative cache. 741 """ 742 return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename)) 743 744def main(): 745 application = webapp.WSGIApplication([('/([^/]+)/(.*)', 746 MemcachedZipHandler)]) 747 util.run_wsgi_app(application) 748 749 750if __name__ == '__main__': 751 main() 752