1#!/usr/bin/env python
2#
3# Copyright 2009 Google Inc.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#   http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""A class to serve pages from zip files and use memcache for performance.
19
20This contains a class and a function to create an anonymous instance of the
21class to serve HTTP GET requests. Memcache is used to increase response speed
22and lower processing cycles used in serving. Credit to Guido van Rossum and
23his implementation of zipserve which served as a reference as I wrote this.
24
25  MemcachedZipHandler: Class that serves request
26  create_handler: method to create instance of MemcachedZipHandler
27"""
28
29__author__ = 'jmatt@google.com (Justin Mattson)'
30
31import email.Utils
32import logging
33import mimetypes
34import re
35import sys
36import time
37import yaml
38import zipfile
39
40from google.appengine.api import memcache
41from google.appengine.ext import webapp
42from google.appengine.ext.webapp import util
43from time import localtime, strftime
44
45def create_handler(zip_files, max_age=None, public=None):
46  """Factory method to create a MemcachedZipHandler instance.
47
48  Args:
49    zip_files: A list of file names, or a list of lists of file name, first
50        member of file mappings. See MemcachedZipHandler documentation for
51        more information about using the list of lists format
52    max_age: The maximum client-side cache lifetime
53    public: Whether this should be declared public in the client-side cache
54  Returns:
55    A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
56    Engine
57
58  Raises:
59    ValueError: if the zip_files argument is not a list
60  """
61  # verify argument integrity. If the argument is passed in list format,
62  # convert it to list of lists format
63  if zip_files and type(zip_files).__name__ == 'list':
64    num_items = len(zip_files)
65    while num_items > 0:
66      if type(zip_files[num_items - 1]).__name__ != 'list':
67        zip_files[num_items - 1] = [zip_files[num_items-1]]
68      num_items -= 1
69  else:
70    raise ValueError('File name arguments must be a list')
71
72  class HandlerWrapper(MemcachedZipHandler):
73    """Simple wrapper for an instance of MemcachedZipHandler.
74
75    I'm still not sure why this is needed
76    """
77    def get(self, name):
78      self.zipfilenames = zip_files
79      self.TrueGet(name)
80      if max_age is not None:
81        MAX_AGE = max_age
82      if public is not None:
83        PUBLIC = public
84
85  return HandlerWrapper
86
87
88class MemcachedZipHandler(webapp.RequestHandler):
89  """Handles get requests for a given URL.
90
91  Serves a GET request from a series of zip files. As files are served they are
92  put into memcache, which is much faster than retreiving them from the zip
93  source file again. It also uses considerably fewer CPU cycles.
94  """
95  zipfile_cache = {}                # class cache of source zip files
96  MAX_AGE = 600                     # max client-side cache lifetime
97  PUBLIC = True                     # public cache setting
98  CACHE_PREFIX = 'cache://'         # memcache key prefix for actual URLs
99  NEG_CACHE_PREFIX = 'noncache://'  # memcache key prefix for non-existant URL
100  REDIRECT_PREFIX = 'redirect://'   # memcache key prefix for redirect data
101  REDIRECT_FILE = 'redirects.yaml'  # Name of file that contains redirect table
102  REDIRECT_SRC = 'src'              # Name of the 'source' attribute for a
103                                    #   redirect table entry
104  REDIRECT_DST = 'dst'              # Name of the 'destination' attribute for
105                                    #   a redirect table entry
106  REDIRECT_TYPE = 'type'            # Name of the 'type' attribute for a
107                                    #   redirect table entry
108  REDIRECT_TYPE_PERM = 'permanent'  # Redirect 'type' string indicating a 301
109                                    #   redirect should be served
110  REDIRECT_TYPE_TEMP = 'temporary'  # Redirect 'type'string indicate a 302
111                                    #   Redirect should be served
112  intlString = 'intl/'
113  validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-TW']
114
115  def TrueGet(self, reqUri):
116    """The top-level entry point to serving requests.
117
118    Called 'True' get because it does the work when called from the wrapper
119    class' get method. Some logic is applied to the request to serve files
120    from an intl/<lang>/... directory or fall through to the default language.
121
122    Args:
123      name: URL requested
124
125    Returns:
126      None
127    """
128    langName = 'en'
129    resetLangCookie = False
130    urlLangName = None
131    retry = False
132    isValidIntl = False
133    isStripped = False
134
135    # Try to retrieve the user's lang pref from the cookie. If there is no
136    # lang pref cookie in the request, add set-cookie to the response with the
137    # default value of 'en'.
138    try:
139      langName = self.request.cookies['android_developer_pref_lang']
140    except KeyError:
141      resetLangCookie = True
142      #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
143    logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
144
145    # Do some prep for handling intl requests. Parse the url and validate
146    # the intl/lang substring, extract the url lang code (urlLangName) and the
147    # the uri that follows the intl/lang substring(contentUri)
148    sections = reqUri.split("/", 2)
149    isIntl = len(sections) > 2 and (sections[0] == "intl")
150    if isIntl:
151      isValidIntl = sections[1] in self.validLangs
152      urlLangName = sections[1]
153      contentUri = sections[2]
154      logging.info('  Content URI is [%s]...', contentUri)
155      if isValidIntl:
156        if (langName != urlLangName) or (langName == 'en'):
157          # if the lang code in the request is different from that in
158          # the cookie, or if the target lang is en, strip the
159          # intl/nn substring. It will later be redirected to
160          # the user's preferred language url.
161          # logging.info('  Handling a MISMATCHED intl request')
162          reqUri = contentUri
163          isStripped = True
164          isValidIntl = False
165          isIntl = False
166          #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
167        #else:
168        #  logging.info('INTL PREP no need to reset langName')
169    else:
170      contentUri = reqUri
171
172    # Apply manual redirects from redirects.yaml. This occurs before any
173    # other mutations are performed, to avoid odd redirect behavior
174    # (For example, a user may want to redirect a directory without having
175    # /index.html appended.)
176    did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
177    if did_redirect:
178      return
179
180    # Preprocess the req url. If it references a directory or the domain itself,
181    # append '/index.html' to the url and 302 redirect. Otherwise, continue
182    # processing the request below.
183    did_redirect = self.PreprocessUrl(reqUri, langName)
184    if did_redirect:
185      return
186
187    # Send for processing
188    if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
189      # handle a 'clean' request.
190      # Try to form a response using the actual request url.
191      # logging.info('  Request being handled as clean: [%s]', name)
192      if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
193        # If CreateResponse returns False, there was no such document
194        # in the intl/lang tree. Before going to 404, see if there is an
195        # English-language version of the doc in the default
196        # default tree and return it, else go to 404.
197        self.CreateResponse(contentUri, langName, False, resetLangCookie)
198
199    elif isIntl:
200      # handle the case where we need to pass through an invalid intl req
201      # for processing (so as to get 404 as appropriate). This is needed
202      # because intl urls are passed through clean and retried in English,
203      # if necessary.
204      # logging.info('  Handling an invalid intl request...')
205      self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
206
207    else:
208      # handle the case where we have a non-clean url (usually a non-intl
209      # url) that we need to interpret in the context of any lang pref
210      # that is set. Prepend an intl/lang string to the request url and
211      # send it as a 302 redirect. After the redirect, the subsequent
212      # request will be handled as a clean url.
213      self.RedirToIntl(reqUri, self.intlString, langName)
214
215  def ProcessManualRedirects(self, contentUri, langName, isIntl):
216    """Compute any manual redirects for a request and execute them.
217
218    This allows content authors to manually define a set of regex rules which,
219    when matched, will cause an HTTP redirect to be performed.
220
221    Redirect rules are typically stored in a file named redirects.yaml. See the
222    comments in that file for more information about formatting.
223
224    Redirect computations are stored in memcache for performance.
225
226    Note that international URIs are handled automatically, and are assumed to
227    mirror redirects for non-intl requests.
228
229    Args:
230      contentUri: The relative URI (without leading slash) that was requested.
231        This should NOT contain an intl-prefix, if otherwise present.
232      langName: The requested language.
233      isIntl: True if contentUri originally contained an intl prefix.
234
235    Results:
236      boolean: True if a redirect has been set, False otherwise.
237    """
238    # Redirect data is stored in memcache for performance
239    memcache_key = self.REDIRECT_PREFIX + contentUri
240    redirect_data = memcache.get(memcache_key)
241    if redirect_data is None:
242      logging.info('Redirect cache miss. Computing new redirect data.\n'
243                   'Memcache Key: ' + memcache_key)
244      redirect_data = self.ComputeManualRedirectUrl(contentUri)
245      memcache.set(memcache_key, redirect_data)
246    contentUri = redirect_data[0]
247    redirectType = redirect_data[1]
248
249    # If this is an international URL, prepend intl path to minimize
250    # number of redirects
251    if isIntl:
252      contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
253
254    if redirectType is None:
255      # No redirect necessary
256      return False
257    elif redirectType == self.REDIRECT_TYPE_PERM:
258      logging.info('Sending permanent redirect: ' + contentUri);
259      self.redirect(contentUri, permanent=True)
260      return True
261    elif redirectType == self.REDIRECT_TYPE_TEMP:
262      logging.info('Sending temporary redirect: ' + contentUri);
263      self.redirect(contentUri, permanent=False)
264      return True
265    else:
266      # Invalid redirect type
267      logging.error('Invalid redirect type: %s', redirectType)
268      raise ('Invalid redirect type: %s', redirectType)
269
270  def ComputeManualRedirectUrl(self, uri):
271    """Read redirects file and evaluate redirect rules for a given URI.
272
273    Args:
274      uri: The relative URI (without leading slash) for which redirect data
275        should be computed. No special handling of intl URIs is pefromed
276        at this level.
277
278    Returns:
279      tuple: The computed redirect data. This tuple has two parts:
280        redirect_uri: The new URI that should be used. (If no redirect rule is
281          found, the original input to 'uri' will be returned.
282        redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
283          for an HTTP 302 redirect, or None if no redirect should be performed.
284    """
285    # Redircts are defined in a file named redirects.yaml.
286    try:
287      f = open(self.REDIRECT_FILE)
288      data = yaml.load(f)
289      f.close()
290    except IOError, e:
291      logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
292                      '): ' + e.strerror)
293      return (uri, None)
294
295    # The incoming path is missing a leading slash. However, many parts of the
296    # redirect system require leading slashes to distinguish between relative
297    # and absolute redirects. So, to compensate for this, we'll add a leading
298    # slash here as well.
299    uri = '/' + uri
300
301    # Check to make sure we actually got an iterable list out of the YAML file
302    if data is None:
303      logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
304                      'YAML.')
305    elif 'redirects' not in data:
306      logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
307                      'properly formatted -- no \'redirects:\' header.')
308    elif hasattr(data['redirects'], '__iter__'):
309      # Iterate through redirect data, try to find a redirect that matches.
310      for redirect in data['redirects']:
311          # Note: re.search adds an implied '^' to the beginning of the regex
312          # This means that the regex must match from the beginning of the
313          # string.
314          try:
315            if re.match(redirect[self.REDIRECT_SRC], uri):
316              # Match found. Apply redirect rule.
317              redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
318                  redirect[self.REDIRECT_DST], uri)
319              logging.info('Redirect rule matched.\n'
320                             'Rule: %s\n'
321                             'Src: %s\n'
322                             'Dst: %s',
323                           redirect[self.REDIRECT_SRC], uri, redirect_uri)
324              if self.REDIRECT_TYPE in redirect:
325                redirect_type = redirect[self.REDIRECT_TYPE]
326              else:
327                # Default redirect type, if unspecified
328                redirect_type = self.REDIRECT_TYPE_PERM
329              return (redirect_uri, redirect_type)
330          except:
331            e = sys.exc_info()[1]
332            raise ('Error while processing redirect rule.\n'
333                     'Rule: %s\n'
334                     'Error: %s' % (redirect[self.REDIRECT_SRC], e))
335    # No redirect found, return URL unchanged
336    return (uri, None)
337
338  def isCleanUrl(self, name, langName, isValidIntl, isStripped):
339    """Determine whether to pass an incoming url straight to processing.
340
341       Args:
342         name: The incoming URL
343
344       Returns:
345         boolean: Whether the URL should be sent straight to processing
346    """
347    # logging.info('  >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
348    if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
349      return True
350
351  def PreprocessUrl(self, name, langName):
352    """Any preprocessing work on the URL when it comes in.
353
354    Put any work related to interpreting the incoming URL here. For example,
355    this is used to redirect requests for a directory to the index.html file
356    in that directory. Subclasses should override this method to do different
357    preprocessing.
358
359    Args:
360      name: The incoming URL
361
362    Returns:
363      True if the request was redirected to '/index.html'.
364      Otherewise False.
365    """
366
367    # determine if this is a request for a directory
368    final_path_segment = name
369    final_slash_offset = name.rfind('/')
370    if final_slash_offset != len(name) - 1:
371      final_path_segment = name[final_slash_offset + 1:]
372      if final_path_segment.find('.') == -1:
373        name = ''.join([name, '/'])
374
375    # if this is a directory or the domain itself, redirect to /index.html
376    if not name or (name[len(name) - 1:] == '/'):
377      uri = ''.join(['/', name, 'index.html'])
378      # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
379      self.redirect(uri, False)
380      return True
381    else:
382      return False
383
384  def RedirToIntl(self, name, intlString, langName):
385    """Redirect an incoming request to the appropriate intl uri.
386
387       For non-en langName, builds the intl/lang string from a
388       base (en) string and redirects (302) the request to look for
389       a version of the file in langName. For en langName, simply
390       redirects a stripped uri string (intl/nn removed).
391
392    Args:
393      name: The incoming, preprocessed URL
394
395    Returns:
396      The lang-specific URL
397    """
398    if not (langName == 'en'):
399      builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
400    else:
401      builtIntlLangUri = name
402    uri = ''.join(['/', builtIntlLangUri])
403    logging.info('-->REDIRECTING %s to  %s', name, uri)
404    self.redirect(uri, False)
405    return uri
406
407  def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
408    """Process the url and form a response, if appropriate.
409
410       Attempts to retrieve the requested file (name) from cache,
411       negative cache, or store (zip) and form the response.
412       For intl requests that are not found (in the localized tree),
413       returns False rather than forming a response, so that
414       the request can be retried with the base url (this is the
415       fallthrough to default language).
416
417       For requests that are found, forms the headers and
418       adds the content to the response entity. If the request was
419       for an intl (localized) url, also resets the language cookie
420       to the language specified in the url if needed, to ensure that
421       the client language and response data remain harmonious.
422
423    Args:
424      name: The incoming, preprocessed URL
425      langName: The language id. Used as necessary to reset the
426                language cookie in the response.
427      isValidIntl: If present, indicates whether the request is
428                   for a language-specific url
429      resetLangCookie: Whether the response should reset the
430                       language cookie to 'langName'
431
432    Returns:
433      True: A response was successfully created for the request
434      False: No response was created.
435    """
436    # see if we have the page in the memcache
437    logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
438      name, langName, isValidIntl, resetLangCookie)
439    resp_data = self.GetFromCache(name)
440    if resp_data is None:
441      logging.info('  Cache miss for %s', name)
442      resp_data = self.GetFromNegativeCache(name)
443      if resp_data is None:
444        resp_data = self.GetFromStore(name)
445
446        # IF we have the file, put it in the memcache
447        # ELSE put it in the negative cache
448        if resp_data is not None:
449          self.StoreOrUpdateInCache(name, resp_data)
450        elif isValidIntl:
451          # couldn't find the intl doc. Try to fall through to English.
452          #logging.info('  Retrying with base uri...')
453          return False
454        else:
455          logging.info('  Adding %s to negative cache, serving 404', name)
456          self.StoreInNegativeCache(name)
457          self.Write404Error()
458          return True
459      else:
460        # found it in negative cache
461        self.Write404Error()
462        return True
463
464    # found content from cache or store
465    logging.info('FOUND CLEAN')
466    if resetLangCookie:
467      logging.info('  Resetting android_developer_pref_lang cookie to [%s]',
468      langName)
469      expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
470      self.response.headers.add_header('Set-Cookie',
471      'android_developer_pref_lang=%s; path=/; expires=%s' %
472      (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
473    mustRevalidate = False
474    if ('.html' in name):
475      # revalidate html files -- workaround for cache inconsistencies for
476      # negotiated responses
477      mustRevalidate = True
478      #logging.info('  Adding [Vary: Cookie] to response...')
479      self.response.headers.add_header('Vary', 'Cookie')
480    content_type, encoding = mimetypes.guess_type(name)
481    if content_type:
482      self.response.headers['Content-Type'] = content_type
483      self.SetCachingHeaders(mustRevalidate)
484      self.response.out.write(resp_data)
485    elif (name == 'favicon.ico'):
486      self.response.headers['Content-Type'] = 'image/x-icon'
487      self.SetCachingHeaders(mustRevalidate)
488      self.response.out.write(resp_data)
489    elif name.endswith('.psd'):
490      self.response.headers['Content-Type'] = 'application/octet-stream'
491      self.SetCachingHeaders(mustRevalidate)
492      self.response.out.write(resp_data)
493    elif name.endswith('.svg'):
494      self.response.headers['Content-Type'] = 'image/svg+xml'
495      self.SetCachingHeaders(mustRevalidate)
496      self.response.out.write(resp_data)
497    elif name.endswith('.mp4'):
498      self.response.headers['Content-Type'] = 'video/mp4'
499      self.SetCachingHeaders(mustRevalidate)
500      self.response.out.write(resp_data)
501    elif name.endswith('.webm'):
502      self.response.headers['Content-Type'] = 'video/webm'
503      self.SetCachingHeaders(mustRevalidate)
504      self.response.out.write(resp_data)
505    elif name.endswith('.ogv'):
506      self.response.headers['Content-Type'] = 'video/ogg'
507      self.SetCachingHeaders(mustRevalidate)
508      self.response.out.write(resp_data)
509    return True
510
511  def GetFromStore(self, file_path):
512    """Retrieve file from zip files.
513
514    Get the file from the source, it must not have been in the memcache. If
515    possible, we'll use the zip file index to quickly locate where the file
516    should be found. (See MapToFileArchive documentation for assumptions about
517    file ordering.) If we don't have an index or don't find the file where the
518    index says we should, look through all the zip files to find it.
519
520    Args:
521      file_path: the file that we're looking for
522
523    Returns:
524      The contents of the requested file
525    """
526    resp_data = None
527    file_itr = iter(self.zipfilenames)
528
529    # check the index, if we have one, to see what archive the file is in
530    archive_name = self.MapFileToArchive(file_path)
531    if not archive_name:
532      archive_name = file_itr.next()[0]
533
534    while resp_data is None and archive_name:
535      zip_archive = self.LoadZipFile(archive_name)
536      if zip_archive:
537
538        # we expect some lookups will fail, and that's okay, 404s will deal
539        # with that
540        try:
541          resp_data = zip_archive.read(file_path)
542        except (KeyError, RuntimeError), err:
543          # no op
544          x = False
545        if resp_data is not None:
546          logging.info('%s read from %s', file_path, archive_name)
547
548      try:
549        archive_name = file_itr.next()[0]
550      except (StopIteration), err:
551        archive_name = False
552
553    return resp_data
554
555  def LoadZipFile(self, zipfilename):
556    """Convenience method to load zip file.
557
558    Just a convenience method to load the zip file from the data store. This is
559    useful if we ever want to change data stores and also as a means of
560    dependency injection for testing. This method will look at our file cache
561    first, and then load and cache the file if there's a cache miss
562
563    Args:
564      zipfilename: the name of the zip file to load
565
566    Returns:
567      The zip file requested, or None if there is an I/O error
568    """
569    zip_archive = None
570    zip_archive = self.zipfile_cache.get(zipfilename)
571    if zip_archive is None:
572      try:
573        zip_archive = zipfile.ZipFile(zipfilename)
574        self.zipfile_cache[zipfilename] = zip_archive
575      except (IOError, RuntimeError), err:
576        logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
577                                                             err))
578    return zip_archive
579
580  def MapFileToArchive(self, file_path):
581    """Given a file name, determine what archive it should be in.
582
583    This method makes two critical assumptions.
584    (1) The zip files passed as an argument to the handler, if concatenated
585        in that same order, would result in a total ordering
586        of all the files. See (2) for ordering type.
587    (2) Upper case letters before lower case letters. The traversal of a
588        directory tree is depth first. A parent directory's files are added
589        before the files of any child directories
590
591    Args:
592      file_path: the file to be mapped to an archive
593
594    Returns:
595      The name of the archive where we expect the file to be
596    """
597    num_archives = len(self.zipfilenames)
598    while num_archives > 0:
599      target = self.zipfilenames[num_archives - 1]
600      if len(target) > 1:
601        if self.CompareFilenames(target[1], file_path) >= 0:
602          return target[0]
603      num_archives -= 1
604
605    return None
606
607  def CompareFilenames(self, file1, file2):
608    """Determines whether file1 is lexigraphically 'before' file2.
609
610    WARNING: This method assumes that paths are output in a depth-first,
611    with parent directories' files stored before childs'
612
613    We say that file1 is lexigraphically before file2 if the last non-matching
614    path segment of file1 is alphabetically before file2.
615
616    Args:
617      file1: the first file path
618      file2: the second file path
619
620    Returns:
621      A positive number if file1 is before file2
622      A negative number if file2 is before file1
623      0 if filenames are the same
624    """
625    f1_segments = file1.split('/')
626    f2_segments = file2.split('/')
627
628    segment_ptr = 0
629    while (segment_ptr < len(f1_segments) and
630           segment_ptr < len(f2_segments) and
631           f1_segments[segment_ptr] == f2_segments[segment_ptr]):
632      segment_ptr += 1
633
634    if len(f1_segments) == len(f2_segments):
635
636      # we fell off the end, the paths much be the same
637      if segment_ptr == len(f1_segments):
638        return 0
639
640      # we didn't fall of the end, compare the segments where they differ
641      if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
642        return 1
643      elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
644        return -1
645      else:
646        return 0
647
648      # the number of segments differs, we either mismatched comparing
649      # directories, or comparing a file to a directory
650    else:
651
652      # IF we were looking at the last segment of one of the paths,
653      # the one with fewer segments is first because files come before
654      # directories
655      # ELSE we just need to compare directory names
656      if (segment_ptr + 1 == len(f1_segments) or
657          segment_ptr + 1 == len(f2_segments)):
658        return len(f2_segments) - len(f1_segments)
659      else:
660        if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
661          return 1
662        elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
663          return -1
664        else:
665          return 0
666
667  def SetCachingHeaders(self, revalidate):
668    """Set caching headers for the request."""
669    max_age = self.MAX_AGE
670    #self.response.headers['Expires'] = email.Utils.formatdate(
671    #    time.time() + max_age, usegmt=True)
672    cache_control = []
673    if self.PUBLIC:
674      cache_control.append('public')
675    cache_control.append('max-age=%d' % max_age)
676    if revalidate:
677      cache_control.append('must-revalidate')
678    self.response.headers['Cache-Control'] = ', '.join(cache_control)
679
680  def GetFromCache(self, filename):
681    """Get file from memcache, if available.
682
683    Args:
684      filename: The URL of the file to return
685
686    Returns:
687      The content of the file
688    """
689    return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
690
691  def StoreOrUpdateInCache(self, filename, data):
692    """Store data in the cache.
693
694    Store a piece of data in the memcache. Memcache has a maximum item size of
695    1*10^6 bytes. If the data is too large, fail, but log the failure. Future
696    work will consider compressing the data before storing or chunking it
697
698    Args:
699      filename: the name of the file to store
700      data: the data of the file
701
702    Returns:
703      None
704    """
705    try:
706      if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
707        memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
708    except (ValueError), err:
709      logging.warning('Data size too large to cache\n%s' % err)
710
711  def Write404Error(self):
712    """Ouptut a simple 404 response."""
713    self.error(404)
714    self.response.out.write(
715        ''.join(['<html><head><title>404: Not Found</title></head>',
716                 '<body><b><h2>Error 404</h2><br/>',
717                 'File not found</b></body></html>']))
718
719  def StoreInNegativeCache(self, filename):
720    """If a non-existant URL is accessed, cache this result as well.
721
722    Future work should consider setting a maximum negative cache size to
723    prevent it from from negatively impacting the real cache.
724
725    Args:
726      filename: URL to add ot negative cache
727
728    Returns:
729      None
730    """
731    memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
732
733  def GetFromNegativeCache(self, filename):
734    """Retrieve from negative cache.
735
736    Args:
737      filename: URL to retreive
738
739    Returns:
740      The file contents if present in the negative cache.
741    """
742    return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
743
744def main():
745  application = webapp.WSGIApplication([('/([^/]+)/(.*)',
746                                         MemcachedZipHandler)])
747  util.run_wsgi_app(application)
748
749
750if __name__ == '__main__':
751  main()
752