1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Copyright 2009 Google Inc.
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#   http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17#
18
19"""A class to serve pages from zip files and use memcache for performance.
20
21This contains a class and a function to create an anonymous instance of the
22class to serve HTTP GET requests. Memcache is used to increase response speed
23and lower processing cycles used in serving. Credit to Guido van Rossum and
24his implementation of zipserve which served as a reference as I wrote this.
25
26  MemcachedZipHandler: Class that serves request
27  create_handler: method to create instance of MemcachedZipHandler
28"""
29
30__author__ = 'jmatt@google.com (Justin Mattson)'
31
32import email.Utils
33import logging
34import mimetypes
35import re
36import sys
37import time
38import yaml
39import zipfile
40
41from google.appengine.api import memcache
42from google.appengine.ext import webapp
43from google.appengine.ext.webapp import util
44from time import localtime, strftime
45
46def create_handler(zip_files, max_age=None, public=None):
47  """Factory method to create a MemcachedZipHandler instance.
48
49  Args:
50    zip_files: A list of file names, or a list of lists of file name, first
51        member of file mappings. See MemcachedZipHandler documentation for
52        more information about using the list of lists format
53    max_age: The maximum client-side cache lifetime
54    public: Whether this should be declared public in the client-side cache
55  Returns:
56    A MemcachedZipHandler wrapped in a pretty, anonymous bow for use with App
57    Engine
58
59  Raises:
60    ValueError: if the zip_files argument is not a list
61  """
62  # verify argument integrity. If the argument is passed in list format,
63  # convert it to list of lists format
64  if zip_files and type(zip_files).__name__ == 'list':
65    num_items = len(zip_files)
66    while num_items > 0:
67      if type(zip_files[num_items - 1]).__name__ != 'list':
68        zip_files[num_items - 1] = [zip_files[num_items-1]]
69      num_items -= 1
70  else:
71    raise ValueError('File name arguments must be a list')
72
73  class HandlerWrapper(MemcachedZipHandler):
74    """Simple wrapper for an instance of MemcachedZipHandler.
75
76    I'm still not sure why this is needed
77    """
78    def get(self, name):
79      self.zipfilenames = zip_files
80      self.TrueGet(name)
81      if max_age is not None:
82        MAX_AGE = max_age
83      if public is not None:
84        PUBLIC = public
85
86  return HandlerWrapper
87
88
89class MemcachedZipHandler(webapp.RequestHandler):
90  """Handles get requests for a given URL.
91
92  Serves a GET request from a series of zip files. As files are served they are
93  put into memcache, which is much faster than retreiving them from the zip
94  source file again. It also uses considerably fewer CPU cycles.
95  """
96  zipfile_cache = {}                # class cache of source zip files
97  MAX_AGE = 43200                   # max client-side cache lifetime, in seconds
98  PUBLIC = True                     # public cache setting
99  CACHE_PREFIX = 'cache://'         # memcache key prefix for actual URLs
100  NEG_CACHE_PREFIX = 'noncache://'  # memcache key prefix for non-existant URL
101  REDIRECT_PREFIX = 'redirect://'   # memcache key prefix for redirect data
102  REDIRECT_FILE = 'redirects.yaml'  # Name of file that contains redirect table
103  REDIRECT_SRC = 'src'              # Name of the 'source' attribute for a
104                                    #   redirect table entry
105  REDIRECT_DST = 'dst'              # Name of the 'destination' attribute for
106                                    #   a redirect table entry
107  REDIRECT_TYPE = 'type'            # Name of the 'type' attribute for a
108                                    #   redirect table entry
109  REDIRECT_TYPE_PERM = 'permanent'  # Redirect 'type' string indicating a 301
110                                    #   redirect should be served
111  REDIRECT_TYPE_TEMP = 'temporary'  # Redirect 'type'string indicate a 302
112                                    #   Redirect should be served
113  intlString = 'intl/'
114  validLangs = ['en', 'de', 'es', 'fr','it','ja','ko','ru','zh-CN','zh-cn','zh-TW','zh-tw']
115
116  def TrueGet(self, reqUri):
117    """The top-level entry point to serving requests.
118
119    Called 'True' get because it does the work when called from the wrapper
120    class' get method. Some logic is applied to the request to serve files
121    from an intl/<lang>/... directory or fall through to the default language.
122
123    Args:
124      name: URL requested
125
126    Returns:
127      None
128    """
129    langName = 'en'
130    resetLangCookie = False
131    urlLangName = None
132    retry = False
133    isValidIntl = False
134    isStripped = False
135
136    # Try to retrieve the user's lang pref from the cookie. If there is no
137    # lang pref cookie in the request, add set-cookie to the response with the
138    # default value of 'en'.
139    try:
140      langName = self.request.cookies['android_developer_pref_lang']
141    except KeyError:
142      resetLangCookie = True
143      #logging.info('==========================EXCEPTION: NO LANG COOKIE FOUND, USING [%s]', langName)
144    logging.info('==========================REQ INIT name [%s] langName [%s] resetLangCookie [%s]', reqUri, langName, resetLangCookie)
145
146    # Do some prep for handling intl requests. Parse the url and validate
147    # the intl/lang substring, extract the url lang code (urlLangName) and the
148    # the uri that follows the intl/lang substring(contentUri)
149    sections = reqUri.split("/", 2)
150    isIntl = len(sections) > 2 and (sections[0] == "intl")
151    if isIntl:
152      isValidIntl = sections[1] in self.validLangs
153      urlLangName = sections[1]
154      contentUri = sections[2]
155      logging.info('  Content URI is [%s]...', contentUri)
156      if isValidIntl:
157        if (langName != urlLangName) or (langName == 'en'):
158          # if the lang code in the request is different from that in
159          # the cookie, or if the target lang is en, strip the
160          # intl/nn substring. It will later be redirected to
161          # the user's preferred language url.
162          # logging.info('  Handling a MISMATCHED intl request')
163          reqUri = contentUri
164          isStripped = True
165          isValidIntl = False
166          isIntl = False
167          #logging.info('INTL PREP resetting langName to urlLangName [%s]', langName)
168        #else:
169        #  logging.info('INTL PREP no need to reset langName')
170    else:
171      contentUri = reqUri
172
173    # Apply manual redirects from redirects.yaml. This occurs before any
174    # other mutations are performed, to avoid odd redirect behavior
175    # (For example, a user may want to redirect a directory without having
176    # /index.html appended.)
177    did_redirect = self.ProcessManualRedirects(contentUri, langName, isIntl)
178    if did_redirect:
179      return
180
181    # Preprocess the req url. If it references a directory or the domain itself,
182    # append '/index.html' to the url and 302 redirect. Otherwise, continue
183    # processing the request below.
184    did_redirect = self.PreprocessUrl(reqUri, langName)
185    if did_redirect:
186      return
187
188    # Send for processing
189    if self.isCleanUrl(reqUri, langName, isValidIntl, isStripped):
190      # handle a 'clean' request.
191      # Try to form a response using the actual request url.
192      # logging.info('  Request being handled as clean: [%s]', name)
193      if not self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie):
194        # If CreateResponse returns False, there was no such document
195        # in the intl/lang tree. Before going to 404, see if there is an
196        # English-language version of the doc in the default
197        # default tree and return it, else go to 404.
198        self.CreateResponse(contentUri, langName, False, resetLangCookie)
199
200    elif isIntl:
201      # handle the case where we need to pass through an invalid intl req
202      # for processing (so as to get 404 as appropriate). This is needed
203      # because intl urls are passed through clean and retried in English,
204      # if necessary.
205      # logging.info('  Handling an invalid intl request...')
206      self.CreateResponse(reqUri, langName, isValidIntl, resetLangCookie)
207
208    else:
209      # handle the case where we have a non-clean url (usually a non-intl
210      # url) that we need to interpret in the context of any lang pref
211      # that is set. Prepend an intl/lang string to the request url and
212      # send it as a 302 redirect. After the redirect, the subsequent
213      # request will be handled as a clean url.
214      self.RedirToIntl(reqUri, self.intlString, langName)
215
216  def ProcessManualRedirects(self, contentUri, langName, isIntl):
217    """Compute any manual redirects for a request and execute them.
218
219    This allows content authors to manually define a set of regex rules which,
220    when matched, will cause an HTTP redirect to be performed.
221
222    Redirect rules are typically stored in a file named redirects.yaml. See the
223    comments in that file for more information about formatting.
224
225    Redirect computations are stored in memcache for performance.
226
227    Note that international URIs are handled automatically, and are assumed to
228    mirror redirects for non-intl requests.
229
230    Args:
231      contentUri: The relative URI (without leading slash) that was requested.
232        This should NOT contain an intl-prefix, if otherwise present.
233      langName: The requested language.
234      isIntl: True if contentUri originally contained an intl prefix.
235
236    Results:
237      boolean: True if a redirect has been set, False otherwise.
238    """
239    # Redirect data is stored in memcache for performance
240    memcache_key = self.REDIRECT_PREFIX + contentUri
241    redirect_data = memcache.get(memcache_key)
242    if redirect_data is None:
243      logging.info('Redirect cache miss. Computing new redirect data.\n'
244                   'Memcache Key: ' + memcache_key)
245      redirect_data = self.ComputeManualRedirectUrl(contentUri)
246      memcache.set(memcache_key, redirect_data)
247    contentUri = redirect_data[0]
248    redirectType = redirect_data[1]
249
250    # If this is an international URL, prepend intl path to minimize
251    # number of redirects
252    if isIntl:
253      contentUri = '/%s%s%s' % (self.intlString, langName, contentUri)
254
255    if redirectType is None:
256      # No redirect necessary
257      return False
258    elif redirectType == self.REDIRECT_TYPE_PERM:
259      logging.info('Sending permanent redirect: ' + contentUri);
260      self.redirect(contentUri, permanent=True)
261      return True
262    elif redirectType == self.REDIRECT_TYPE_TEMP:
263      logging.info('Sending temporary redirect: ' + contentUri);
264      self.redirect(contentUri, permanent=False)
265      return True
266    else:
267      # Invalid redirect type
268      logging.error('Invalid redirect type: %s', redirectType)
269      raise ('Invalid redirect type: %s', redirectType)
270
271  def ComputeManualRedirectUrl(self, uri):
272    """Read redirects file and evaluate redirect rules for a given URI.
273
274    Args:
275      uri: The relative URI (without leading slash) for which redirect data
276        should be computed. No special handling of intl URIs is pefromed
277        at this level.
278
279    Returns:
280      tuple: The computed redirect data. This tuple has two parts:
281        redirect_uri: The new URI that should be used. (If no redirect rule is
282          found, the original input to 'uri' will be returned.
283        redirect_type: Either 'permanent' for an HTTP 301 redirect, 'temporary'
284          for an HTTP 302 redirect, or None if no redirect should be performed.
285    """
286    # Redircts are defined in a file named redirects.yaml.
287    try:
288      f = open(self.REDIRECT_FILE)
289      data = yaml.load(f)
290      f.close()
291    except IOError, e:
292      logging.warning('Error opening redirect file (' + self.REDIRECT_FILE +
293                      '): ' + e.strerror)
294      return (uri, None)
295
296    # The incoming path is missing a leading slash. However, many parts of the
297    # redirect system require leading slashes to distinguish between relative
298    # and absolute redirects. So, to compensate for this, we'll add a leading
299    # slash here as well.
300    uri = '/' + uri
301
302    # Check to make sure we actually got an iterable list out of the YAML file
303    if data is None:
304      logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not valid '
305                      'YAML.')
306    elif 'redirects' not in data:
307      logging.warning('Redirect file (' + self.REDIRECT_FILE + ') not '
308                      'properly formatted -- no \'redirects:\' header.')
309    elif hasattr(data['redirects'], '__iter__'):
310      # Iterate through redirect data, try to find a redirect that matches.
311      for redirect in data['redirects']:
312          # Note: re.search adds an implied '^' to the beginning of the regex
313          # This means that the regex must match from the beginning of the
314          # string.
315          try:
316            if re.match(redirect[self.REDIRECT_SRC], uri):
317              # Match found. Apply redirect rule.
318              redirect_uri = re.sub('^' + redirect[self.REDIRECT_SRC],
319                  redirect[self.REDIRECT_DST], uri)
320              logging.info('Redirect rule matched.\n'
321                             'Rule: %s\n'
322                             'Src: %s\n'
323                             'Dst: %s',
324                           redirect[self.REDIRECT_SRC], uri, redirect_uri)
325              if self.REDIRECT_TYPE in redirect:
326                redirect_type = redirect[self.REDIRECT_TYPE]
327              else:
328                # Default redirect type, if unspecified
329                redirect_type = self.REDIRECT_TYPE_PERM
330              return (redirect_uri, redirect_type)
331          except:
332            e = sys.exc_info()[1]
333            raise ('Error while processing redirect rule.\n'
334                     'Rule: %s\n'
335                     'Error: %s' % (redirect[self.REDIRECT_SRC], e))
336    # No redirect found, return URL unchanged
337    return (uri, None)
338
339  def isCleanUrl(self, name, langName, isValidIntl, isStripped):
340    """Determine whether to pass an incoming url straight to processing.
341
342       Args:
343         name: The incoming URL
344
345       Returns:
346         boolean: Whether the URL should be sent straight to processing
347    """
348    # logging.info('  >>>> isCleanUrl name [%s] langName [%s] isValidIntl [%s]', name, langName, isValidIntl)
349    if (langName == 'en' and not isStripped) or isValidIntl or not ('.html' in name) or (not isValidIntl and not langName):
350      return True
351
352  def PreprocessUrl(self, name, langName):
353    """Any preprocessing work on the URL when it comes in.
354
355    Put any work related to interpreting the incoming URL here. For example,
356    this is used to redirect requests for a directory to the index.html file
357    in that directory. Subclasses should override this method to do different
358    preprocessing.
359
360    Args:
361      name: The incoming URL
362
363    Returns:
364      True if the request was redirected to '/index.html'.
365      Otherewise False.
366    """
367
368    # determine if this is a request for a directory
369    final_path_segment = name
370    final_slash_offset = name.rfind('/')
371    if final_slash_offset != len(name) - 1:
372      final_path_segment = name[final_slash_offset + 1:]
373      if final_path_segment.find('.') == -1:
374        name = ''.join([name, '/'])
375
376    # if this is a directory or the domain itself, redirect to /index.html
377    if not name or (name[len(name) - 1:] == '/'):
378      uri = ''.join(['/', name, 'index.html'])
379      # logging.info('--->PREPROCESSING REDIRECT [%s] to [%s] with langName [%s]', name, uri, langName)
380      self.redirect(uri, False)
381      return True
382    else:
383      return False
384
385  def RedirToIntl(self, name, intlString, langName):
386    """Redirect an incoming request to the appropriate intl uri.
387
388       For non-en langName, builds the intl/lang string from a
389       base (en) string and redirects (302) the request to look for
390       a version of the file in langName. For en langName, simply
391       redirects a stripped uri string (intl/nn removed).
392
393    Args:
394      name: The incoming, preprocessed URL
395
396    Returns:
397      The lang-specific URL
398    """
399    if not (langName == 'en'):
400      builtIntlLangUri = ''.join([intlString, langName, '/', name, '?', self.request.query_string])
401    else:
402      builtIntlLangUri = name
403    uri = ''.join(['/', builtIntlLangUri])
404    logging.info('-->REDIRECTING %s to  %s', name, uri)
405    self.redirect(uri, False)
406    return uri
407
408  def CreateResponse(self, name, langName, isValidIntl, resetLangCookie):
409    """Process the url and form a response, if appropriate.
410
411       Attempts to retrieve the requested file (name) from cache,
412       negative cache, or store (zip) and form the response.
413       For intl requests that are not found (in the localized tree),
414       returns False rather than forming a response, so that
415       the request can be retried with the base url (this is the
416       fallthrough to default language).
417
418       For requests that are found, forms the headers and
419       adds the content to the response entity. If the request was
420       for an intl (localized) url, also resets the language cookie
421       to the language specified in the url if needed, to ensure that
422       the client language and response data remain harmonious.
423
424    Args:
425      name: The incoming, preprocessed URL
426      langName: The language id. Used as necessary to reset the
427                language cookie in the response.
428      isValidIntl: If present, indicates whether the request is
429                   for a language-specific url
430      resetLangCookie: Whether the response should reset the
431                       language cookie to 'langName'
432
433    Returns:
434      True: A response was successfully created for the request
435      False: No response was created.
436    """
437    # see if we have the page in the memcache
438    logging.info('PROCESSING %s langName [%s] isValidIntl [%s] resetLang [%s]',
439      name, langName, isValidIntl, resetLangCookie)
440    resp_data = self.GetFromCache(name)
441    if resp_data is None:
442      logging.info('  Cache miss for %s', name)
443      resp_data = self.GetFromNegativeCache(name)
444      if resp_data is None:
445        resp_data = self.GetFromStore(name)
446
447        # IF we have the file, put it in the memcache
448        # ELSE put it in the negative cache
449        if resp_data is not None:
450          self.StoreOrUpdateInCache(name, resp_data)
451        elif isValidIntl:
452          # couldn't find the intl doc. Try to fall through to English.
453          #logging.info('  Retrying with base uri...')
454          return False
455        else:
456          logging.info('  Adding %s to negative cache, serving 404', name)
457          self.StoreInNegativeCache(name)
458          self.Write404Error()
459          return True
460      else:
461        # found it in negative cache
462        self.Write404Error()
463        return True
464
465    # found content from cache or store
466    logging.info('FOUND CLEAN')
467    if resetLangCookie:
468      logging.info('  Resetting android_developer_pref_lang cookie to [%s]',
469      langName)
470      expireDate = time.mktime(localtime()) + 60 * 60 * 24 * 365 * 10
471      self.response.headers.add_header('Set-Cookie',
472      'android_developer_pref_lang=%s; path=/; expires=%s' %
473      (langName, strftime("%a, %d %b %Y %H:%M:%S", localtime(expireDate))))
474    mustRevalidate = False
475    if ('.html' in name):
476      # revalidate html files -- workaround for cache inconsistencies for
477      # negotiated responses
478      mustRevalidate = True
479      #logging.info('  Adding [Vary: Cookie] to response...')
480      self.response.headers.add_header('Vary', 'Cookie')
481    content_type, encoding = mimetypes.guess_type(name)
482    if content_type:
483      self.response.headers['Content-Type'] = content_type
484      self.SetCachingHeaders(mustRevalidate)
485      self.response.out.write(resp_data)
486    elif (name == 'favicon.ico'):
487      self.response.headers['Content-Type'] = 'image/x-icon'
488      self.SetCachingHeaders(mustRevalidate)
489      self.response.out.write(resp_data)
490    elif name.endswith('.psd'):
491      self.response.headers['Content-Type'] = 'application/octet-stream'
492      self.SetCachingHeaders(mustRevalidate)
493      self.response.out.write(resp_data)
494    elif name.endswith('.svg'):
495      self.response.headers['Content-Type'] = 'image/svg+xml'
496      self.SetCachingHeaders(mustRevalidate)
497      self.response.out.write(resp_data)
498    elif name.endswith('.mp4'):
499      self.response.headers['Content-Type'] = 'video/mp4'
500      self.SetCachingHeaders(mustRevalidate)
501      self.response.out.write(resp_data)
502    elif name.endswith('.webm'):
503      self.response.headers['Content-Type'] = 'video/webm'
504      self.SetCachingHeaders(mustRevalidate)
505      self.response.out.write(resp_data)
506    elif name.endswith('.ogv'):
507      self.response.headers['Content-Type'] = 'video/ogg'
508      self.SetCachingHeaders(mustRevalidate)
509      self.response.out.write(resp_data)
510    return True
511
512  def GetFromStore(self, file_path):
513    """Retrieve file from zip files.
514
515    Get the file from the source, it must not have been in the memcache. If
516    possible, we'll use the zip file index to quickly locate where the file
517    should be found. (See MapToFileArchive documentation for assumptions about
518    file ordering.) If we don't have an index or don't find the file where the
519    index says we should, look through all the zip files to find it.
520
521    Args:
522      file_path: the file that we're looking for
523
524    Returns:
525      The contents of the requested file
526    """
527    resp_data = None
528    file_itr = iter(self.zipfilenames)
529
530    # decode any escape characters in the URI
531    # Note: We are currenty just looking for '@' (%40)
532    file_path = file_path.replace('%40', '@')
533
534    # check the index, if we have one, to see what archive the file is in
535    archive_name = self.MapFileToArchive(file_path)
536    if not archive_name:
537      archive_name = file_itr.next()[0]
538
539    while resp_data is None and archive_name:
540      zip_archive = self.LoadZipFile(archive_name)
541      if zip_archive:
542
543        # we expect some lookups will fail, and that's okay, 404s will deal
544        # with that
545        try:
546          resp_data = zip_archive.read(file_path)
547        except (KeyError, RuntimeError), err:
548          # no op
549          x = False
550        if resp_data is not None:
551          logging.info('%s read from %s', file_path, archive_name)
552
553      try:
554        archive_name = file_itr.next()[0]
555      except (StopIteration), err:
556        archive_name = False
557
558    return resp_data
559
560  def LoadZipFile(self, zipfilename):
561    """Convenience method to load zip file.
562
563    Just a convenience method to load the zip file from the data store. This is
564    useful if we ever want to change data stores and also as a means of
565    dependency injection for testing. This method will look at our file cache
566    first, and then load and cache the file if there's a cache miss
567
568    Args:
569      zipfilename: the name of the zip file to load
570
571    Returns:
572      The zip file requested, or None if there is an I/O error
573    """
574    zip_archive = None
575    zip_archive = self.zipfile_cache.get(zipfilename)
576    if zip_archive is None:
577      try:
578        zip_archive = zipfile.ZipFile(zipfilename)
579        self.zipfile_cache[zipfilename] = zip_archive
580      except (IOError, RuntimeError), err:
581        logging.error('Can\'t open zipfile %s, cause: %s' % (zipfilename,
582                                                             err))
583    return zip_archive
584
585  def MapFileToArchive(self, file_path):
586    """Given a file name, determine what archive it should be in.
587
588    This method makes two critical assumptions.
589    (1) The zip files passed as an argument to the handler, if concatenated
590        in that same order, would result in a total ordering
591        of all the files. See (2) for ordering type.
592    (2) Upper case letters before lower case letters. The traversal of a
593        directory tree is depth first. A parent directory's files are added
594        before the files of any child directories
595
596    Args:
597      file_path: the file to be mapped to an archive
598
599    Returns:
600      The name of the archive where we expect the file to be
601    """
602    num_archives = len(self.zipfilenames)
603    while num_archives > 0:
604      target = self.zipfilenames[num_archives - 1]
605      if len(target) > 1:
606        if self.CompareFilenames(target[1], file_path) >= 0:
607          return target[0]
608      num_archives -= 1
609
610    return None
611
612  def CompareFilenames(self, file1, file2):
613    """Determines whether file1 is lexigraphically 'before' file2.
614
615    WARNING: This method assumes that paths are output in a depth-first,
616    with parent directories' files stored before childs'
617
618    We say that file1 is lexigraphically before file2 if the last non-matching
619    path segment of file1 is alphabetically before file2.
620
621    Args:
622      file1: the first file path
623      file2: the second file path
624
625    Returns:
626      A positive number if file1 is before file2
627      A negative number if file2 is before file1
628      0 if filenames are the same
629    """
630    f1_segments = file1.split('/')
631    f2_segments = file2.split('/')
632
633    segment_ptr = 0
634    while (segment_ptr < len(f1_segments) and
635           segment_ptr < len(f2_segments) and
636           f1_segments[segment_ptr] == f2_segments[segment_ptr]):
637      segment_ptr += 1
638
639    if len(f1_segments) == len(f2_segments):
640
641      # we fell off the end, the paths much be the same
642      if segment_ptr == len(f1_segments):
643        return 0
644
645      # we didn't fall of the end, compare the segments where they differ
646      if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
647        return 1
648      elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
649        return -1
650      else:
651        return 0
652
653      # the number of segments differs, we either mismatched comparing
654      # directories, or comparing a file to a directory
655    else:
656
657      # IF we were looking at the last segment of one of the paths,
658      # the one with fewer segments is first because files come before
659      # directories
660      # ELSE we just need to compare directory names
661      if (segment_ptr + 1 == len(f1_segments) or
662          segment_ptr + 1 == len(f2_segments)):
663        return len(f2_segments) - len(f1_segments)
664      else:
665        if f1_segments[segment_ptr] < f2_segments[segment_ptr]:
666          return 1
667        elif f1_segments[segment_ptr] > f2_segments[segment_ptr]:
668          return -1
669        else:
670          return 0
671
672  def SetCachingHeaders(self, revalidate):
673    """Set caching headers for the request."""
674    max_age = self.MAX_AGE
675    #self.response.headers['Expires'] = email.Utils.formatdate(
676    #    time.time() + max_age, usegmt=True)
677    cache_control = []
678    if self.PUBLIC:
679      cache_control.append('public')
680    cache_control.append('max-age=%d' % max_age)
681    if revalidate:
682      cache_control.append('must-revalidate')
683    self.response.headers['Cache-Control'] = ', '.join(cache_control)
684
685  def GetFromCache(self, filename):
686    """Get file from memcache, if available.
687
688    Args:
689      filename: The URL of the file to return
690
691    Returns:
692      The content of the file
693    """
694    return memcache.get('%s%s' % (self.CACHE_PREFIX, filename))
695
696  def StoreOrUpdateInCache(self, filename, data):
697    """Store data in the cache.
698
699    Store a piece of data in the memcache. Memcache has a maximum item size of
700    1*10^6 bytes. If the data is too large, fail, but log the failure. Future
701    work will consider compressing the data before storing or chunking it
702
703    Args:
704      filename: the name of the file to store
705      data: the data of the file
706
707    Returns:
708      None
709    """
710    try:
711      if not memcache.add('%s%s' % (self.CACHE_PREFIX, filename), data):
712        memcache.replace('%s%s' % (self.CACHE_PREFIX, filename), data)
713    except (ValueError), err:
714      logging.warning('Data size too large to cache\n%s' % err)
715
716  def Write404Error(self):
717    """Ouptut a simple 404 response."""
718    self.error(404)
719    self.response.out.write(
720        ''.join(['<html><head><title>404: Not Found</title></head>',
721                 '<body><b><h2>Error 404</h2><br/>',
722                 'File not found</b></body></html>']))
723
724  def StoreInNegativeCache(self, filename):
725    """If a non-existant URL is accessed, cache this result as well.
726
727    Future work should consider setting a maximum negative cache size to
728    prevent it from from negatively impacting the real cache.
729
730    Args:
731      filename: URL to add ot negative cache
732
733    Returns:
734      None
735    """
736    memcache.add('%s%s' % (self.NEG_CACHE_PREFIX, filename), -1)
737
738  def GetFromNegativeCache(self, filename):
739    """Retrieve from negative cache.
740
741    Args:
742      filename: URL to retreive
743
744    Returns:
745      The file contents if present in the negative cache.
746    """
747    return memcache.get('%s%s' % (self.NEG_CACHE_PREFIX, filename))
748
749def main():
750  application = webapp.WSGIApplication([('/([^/]+)/(.*)',
751                                         MemcachedZipHandler)])
752  util.run_wsgi_app(application)
753
754
755if __name__ == '__main__':
756  main()
757