10f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved.
20f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
30f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# found in the LICENSE file.
40f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
5c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochimport logging
60f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)import mimetypes
75d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)import posixpath
8c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochimport traceback
90f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
100f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from compiled_file_system import SingleFile
110f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from directory_zipper import DirectoryZipper
12a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)from docs_server_utils import ToUnicode
135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from file_system import FileNotFoundError
14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom future import Future
155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from path_canonicalizer import PathCanonicalizer
160529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochfrom path_util import AssertIsValid, IsDirectory, Join, ToDirectory
175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from special_paths import SITE_VERIFICATION_FILE
180f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from third_party.handlebar import Handlebar
195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from third_party.markdown import markdown
205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)_MIMETYPE_OVERRIDES = {
235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  # SVG is not supported by mimetypes.guess_type on AppEngine.
245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  '.svg': 'image/svg+xml',
255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
260f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
270f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
280f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)class ContentAndType(object):
290f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  '''Return value from ContentProvider.GetContentAndType.
300f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  '''
310f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
32e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch  def __init__(self, content, content_type, version):
330f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self.content = content
340f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self.content_type = content_type
35e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    self.version = version
360f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
370f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
380f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)class ContentProvider(object):
390f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  '''Returns file contents correctly typed for their content-types (in the HTTP
400f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  sense). Content-type is determined from Python's mimetype library which
410f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  guesses based on the file extension.
420f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
430f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  Typically the file contents will be either str (for binary content) or
440f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  unicode (for text content). However, HTML files *may* be returned as
455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  Handlebar templates (if |supports_templates| is True on construction), in
465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  which case the caller will presumably want to Render them.
475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  Zip file are automatically created and returned for .zip file extensions if
495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  |supports_zip| is True.
505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  |default_extensions| is a list of file extensions which are queried when no
525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  file extension is given to GetCanonicalPath/GetContentAndType.  Typically
535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  this will include .html.
540f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  '''
550f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
560f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  def __init__(self,
570f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)               name,
580f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)               compiled_fs_factory,
590f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)               file_system,
605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)               object_store_creator,
615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)               default_extensions=(),
620f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)               supports_templates=False,
630f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)               supports_zip=False):
640f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    # Public.
650f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self.name = name
660f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self.file_system = file_system
670f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    # Private.
680f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self._content_cache = compiled_fs_factory.Create(file_system,
690f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)                                                     self._CompileContent,
700f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)                                                     ContentProvider)
715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._path_canonicalizer = PathCanonicalizer(file_system,
725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                                                 object_store_creator,
735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                                                 default_extensions)
745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    self._default_extensions = default_extensions
750f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    self._supports_templates = supports_templates
760f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    if supports_zip:
770f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      self._directory_zipper = DirectoryZipper(compiled_fs_factory, file_system)
780f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    else:
790f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      self._directory_zipper = None
800f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
810f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  @SingleFile
820f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  def _CompileContent(self, path, text):
830f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    assert text is not None, path
845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    _, ext = posixpath.splitext(path)
855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    mimetype = _MIMETYPE_OVERRIDES.get(ext, mimetypes.guess_type(path)[0])
865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if ext == '.md':
875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # See http://pythonhosted.org/Markdown/extensions
885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # for details on "extensions=".
895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      content = markdown(ToUnicode(text),
905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)                         extensions=('extra', 'headerid', 'sane_lists'))
915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      if self._supports_templates:
925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        content = Handlebar(content, name=path)
935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      mimetype = 'text/html'
945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    elif mimetype is None:
950f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      content = text
960f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      mimetype = 'text/plain'
970f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    elif mimetype == 'text/html':
980f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      content = ToUnicode(text)
990f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      if self._supports_templates:
100a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)        content = Handlebar(content, name=path)
1010f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    elif (mimetype.startswith('text/') or
1020f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)          mimetype in ('application/javascript', 'application/json')):
1030f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      content = ToUnicode(text)
1040f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    else:
1050f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)      content = text
106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch    return ContentAndType(content,
107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                          mimetype,
108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch                          self.file_system.Stat(path).version)
1090f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  def GetCanonicalPath(self, path):
1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    '''Gets the canonical location of |path|. This class is tolerant of
1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    spelling errors and missing files that are in other directories, and this
1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    returns the correct/canonical path for those.
1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    For example, the canonical path of "browseraction" is probably
1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    "extensions/browserAction.html".
1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    Note that the canonical path is relative to this content provider i.e.
1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    given relative to |path|. It does not add the "serveFrom" prefix which
1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    would have been pulled out in ContentProviders, callers must do that
1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    themselves.
1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    '''
1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    AssertIsValid(path)
1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    base, ext = posixpath.splitext(path)
1255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if self._directory_zipper and ext == '.zip':
1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # The canonical location of zip files is the canonical location of the
1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # directory to zip + '.zip'.
1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return self._path_canonicalizer.Canonicalize(base + '/').rstrip('/') + ext
1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    return self._path_canonicalizer.Canonicalize(path)
1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def GetContentAndType(self, path):
1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    '''Returns the ContentAndType of the file at |path|.
1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    '''
1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    AssertIsValid(path)
1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    base, ext = posixpath.splitext(path)
1360f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
1370f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    # Check for a zip file first, if zip is enabled.
1380f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)    if self._directory_zipper and ext == '.zip':
1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      zip_future = self._directory_zipper.Zip(ToDirectory(base))
140effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch      return Future(callback=
141e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch          lambda: ContentAndType(zip_future.Get(), 'application/zip', None))
1420f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # If there is no file extension, look for a file with one of the default
144a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    # extensions. If one cannot be found, check if the path is a directory.
145a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    # If it is, then check for an index file with one of the default
1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    # extensions.
1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if not ext:
148a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      new_path = self._AddExt(path)
149a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      # Add a trailing / to check if it is a directory and not a file with
150a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      # no extension.
1510529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch      if new_path is None and self.file_system.Exists(ToDirectory(path)).Get():
1520529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch        new_path = self._AddExt(Join(path, 'index'))
1530529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch        # If an index file wasn't found in this directly then we're never going
1540529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch        # to find a file.
1550529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch        if new_path is None:
1560529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch          return FileNotFoundError.RaiseInFuture('"%s" is a directory' % path)
157a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      if new_path is not None:
158a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        path = new_path
1595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
160a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)    return self._content_cache.GetFromFile(path)
1610f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)
162a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch  def _AddExt(self, path):
163a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    '''Tries to append each of the default file extensions to path and returns
164a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    the first one that is an existing file.
165a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    '''
166a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    for default_ext in self._default_extensions:
167a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch      if self.file_system.Exists(path + default_ext).Get():
168a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch        return path + default_ext
169a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch    return None
170a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch
1710f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)  def Cron(self):
172c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    futures = [('<path_canonicalizer>',  # semi-arbitrary string since there is
173c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch                                         # no path associated with this Future.
174c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch                self._path_canonicalizer.Cron())]
1755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    for root, _, files in self.file_system.Walk(''):
1765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      for f in files:
177c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch        futures.append((Join(root, f),
178c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch                        self.GetContentAndType(Join(root, f))))
1795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        # Also cache the extension-less version of the file if needed.
1805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        base, ext = posixpath.splitext(f)
1815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)        if f != SITE_VERIFICATION_FILE and ext in self._default_extensions:
182c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch          futures.append((Join(root, base),
183c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch                          self.GetContentAndType(Join(root, base))))
1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      # TODO(kalman): Cache .zip files for each directory (if supported).
185c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    def resolve():
186c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch      for label, future in futures:
187c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch        try: future.Get()
188c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch        except: logging.error('%s: %s' % (label, traceback.format_exc()))
189c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch    return Future(callback=resolve)
190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)
191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)  def __repr__(self):
192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)    return 'ContentProvider of <%s>' % repr(self.file_system)
193