1# Copyright 2013 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import mimetypes
7import posixpath
8import traceback
9
10from compiled_file_system import SingleFile
11from directory_zipper import DirectoryZipper
12from docs_server_utils import ToUnicode
13from file_system import FileNotFoundError
14from future import Future
15from path_canonicalizer import PathCanonicalizer
16from path_util import AssertIsValid, IsDirectory, Join, ToDirectory
17from special_paths import SITE_VERIFICATION_FILE
18from third_party.handlebar import Handlebar
19from third_party.markdown import markdown
20
21
22_MIMETYPE_OVERRIDES = {
23  # SVG is not supported by mimetypes.guess_type on AppEngine.
24  '.svg': 'image/svg+xml',
25}
26
27
28class ContentAndType(object):
29  '''Return value from ContentProvider.GetContentAndType.
30  '''
31
32  def __init__(self, content, content_type, version):
33    self.content = content
34    self.content_type = content_type
35    self.version = version
36
37
38class ContentProvider(object):
39  '''Returns file contents correctly typed for their content-types (in the HTTP
40  sense). Content-type is determined from Python's mimetype library which
41  guesses based on the file extension.
42
43  Typically the file contents will be either str (for binary content) or
44  unicode (for text content). However, HTML files *may* be returned as
45  Handlebar templates (if |supports_templates| is True on construction), in
46  which case the caller will presumably want to Render them.
47
48  Zip file are automatically created and returned for .zip file extensions if
49  |supports_zip| is True.
50
51  |default_extensions| is a list of file extensions which are queried when no
52  file extension is given to GetCanonicalPath/GetContentAndType.  Typically
53  this will include .html.
54  '''
55
56  def __init__(self,
57               name,
58               compiled_fs_factory,
59               file_system,
60               object_store_creator,
61               default_extensions=(),
62               supports_templates=False,
63               supports_zip=False):
64    # Public.
65    self.name = name
66    self.file_system = file_system
67    # Private.
68    self._content_cache = compiled_fs_factory.Create(file_system,
69                                                     self._CompileContent,
70                                                     ContentProvider)
71    self._path_canonicalizer = PathCanonicalizer(file_system,
72                                                 object_store_creator,
73                                                 default_extensions)
74    self._default_extensions = default_extensions
75    self._supports_templates = supports_templates
76    if supports_zip:
77      self._directory_zipper = DirectoryZipper(compiled_fs_factory, file_system)
78    else:
79      self._directory_zipper = None
80
81  @SingleFile
82  def _CompileContent(self, path, text):
83    assert text is not None, path
84    _, ext = posixpath.splitext(path)
85    mimetype = _MIMETYPE_OVERRIDES.get(ext, mimetypes.guess_type(path)[0])
86    if ext == '.md':
87      # See http://pythonhosted.org/Markdown/extensions
88      # for details on "extensions=".
89      content = markdown(ToUnicode(text),
90                         extensions=('extra', 'headerid', 'sane_lists'))
91      if self._supports_templates:
92        content = Handlebar(content, name=path)
93      mimetype = 'text/html'
94    elif mimetype is None:
95      content = text
96      mimetype = 'text/plain'
97    elif mimetype == 'text/html':
98      content = ToUnicode(text)
99      if self._supports_templates:
100        content = Handlebar(content, name=path)
101    elif (mimetype.startswith('text/') or
102          mimetype in ('application/javascript', 'application/json')):
103      content = ToUnicode(text)
104    else:
105      content = text
106    return ContentAndType(content,
107                          mimetype,
108                          self.file_system.Stat(path).version)
109
110  def GetCanonicalPath(self, path):
111    '''Gets the canonical location of |path|. This class is tolerant of
112    spelling errors and missing files that are in other directories, and this
113    returns the correct/canonical path for those.
114
115    For example, the canonical path of "browseraction" is probably
116    "extensions/browserAction.html".
117
118    Note that the canonical path is relative to this content provider i.e.
119    given relative to |path|. It does not add the "serveFrom" prefix which
120    would have been pulled out in ContentProviders, callers must do that
121    themselves.
122    '''
123    AssertIsValid(path)
124    base, ext = posixpath.splitext(path)
125    if self._directory_zipper and ext == '.zip':
126      # The canonical location of zip files is the canonical location of the
127      # directory to zip + '.zip'.
128      return self._path_canonicalizer.Canonicalize(base + '/').rstrip('/') + ext
129    return self._path_canonicalizer.Canonicalize(path)
130
131  def GetContentAndType(self, path):
132    '''Returns the ContentAndType of the file at |path|.
133    '''
134    AssertIsValid(path)
135    base, ext = posixpath.splitext(path)
136
137    # Check for a zip file first, if zip is enabled.
138    if self._directory_zipper and ext == '.zip':
139      zip_future = self._directory_zipper.Zip(ToDirectory(base))
140      return Future(callback=
141          lambda: ContentAndType(zip_future.Get(), 'application/zip', None))
142
143    # If there is no file extension, look for a file with one of the default
144    # extensions. If one cannot be found, check if the path is a directory.
145    # If it is, then check for an index file with one of the default
146    # extensions.
147    if not ext:
148      new_path = self._AddExt(path)
149      # Add a trailing / to check if it is a directory and not a file with
150      # no extension.
151      if new_path is None and self.file_system.Exists(ToDirectory(path)).Get():
152        new_path = self._AddExt(Join(path, 'index'))
153        # If an index file wasn't found in this directly then we're never going
154        # to find a file.
155        if new_path is None:
156          return FileNotFoundError.RaiseInFuture('"%s" is a directory' % path)
157      if new_path is not None:
158        path = new_path
159
160    return self._content_cache.GetFromFile(path)
161
162  def _AddExt(self, path):
163    '''Tries to append each of the default file extensions to path and returns
164    the first one that is an existing file.
165    '''
166    for default_ext in self._default_extensions:
167      if self.file_system.Exists(path + default_ext).Get():
168        return path + default_ext
169    return None
170
171  def Cron(self):
172    futures = [('<path_canonicalizer>',  # semi-arbitrary string since there is
173                                         # no path associated with this Future.
174                self._path_canonicalizer.Cron())]
175    for root, _, files in self.file_system.Walk(''):
176      for f in files:
177        futures.append((Join(root, f),
178                        self.GetContentAndType(Join(root, f))))
179        # Also cache the extension-less version of the file if needed.
180        base, ext = posixpath.splitext(f)
181        if f != SITE_VERIFICATION_FILE and ext in self._default_extensions:
182          futures.append((Join(root, base),
183                          self.GetContentAndType(Join(root, base))))
184      # TODO(kalman): Cache .zip files for each directory (if supported).
185    def resolve():
186      for label, future in futures:
187        try: future.Get()
188        except: logging.error('%s: %s' % (label, traceback.format_exc()))
189    return Future(callback=resolve)
190
191  def __repr__(self):
192    return 'ContentProvider of <%s>' % repr(self.file_system)
193