1# Copyright 2013 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import logging 6import mimetypes 7import posixpath 8import traceback 9 10from compiled_file_system import SingleFile 11from directory_zipper import DirectoryZipper 12from docs_server_utils import ToUnicode 13from file_system import FileNotFoundError 14from future import Future 15from path_canonicalizer import PathCanonicalizer 16from path_util import AssertIsValid, IsDirectory, Join, ToDirectory 17from special_paths import SITE_VERIFICATION_FILE 18from third_party.handlebar import Handlebar 19from third_party.markdown import markdown 20 21 22_MIMETYPE_OVERRIDES = { 23 # SVG is not supported by mimetypes.guess_type on AppEngine. 24 '.svg': 'image/svg+xml', 25} 26 27 28class ContentAndType(object): 29 '''Return value from ContentProvider.GetContentAndType. 30 ''' 31 32 def __init__(self, content, content_type, version): 33 self.content = content 34 self.content_type = content_type 35 self.version = version 36 37 38class ContentProvider(object): 39 '''Returns file contents correctly typed for their content-types (in the HTTP 40 sense). Content-type is determined from Python's mimetype library which 41 guesses based on the file extension. 42 43 Typically the file contents will be either str (for binary content) or 44 unicode (for text content). However, HTML files *may* be returned as 45 Handlebar templates (if |supports_templates| is True on construction), in 46 which case the caller will presumably want to Render them. 47 48 Zip file are automatically created and returned for .zip file extensions if 49 |supports_zip| is True. 50 51 |default_extensions| is a list of file extensions which are queried when no 52 file extension is given to GetCanonicalPath/GetContentAndType. Typically 53 this will include .html. 54 ''' 55 56 def __init__(self, 57 name, 58 compiled_fs_factory, 59 file_system, 60 object_store_creator, 61 default_extensions=(), 62 supports_templates=False, 63 supports_zip=False): 64 # Public. 65 self.name = name 66 self.file_system = file_system 67 # Private. 68 self._content_cache = compiled_fs_factory.Create(file_system, 69 self._CompileContent, 70 ContentProvider) 71 self._path_canonicalizer = PathCanonicalizer(file_system, 72 object_store_creator, 73 default_extensions) 74 self._default_extensions = default_extensions 75 self._supports_templates = supports_templates 76 if supports_zip: 77 self._directory_zipper = DirectoryZipper(compiled_fs_factory, file_system) 78 else: 79 self._directory_zipper = None 80 81 @SingleFile 82 def _CompileContent(self, path, text): 83 assert text is not None, path 84 _, ext = posixpath.splitext(path) 85 mimetype = _MIMETYPE_OVERRIDES.get(ext, mimetypes.guess_type(path)[0]) 86 if ext == '.md': 87 # See http://pythonhosted.org/Markdown/extensions 88 # for details on "extensions=". 89 content = markdown(ToUnicode(text), 90 extensions=('extra', 'headerid', 'sane_lists')) 91 if self._supports_templates: 92 content = Handlebar(content, name=path) 93 mimetype = 'text/html' 94 elif mimetype is None: 95 content = text 96 mimetype = 'text/plain' 97 elif mimetype == 'text/html': 98 content = ToUnicode(text) 99 if self._supports_templates: 100 content = Handlebar(content, name=path) 101 elif (mimetype.startswith('text/') or 102 mimetype in ('application/javascript', 'application/json')): 103 content = ToUnicode(text) 104 else: 105 content = text 106 return ContentAndType(content, 107 mimetype, 108 self.file_system.Stat(path).version) 109 110 def GetCanonicalPath(self, path): 111 '''Gets the canonical location of |path|. This class is tolerant of 112 spelling errors and missing files that are in other directories, and this 113 returns the correct/canonical path for those. 114 115 For example, the canonical path of "browseraction" is probably 116 "extensions/browserAction.html". 117 118 Note that the canonical path is relative to this content provider i.e. 119 given relative to |path|. It does not add the "serveFrom" prefix which 120 would have been pulled out in ContentProviders, callers must do that 121 themselves. 122 ''' 123 AssertIsValid(path) 124 base, ext = posixpath.splitext(path) 125 if self._directory_zipper and ext == '.zip': 126 # The canonical location of zip files is the canonical location of the 127 # directory to zip + '.zip'. 128 return self._path_canonicalizer.Canonicalize(base + '/').rstrip('/') + ext 129 return self._path_canonicalizer.Canonicalize(path) 130 131 def GetContentAndType(self, path): 132 '''Returns the ContentAndType of the file at |path|. 133 ''' 134 AssertIsValid(path) 135 base, ext = posixpath.splitext(path) 136 137 # Check for a zip file first, if zip is enabled. 138 if self._directory_zipper and ext == '.zip': 139 zip_future = self._directory_zipper.Zip(ToDirectory(base)) 140 return Future(callback= 141 lambda: ContentAndType(zip_future.Get(), 'application/zip', None)) 142 143 # If there is no file extension, look for a file with one of the default 144 # extensions. If one cannot be found, check if the path is a directory. 145 # If it is, then check for an index file with one of the default 146 # extensions. 147 if not ext: 148 new_path = self._AddExt(path) 149 # Add a trailing / to check if it is a directory and not a file with 150 # no extension. 151 if new_path is None and self.file_system.Exists(ToDirectory(path)).Get(): 152 new_path = self._AddExt(Join(path, 'index')) 153 # If an index file wasn't found in this directly then we're never going 154 # to find a file. 155 if new_path is None: 156 return FileNotFoundError.RaiseInFuture('"%s" is a directory' % path) 157 if new_path is not None: 158 path = new_path 159 160 return self._content_cache.GetFromFile(path) 161 162 def _AddExt(self, path): 163 '''Tries to append each of the default file extensions to path and returns 164 the first one that is an existing file. 165 ''' 166 for default_ext in self._default_extensions: 167 if self.file_system.Exists(path + default_ext).Get(): 168 return path + default_ext 169 return None 170 171 def Cron(self): 172 futures = [('<path_canonicalizer>', # semi-arbitrary string since there is 173 # no path associated with this Future. 174 self._path_canonicalizer.Cron())] 175 for root, _, files in self.file_system.Walk(''): 176 for f in files: 177 futures.append((Join(root, f), 178 self.GetContentAndType(Join(root, f)))) 179 # Also cache the extension-less version of the file if needed. 180 base, ext = posixpath.splitext(f) 181 if f != SITE_VERIFICATION_FILE and ext in self._default_extensions: 182 futures.append((Join(root, base), 183 self.GetContentAndType(Join(root, base)))) 184 # TODO(kalman): Cache .zip files for each directory (if supported). 185 def resolve(): 186 for label, future in futures: 187 try: future.Get() 188 except: logging.error('%s: %s' % (label, traceback.format_exc())) 189 return Future(callback=resolve) 190 191 def __repr__(self): 192 return 'ContentProvider of <%s>' % repr(self.file_system) 193