10f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# Copyright 2013 The Chromium Authors. All rights reserved. 20f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 30f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)# found in the LICENSE file. 40f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 5c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochimport logging 60f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)import mimetypes 75d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)import posixpath 8c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdochimport traceback 90f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 100f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from compiled_file_system import SingleFile 110f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from directory_zipper import DirectoryZipper 12a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles)from docs_server_utils import ToUnicode 135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from file_system import FileNotFoundError 14effb81e5f8246d0db0270817048dc992db66e9fbBen Murdochfrom future import Future 155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from path_canonicalizer import PathCanonicalizer 160529e5d033099cbfc42635f6f6183833b09dff6eBen Murdochfrom path_util import AssertIsValid, IsDirectory, Join, ToDirectory 175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from special_paths import SITE_VERIFICATION_FILE 180f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)from third_party.handlebar import Handlebar 195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)from third_party.markdown import markdown 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)_MIMETYPE_OVERRIDES = { 235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # SVG is not supported by mimetypes.guess_type on AppEngine. 245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) '.svg': 'image/svg+xml', 255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 260f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 270f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 280f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)class ContentAndType(object): 290f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) '''Return value from ContentProvider.GetContentAndType. 300f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) ''' 310f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 32e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch def __init__(self, content, content_type, version): 330f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self.content = content 340f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self.content_type = content_type 35e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch self.version = version 360f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 370f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 380f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles)class ContentProvider(object): 390f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) '''Returns file contents correctly typed for their content-types (in the HTTP 400f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) sense). Content-type is determined from Python's mimetype library which 410f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) guesses based on the file extension. 420f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 430f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) Typically the file contents will be either str (for binary content) or 440f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) unicode (for text content). However, HTML files *may* be returned as 455d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) Handlebar templates (if |supports_templates| is True on construction), in 465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) which case the caller will presumably want to Render them. 475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 485d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) Zip file are automatically created and returned for .zip file extensions if 495d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) |supports_zip| is True. 505d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 515d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) |default_extensions| is a list of file extensions which are queried when no 525d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) file extension is given to GetCanonicalPath/GetContentAndType. Typically 535d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) this will include .html. 540f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) ''' 550f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 560f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) def __init__(self, 570f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) name, 580f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) compiled_fs_factory, 590f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) file_system, 605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) object_store_creator, 615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) default_extensions=(), 620f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) supports_templates=False, 630f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) supports_zip=False): 640f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) # Public. 650f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self.name = name 660f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self.file_system = file_system 670f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) # Private. 680f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._content_cache = compiled_fs_factory.Create(file_system, 690f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._CompileContent, 700f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) ContentProvider) 715d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._path_canonicalizer = PathCanonicalizer(file_system, 725d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) object_store_creator, 735d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) default_extensions) 745d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) self._default_extensions = default_extensions 750f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._supports_templates = supports_templates 760f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) if supports_zip: 770f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._directory_zipper = DirectoryZipper(compiled_fs_factory, file_system) 780f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) else: 790f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) self._directory_zipper = None 800f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 810f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) @SingleFile 820f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) def _CompileContent(self, path, text): 830f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) assert text is not None, path 845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) _, ext = posixpath.splitext(path) 855d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) mimetype = _MIMETYPE_OVERRIDES.get(ext, mimetypes.guess_type(path)[0]) 865d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if ext == '.md': 875d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # See http://pythonhosted.org/Markdown/extensions 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # for details on "extensions=". 895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) content = markdown(ToUnicode(text), 905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) extensions=('extra', 'headerid', 'sane_lists')) 915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if self._supports_templates: 925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) content = Handlebar(content, name=path) 935d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) mimetype = 'text/html' 945d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) elif mimetype is None: 950f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) content = text 960f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) mimetype = 'text/plain' 970f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) elif mimetype == 'text/html': 980f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) content = ToUnicode(text) 990f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) if self._supports_templates: 100a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) content = Handlebar(content, name=path) 1010f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) elif (mimetype.startswith('text/') or 1020f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) mimetype in ('application/javascript', 'application/json')): 1030f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) content = ToUnicode(text) 1040f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) else: 1050f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) content = text 106e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch return ContentAndType(content, 107e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch mimetype, 108e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch self.file_system.Stat(path).version) 1090f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 1105d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) def GetCanonicalPath(self, path): 1115d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) '''Gets the canonical location of |path|. This class is tolerant of 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) spelling errors and missing files that are in other directories, and this 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) returns the correct/canonical path for those. 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) For example, the canonical path of "browseraction" is probably 1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) "extensions/browserAction.html". 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) Note that the canonical path is relative to this content provider i.e. 1195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) given relative to |path|. It does not add the "serveFrom" prefix which 1205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) would have been pulled out in ContentProviders, callers must do that 1215d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) themselves. 1225d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ''' 1235d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) AssertIsValid(path) 1245d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base, ext = posixpath.splitext(path) 1255d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if self._directory_zipper and ext == '.zip': 1265d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # The canonical location of zip files is the canonical location of the 1275d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # directory to zip + '.zip'. 1285d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return self._path_canonicalizer.Canonicalize(base + '/').rstrip('/') + ext 1295d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) return self._path_canonicalizer.Canonicalize(path) 1305d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def GetContentAndType(self, path): 1325d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) '''Returns the ContentAndType of the file at |path|. 1335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) ''' 1345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) AssertIsValid(path) 1355d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base, ext = posixpath.splitext(path) 1360f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 1370f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) # Check for a zip file first, if zip is enabled. 1380f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) if self._directory_zipper and ext == '.zip': 1395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) zip_future = self._directory_zipper.Zip(ToDirectory(base)) 140effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return Future(callback= 141e5d81f57cb97b3b6b7fccc9c5610d21eb81db09dBen Murdoch lambda: ContentAndType(zip_future.Get(), 'application/zip', None)) 1420f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 1435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # If there is no file extension, look for a file with one of the default 144a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch # extensions. If one cannot be found, check if the path is a directory. 145a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch # If it is, then check for an index file with one of the default 1465d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # extensions. 1475d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if not ext: 148a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch new_path = self._AddExt(path) 149a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch # Add a trailing / to check if it is a directory and not a file with 150a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch # no extension. 1510529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch if new_path is None and self.file_system.Exists(ToDirectory(path)).Get(): 1520529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch new_path = self._AddExt(Join(path, 'index')) 1530529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch # If an index file wasn't found in this directly then we're never going 1540529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch # to find a file. 1550529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch if new_path is None: 1560529e5d033099cbfc42635f6f6183833b09dff6eBen Murdoch return FileNotFoundError.RaiseInFuture('"%s" is a directory' % path) 157a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if new_path is not None: 158a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch path = new_path 1595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 160a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) return self._content_cache.GetFromFile(path) 1610f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) 162a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch def _AddExt(self, path): 163a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch '''Tries to append each of the default file extensions to path and returns 164a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch the first one that is an existing file. 165a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch ''' 166a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch for default_ext in self._default_extensions: 167a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch if self.file_system.Exists(path + default_ext).Get(): 168a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return path + default_ext 169a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch return None 170a02191e04bc25c4935f804f2c080ae28663d096dBen Murdoch 1710f1bc08d4cfcc34181b0b5cbf065c40f687bf740Torne (Richard Coles) def Cron(self): 172c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch futures = [('<path_canonicalizer>', # semi-arbitrary string since there is 173c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch # no path associated with this Future. 174c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch self._path_canonicalizer.Cron())] 1755d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for root, _, files in self.file_system.Walk(''): 1765d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) for f in files: 177c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch futures.append((Join(root, f), 178c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch self.GetContentAndType(Join(root, f)))) 1795d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # Also cache the extension-less version of the file if needed. 1805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base, ext = posixpath.splitext(f) 1815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) if f != SITE_VERIFICATION_FILE and ext in self._default_extensions: 182c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch futures.append((Join(root, base), 183c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch self.GetContentAndType(Join(root, base)))) 1845d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) # TODO(kalman): Cache .zip files for each directory (if supported). 185c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch def resolve(): 186c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch for label, future in futures: 187c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch try: future.Get() 188c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch except: logging.error('%s: %s' % (label, traceback.format_exc())) 189c5cede9ae108bb15f6b7a8aea21c7e1fefa2834cBen Murdoch return Future(callback=resolve) 190f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 191f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) def __repr__(self): 192f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) return 'ContentProvider of <%s>' % repr(self.file_system) 193