subversion_file_system.py revision ca12bfac764ba476d6cd062bf1dde12cc64c3f40
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import posixpath 6import xml.dom.minidom as xml 7from xml.parsers.expat import ExpatError 8 9from appengine_url_fetcher import AppEngineUrlFetcher 10from docs_server_utils import StringIdentity 11from file_system import FileSystem, FileNotFoundError, StatInfo, ToUnicode 12from future import Future 13import svn_constants 14import url_constants 15 16class _AsyncFetchFuture(object): 17 def __init__(self, paths, fetcher, binary, args=None): 18 def apply_args(path): 19 return path if args is None else '%s?%s' % (path, args) 20 # A list of tuples of the form (path, Future). 21 self._fetches = [(path, fetcher.FetchAsync(apply_args(path))) 22 for path in paths] 23 self._value = {} 24 self._error = None 25 self._binary = binary 26 27 def _ListDir(self, directory): 28 dom = xml.parseString(directory) 29 files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')] 30 if '..' in files: 31 files.remove('..') 32 return files 33 34 def Get(self): 35 for path, future in self._fetches: 36 try: 37 result = future.Get() 38 except Exception as e: 39 raise FileNotFoundError( 40 'Error when fetching %s for Get: %s' % (path, e)) 41 if result.status_code == 404: 42 raise FileNotFoundError('Got 404 when fetching %s for Get' % path) 43 elif path.endswith('/'): 44 self._value[path] = self._ListDir(result.content) 45 elif not self._binary: 46 self._value[path] = ToUnicode(result.content) 47 else: 48 self._value[path] = result.content 49 if self._error is not None: 50 raise self._error 51 return self._value 52 53class SubversionFileSystem(FileSystem): 54 '''Class to fetch resources from src.chromium.org. 55 ''' 56 @staticmethod 57 def Create(branch='trunk', revision=None): 58 if branch == 'trunk': 59 svn_path = 'trunk/src/%s' % svn_constants.EXTENSIONS_PATH 60 else: 61 svn_path = 'branches/%s/src/%s' % (branch, svn_constants.EXTENSIONS_PATH) 62 return SubversionFileSystem( 63 AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)), 64 AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)), 65 svn_path, 66 revision=revision) 67 68 def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None): 69 self._file_fetcher = file_fetcher 70 self._stat_fetcher = stat_fetcher 71 self._svn_path = svn_path 72 self._revision = revision 73 74 def Read(self, paths, binary=False): 75 args = None 76 if self._revision is not None: 77 # |fetcher| gets from svn.chromium.org which uses p= for version. 78 args = 'p=%s' % self._revision 79 return Future(delegate=_AsyncFetchFuture(paths, 80 self._file_fetcher, 81 binary, 82 args=args)) 83 84 def _ParseHTML(self, html): 85 '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care 86 of all mismatched tags. 87 ''' 88 try: 89 return xml.parseString(html) 90 except ExpatError as e: 91 return self._ParseHTML('\n'.join( 92 line for (i, line) in enumerate(html.split('\n')) 93 if e.lineno != i + 1)) 94 95 def _CreateStatInfo(self, html): 96 def inner_text(node): 97 '''Like node.innerText in JS DOM, but strips surrounding whitespace. 98 ''' 99 text = [] 100 if node.nodeValue: 101 text.append(node.nodeValue) 102 if hasattr(node, 'childNodes'): 103 for child_node in node.childNodes: 104 text.append(inner_text(child_node)) 105 return ''.join(text).strip() 106 107 dom = self._ParseHTML(html) 108 109 # Try all of the tables until we find the one that contains the data. 110 for table in dom.getElementsByTagName('table'): 111 # Within the table there is a list of files. However, there may be some 112 # things beforehand; a header, "parent directory" list, etc. We will deal 113 # with that below by being generous and just ignoring such rows. 114 rows = table.getElementsByTagName('tr') 115 child_versions = {} 116 117 for row in rows: 118 # Within each row there are probably 5 cells; name, version, age, 119 # author, and last log entry. Maybe the columns will change; we're at 120 # the mercy viewvc, but this constant can be easily updated. 121 elements = row.getElementsByTagName('td') 122 if len(elements) != 5: 123 continue 124 name_element, version_element, _, __, ___ = elements 125 126 name = inner_text(name_element) # note: will end in / for directories 127 try: 128 version = int(inner_text(version_element)) 129 except ValueError: 130 continue 131 child_versions[name] = version 132 133 if not child_versions: 134 continue 135 136 # Parent version is max version of all children, since it's SVN. 137 parent_version = max(child_versions.values()) 138 139 # All versions in StatInfo need to be strings. 140 return StatInfo(str(parent_version), 141 dict((path, str(version)) 142 for path, version in child_versions.iteritems())) 143 144 # Bleh, but, this data is so unreliable. There are actually some empty file 145 # listings caused by git/svn/something not cleaning up empty dirs. 146 return StatInfo('0', {}) 147 148 def Stat(self, path): 149 directory, filename = posixpath.split(path) 150 directory += '/' 151 if self._revision is not None: 152 # |stat_fetch| uses viewvc which uses pathrev= for version. 153 directory += '?pathrev=%s' % self._revision 154 result = self._stat_fetcher.Fetch(directory) 155 if result.status_code == 404: 156 raise FileNotFoundError( 157 'Got 404 when fetching %s from %s for Stat' % (path, directory)) 158 stat_info = self._CreateStatInfo(result.content) 159 if path.endswith('/'): 160 return stat_info 161 if filename not in stat_info.child_versions: 162 raise FileNotFoundError('%s was not in child versions' % filename) 163 return StatInfo(stat_info.child_versions[filename]) 164 165 def GetIdentity(self): 166 # NOTE: no revision here, consider it just an implementation detail of the 167 # file version that is handled by Stat. 168 return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path))) 169