subversion_file_system.py revision 5821806d5e7f356e8fa4b058a389a808ea183019
1# Copyright (c) 2012 The Chromium Authors. All rights reserved. 2# Use of this source code is governed by a BSD-style license that can be 3# found in the LICENSE file. 4 5import re 6import xml.dom.minidom as xml 7from xml.parsers.expat import ExpatError 8 9import file_system 10from future import Future 11 12class _AsyncFetchFuture(object): 13 def __init__(self, paths, fetcher, binary): 14 # A list of tuples of the form (path, Future). 15 self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths] 16 self._value = {} 17 self._error = None 18 self._binary = binary 19 20 def _ListDir(self, directory): 21 dom = xml.parseString(directory) 22 files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')] 23 if '..' in files: 24 files.remove('..') 25 return files 26 27 def Get(self): 28 for path, future in self._fetches: 29 result = future.Get() 30 if result.status_code == 404: 31 raise file_system.FileNotFoundError(path) 32 elif path.endswith('/'): 33 self._value[path] = self._ListDir(result.content) 34 elif not self._binary: 35 self._value[path] = file_system._ProcessFileData(result.content, path) 36 else: 37 self._value[path] = result.content 38 if self._error is not None: 39 raise self._error 40 return self._value 41 42class SubversionFileSystem(file_system.FileSystem): 43 """Class to fetch resources from src.chromium.org. 44 """ 45 def __init__(self, fetcher, stat_fetcher): 46 self._fetcher = fetcher 47 self._stat_fetcher = stat_fetcher 48 49 def Read(self, paths, binary=False): 50 return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary)) 51 52 def _ParseHTML(self, html): 53 """Unfortunately, the viewvc page has a stray </div> tag, so this takes care 54 of all mismatched tags. 55 """ 56 try: 57 return xml.parseString(html) 58 except ExpatError as e: 59 return self._ParseHTML('\n'.join( 60 line for (i, line) in enumerate(html.split('\n')) 61 if e.lineno != i + 1)) 62 63 def _CreateStatInfo(self, html): 64 dom = self._ParseHTML(html) 65 # Brace yourself, this is about to get ugly. The page returned from viewvc 66 # was not the prettiest. 67 tds = dom.getElementsByTagName('td') 68 a_list = [] 69 found = False 70 dir_revision = None 71 for td in tds: 72 if found: 73 dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue 74 found = False 75 a_list.extend(td.getElementsByTagName('a')) 76 if (td.firstChild is not None and 77 td.firstChild.nodeValue == 'Directory revision:'): 78 found = True 79 child_revisions = {} 80 for i, a in enumerate(a_list): 81 if i + 1 >= len(a_list): 82 break 83 next_a = a_list[i + 1] 84 name = a.getAttribute('name') 85 if name: 86 rev = next_a.getElementsByTagName('strong')[0] 87 if 'file' in next_a.getAttribute('title'): 88 child_revisions[name] = rev.firstChild.nodeValue 89 else: 90 child_revisions[name + '/'] = rev.firstChild.nodeValue 91 return file_system.StatInfo(dir_revision, child_revisions) 92 93 def Stat(self, path): 94 directory = path.rsplit('/', 1)[0] 95 result = self._stat_fetcher.Fetch(directory + '/') 96 if result.status_code == 404: 97 raise file_system.FileNotFoundError(path) 98 stat_info = self._CreateStatInfo(result.content) 99 if not path.endswith('/'): 100 filename = path.rsplit('/', 1)[-1] 101 if filename not in stat_info.child_versions: 102 raise file_system.FileNotFoundError(path) 103 stat_info.version = stat_info.child_versions[filename] 104 return stat_info 105