subversion_file_system.py revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import re
6import xml.dom.minidom as xml
7from xml.parsers.expat import ExpatError
8
9import file_system
10from future import Future
11
12class _AsyncFetchFuture(object):
13  def __init__(self, paths, fetcher, binary):
14    # A list of tuples of the form (path, Future).
15    self._fetches = [(path, fetcher.FetchAsync(path)) for path in paths]
16    self._value = {}
17    self._error = None
18    self._binary = binary
19
20  def _ListDir(self, directory):
21    dom = xml.parseString(directory)
22    files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
23    if '..' in files:
24      files.remove('..')
25    return files
26
27  def Get(self):
28    for path, future in self._fetches:
29      result = future.Get()
30      if result.status_code == 404:
31        raise file_system.FileNotFoundError(path)
32      elif path.endswith('/'):
33        self._value[path] = self._ListDir(result.content)
34      elif not self._binary:
35        self._value[path] = file_system._ToUnicode(result.content)
36      else:
37        self._value[path] = result.content
38    if self._error is not None:
39      raise self._error
40    return self._value
41
42class SubversionFileSystem(file_system.FileSystem):
43  """Class to fetch resources from src.chromium.org.
44  """
45  def __init__(self, fetcher, stat_fetcher):
46    self._fetcher = fetcher
47    self._stat_fetcher = stat_fetcher
48
49  def Read(self, paths, binary=False):
50    return Future(delegate=_AsyncFetchFuture(paths, self._fetcher, binary))
51
52  def _ParseHTML(self, html):
53    """Unfortunately, the viewvc page has a stray </div> tag, so this takes care
54    of all mismatched tags.
55    """
56    try:
57      return xml.parseString(html)
58    except ExpatError as e:
59      return self._ParseHTML('\n'.join(
60          line for (i, line) in enumerate(html.split('\n'))
61          if e.lineno != i + 1))
62
63  def _CreateStatInfo(self, html):
64    dom = self._ParseHTML(html)
65    # Brace yourself, this is about to get ugly. The page returned from viewvc
66    # was not the prettiest.
67    tds = dom.getElementsByTagName('td')
68    a_list = []
69    found = False
70    dir_revision = None
71    for td in tds:
72      if found:
73        dir_revision = td.getElementsByTagName('a')[0].firstChild.nodeValue
74        found = False
75      a_list.extend(td.getElementsByTagName('a'))
76      if (td.firstChild is not None and
77          td.firstChild.nodeValue == 'Directory revision:'):
78        found = True
79    child_revisions = {}
80    for i, a in enumerate(a_list):
81      if i + 1 >= len(a_list):
82        break
83      next_a = a_list[i + 1]
84      name = a.getAttribute('name')
85      if name:
86        rev = next_a.getElementsByTagName('strong')[0]
87        if 'file' in next_a.getAttribute('title'):
88          child_revisions[name] = rev.firstChild.nodeValue
89        else:
90          child_revisions[name + '/'] = rev.firstChild.nodeValue
91    return file_system.StatInfo(dir_revision, child_revisions)
92
93  def Stat(self, path):
94    directory = path.rsplit('/', 1)[0]
95    result = self._stat_fetcher.Fetch(directory + '/')
96    if result.status_code == 404:
97      raise file_system.FileNotFoundError(path)
98    stat_info = self._CreateStatInfo(result.content)
99    if not path.endswith('/'):
100      filename = path.rsplit('/', 1)[-1]
101      if filename not in stat_info.child_versions:
102        raise file_system.FileNotFoundError(path)
103      stat_info.version = stat_info.child_versions[filename]
104    return stat_info
105