subversion_file_system.py revision ca12bfac764ba476d6cd062bf1dde12cc64c3f40
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import xml.dom.minidom as xml
7from xml.parsers.expat import ExpatError
8
9from appengine_url_fetcher import AppEngineUrlFetcher
10from docs_server_utils import StringIdentity
11from file_system import FileSystem, FileNotFoundError, StatInfo, ToUnicode
12from future import Future
13import svn_constants
14import url_constants
15
16class _AsyncFetchFuture(object):
17  def __init__(self, paths, fetcher, binary, args=None):
18    def apply_args(path):
19      return path if args is None else '%s?%s' % (path, args)
20    # A list of tuples of the form (path, Future).
21    self._fetches = [(path, fetcher.FetchAsync(apply_args(path)))
22                     for path in paths]
23    self._value = {}
24    self._error = None
25    self._binary = binary
26
27  def _ListDir(self, directory):
28    dom = xml.parseString(directory)
29    files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
30    if '..' in files:
31      files.remove('..')
32    return files
33
34  def Get(self):
35    for path, future in self._fetches:
36      try:
37        result = future.Get()
38      except Exception as e:
39        raise FileNotFoundError(
40            'Error when fetching %s for Get: %s' % (path, e))
41      if result.status_code == 404:
42        raise FileNotFoundError('Got 404 when fetching %s for Get' % path)
43      elif path.endswith('/'):
44        self._value[path] = self._ListDir(result.content)
45      elif not self._binary:
46        self._value[path] = ToUnicode(result.content)
47      else:
48        self._value[path] = result.content
49    if self._error is not None:
50      raise self._error
51    return self._value
52
53class SubversionFileSystem(FileSystem):
54  '''Class to fetch resources from src.chromium.org.
55  '''
56  @staticmethod
57  def Create(branch='trunk', revision=None):
58    if branch == 'trunk':
59      svn_path = 'trunk/src/%s' % svn_constants.EXTENSIONS_PATH
60    else:
61      svn_path = 'branches/%s/src/%s' % (branch, svn_constants.EXTENSIONS_PATH)
62    return SubversionFileSystem(
63        AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
64        AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
65        svn_path,
66        revision=revision)
67
68  def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
69    self._file_fetcher = file_fetcher
70    self._stat_fetcher = stat_fetcher
71    self._svn_path = svn_path
72    self._revision = revision
73
74  def Read(self, paths, binary=False):
75    args = None
76    if self._revision is not None:
77      # |fetcher| gets from svn.chromium.org which uses p= for version.
78      args = 'p=%s' % self._revision
79    return Future(delegate=_AsyncFetchFuture(paths,
80                                             self._file_fetcher,
81                                             binary,
82                                             args=args))
83
84  def _ParseHTML(self, html):
85    '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
86    of all mismatched tags.
87    '''
88    try:
89      return xml.parseString(html)
90    except ExpatError as e:
91      return self._ParseHTML('\n'.join(
92          line for (i, line) in enumerate(html.split('\n'))
93          if e.lineno != i + 1))
94
95  def _CreateStatInfo(self, html):
96    def inner_text(node):
97      '''Like node.innerText in JS DOM, but strips surrounding whitespace.
98      '''
99      text = []
100      if node.nodeValue:
101        text.append(node.nodeValue)
102      if hasattr(node, 'childNodes'):
103        for child_node in node.childNodes:
104          text.append(inner_text(child_node))
105      return ''.join(text).strip()
106
107    dom = self._ParseHTML(html)
108
109    # Try all of the tables until we find the one that contains the data.
110    for table in dom.getElementsByTagName('table'):
111      # Within the table there is a list of files. However, there may be some
112      # things beforehand; a header, "parent directory" list, etc. We will deal
113      # with that below by being generous and just ignoring such rows.
114      rows = table.getElementsByTagName('tr')
115      child_versions = {}
116
117      for row in rows:
118        # Within each row there are probably 5 cells; name, version, age,
119        # author, and last log entry. Maybe the columns will change; we're at
120        # the mercy viewvc, but this constant can be easily updated.
121        elements = row.getElementsByTagName('td')
122        if len(elements) != 5:
123          continue
124        name_element, version_element, _, __, ___ = elements
125
126        name = inner_text(name_element)  # note: will end in / for directories
127        try:
128          version = int(inner_text(version_element))
129        except ValueError:
130          continue
131        child_versions[name] = version
132
133      if not child_versions:
134        continue
135
136      # Parent version is max version of all children, since it's SVN.
137      parent_version = max(child_versions.values())
138
139      # All versions in StatInfo need to be strings.
140      return StatInfo(str(parent_version),
141                      dict((path, str(version))
142                           for path, version in child_versions.iteritems()))
143
144    # Bleh, but, this data is so unreliable. There are actually some empty file
145    # listings caused by git/svn/something not cleaning up empty dirs.
146    return StatInfo('0', {})
147
148  def Stat(self, path):
149    directory, filename = posixpath.split(path)
150    directory += '/'
151    if self._revision is not None:
152      # |stat_fetch| uses viewvc which uses pathrev= for version.
153      directory += '?pathrev=%s' % self._revision
154    result = self._stat_fetcher.Fetch(directory)
155    if result.status_code == 404:
156      raise FileNotFoundError(
157          'Got 404 when fetching %s from %s for Stat' % (path, directory))
158    stat_info = self._CreateStatInfo(result.content)
159    if path.endswith('/'):
160      return stat_info
161    if filename not in stat_info.child_versions:
162      raise FileNotFoundError('%s was not in child versions' % filename)
163    return StatInfo(stat_info.child_versions[filename])
164
165  def GetIdentity(self):
166    # NOTE: no revision here, consider it just an implementation detail of the
167    # file version that is handled by Stat.
168    return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))
169