subversion_file_system.py revision effb81e5f8246d0db0270817048dc992db66e9fb
1# Copyright (c) 2012 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import posixpath
6import traceback
7import xml.dom.minidom as xml
8from xml.parsers.expat import ExpatError
9
10from appengine_url_fetcher import AppEngineUrlFetcher
11from appengine_wrappers import IsDownloadError
12from docs_server_utils import StringIdentity
13from file_system import (
14    FileNotFoundError, FileSystem, FileSystemError, StatInfo)
15from future import Future
16import url_constants
17
18
19def _ParseHTML(html):
20  '''Unfortunately, the viewvc page has a stray </div> tag, so this takes care
21  of all mismatched tags.
22  '''
23  try:
24    return xml.parseString(html)
25  except ExpatError as e:
26    return _ParseHTML('\n'.join(
27        line for (i, line) in enumerate(html.split('\n'))
28        if e.lineno != i + 1))
29
30def _InnerText(node):
31  '''Like node.innerText in JS DOM, but strips surrounding whitespace.
32  '''
33  text = []
34  if node.nodeValue:
35    text.append(node.nodeValue)
36  if hasattr(node, 'childNodes'):
37    for child_node in node.childNodes:
38      text.append(_InnerText(child_node))
39  return ''.join(text).strip()
40
41def _CreateStatInfo(html):
42  parent_version = None
43  child_versions = {}
44
45  # Try all of the tables until we find the ones that contain the data (the
46  # directory and file versions are in different tables).
47  for table in _ParseHTML(html).getElementsByTagName('table'):
48    # Within the table there is a list of files. However, there may be some
49    # things beforehand; a header, "parent directory" list, etc. We will deal
50    # with that below by being generous and just ignoring such rows.
51    rows = table.getElementsByTagName('tr')
52
53    for row in rows:
54      cells = row.getElementsByTagName('td')
55
56      # The version of the directory will eventually appear in the soup of
57      # table rows, like this:
58      #
59      # <tr>
60      #   <td>Directory revision:</td>
61      #   <td><a href=... title="Revision 214692">214692</a> (of...)</td>
62      # </tr>
63      #
64      # So look out for that.
65      if len(cells) == 2 and _InnerText(cells[0]) == 'Directory revision:':
66        links = cells[1].getElementsByTagName('a')
67        if len(links) != 2:
68          raise FileSystemError('ViewVC assumption invalid: directory ' +
69                                'revision content did not have 2 <a> ' +
70                                ' elements, instead %s' % _InnerText(cells[1]))
71        this_parent_version = _InnerText(links[0])
72        int(this_parent_version)  # sanity check
73        if parent_version is not None:
74          raise FileSystemError('There was already a parent version %s, and ' +
75                                ' we just found a second at %s' %
76                                (parent_version, this_parent_version))
77        parent_version = this_parent_version
78
79      # The version of each file is a list of rows with 5 cells: name, version,
80      # age, author, and last log entry. Maybe the columns will change; we're
81      # at the mercy viewvc, but this constant can be easily updated.
82      if len(cells) != 5:
83        continue
84      name_element, version_element, _, __, ___ = cells
85
86      name = _InnerText(name_element)  # note: will end in / for directories
87      try:
88        version = int(_InnerText(version_element))
89      except StandardError:
90        continue
91      child_versions[name] = str(version)
92
93    if parent_version and child_versions:
94      break
95
96  return StatInfo(parent_version, child_versions)
97
98def _GetAsyncFetchCallback(paths, fetcher, args=None):
99  def apply_args(path):
100    return path if args is None else '%s?%s' % (path, args)
101
102  def list_dir(directory):
103    dom = xml.parseString(directory)
104    files = [elem.childNodes[0].data for elem in dom.getElementsByTagName('a')]
105    if '..' in files:
106      files.remove('..')
107    return files
108
109  # A list of tuples of the form (path, Future).
110  fetches = [(path, fetcher.FetchAsync(apply_args(path))) for path in paths]
111
112  def resolve():
113    value = {}
114    for path, future in fetches:
115      try:
116        result = future.Get()
117      except Exception as e:
118        exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
119        raise exc_type('%s fetching %s for Get: %s' %
120                       (type(e).__name__, path, traceback.format_exc()))
121      if result.status_code == 404:
122        raise FileNotFoundError('Got 404 when fetching %s for Get, content %s' %
123            (path, result.content))
124      if result.status_code != 200:
125        raise FileSystemError('Got %s when fetching %s for Get, content %s' %
126            (result.status_code, path, result.content))
127      if path.endswith('/'):
128        value[path] = list_dir(result.content)
129      else:
130        value[path] = result.content
131    return value
132
133  return resolve
134
135class SubversionFileSystem(FileSystem):
136  '''Class to fetch resources from src.chromium.org.
137  '''
138  @staticmethod
139  def Create(branch='trunk', revision=None):
140    if branch == 'trunk':
141      svn_path = 'trunk/src'
142    else:
143      svn_path = 'branches/%s/src' % branch
144    return SubversionFileSystem(
145        AppEngineUrlFetcher('%s/%s' % (url_constants.SVN_URL, svn_path)),
146        AppEngineUrlFetcher('%s/%s' % (url_constants.VIEWVC_URL, svn_path)),
147        svn_path,
148        revision=revision)
149
150  def __init__(self, file_fetcher, stat_fetcher, svn_path, revision=None):
151    self._file_fetcher = file_fetcher
152    self._stat_fetcher = stat_fetcher
153    self._svn_path = svn_path
154    self._revision = revision
155
156  def Read(self, paths):
157    args = None
158    if self._revision is not None:
159      # |fetcher| gets from svn.chromium.org which uses p= for version.
160      args = 'p=%s' % self._revision
161    return Future(callback=_GetAsyncFetchCallback(paths,
162                                                  self._file_fetcher,
163                                                  args=args))
164
165  def Refresh(self):
166    return Future(value=())
167
168  def Stat(self, path):
169    directory, filename = posixpath.split(path)
170    if self._revision is not None:
171      # |stat_fetch| uses viewvc which uses pathrev= for version.
172      directory += '?pathrev=%s' % self._revision
173
174    try:
175      result = self._stat_fetcher.Fetch(directory)
176    except Exception as e:
177      exc_type = FileNotFoundError if IsDownloadError(e) else FileSystemError
178      raise exc_type('%s fetching %s for Stat: %s' %
179                     (type(e).__name__, path, traceback.format_exc()))
180
181    if result.status_code == 404:
182      raise FileNotFoundError('Got 404 when fetching %s for Stat, content %s' %
183          (path, result.content))
184    if result.status_code != 200:
185      raise FileNotFoundError('Got %s when fetching %s for Stat, content %s' %
186          (result.status_code, path, result.content))
187
188    stat_info = _CreateStatInfo(result.content)
189    if stat_info.version is None:
190      raise FileSystemError('Failed to find version of dir %s' % directory)
191    if path == '' or path.endswith('/'):
192      return stat_info
193    if filename not in stat_info.child_versions:
194      raise FileNotFoundError(
195          '%s from %s was not in child versions for Stat' % (filename, path))
196    return StatInfo(stat_info.child_versions[filename])
197
198  def GetIdentity(self):
199    # NOTE: no revision here, since it would mess up the caching of reads. It
200    # probably doesn't matter since all the caching classes will use the result
201    # of Stat to decide whether to re-read - and Stat has a ceiling of the
202    # revision - so when the revision changes, so might Stat. That is enough.
203    return '@'.join((self.__class__.__name__, StringIdentity(self._svn_path)))
204