11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# Copyright (c) 2014 The Chromium Authors. All rights reserved.
203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# found in the LICENSE file.
403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import xml.dom.minidom as minidom
603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from xml.parsers.expat import ExpatError
703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import crash_utils
903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from repository_parser_interface import ParserInterface
1003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
1103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
1203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# This number is 6 because each linediff page in src.chromium.org should
1303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# contain the following tables: table with revision number, table with actual
1403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# diff, table with dropdown menu, table with legend, a border table and a table
1503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# containing page information.
1603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)NUM_TABLES_IN_LINEDIFF_PAGE = 6
1703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# Each of the linediff info should contain 3 tds, one for changed line number,
1803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# and two for line contents before/after.
1903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)NUM_TDS_IN_LINEDIFF_PAGE = 3
2003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)class SVNParser(ParserInterface):
2303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  """Parser for SVN repository using chromium.org, for components in config.
2403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  Attributes:
2603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_map: A map from component to the urls, where urls are for changelog,
2703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)             revision, line diff and annotation.
2803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  """
2903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
3003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def __init__(self, url_map):
3103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    self.component_to_urls_map = url_map
3203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
3303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseChangelog(self, component, range_start, range_end):
3403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    file_to_revision_map = {}
3503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_map = {}
3603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
3703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Check if the current component is supported by reading the components
3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # parsed from config file. If it is not, fail.
3903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
4003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_map = self.component_to_urls_map.get(component)
4103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not url_map:
4203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
4303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
4403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve data from the url, return empty map if fails.
4503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_range_str = '%s:%s' % (range_start, range_end)
4603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = url_map['changelog_url'] % revision_range_str
4703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    response = crash_utils.GetDataFromURL(url)
4803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not response:
4903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
5003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
5103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse xml out of the returned string. If it fails, return empty map.
5203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    try:
5303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      xml_revisions = minidom.parseString(response)
5403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    except ExpatError:
5503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
5603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
5703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Iterate through the returned XML object.
5803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revisions = xml_revisions.getElementsByTagName('logentry')
5903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for revision in revisions:
6003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Create new revision object for each of the revision.
6103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_object = {}
6203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
6303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Set author of the CL.
6403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_object['author'] = revision.getElementsByTagName(
6503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          'author')[0].firstChild.nodeValue
6603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
6703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Get the revision number from xml.
6803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_number = int(revision.getAttribute('revision'))
6903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
7003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Iterate through the changed paths in the CL.
7103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      paths = revision.getElementsByTagName('paths')
7203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if paths:
7303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        for changed_path in paths[0].getElementsByTagName('path'):
741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          # Get path and file change type from the xml.
7503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          file_path = changed_path.firstChild.nodeValue
761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          file_change_type = changed_path.getAttribute('action')
771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          if file_path.startswith('/trunk/'):
791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            file_path = file_path[len('/trunk/'):]
8003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
8103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          # Add file to the map.
821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          if file_path not in file_to_revision_map:
831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            file_to_revision_map[file_path] = []
841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          file_to_revision_map[file_path].append(
851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci              (revision_number, file_change_type))
8603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
8703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Set commit message of the CL.
8803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_object['message'] = revision.getElementsByTagName('msg')[
8903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          0].firstChild.nodeValue
9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
9103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Set url of this CL.
9203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_url = url_map['revision_url'] % revision_number
9303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_object['url'] = revision_url
9403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
9503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Add this CL to the revision map.
9603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_map[revision_number] = revision_object
9703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
9803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return (revision_map, file_to_revision_map)
9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def ParseLineDiff(self, path, component, file_change_type, revision_number):
10103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changed_line_numbers = []
10203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changed_line_contents = []
10303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
10403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_map = self.component_to_urls_map.get(component)
10503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not url_map:
10603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (None, None, None)
10703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
10803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # If the file is added (not modified), treat it as if it is not changed.
10903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    backup_url = url_map['revision_url'] % revision_number
1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if file_change_type == 'A':
11103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (backup_url, changed_line_numbers, changed_line_contents)
11203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
11303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve data from the url. If no data is retrieved, return empty lists.
11403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = url_map['diff_url'] % (path, revision_number - 1,
11503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                                 revision_number, revision_number)
11603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    data = crash_utils.GetDataFromURL(url)
11703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not data:
11803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (backup_url, changed_line_numbers, changed_line_contents)
11903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
12003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    line_diff_html = minidom.parseString(data)
12103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    tables = line_diff_html.getElementsByTagName('table')
12203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # If there are not NUM_TABLES tables in the html page, there should be an
12303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # error in the html page.
12403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE:
12503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (backup_url, changed_line_numbers, changed_line_contents)
12603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
12703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Diff content is in the second table. Each line of the diff content
12803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # is in <tr>.
12903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    trs = tables[1].getElementsByTagName('tr')
13003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    prefix_len = len('vc_diff_')
13103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Filter trs so that it only contains diff chunk with contents.
13303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    filtered_trs = []
13403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for tr in trs:
13503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      tr_class = tr.getAttribute('class')
13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Check for the classes of the <tr>s.
13803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if tr_class:
13903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        tr_class = tr_class[prefix_len:]
14003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
14103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Do not have to add header.
14203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if tr_class == 'header' or tr_class == 'chunk_header':
14303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          continue
14403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
14503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # If the class of tr is empty, this page does not have any change.
14603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if tr_class == 'empty':
14703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          return (backup_url, changed_line_numbers, changed_line_contents)
14803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
14903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      filtered_trs.append(tr)
15003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
15103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Iterate through filtered trs, and grab line diff information.
15203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for tr in filtered_trs:
15303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      tds = tr.getElementsByTagName('td')
15403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
15503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # If there aren't 3 tds, this line does should not contain line diff.
15603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE:
15703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        continue
15803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
15903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # If line number information is not in hyperlink, ignore this line.
16003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      try:
16103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue
16203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        left_diff_type = tds[1].getAttribute('class')[prefix_len:]
16303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        right_diff_type = tds[2].getAttribute('class')[prefix_len:]
16403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      except IndexError:
16503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        continue
16603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
16703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Treat the line as modified only if both left and right diff has type
16803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # changed or both have different change type, and if the change is not
16903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # deletion.
17003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if (left_diff_type != right_diff_type) or (
17103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          left_diff_type == 'change' and right_diff_type == 'change'):
17203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
17303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Check if the line content is not empty.
17403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        try:
17503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          new_line = tds[2].firstChild.nodeValue
17603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        except AttributeError:
17703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          new_line = ''
17803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
17903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if not (left_diff_type == 'remove' and right_diff_type == 'empty'):
18003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          changed_line_numbers.append(int(line_num))
18103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          changed_line_contents.append(new_line.strip())
18203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return (url, changed_line_numbers, changed_line_contents)
18403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseBlameInfo(self, component, file_path, line, revision):
18603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_map = self.component_to_urls_map.get(component)
18703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not url_map:
18803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return None
18903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
19003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve blame data from url, return None if fails.
19103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = url_map['blame_url'] % (file_path, revision, revision)
19203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    data = crash_utils.GetDataFromURL(url)
19303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not data:
19403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return None
19503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
19603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    blame_html = minidom.parseString(data)
19703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
19803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    title = blame_html.getElementsByTagName('title')
19903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # If the returned html page is an exception page, return None.
20003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if title[0].firstChild.nodeValue == 'ViewVC Exception':
20103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return None
20203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
20303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Each of the blame result is in <tr>.
20403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    blame_results = blame_html.getElementsByTagName('tr')
2051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    try:
2061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      blame_result = blame_results[line]
2071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    except IndexError:
2081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      return None
20903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # There must be 4 <td> for each <tr>. If not, this page is wrong.
21103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    tds = blame_result.getElementsByTagName('td')
21203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if len(tds) != 4:
21303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return None
21403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # The third <td> has the line content, separated by <span>s. Combine
21603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # those to get a string of changed line. If it has nothing, the line
21703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # is empty.
21803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    line_content = ''
21903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if tds[3].hasChildNodes():
22003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      contents = tds[3].childNodes
22103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
22203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      for content in contents:
22303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Nodetype 3 means it is text node.
22403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if content.nodeType == minidom.Node.TEXT_NODE:
22503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          line_content += content.nodeValue
22603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        else:
22703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          line_content += content.firstChild.nodeValue
22803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
22903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      line_content = line_content.strip()
23003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
23103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # If the current line has the same author/revision as the previous lines,
23203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # the result is not shown. Propagate up until we find the line with info.
23303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    while not tds[1].firstChild:
23403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      line -= 1
23503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      blame_result = blame_results[line]
23603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      tds = blame_result.getElementsByTagName('td')
23703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    author = tds[1].firstChild.nodeValue
23803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
23903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Revision can either be in hyperlink or plain text.
24003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    try:
24103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue
24203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    except IndexError:
24303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision = tds[2].firstChild.nodeValue
24403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    (revision_info, _) = self.ParseChangelog(component, revision, revision)
2461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    message = revision_info[int(revision)]['message']
2471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
24803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Return the parsed information.
24903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_url = url_map['revision_url'] % int(revision)
2501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    return (line_content, revision, author, revision_url, message)
251