11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# Copyright (c) 2014 The Chromium Authors. All rights reserved. 203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be 303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# found in the LICENSE file. 403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import xml.dom.minidom as minidom 603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from xml.parsers.expat import ExpatError 703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import crash_utils 903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from repository_parser_interface import ParserInterface 1003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 1103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 1203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# This number is 6 because each linediff page in src.chromium.org should 1303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# contain the following tables: table with revision number, table with actual 1403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# diff, table with dropdown menu, table with legend, a border table and a table 1503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# containing page information. 1603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)NUM_TABLES_IN_LINEDIFF_PAGE = 6 1703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# Each of the linediff info should contain 3 tds, one for changed line number, 1803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# and two for line contents before/after. 1903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)NUM_TDS_IN_LINEDIFF_PAGE = 3 2003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 2103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 2203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)class SVNParser(ParserInterface): 2303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) """Parser for SVN repository using chromium.org, for components in config. 2403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 2503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) Attributes: 2603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url_map: A map from component to the urls, where urls are for changelog, 2703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision, line diff and annotation. 2803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) """ 2903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 3003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) def __init__(self, url_map): 3103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) self.component_to_urls_map = url_map 3203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 3303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) def ParseChangelog(self, component, range_start, range_end): 3403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) file_to_revision_map = {} 3503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_map = {} 3603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 3703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Check if the current component is supported by reading the components 3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # parsed from config file. If it is not, fail. 3903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 4003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url_map = self.component_to_urls_map.get(component) 4103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not url_map: 4203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (revision_map, file_to_revision_map) 4303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 4403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Retrieve data from the url, return empty map if fails. 4503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_range_str = '%s:%s' % (range_start, range_end) 4603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url = url_map['changelog_url'] % revision_range_str 4703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) response = crash_utils.GetDataFromURL(url) 4803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not response: 4903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (revision_map, file_to_revision_map) 5003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 5103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Parse xml out of the returned string. If it fails, return empty map. 5203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) try: 5303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) xml_revisions = minidom.parseString(response) 5403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) except ExpatError: 5503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (revision_map, file_to_revision_map) 5603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 5703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Iterate through the returned XML object. 5803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revisions = xml_revisions.getElementsByTagName('logentry') 5903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) for revision in revisions: 6003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Create new revision object for each of the revision. 6103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_object = {} 6203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 6303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Set author of the CL. 6403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_object['author'] = revision.getElementsByTagName( 6503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 'author')[0].firstChild.nodeValue 6603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 6703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Get the revision number from xml. 6803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_number = int(revision.getAttribute('revision')) 6903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 7003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Iterate through the changed paths in the CL. 7103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) paths = revision.getElementsByTagName('paths') 7203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if paths: 7303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) for changed_path in paths[0].getElementsByTagName('path'): 741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci # Get path and file change type from the xml. 7503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) file_path = changed_path.firstChild.nodeValue 761320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci file_change_type = changed_path.getAttribute('action') 771320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 781320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if file_path.startswith('/trunk/'): 791320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci file_path = file_path[len('/trunk/'):] 8003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 8103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Add file to the map. 821320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if file_path not in file_to_revision_map: 831320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci file_to_revision_map[file_path] = [] 841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci file_to_revision_map[file_path].append( 851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (revision_number, file_change_type)) 8603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 8703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Set commit message of the CL. 8803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_object['message'] = revision.getElementsByTagName('msg')[ 8903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 0].firstChild.nodeValue 9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 9103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Set url of this CL. 9203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_url = url_map['revision_url'] % revision_number 9303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_object['url'] = revision_url 9403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 9503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Add this CL to the revision map. 9603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_map[revision_number] = revision_object 9703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 9803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (revision_map, file_to_revision_map) 9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 1001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci def ParseLineDiff(self, path, component, file_change_type, revision_number): 10103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) changed_line_numbers = [] 10203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) changed_line_contents = [] 10303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 10403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url_map = self.component_to_urls_map.get(component) 10503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not url_map: 10603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (None, None, None) 10703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 10803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If the file is added (not modified), treat it as if it is not changed. 10903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) backup_url = url_map['revision_url'] % revision_number 1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if file_change_type == 'A': 11103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (backup_url, changed_line_numbers, changed_line_contents) 11203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 11303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Retrieve data from the url. If no data is retrieved, return empty lists. 11403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url = url_map['diff_url'] % (path, revision_number - 1, 11503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_number, revision_number) 11603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) data = crash_utils.GetDataFromURL(url) 11703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not data: 11803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (backup_url, changed_line_numbers, changed_line_contents) 11903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 12003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_diff_html = minidom.parseString(data) 12103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tables = line_diff_html.getElementsByTagName('table') 12203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If there are not NUM_TABLES tables in the html page, there should be an 12303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # error in the html page. 12403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if len(tables) != NUM_TABLES_IN_LINEDIFF_PAGE: 12503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (backup_url, changed_line_numbers, changed_line_contents) 12603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 12703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Diff content is in the second table. Each line of the diff content 12803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # is in <tr>. 12903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) trs = tables[1].getElementsByTagName('tr') 13003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) prefix_len = len('vc_diff_') 13103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 13203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Filter trs so that it only contains diff chunk with contents. 13303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) filtered_trs = [] 13403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) for tr in trs: 13503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tr_class = tr.getAttribute('class') 13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 13703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Check for the classes of the <tr>s. 13803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if tr_class: 13903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tr_class = tr_class[prefix_len:] 14003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 14103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Do not have to add header. 14203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if tr_class == 'header' or tr_class == 'chunk_header': 14303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) continue 14403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 14503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If the class of tr is empty, this page does not have any change. 14603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if tr_class == 'empty': 14703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (backup_url, changed_line_numbers, changed_line_contents) 14803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 14903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) filtered_trs.append(tr) 15003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 15103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Iterate through filtered trs, and grab line diff information. 15203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) for tr in filtered_trs: 15303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tds = tr.getElementsByTagName('td') 15403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 15503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If there aren't 3 tds, this line does should not contain line diff. 15603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if len(tds) != NUM_TDS_IN_LINEDIFF_PAGE: 15703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) continue 15803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 15903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If line number information is not in hyperlink, ignore this line. 16003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) try: 16103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_num = tds[0].getElementsByTagName('a')[0].firstChild.nodeValue 16203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) left_diff_type = tds[1].getAttribute('class')[prefix_len:] 16303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) right_diff_type = tds[2].getAttribute('class')[prefix_len:] 16403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) except IndexError: 16503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) continue 16603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 16703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Treat the line as modified only if both left and right diff has type 16803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # changed or both have different change type, and if the change is not 16903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # deletion. 17003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if (left_diff_type != right_diff_type) or ( 17103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) left_diff_type == 'change' and right_diff_type == 'change'): 17203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 17303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Check if the line content is not empty. 17403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) try: 17503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) new_line = tds[2].firstChild.nodeValue 17603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) except AttributeError: 17703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) new_line = '' 17803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 17903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not (left_diff_type == 'remove' and right_diff_type == 'empty'): 18003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) changed_line_numbers.append(int(line_num)) 18103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) changed_line_contents.append(new_line.strip()) 18203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 18303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return (url, changed_line_numbers, changed_line_contents) 18403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 18503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) def ParseBlameInfo(self, component, file_path, line, revision): 18603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url_map = self.component_to_urls_map.get(component) 18703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not url_map: 18803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return None 18903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 19003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Retrieve blame data from url, return None if fails. 19103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) url = url_map['blame_url'] % (file_path, revision, revision) 19203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) data = crash_utils.GetDataFromURL(url) 19303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if not data: 19403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return None 19503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 19603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) blame_html = minidom.parseString(data) 19703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 19803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) title = blame_html.getElementsByTagName('title') 19903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If the returned html page is an exception page, return None. 20003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if title[0].firstChild.nodeValue == 'ViewVC Exception': 20103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return None 20203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 20303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Each of the blame result is in <tr>. 20403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) blame_results = blame_html.getElementsByTagName('tr') 2051320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci try: 2061320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci blame_result = blame_results[line] 2071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci except IndexError: 2081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return None 20903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 21003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # There must be 4 <td> for each <tr>. If not, this page is wrong. 21103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tds = blame_result.getElementsByTagName('td') 21203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if len(tds) != 4: 21303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) return None 21403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 21503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # The third <td> has the line content, separated by <span>s. Combine 21603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # those to get a string of changed line. If it has nothing, the line 21703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # is empty. 21803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_content = '' 21903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if tds[3].hasChildNodes(): 22003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) contents = tds[3].childNodes 22103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 22203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) for content in contents: 22303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Nodetype 3 means it is text node. 22403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) if content.nodeType == minidom.Node.TEXT_NODE: 22503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_content += content.nodeValue 22603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) else: 22703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_content += content.firstChild.nodeValue 22803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 22903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line_content = line_content.strip() 23003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 23103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # If the current line has the same author/revision as the previous lines, 23203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # the result is not shown. Propagate up until we find the line with info. 23303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) while not tds[1].firstChild: 23403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) line -= 1 23503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) blame_result = blame_results[line] 23603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) tds = blame_result.getElementsByTagName('td') 23703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) author = tds[1].firstChild.nodeValue 23803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 23903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Revision can either be in hyperlink or plain text. 24003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) try: 24103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision = tds[2].getElementsByTagName('a')[0].firstChild.nodeValue 24203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) except IndexError: 24303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision = tds[2].firstChild.nodeValue 24403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) 2451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (revision_info, _) = self.ParseChangelog(component, revision, revision) 2461320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci message = revision_info[int(revision)]['message'] 2471320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 24803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) # Return the parsed information. 24903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles) revision_url = url_map['revision_url'] % int(revision) 2501320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return (line_content, revision, author, revision_url, message) 251