11320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci# Copyright (c) 2014 The Chromium Authors. All rights reserved.
203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# Use of this source code is governed by a BSD-style license that can be
303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)# found in the LICENSE file.
403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import base64
603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import xml.dom.minidom as minidom
703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from xml.parsers.expat import ExpatError
803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)import crash_utils
1003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)from repository_parser_interface import ParserInterface
1103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
121320f92c476a1ad9d19dba2a48c72b75566198e9Primiano TucciFILE_CHANGE_TYPE_MAP = {
131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'add': 'A',
141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'copy': 'C',
151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'delete': 'D',
161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'modify': 'M',
171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    'rename': 'R'
181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci}
191320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
201320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
211320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccidef _ConvertToFileChangeType(file_action):
221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  # TODO(stgao): verify impact on code that checks the file change type.
231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  return file_action[0].upper()
241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci
2503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)class GitParser(ParserInterface):
2703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  """Parser for Git repository in googlesource.
2803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  Attributes:
3003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    parsed_deps: A map from component path to its repository name, regression,
3103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                 etc.
3203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_parts_map: A map from url type to its url parts. This parts are added
3303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                   the base url to form different urls.
3403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  """
3503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
3603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def __init__(self, parsed_deps, url_parts_map):
3703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    self.component_to_url_map = parsed_deps
3803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    self.url_parts_map = url_parts_map
3903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
4003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseChangelog(self, component_path, range_start, range_end):
4103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    file_to_revision_map = {}
4203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_map = {}
4303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    base_url = self.component_to_url_map[component_path]['repository']
4403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changelog_url = base_url + self.url_parts_map['changelog_url']
4503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_url = base_url + self.url_parts_map['revision_url']
4603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
4703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve data from the url, return empty maps if fails. Html url is a\
4803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # url where the changelog can be parsed from html.
4903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = changelog_url % (range_start, range_end)
5003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    html_url = url + '?pretty=fuller'
5103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    response = crash_utils.GetDataFromURL(html_url)
5203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not response:
5303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
5403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
551320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # Parse xml out of the returned string. If it failes, Try parsing
561320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    # from JSON objects.
5703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    try:
5803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      dom = minidom.parseString(response)
5903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    except ExpatError:
601320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      self.ParseChangelogFromJSON(range_start, range_end, changelog_url,
611320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                  revision_url, revision_map,
621320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci                                  file_to_revision_map)
6303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
6403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
6503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # The revisions information are in from the third divs to the second
6603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # to last one.
6703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    divs = dom.getElementsByTagName('div')[2:-1]
6803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    pres = dom.getElementsByTagName('pre')
6903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    uls = dom.getElementsByTagName('ul')
7003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
7103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Divs, pres and uls each contain revision information for one CL, so
7203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # they should have same length.
7303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not divs or len(divs) != len(pres) or len(pres) != len(uls):
7403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      self.ParseChangelogFromJSON(range_start, range_end, changelog_url,
7503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                                  revision_url, revision_map,
7603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                                  file_to_revision_map)
7703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (revision_map, file_to_revision_map)
7803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
7903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Iterate through divs and parse revisions
8003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for (div, pre, ul) in zip(divs, pres, uls):
8103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Create new revision object for each revision.
8203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision = {}
8303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
8403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # There must be three <tr>s. If not, this page is wrong.
8503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      trs = div.getElementsByTagName('tr')
8603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if len(trs) != 3:
8703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        continue
8803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
8903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Retrieve git hash.
9003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      githash = trs[0].getElementsByTagName('a')[0].firstChild.nodeValue
9103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
9203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Retrieve and set author.
9303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      author = trs[1].getElementsByTagName(
9403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          'td')[0].firstChild.nodeValue.split('<')[0]
9503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision['author'] = author
9603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
9703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Retrive and set message.
9803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision['message'] = pre.firstChild.nodeValue
9903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
10003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Set url of this CL.
10103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_url_part = self.url_parts_map['revision_url'] % githash
10203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision['url'] = base_url + revision_url_part
10303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
10403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Go through changed files, they are in li.
10503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      lis = ul.getElementsByTagName('li')
10603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      for li in lis:
10703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Retrieve path and action of the changed file
10803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        file_path = li.getElementsByTagName('a')[0].firstChild.nodeValue
1091320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        file_change_type = li.getElementsByTagName('span')[
1101320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci            0].getAttribute('class')
11103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
11203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Normalize file action so that it is same as SVN parser.
1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        file_change_type = _ConvertToFileChangeType(file_change_type)
11403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
11503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Add the changed file to the map.
1161320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        if file_path not in file_to_revision_map:
1171320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci          file_to_revision_map[file_path] = []
1181320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        file_to_revision_map[file_path].append((githash, file_change_type))
11903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
12003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Add this revision object to the map.
12103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_map[githash] = revision
12203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
12303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse one revision for the start range, because googlesource does not
12403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # include the start of the range.
12503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    self.ParseRevision(revision_url, range_start, revision_map,
12603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                       file_to_revision_map)
12703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
12803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return (revision_map, file_to_revision_map)
12903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseChangelogFromJSON(self, range_start, range_end, changelog_url,
13103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                             revision_url, revision_map, file_to_revision_map):
13203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    """Parses changelog by going over the JSON file.
13303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
13403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    Args:
13503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      range_start: Starting range of the regression.
13603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      range_end: Ending range of the regression.
13703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      changelog_url: The url to retrieve changelog from.
13803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_url: The url to retrieve individual revision from.
13903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      revision_map: A map from a git hash number to its revision information.
14003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      file_to_revision_map: A map from file to a git hash in which it occurs.
14103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    """
14203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Compute URLs from given range, and retrieves changelog. Stop if it fails.
14303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changelog_url %= (range_start, range_end)
14403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    json_url = changelog_url + '?format=json'
14503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    response = crash_utils.GetDataFromURL(json_url)
14603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not response:
14703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
14803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
14903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse changelog from the returned object. The returned string should
15003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # start with ")}]'\n", so start from the 6th character.
15103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revisions = crash_utils.LoadJSON(response[5:])
15203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not revisions:
15303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
15403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
15503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse individual revision in the log.
15603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for revision in revisions['log']:
15703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      githash = revision['commit']
15803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      self.ParseRevision(revision_url, githash, revision_map,
15903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                         file_to_revision_map)
16003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
16103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse the revision with range_start, because googlesource ignores
16203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # that one.
16303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    self.ParseRevision(revision_url, range_start, revision_map,
16403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                       file_to_revision_map)
16503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
16603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseRevision(self, revision_url, githash, revision_map,
16703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)                    file_to_revision_map):
16803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
16903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve data from the URL, return if it fails.
17003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = revision_url % githash
17103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    response = crash_utils.GetDataFromURL(url + '?format=json')
17203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not response:
17303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
17403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
17503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Load JSON object from the string. If it fails, terminate the function.
17603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    json_revision = crash_utils.LoadJSON(response[5:])
17703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not json_revision:
17803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
17903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Create a map representing object and get githash from the JSON object.
18103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision = {}
18203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    githash = json_revision['commit']
18303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Set author, message and URL of this CL.
18503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision['author'] = json_revision['author']['name']
18603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision['message'] = json_revision['message']
18703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision['url'] = url
18803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
18903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Iterate through the changed files.
19003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for diff in json_revision['tree_diff']:
19103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      file_path = diff['new_path']
1921320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      file_change_type = diff['type']
19303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
19403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Normalize file action so that it fits with svn_repository_parser.
1951320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      file_change_type = _ConvertToFileChangeType(file_change_type)
19603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
19703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # Add the file to the map.
1981320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      if file_path not in file_to_revision_map:
1991320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        file_to_revision_map[file_path] = []
2001320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      file_to_revision_map[file_path].append((githash, file_change_type))
20103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
20203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Add this CL to the map.
20303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    revision_map[githash] = revision
20403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
20503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return
20603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2071320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci  def ParseLineDiff(self, path, component, file_change_type, githash):
20803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changed_line_numbers = []
20903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    changed_line_contents = []
21003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    base_url = self.component_to_url_map[component]['repository']
21103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    backup_url = (base_url + self.url_parts_map['revision_url']) % githash
21203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # If the file is added (not modified), treat it as if it is not changed.
2141320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci    if file_change_type in ('A', 'C', 'R'):
2151320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci      # TODO(stgao): Maybe return whole file change for Add, Rename, and Copy?
21603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (backup_url, changed_line_numbers, changed_line_contents)
21703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
21803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieves the diff data from URL, and if it fails, return emptry lines.
21903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url = (base_url + self.url_parts_map['diff_url']) % (githash, path)
22003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    data = crash_utils.GetDataFromURL(url + '?format=text')
22103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not data:
22203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return (backup_url, changed_line_numbers, changed_line_contents)
22303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
22403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Decode the returned object to line diff info
22503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    diff = base64.b64decode(data).splitlines()
22603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
22703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Iterate through the lines in diff. Set current line to -1 so that we know
22803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # that current line is part of the diff chunk.
22903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    current_line = -1
23003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for line in diff:
23103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      line = line.strip()
23203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
23303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # If line starts with @@, a new chunk starts.
23403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if line.startswith('@@'):
23503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        current_line = int(line.split('+')[1].split(',')[0])
23603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
23703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # If we are in a chunk.
23803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      elif current_line != -1:
23903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # If line is either added or modified.
24003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if line.startswith('+'):
24103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          changed_line_numbers.append(current_line)
24203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          changed_line_contents.append(line[2:])
24303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
24403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # Do not increment current line if the change is 'delete'.
24503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        if not line.startswith('-'):
24603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)          current_line += 1
24703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
24803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Return url without '?format=json'
24903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return (url, changed_line_numbers, changed_line_contents)
25003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
25103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)  def ParseBlameInfo(self, component, file_path, line, revision):
25203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    base_url = self.component_to_url_map[component]['repository']
25303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
25403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Retrieve blame JSON file from googlesource. If it fails, return None.
25503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    url_part = self.url_parts_map['blame_url'] % (revision, file_path)
25603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    blame_url = base_url + url_part
25703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    json_string = crash_utils.GetDataFromURL(blame_url)
25803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not json_string:
25903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
26003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
26103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Parse JSON object from the string. The returned string should
26203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # start with ")}]'\n", so start from the 6th character.
26303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    annotation = crash_utils.LoadJSON(json_string[5:])
26403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    if not annotation:
26503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      return
26603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
26703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Go through the regions, which is a list of consecutive lines with same
26803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # author/revision.
26903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    for blame_line in annotation['regions']:
27003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      start = blame_line['start']
27103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      count = blame_line['count']
27203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
27303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # For each region, check if the line we want the blame info of is in this
27403b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      # region.
27503b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)      if start <= line and line <= start + count - 1:
27603b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # If we are in the right region, get the information from the line.
27703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        revision = blame_line['commit']
27803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        author = blame_line['author']['name']
27903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        revision_url_parts = self.url_parts_map['revision_url'] % revision
28003b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        revision_url = base_url + revision_url_parts
28103b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        # TODO(jeun): Add a way to get content from JSON object.
28203b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)        content = None
28303b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
2841320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        (revision_info, _) = self.ParseChangelog(component, revision, revision)
2851320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        message = revision_info[revision]['message']
2861320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci        return (content, revision, author, revision_url, message)
28703b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)
28803b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    # Return none if the region does not exist.
28903b57e008b61dfcb1fbad3aea950ae0e001748b0Torne (Richard Coles)    return None
290