10bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# Copyright (C) 2009 Google Inc. All rights reserved. 20bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# 30bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# Redistribution and use in source and binary forms, with or without 40bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# modification, are permitted provided that the following conditions are 50bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# met: 60bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# 70bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# * Redistributions of source code must retain the above copyright 80bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# notice, this list of conditions and the following disclaimer. 90bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# * Redistributions in binary form must reproduce the above 100bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# copyright notice, this list of conditions and the following disclaimer 110bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# in the documentation and/or other materials provided with the 120bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# distribution. 130bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# * Neither the name of Google Inc. nor the names of its 140bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# contributors may be used to endorse or promote products derived from 150bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# this software without specific prior written permission. 160bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# 170bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 180bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 190bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 200bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 210bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 220bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 230bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 240bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 250bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 260bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 270bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 280bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 290bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch"""WebKit's Python module for interacting with patches.""" 300bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 310bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochimport logging 320bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochimport re 330bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 34dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block_log = logging.getLogger("webkitpy.common.checkout.diff_parser") 350bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 36a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch 37a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This is broken. We should compile our regexps up-front 38a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# instead of using a custom cache. 390bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_regexp_compile_cache = {} 400bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 410bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 42a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This function should be removed. 430bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef match(pattern, string): 440bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """Matches the string with the pattern, caching the compiled regexp.""" 450bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if not pattern in _regexp_compile_cache: 460bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch _regexp_compile_cache[pattern] = re.compile(pattern) 470bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch return _regexp_compile_cache[pattern].match(string) 480bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 490bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 50a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This belongs on DiffParser (e.g. as to_svn_diff()). 510bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef git_diff_to_svn_diff(line): 520bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """Converts a git formatted diff line to a svn formatted line. 530bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 540bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch Args: 550bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch line: A string representing a line of the diff. 560bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """ 57a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # FIXME: This list should be a class member on DiffParser. 58a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # These regexp patterns should be compiled once instead of every time. 59d0825bca7fe65beaee391d30da42e937db621564Steve Block conversion_patterns = (("^diff --git \w/(.+) \w/(?P<FilePath>.+)", lambda matched: "Index: " + matched.group('FilePath') + "\n"), 600bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch ("^new file.*", lambda matched: "\n"), 610bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch ("^index [0-9a-f]{7}\.\.[0-9a-f]{7} [0-9]{6}", lambda matched: "===================================================================\n"), 62d0825bca7fe65beaee391d30da42e937db621564Steve Block ("^--- \w/(?P<FilePath>.+)", lambda matched: "--- " + matched.group('FilePath') + "\n"), 63d0825bca7fe65beaee391d30da42e937db621564Steve Block ("^\+\+\+ \w/(?P<FilePath>.+)", lambda matched: "+++ " + matched.group('FilePath') + "\n")) 640bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 650bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch for pattern, conversion in conversion_patterns: 660bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch matched = match(pattern, line) 670bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if matched: 680bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch return conversion(matched) 690bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch return line 700bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 710bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 72a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This method belongs on DiffParser 730bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef get_diff_converter(first_diff_line): 740bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """Gets a converter function of diff lines. 750bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 760bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch Args: 770bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch first_diff_line: The first filename line of a diff file. 780bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch If this line is git formatted, we'll return a 790bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch converter from git to SVN. 800bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """ 81d0825bca7fe65beaee391d30da42e937db621564Steve Block if match(r"^diff --git \w/", first_diff_line): 820bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch return git_diff_to_svn_diff 830bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch return lambda input: input 840bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 850bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 860bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_INITIAL_STATE = 1 870bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_DECLARED_FILE_PATH = 2 880bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_PROCESSING_CHUNK = 3 890bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 900bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 91a94275402997c11dd2e778633dacf4b7e630a35dBen Murdochclass DiffFile(object): 920bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """Contains the information for one file in a patch. 930bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 940bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch The field "lines" is a list which contains tuples in this format: 950bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch (deleted_line_number, new_line_number, line_string) 960bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch If deleted_line_number is zero, it means this line is newly added. 970bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch If new_line_number is zero, it means this line is deleted. 980bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """ 99a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # FIXME: Tuples generally grow into classes. We should consider 100a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # adding a DiffLine object. 101a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch 102a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch def added_or_modified_line_numbers(self): 103a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # This logic was moved from patchreader.py, but may not be 104a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch # the right API for this object long-term. 105a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch return [line[1] for line in self.lines if not line[0]] 1060bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1070bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch def __init__(self, filename): 1080bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch self.filename = filename 1090bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch self.lines = [] 1100bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1110bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch def add_new_line(self, line_number, line): 1120bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch self.lines.append((0, line_number, line)) 1130bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1140bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch def add_deleted_line(self, line_number, line): 1150bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch self.lines.append((line_number, 0, line)) 1160bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1170bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch def add_unchanged_line(self, deleted_line_number, new_line_number, line): 1180bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch self.lines.append((deleted_line_number, new_line_number, line)) 1190bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1200bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 121cad810f21b803229eb11403f9209855525a25d57Steve Block# If this is going to be called DiffParser, it should be a re-useable parser. 122cad810f21b803229eb11403f9209855525a25d57Steve Block# Otherwise we should rename it to ParsedDiff or just Diff. 123a94275402997c11dd2e778633dacf4b7e630a35dBen Murdochclass DiffParser(object): 1240bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """A parser for a patch file. 1250bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1260bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch The field "files" is a dict whose key is the filename and value is 1270bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch a DiffFile object. 1280bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """ 1290bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1300bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch def __init__(self, diff_input): 1310bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """Parses a diff. 1320bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1330bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch Args: 1340bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch diff_input: An iterable object. 1350bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch """ 136cad810f21b803229eb11403f9209855525a25d57Steve Block self.files = self._parse_into_diff_files(diff_input) 1370bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 138cad810f21b803229eb11403f9209855525a25d57Steve Block # FIXME: This function is way too long and needs to be broken up. 139cad810f21b803229eb11403f9209855525a25d57Steve Block def _parse_into_diff_files(self, diff_input): 140cad810f21b803229eb11403f9209855525a25d57Steve Block files = {} 141cad810f21b803229eb11403f9209855525a25d57Steve Block state = _INITIAL_STATE 1420bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch current_file = None 1430bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch old_diff_line = None 1440bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch new_diff_line = None 1450bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch for line in diff_input: 1460bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch line = line.rstrip("\n") 1470bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if state == _INITIAL_STATE: 1480bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch transform_line = get_diff_converter(line) 1490bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch line = transform_line(line) 1500bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1510bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch file_declaration = match(r"^Index: (?P<FilePath>.+)", line) 1520bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if file_declaration: 1530bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch filename = file_declaration.group('FilePath') 1540bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch current_file = DiffFile(filename) 155cad810f21b803229eb11403f9209855525a25d57Steve Block files[filename] = current_file 1560bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch state = _DECLARED_FILE_PATH 1570bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch continue 1580bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1590bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch lines_changed = match(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P<NewStartLine>\d+)(,\d+)? @@", line) 1600bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if lines_changed: 1610bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK: 162dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block _log.error('Unexpected line change without file path ' 163dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block 'declaration: %r' % line) 1640bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch old_diff_line = int(lines_changed.group('OldStartLine')) 1650bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch new_diff_line = int(lines_changed.group('NewStartLine')) 1660bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch state = _PROCESSING_CHUNK 1670bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch continue 1680bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch 1690bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if state == _PROCESSING_CHUNK: 1700bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch if line.startswith('+'): 1710bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch current_file.add_new_line(new_diff_line, line[1:]) 1720bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch new_diff_line += 1 1730bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch elif line.startswith('-'): 1740bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch current_file.add_deleted_line(old_diff_line, line[1:]) 1750bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch old_diff_line += 1 1760bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch elif line.startswith(' '): 1770bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch current_file.add_unchanged_line(old_diff_line, new_diff_line, line[1:]) 1780bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch old_diff_line += 1 1790bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch new_diff_line += 1 1800bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch elif line == '\\ No newline at end of file': 1810bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch # Nothing to do. We may still have some added lines. 1820bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch pass 1830bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch else: 184dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block _log.error('Unexpected diff format when parsing a ' 185dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block 'chunk: %r' % line) 186cad810f21b803229eb11403f9209855525a25d57Steve Block return files 187