10bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# Copyright (C) 2009 Google Inc. All rights reserved.
20bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#
30bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# Redistribution and use in source and binary forms, with or without
40bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# modification, are permitted provided that the following conditions are
50bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# met:
60bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#
70bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#    * Redistributions of source code must retain the above copyright
80bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# notice, this list of conditions and the following disclaimer.
90bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#    * Redistributions in binary form must reproduce the above
100bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# copyright notice, this list of conditions and the following disclaimer
110bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# in the documentation and/or other materials provided with the
120bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# distribution.
130bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#    * Neither the name of Google Inc. nor the names of its
140bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# contributors may be used to endorse or promote products derived from
150bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# this software without specific prior written permission.
160bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch#
170bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
180bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
190bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
200bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
210bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
220bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
230bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
240bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
250bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
260bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
270bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
280bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
290bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch"""WebKit's Python module for interacting with patches."""
300bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
310bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochimport logging
320bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochimport re
330bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
34dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block_log = logging.getLogger("webkitpy.common.checkout.diff_parser")
350bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
36a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch
37a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This is broken. We should compile our regexps up-front
38a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# instead of using a custom cache.
390bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_regexp_compile_cache = {}
400bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
410bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
42a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This function should be removed.
430bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef match(pattern, string):
440bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """Matches the string with the pattern, caching the compiled regexp."""
450bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    if not pattern in _regexp_compile_cache:
460bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        _regexp_compile_cache[pattern] = re.compile(pattern)
470bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    return _regexp_compile_cache[pattern].match(string)
480bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
490bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
50a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This belongs on DiffParser (e.g. as to_svn_diff()).
510bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef git_diff_to_svn_diff(line):
520bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """Converts a git formatted diff line to a svn formatted line.
530bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
540bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    Args:
550bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch      line: A string representing a line of the diff.
560bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """
57a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    # FIXME: This list should be a class member on DiffParser.
58a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    # These regexp patterns should be compiled once instead of every time.
59d0825bca7fe65beaee391d30da42e937db621564Steve Block    conversion_patterns = (("^diff --git \w/(.+) \w/(?P<FilePath>.+)", lambda matched: "Index: " + matched.group('FilePath') + "\n"),
600bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                           ("^new file.*", lambda matched: "\n"),
610bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                           ("^index [0-9a-f]{7}\.\.[0-9a-f]{7} [0-9]{6}", lambda matched: "===================================================================\n"),
62d0825bca7fe65beaee391d30da42e937db621564Steve Block                           ("^--- \w/(?P<FilePath>.+)", lambda matched: "--- " + matched.group('FilePath') + "\n"),
63d0825bca7fe65beaee391d30da42e937db621564Steve Block                           ("^\+\+\+ \w/(?P<FilePath>.+)", lambda matched: "+++ " + matched.group('FilePath') + "\n"))
640bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
650bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    for pattern, conversion in conversion_patterns:
660bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        matched = match(pattern, line)
670bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        if matched:
680bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            return conversion(matched)
690bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    return line
700bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
710bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
72a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch# FIXME: This method belongs on DiffParser
730bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdochdef get_diff_converter(first_diff_line):
740bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """Gets a converter function of diff lines.
750bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
760bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    Args:
770bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch      first_diff_line: The first filename line of a diff file.
780bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                       If this line is git formatted, we'll return a
790bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                       converter from git to SVN.
800bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """
81d0825bca7fe65beaee391d30da42e937db621564Steve Block    if match(r"^diff --git \w/", first_diff_line):
820bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        return git_diff_to_svn_diff
830bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    return lambda input: input
840bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
850bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
860bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_INITIAL_STATE = 1
870bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_DECLARED_FILE_PATH = 2
880bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch_PROCESSING_CHUNK = 3
890bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
900bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
91a94275402997c11dd2e778633dacf4b7e630a35dBen Murdochclass DiffFile(object):
920bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """Contains the information for one file in a patch.
930bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
940bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    The field "lines" is a list which contains tuples in this format:
950bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch       (deleted_line_number, new_line_number, line_string)
960bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    If deleted_line_number is zero, it means this line is newly added.
970bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    If new_line_number is zero, it means this line is deleted.
980bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """
99a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    # FIXME: Tuples generally grow into classes.  We should consider
100a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    # adding a DiffLine object.
101a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch
102a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    def added_or_modified_line_numbers(self):
103a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        # This logic was moved from patchreader.py, but may not be
104a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        # the right API for this object long-term.
105a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        return [line[1] for line in self.lines if not line[0]]
1060bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1070bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    def __init__(self, filename):
1080bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        self.filename = filename
1090bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        self.lines = []
1100bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1110bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    def add_new_line(self, line_number, line):
1120bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        self.lines.append((0, line_number, line))
1130bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1140bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    def add_deleted_line(self, line_number, line):
1150bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        self.lines.append((line_number, 0, line))
1160bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1170bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    def add_unchanged_line(self, deleted_line_number, new_line_number, line):
1180bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        self.lines.append((deleted_line_number, new_line_number, line))
1190bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1200bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
121cad810f21b803229eb11403f9209855525a25d57Steve Block# If this is going to be called DiffParser, it should be a re-useable parser.
122cad810f21b803229eb11403f9209855525a25d57Steve Block# Otherwise we should rename it to ParsedDiff or just Diff.
123a94275402997c11dd2e778633dacf4b7e630a35dBen Murdochclass DiffParser(object):
1240bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """A parser for a patch file.
1250bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1260bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    The field "files" is a dict whose key is the filename and value is
1270bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    a DiffFile object.
1280bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    """
1290bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1300bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch    def __init__(self, diff_input):
1310bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        """Parses a diff.
1320bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1330bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        Args:
1340bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch          diff_input: An iterable object.
1350bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        """
136cad810f21b803229eb11403f9209855525a25d57Steve Block        self.files = self._parse_into_diff_files(diff_input)
1370bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
138cad810f21b803229eb11403f9209855525a25d57Steve Block    # FIXME: This function is way too long and needs to be broken up.
139cad810f21b803229eb11403f9209855525a25d57Steve Block    def _parse_into_diff_files(self, diff_input):
140cad810f21b803229eb11403f9209855525a25d57Steve Block        files = {}
141cad810f21b803229eb11403f9209855525a25d57Steve Block        state = _INITIAL_STATE
1420bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        current_file = None
1430bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        old_diff_line = None
1440bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        new_diff_line = None
1450bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch        for line in diff_input:
1460bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            line = line.rstrip("\n")
1470bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            if state == _INITIAL_STATE:
1480bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                transform_line = get_diff_converter(line)
1490bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            line = transform_line(line)
1500bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1510bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            file_declaration = match(r"^Index: (?P<FilePath>.+)", line)
1520bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            if file_declaration:
1530bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                filename = file_declaration.group('FilePath')
1540bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                current_file = DiffFile(filename)
155cad810f21b803229eb11403f9209855525a25d57Steve Block                files[filename] = current_file
1560bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                state = _DECLARED_FILE_PATH
1570bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                continue
1580bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1590bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            lines_changed = match(r"^@@ -(?P<OldStartLine>\d+)(,\d+)? \+(?P<NewStartLine>\d+)(,\d+)? @@", line)
1600bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            if lines_changed:
1610bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                if state != _DECLARED_FILE_PATH and state != _PROCESSING_CHUNK:
162dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                    _log.error('Unexpected line change without file path '
163dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                               'declaration: %r' % line)
1640bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                old_diff_line = int(lines_changed.group('OldStartLine'))
1650bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                new_diff_line = int(lines_changed.group('NewStartLine'))
1660bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                state = _PROCESSING_CHUNK
1670bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                continue
1680bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch
1690bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch            if state == _PROCESSING_CHUNK:
1700bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                if line.startswith('+'):
1710bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    current_file.add_new_line(new_diff_line, line[1:])
1720bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    new_diff_line += 1
1730bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                elif line.startswith('-'):
1740bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    current_file.add_deleted_line(old_diff_line, line[1:])
1750bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    old_diff_line += 1
1760bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                elif line.startswith(' '):
1770bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    current_file.add_unchanged_line(old_diff_line, new_diff_line, line[1:])
1780bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    old_diff_line += 1
1790bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    new_diff_line += 1
1800bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                elif line == '\\ No newline at end of file':
1810bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    # Nothing to do.  We may still have some added lines.
1820bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                    pass
1830bf48ef3be53ddaa52bbead65dfd75bf90e7a2b5Ben Murdoch                else:
184dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                    _log.error('Unexpected diff format when parsing a '
185dcc8cf2e65d1aa555cce12431a16547e66b469eeSteve Block                               'chunk: %r' % line)
186cad810f21b803229eb11403f9209855525a25d57Steve Block        return files
187