1# Copyright (c) 2009, 2010, 2011 Google Inc. All rights reserved.
2# Copyright (c) 2009 Apple Inc. All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30import datetime
31import logging
32import os
33import re
34
35from webkitpy.common.checkout.scm.scm import SCM
36from webkitpy.common.memoized import memoized
37from webkitpy.common.system.executive import Executive, ScriptError
38
39_log = logging.getLogger(__name__)
40
41
42class AmbiguousCommitError(Exception):
43    def __init__(self, num_local_commits, has_working_directory_changes):
44        Exception.__init__(self, "Found %s local commits and the working directory is %s" % (
45            num_local_commits, ["clean", "not clean"][has_working_directory_changes]))
46        self.num_local_commits = num_local_commits
47        self.has_working_directory_changes = has_working_directory_changes
48
49
50class Git(SCM):
51
52    # Git doesn't appear to document error codes, but seems to return
53    # 1 or 128, mostly.
54    ERROR_FILE_IS_MISSING = 128
55
56    executable_name = 'git'
57
58    def __init__(self, cwd, **kwargs):
59        SCM.__init__(self, cwd, **kwargs)
60
61    def _run_git(self, command_args, **kwargs):
62        full_command_args = [self.executable_name] + command_args
63        full_kwargs = kwargs
64        if not 'cwd' in full_kwargs:
65            full_kwargs['cwd'] = self.checkout_root
66        return self._run(full_command_args, **full_kwargs)
67
68    @classmethod
69    def in_working_directory(cls, path, executive=None):
70        try:
71            executive = executive or Executive()
72            return executive.run_command([cls.executable_name, 'rev-parse', '--is-inside-work-tree'], cwd=path, error_handler=Executive.ignore_error).rstrip() == "true"
73        except OSError, e:
74            # The Windows bots seem to through a WindowsError when git isn't installed.
75            return False
76
77    def find_checkout_root(self, path):
78        # "git rev-parse --show-cdup" would be another way to get to the root
79        checkout_root = self._run_git(['rev-parse', '--show-toplevel'], cwd=(path or "./")).strip()
80        if not self._filesystem.isabs(checkout_root):  # Sometimes git returns relative paths
81            checkout_root = self._filesystem.join(path, checkout_root)
82        return checkout_root
83
84    @classmethod
85    def read_git_config(cls, key, cwd=None, executive=None):
86        # FIXME: This should probably use cwd=self.checkout_root.
87        # Pass --get-all for cases where the config has multiple values
88        # Pass the cwd if provided so that we can handle the case of running webkit-patch outside of the working directory.
89        # FIXME: This should use an Executive.
90        executive = executive or Executive()
91        return executive.run_command([cls.executable_name, "config", "--get-all", key], error_handler=Executive.ignore_error, cwd=cwd).rstrip('\n')
92
93    def _discard_local_commits(self):
94        self._run_git(['reset', '--hard', self._remote_branch_ref()])
95
96    def _local_commits(self, ref='HEAD'):
97        return self._run_git(['log', '--pretty=oneline', ref + '...' + self._remote_branch_ref()]).splitlines()
98
99    def _rebase_in_progress(self):
100        return self._filesystem.exists(self.absolute_path(self._filesystem.join('.git', 'rebase-apply')))
101
102    def has_working_directory_changes(self):
103        return self._run_git(['diff', 'HEAD', '--no-renames', '--name-only']) != ""
104
105    def _discard_working_directory_changes(self):
106        # Could run git clean here too, but that wouldn't match subversion
107        self._run_git(['reset', 'HEAD', '--hard'])
108        # Aborting rebase even though this does not match subversion
109        if self._rebase_in_progress():
110            self._run_git(['rebase', '--abort'])
111
112    def status_command(self):
113        # git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead.
114        # No file contents printed, thus utf-8 autodecoding in self.run is fine.
115        return [self.executable_name, "diff", "--name-status", "--no-renames", "HEAD"]
116
117    def _status_regexp(self, expected_types):
118        return '^(?P<status>[%s])\t(?P<filename>.+)$' % expected_types
119
120    def add_list(self, paths, return_exit_code=False, recurse=True):
121        return self._run_git(["add"] + paths, return_exit_code=return_exit_code)
122
123    def delete_list(self, paths):
124        return self._run_git(["rm", "-f"] + paths)
125
126    def move(self, origin, destination):
127        return self._run_git(["mv", "-f", origin, destination])
128
129    def exists(self, path):
130        return_code = self._run_git(["show", "HEAD:%s" % path], return_exit_code=True, decode_output=False)
131        return return_code != self.ERROR_FILE_IS_MISSING
132
133    def _branch_from_ref(self, ref):
134        return ref.replace('refs/heads/', '')
135
136    def current_branch(self):
137        return self._branch_from_ref(self._run_git(['symbolic-ref', '-q', 'HEAD']).strip())
138
139    def _upstream_branch(self):
140        current_branch = self.current_branch()
141        return self._branch_from_ref(self.read_git_config('branch.%s.merge' % current_branch, cwd=self.checkout_root, executive=self._executive).strip())
142
143    def _merge_base(self, git_commit=None):
144        if git_commit:
145            # Rewrite UPSTREAM to the upstream branch
146            if 'UPSTREAM' in git_commit:
147                upstream = self._upstream_branch()
148                if not upstream:
149                    raise ScriptError(message='No upstream/tracking branch set.')
150                git_commit = git_commit.replace('UPSTREAM', upstream)
151
152            # Special-case <refname>.. to include working copy changes, e.g., 'HEAD....' shows only the diffs from HEAD.
153            if git_commit.endswith('....'):
154                return git_commit[:-4]
155
156            if '..' not in git_commit:
157                git_commit = git_commit + "^.." + git_commit
158            return git_commit
159
160        return self._remote_merge_base()
161
162    def changed_files(self, git_commit=None):
163        # FIXME: --diff-filter could be used to avoid the "extract_filenames" step.
164        status_command = [self.executable_name, 'diff', '-r', '--name-status', "--no-renames", "--no-ext-diff", "--full-index", self._merge_base(git_commit)]
165        # FIXME: I'm not sure we're returning the same set of files that SVN.changed_files is.
166        # Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R)
167        return self._run_status_and_extract_filenames(status_command, self._status_regexp("ADM"))
168
169    def _added_files(self):
170        return self._run_status_and_extract_filenames(self.status_command(), self._status_regexp("A"))
171
172    def _deleted_files(self):
173        return self._run_status_and_extract_filenames(self.status_command(), self._status_regexp("D"))
174
175    @staticmethod
176    def supports_local_commits():
177        return True
178
179    def display_name(self):
180        return "git"
181
182    def most_recent_log_matching(self, grep_str, path):
183        # We use '--grep=' + foo rather than '--grep', foo because
184        # git 1.7.0.4 (and earlier) didn't support the separate arg.
185        return self._run_git(['log', '-1', '--grep=' + grep_str, '--date=iso', self.find_checkout_root(path)])
186
187    def svn_revision(self, path):
188        git_log = self.most_recent_log_matching('git-svn-id:', path)
189        match = re.search("^\s*git-svn-id:.*@(?P<svn_revision>\d+)\ ", git_log, re.MULTILINE)
190        if not match:
191            return ""
192        return str(match.group('svn_revision'))
193
194    def timestamp_of_revision(self, path, revision):
195        git_log = self.most_recent_log_matching('git-svn-id:.*@%s' % revision, path)
196        match = re.search("^Date:\s*(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2}) ([+-])(\d{2})(\d{2})$", git_log, re.MULTILINE)
197        if not match:
198            return ""
199
200        # Manually modify the timezone since Git doesn't have an option to show it in UTC.
201        # Git also truncates milliseconds but we're going to ignore that for now.
202        time_with_timezone = datetime.datetime(int(match.group(1)), int(match.group(2)), int(match.group(3)),
203            int(match.group(4)), int(match.group(5)), int(match.group(6)), 0)
204
205        sign = 1 if match.group(7) == '+' else -1
206        time_without_timezone = time_with_timezone - datetime.timedelta(hours=sign * int(match.group(8)), minutes=int(match.group(9)))
207        return time_without_timezone.strftime('%Y-%m-%dT%H:%M:%SZ')
208
209    def _prepend_svn_revision(self, diff):
210        revision = self._head_svn_revision()
211        if not revision:
212            return diff
213
214        return "Subversion Revision: " + revision + '\n' + diff
215
216    def create_patch(self, git_commit=None, changed_files=None):
217        """Returns a byte array (str()) representing the patch file.
218        Patch files are effectively binary since they may contain
219        files of multiple different encodings."""
220
221        # Put code changes at the top of the patch and layout tests
222        # at the bottom, this makes for easier reviewing.
223        config_path = self._filesystem.dirname(self._filesystem.path_to_module('webkitpy.common.config'))
224        order_file = self._filesystem.join(config_path, 'orderfile')
225        order = ""
226        if self._filesystem.exists(order_file):
227            order = "-O%s" % order_file
228
229        command = [self.executable_name, 'diff', '--binary', '--no-color', "--no-ext-diff", "--full-index", "--no-renames", order, self._merge_base(git_commit), "--"]
230        if changed_files:
231            command += changed_files
232        return self._prepend_svn_revision(self._run(command, decode_output=False, cwd=self.checkout_root))
233
234    @memoized
235    def svn_revision_from_git_commit(self, git_commit):
236        # git svn find-rev always exits 0, even when the revision or commit is not found.
237        try:
238            return int(self._run_git(['svn', 'find-rev', git_commit]).rstrip())
239        except ValueError, e:
240            return None
241
242    def checkout_branch(self, name):
243        self._run_git(['checkout', '-q', name])
244
245    def create_clean_branch(self, name):
246        self._run_git(['checkout', '-q', '-b', name, self._remote_branch_ref()])
247
248    def blame(self, path):
249        return self._run_git(['blame', path])
250
251    # Git-specific methods:
252    def _branch_ref_exists(self, branch_ref):
253        return self._run_git(['show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0
254
255    def delete_branch(self, branch_name):
256        if self._branch_ref_exists('refs/heads/' + branch_name):
257            self._run_git(['branch', '-D', branch_name])
258
259    def _remote_merge_base(self):
260        return self._run_git(['merge-base', self._remote_branch_ref(), 'HEAD']).strip()
261
262    def _remote_branch_ref(self):
263        # Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists.
264        remote_branch_refs = self.read_git_config('svn-remote.svn.fetch', cwd=self.checkout_root, executive=self._executive)
265        if not remote_branch_refs:
266            remote_master_ref = 'refs/remotes/origin/master'
267            if not self._branch_ref_exists(remote_master_ref):
268                raise ScriptError(message="Can't find a branch to diff against. svn-remote.svn.fetch is not in the git config and %s does not exist" % remote_master_ref)
269            return remote_master_ref
270
271        # FIXME: What's the right behavior when there are multiple svn-remotes listed?
272        # For now, just use the first one.
273        first_remote_branch_ref = remote_branch_refs.split('\n')[0]
274        return first_remote_branch_ref.split(':')[1]
275
276    def commit_locally_with_message(self, message, commit_all_working_directory_changes=True):
277        command = ['commit', '-F', '-']
278        if commit_all_working_directory_changes:
279            command.insert(1, '--all')
280        self._run_git(command, input=message)
281
282    # These methods are git specific and are meant to provide support for the Git oriented workflow
283    # that Blink is moving towards, hence there are no equivalent methods in the SVN class.
284
285    def pull(self):
286        self._run_git(['pull'])
287
288    def latest_git_commit(self):
289        return self._run_git(['log', '-1', '--format=%H']).strip()
290
291    def git_commits_since(self, commit):
292        return self._run_git(['log', commit + '..master', '--format=%H', '--reverse']).split()
293
294    def git_commit_detail(self, commit, format=None):
295        args = ['log', '-1', commit]
296        if format:
297            args.append('--format=' + format)
298        return self._run_git(args)
299
300    def _branch_tracking_remote_master(self):
301        origin_info = self._run_git(['remote', 'show', 'origin', '-n'])
302        match = re.search("^\s*(?P<branch_name>\S+)\s+merges with remote master$", origin_info, re.MULTILINE)
303        if not match:
304            raise ScriptError(message="Unable to find local branch tracking origin/master.")
305        branch = str(match.group("branch_name"))
306        return self._branch_from_ref(self._run_git(['rev-parse', '--symbolic-full-name', branch]).strip())
307
308    def is_cleanly_tracking_remote_master(self):
309        if self.has_working_directory_changes():
310            return False
311        if self.current_branch() != self._branch_tracking_remote_master():
312            return False
313        if len(self._local_commits(self._branch_tracking_remote_master())) > 0:
314            return False
315        return True
316
317    def ensure_cleanly_tracking_remote_master(self):
318        self._discard_working_directory_changes()
319        self._run_git(['checkout', '-q', self._branch_tracking_remote_master()])
320        self._discard_local_commits()
321