1# Copyright (c) 2009, Google Inc. All rights reserved.
2# Copyright (c) 2009 Apple Inc. All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are
6# met:
7#
8#     * Redistributions of source code must retain the above copyright
9# notice, this list of conditions and the following disclaimer.
10#     * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following disclaimer
12# in the documentation and/or other materials provided with the
13# distribution.
14#     * Neither the name of Google Inc. nor the names of its
15# contributors may be used to endorse or promote products derived from
16# this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29#
30# Python module for interacting with an SCM system (like SVN or Git)
31
32import logging
33import os
34import re
35import sys
36import shutil
37
38from webkitpy.common.memoized import memoized
39from webkitpy.common.system.deprecated_logging import error, log
40from webkitpy.common.system.executive import Executive, run_command, ScriptError
41from webkitpy.common.system import ospath
42
43
44def find_checkout_root():
45    """Returns the current checkout root (as determined by default_scm().
46
47    Returns the absolute path to the top of the WebKit checkout, or None
48    if it cannot be determined.
49
50    """
51    scm_system = default_scm()
52    if scm_system:
53        return scm_system.checkout_root
54    return None
55
56
57def default_scm(patch_directories=None):
58    """Return the default SCM object as determined by the CWD and running code.
59
60    Returns the default SCM object for the current working directory; if the
61    CWD is not in a checkout, then we attempt to figure out if the SCM module
62    itself is part of a checkout, and return that one. If neither is part of
63    a checkout, None is returned.
64
65    """
66    cwd = os.getcwd()
67    scm_system = detect_scm_system(cwd, patch_directories)
68    if not scm_system:
69        script_directory = os.path.dirname(os.path.abspath(__file__))
70        scm_system = detect_scm_system(script_directory, patch_directories)
71        if scm_system:
72            log("The current directory (%s) is not a WebKit checkout, using %s" % (cwd, scm_system.checkout_root))
73        else:
74            error("FATAL: Failed to determine the SCM system for either %s or %s" % (cwd, script_directory))
75    return scm_system
76
77
78def detect_scm_system(path, patch_directories=None):
79    absolute_path = os.path.abspath(path)
80
81    if patch_directories == []:
82        patch_directories = None
83
84    if SVN.in_working_directory(absolute_path):
85        return SVN(cwd=absolute_path, patch_directories=patch_directories)
86
87    if Git.in_working_directory(absolute_path):
88        return Git(cwd=absolute_path)
89
90    return None
91
92
93def first_non_empty_line_after_index(lines, index=0):
94    first_non_empty_line = index
95    for line in lines[index:]:
96        if re.match("^\s*$", line):
97            first_non_empty_line += 1
98        else:
99            break
100    return first_non_empty_line
101
102
103class CommitMessage:
104    def __init__(self, message):
105        self.message_lines = message[first_non_empty_line_after_index(message, 0):]
106
107    def body(self, lstrip=False):
108        lines = self.message_lines[first_non_empty_line_after_index(self.message_lines, 1):]
109        if lstrip:
110            lines = [line.lstrip() for line in lines]
111        return "\n".join(lines) + "\n"
112
113    def description(self, lstrip=False, strip_url=False):
114        line = self.message_lines[0]
115        if lstrip:
116            line = line.lstrip()
117        if strip_url:
118            line = re.sub("^(\s*)<.+> ", "\1", line)
119        return line
120
121    def message(self):
122        return "\n".join(self.message_lines) + "\n"
123
124
125class CheckoutNeedsUpdate(ScriptError):
126    def __init__(self, script_args, exit_code, output, cwd):
127        ScriptError.__init__(self, script_args=script_args, exit_code=exit_code, output=output, cwd=cwd)
128
129
130def commit_error_handler(error):
131    if re.search("resource out of date", error.output):
132        raise CheckoutNeedsUpdate(script_args=error.script_args, exit_code=error.exit_code, output=error.output, cwd=error.cwd)
133    Executive.default_error_handler(error)
134
135
136class AuthenticationError(Exception):
137    def __init__(self, server_host, prompt_for_password=False):
138        self.server_host = server_host
139        self.prompt_for_password = prompt_for_password
140
141
142class AmbiguousCommitError(Exception):
143    def __init__(self, num_local_commits, working_directory_is_clean):
144        self.num_local_commits = num_local_commits
145        self.working_directory_is_clean = working_directory_is_clean
146
147
148# SCM methods are expected to return paths relative to self.checkout_root.
149class SCM:
150    def __init__(self, cwd, executive=None):
151        self.cwd = cwd
152        self.checkout_root = self.find_checkout_root(self.cwd)
153        self.dryrun = False
154        self._executive = executive or Executive()
155
156    # A wrapper used by subclasses to create processes.
157    def run(self, args, cwd=None, input=None, error_handler=None, return_exit_code=False, return_stderr=True, decode_output=True):
158        # FIXME: We should set cwd appropriately.
159        return self._executive.run_command(args,
160                           cwd=cwd,
161                           input=input,
162                           error_handler=error_handler,
163                           return_exit_code=return_exit_code,
164                           return_stderr=return_stderr,
165                           decode_output=decode_output)
166
167    # SCM always returns repository relative path, but sometimes we need
168    # absolute paths to pass to rm, etc.
169    def absolute_path(self, repository_relative_path):
170        return os.path.join(self.checkout_root, repository_relative_path)
171
172    # FIXME: This belongs in Checkout, not SCM.
173    def scripts_directory(self):
174        return os.path.join(self.checkout_root, "Tools", "Scripts")
175
176    # FIXME: This belongs in Checkout, not SCM.
177    def script_path(self, script_name):
178        return os.path.join(self.scripts_directory(), script_name)
179
180    def ensure_clean_working_directory(self, force_clean):
181        if self.working_directory_is_clean():
182            return
183        if not force_clean:
184            # FIXME: Shouldn't this use cwd=self.checkout_root?
185            print self.run(self.status_command(), error_handler=Executive.ignore_error)
186            raise ScriptError(message="Working directory has modifications, pass --force-clean or --no-clean to continue.")
187        log("Cleaning working directory")
188        self.clean_working_directory()
189
190    def ensure_no_local_commits(self, force):
191        if not self.supports_local_commits():
192            return
193        commits = self.local_commits()
194        if not len(commits):
195            return
196        if not force:
197            error("Working directory has local commits, pass --force-clean to continue.")
198        self.discard_local_commits()
199
200    def run_status_and_extract_filenames(self, status_command, status_regexp):
201        filenames = []
202        # We run with cwd=self.checkout_root so that returned-paths are root-relative.
203        for line in self.run(status_command, cwd=self.checkout_root).splitlines():
204            match = re.search(status_regexp, line)
205            if not match:
206                continue
207            # status = match.group('status')
208            filename = match.group('filename')
209            filenames.append(filename)
210        return filenames
211
212    def strip_r_from_svn_revision(self, svn_revision):
213        match = re.match("^r(?P<svn_revision>\d+)", unicode(svn_revision))
214        if (match):
215            return match.group('svn_revision')
216        return svn_revision
217
218    def svn_revision_from_commit_text(self, commit_text):
219        match = re.search(self.commit_success_regexp(), commit_text, re.MULTILINE)
220        return match.group('svn_revision')
221
222    @staticmethod
223    def _subclass_must_implement():
224        raise NotImplementedError("subclasses must implement")
225
226    @staticmethod
227    def in_working_directory(path):
228        SCM._subclass_must_implement()
229
230    @staticmethod
231    def find_checkout_root(path):
232        SCM._subclass_must_implement()
233
234    @staticmethod
235    def commit_success_regexp():
236        SCM._subclass_must_implement()
237
238    def working_directory_is_clean(self):
239        self._subclass_must_implement()
240
241    def clean_working_directory(self):
242        self._subclass_must_implement()
243
244    def status_command(self):
245        self._subclass_must_implement()
246
247    def add(self, path, return_exit_code=False):
248        self._subclass_must_implement()
249
250    def delete(self, path):
251        self._subclass_must_implement()
252
253    def changed_files(self, git_commit=None):
254        self._subclass_must_implement()
255
256    def changed_files_for_revision(self, revision):
257        self._subclass_must_implement()
258
259    def revisions_changing_file(self, path, limit=5):
260        self._subclass_must_implement()
261
262    def added_files(self):
263        self._subclass_must_implement()
264
265    def conflicted_files(self):
266        self._subclass_must_implement()
267
268    def display_name(self):
269        self._subclass_must_implement()
270
271    def create_patch(self, git_commit=None, changed_files=None):
272        self._subclass_must_implement()
273
274    def committer_email_for_revision(self, revision):
275        self._subclass_must_implement()
276
277    def contents_at_revision(self, path, revision):
278        self._subclass_must_implement()
279
280    def diff_for_revision(self, revision):
281        self._subclass_must_implement()
282
283    def diff_for_file(self, path, log=None):
284        self._subclass_must_implement()
285
286    def show_head(self, path):
287        self._subclass_must_implement()
288
289    def apply_reverse_diff(self, revision):
290        self._subclass_must_implement()
291
292    def revert_files(self, file_paths):
293        self._subclass_must_implement()
294
295    def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
296        self._subclass_must_implement()
297
298    def svn_commit_log(self, svn_revision):
299        self._subclass_must_implement()
300
301    def last_svn_commit_log(self):
302        self._subclass_must_implement()
303
304    # Subclasses must indicate if they support local commits,
305    # but the SCM baseclass will only call local_commits methods when this is true.
306    @staticmethod
307    def supports_local_commits():
308        SCM._subclass_must_implement()
309
310    def remote_merge_base():
311        SCM._subclass_must_implement()
312
313    def commit_locally_with_message(self, message):
314        error("Your source control manager does not support local commits.")
315
316    def discard_local_commits(self):
317        pass
318
319    def local_commits(self):
320        return []
321
322
323# A mixin class that represents common functionality for SVN and Git-SVN.
324class SVNRepository:
325    def has_authorization_for_realm(self, realm, home_directory=os.getenv("HOME")):
326        # Assumes find and grep are installed.
327        if not os.path.isdir(os.path.join(home_directory, ".subversion")):
328            return False
329        find_args = ["find", ".subversion", "-type", "f", "-exec", "grep", "-q", realm, "{}", ";", "-print"]
330        find_output = self.run(find_args, cwd=home_directory, error_handler=Executive.ignore_error).rstrip()
331        return find_output and os.path.isfile(os.path.join(home_directory, find_output))
332
333
334class SVN(SCM, SVNRepository):
335    # FIXME: These belong in common.config.urls
336    svn_server_host = "svn.webkit.org"
337    svn_server_realm = "<http://svn.webkit.org:80> Mac OS Forge"
338
339    def __init__(self, cwd, patch_directories, executive=None):
340        SCM.__init__(self, cwd, executive)
341        self._bogus_dir = None
342        if patch_directories == []:
343            # FIXME: ScriptError is for Executive, this should probably be a normal Exception.
344            raise ScriptError(script_args=svn_info_args, message='Empty list of patch directories passed to SCM.__init__')
345        elif patch_directories == None:
346            self._patch_directories = [ospath.relpath(cwd, self.checkout_root)]
347        else:
348            self._patch_directories = patch_directories
349
350    @staticmethod
351    def in_working_directory(path):
352        return os.path.isdir(os.path.join(path, '.svn'))
353
354    @classmethod
355    def find_uuid(cls, path):
356        if not cls.in_working_directory(path):
357            return None
358        return cls.value_from_svn_info(path, 'Repository UUID')
359
360    @classmethod
361    def value_from_svn_info(cls, path, field_name):
362        svn_info_args = ['svn', 'info', path]
363        info_output = run_command(svn_info_args).rstrip()
364        match = re.search("^%s: (?P<value>.+)$" % field_name, info_output, re.MULTILINE)
365        if not match:
366            raise ScriptError(script_args=svn_info_args, message='svn info did not contain a %s.' % field_name)
367        return match.group('value')
368
369    @staticmethod
370    def find_checkout_root(path):
371        uuid = SVN.find_uuid(path)
372        # If |path| is not in a working directory, we're supposed to return |path|.
373        if not uuid:
374            return path
375        # Search up the directory hierarchy until we find a different UUID.
376        last_path = None
377        while True:
378            if uuid != SVN.find_uuid(path):
379                return last_path
380            last_path = path
381            (path, last_component) = os.path.split(path)
382            if last_path == path:
383                return None
384
385    @staticmethod
386    def commit_success_regexp():
387        return "^Committed revision (?P<svn_revision>\d+)\.$"
388
389    @memoized
390    def svn_version(self):
391        return self.run(['svn', '--version', '--quiet'])
392
393    def working_directory_is_clean(self):
394        return self.run(["svn", "diff"], cwd=self.checkout_root, decode_output=False) == ""
395
396    def clean_working_directory(self):
397        # Make sure there are no locks lying around from a previously aborted svn invocation.
398        # This is slightly dangerous, as it's possible the user is running another svn process
399        # on this checkout at the same time.  However, it's much more likely that we're running
400        # under windows and svn just sucks (or the user interrupted svn and it failed to clean up).
401        self.run(["svn", "cleanup"], cwd=self.checkout_root)
402
403        # svn revert -R is not as awesome as git reset --hard.
404        # It will leave added files around, causing later svn update
405        # calls to fail on the bots.  We make this mirror git reset --hard
406        # by deleting any added files as well.
407        added_files = reversed(sorted(self.added_files()))
408        # added_files() returns directories for SVN, we walk the files in reverse path
409        # length order so that we remove files before we try to remove the directories.
410        self.run(["svn", "revert", "-R", "."], cwd=self.checkout_root)
411        for path in added_files:
412            # This is robust against cwd != self.checkout_root
413            absolute_path = self.absolute_path(path)
414            # Completely lame that there is no easy way to remove both types with one call.
415            if os.path.isdir(path):
416                os.rmdir(absolute_path)
417            else:
418                os.remove(absolute_path)
419
420    def status_command(self):
421        return ['svn', 'status']
422
423    def _status_regexp(self, expected_types):
424        field_count = 6 if self.svn_version() > "1.6" else 5
425        return "^(?P<status>[%s]).{%s} (?P<filename>.+)$" % (expected_types, field_count)
426
427    def _add_parent_directories(self, path):
428        """Does 'svn add' to the path and its parents."""
429        if self.in_working_directory(path):
430            return
431        dirname = os.path.dirname(path)
432        # We have dirname directry - ensure it added.
433        if dirname != path:
434            self._add_parent_directories(dirname)
435        self.add(path)
436
437    def add(self, path, return_exit_code=False):
438        self._add_parent_directories(os.path.dirname(os.path.abspath(path)))
439        return self.run(["svn", "add", path], return_exit_code=return_exit_code)
440
441    def delete(self, path):
442        parent, base = os.path.split(os.path.abspath(path))
443        return self.run(["svn", "delete", "--force", base], cwd=parent)
444
445    def changed_files(self, git_commit=None):
446        status_command = ["svn", "status"]
447        status_command.extend(self._patch_directories)
448        # ACDMR: Addded, Conflicted, Deleted, Modified or Replaced
449        return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR"))
450
451    def changed_files_for_revision(self, revision):
452        # As far as I can tell svn diff --summarize output looks just like svn status output.
453        # No file contents printed, thus utf-8 auto-decoding in self.run is fine.
454        status_command = ["svn", "diff", "--summarize", "-c", revision]
455        return self.run_status_and_extract_filenames(status_command, self._status_regexp("ACDMR"))
456
457    def revisions_changing_file(self, path, limit=5):
458        revisions = []
459        # svn log will exit(1) (and thus self.run will raise) if the path does not exist.
460        log_command = ['svn', 'log', '--quiet', '--limit=%s' % limit, path]
461        for line in self.run(log_command, cwd=self.checkout_root).splitlines():
462            match = re.search('^r(?P<revision>\d+) ', line)
463            if not match:
464                continue
465            revisions.append(int(match.group('revision')))
466        return revisions
467
468    def conflicted_files(self):
469        return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("C"))
470
471    def added_files(self):
472        return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("A"))
473
474    def deleted_files(self):
475        return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("D"))
476
477    @staticmethod
478    def supports_local_commits():
479        return False
480
481    def display_name(self):
482        return "svn"
483
484    # FIXME: This method should be on Checkout.
485    def create_patch(self, git_commit=None, changed_files=None):
486        """Returns a byte array (str()) representing the patch file.
487        Patch files are effectively binary since they may contain
488        files of multiple different encodings."""
489        if changed_files == []:
490            return ""
491        elif changed_files == None:
492            changed_files = []
493        return self.run([self.script_path("svn-create-patch")] + changed_files,
494            cwd=self.checkout_root, return_stderr=False,
495            decode_output=False)
496
497    def committer_email_for_revision(self, revision):
498        return self.run(["svn", "propget", "svn:author", "--revprop", "-r", revision]).rstrip()
499
500    def contents_at_revision(self, path, revision):
501        """Returns a byte array (str()) containing the contents
502        of path @ revision in the repository."""
503        remote_path = "%s/%s" % (self._repository_url(), path)
504        return self.run(["svn", "cat", "-r", revision, remote_path], decode_output=False)
505
506    def diff_for_revision(self, revision):
507        # FIXME: This should probably use cwd=self.checkout_root
508        return self.run(['svn', 'diff', '-c', revision])
509
510    def _bogus_dir_name(self):
511        if sys.platform.startswith("win"):
512            parent_dir = tempfile.gettempdir()
513        else:
514            parent_dir = sys.path[0]  # tempdir is not secure.
515        return os.path.join(parent_dir, "temp_svn_config")
516
517    def _setup_bogus_dir(self, log):
518        self._bogus_dir = self._bogus_dir_name()
519        if not os.path.exists(self._bogus_dir):
520            os.mkdir(self._bogus_dir)
521            self._delete_bogus_dir = True
522        else:
523            self._delete_bogus_dir = False
524        if log:
525            log.debug('  Html: temp config dir: "%s".', self._bogus_dir)
526
527    def _teardown_bogus_dir(self, log):
528        if self._delete_bogus_dir:
529            shutil.rmtree(self._bogus_dir, True)
530            if log:
531                log.debug('  Html: removed temp config dir: "%s".', self._bogus_dir)
532        self._bogus_dir = None
533
534    def diff_for_file(self, path, log=None):
535        self._setup_bogus_dir(log)
536        try:
537            args = ['svn', 'diff']
538            if self._bogus_dir:
539                args += ['--config-dir', self._bogus_dir]
540            args.append(path)
541            return self.run(args)
542        finally:
543            self._teardown_bogus_dir(log)
544
545    def show_head(self, path):
546        return self.run(['svn', 'cat', '-r', 'BASE', path], decode_output=False)
547
548    def _repository_url(self):
549        return self.value_from_svn_info(self.checkout_root, 'URL')
550
551    def apply_reverse_diff(self, revision):
552        # '-c -revision' applies the inverse diff of 'revision'
553        svn_merge_args = ['svn', 'merge', '--non-interactive', '-c', '-%s' % revision, self._repository_url()]
554        log("WARNING: svn merge has been known to take more than 10 minutes to complete.  It is recommended you use git for rollouts.")
555        log("Running '%s'" % " ".join(svn_merge_args))
556        # FIXME: Should this use cwd=self.checkout_root?
557        self.run(svn_merge_args)
558
559    def revert_files(self, file_paths):
560        # FIXME: This should probably use cwd=self.checkout_root.
561        self.run(['svn', 'revert'] + file_paths)
562
563    def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
564        # git-commit and force are not used by SVN.
565        svn_commit_args = ["svn", "commit"]
566
567        if not username and not self.has_authorization_for_realm(self.svn_server_realm):
568            raise AuthenticationError(self.svn_server_host)
569        if username:
570            svn_commit_args.extend(["--username", username])
571
572        svn_commit_args.extend(["-m", message])
573
574        if changed_files:
575            svn_commit_args.extend(changed_files)
576
577        if self.dryrun:
578            _log = logging.getLogger("webkitpy.common.system")
579            _log.debug('Would run SVN command: "' + " ".join(svn_commit_args) + '"')
580
581            # Return a string which looks like a commit so that things which parse this output will succeed.
582            return "Dry run, no commit.\nCommitted revision 0."
583
584        return self.run(svn_commit_args, cwd=self.checkout_root, error_handler=commit_error_handler)
585
586    def svn_commit_log(self, svn_revision):
587        svn_revision = self.strip_r_from_svn_revision(svn_revision)
588        return self.run(['svn', 'log', '--non-interactive', '--revision', svn_revision])
589
590    def last_svn_commit_log(self):
591        # BASE is the checkout revision, HEAD is the remote repository revision
592        # http://svnbook.red-bean.com/en/1.0/ch03s03.html
593        return self.svn_commit_log('BASE')
594
595    def propset(self, pname, pvalue, path):
596        dir, base = os.path.split(path)
597        return self.run(['svn', 'pset', pname, pvalue, base], cwd=dir)
598
599    def propget(self, pname, path):
600        dir, base = os.path.split(path)
601        return self.run(['svn', 'pget', pname, base], cwd=dir).encode('utf-8').rstrip("\n")
602
603
604# All git-specific logic should go here.
605class Git(SCM, SVNRepository):
606    def __init__(self, cwd, executive=None):
607        SCM.__init__(self, cwd, executive)
608        self._check_git_architecture()
609
610    def _machine_is_64bit(self):
611        import platform
612        # This only is tested on Mac.
613        if not platform.mac_ver()[0]:
614            return False
615
616        # platform.architecture()[0] can be '64bit' even if the machine is 32bit:
617        # http://mail.python.org/pipermail/pythonmac-sig/2009-September/021648.html
618        # Use the sysctl command to find out what the processor actually supports.
619        return self.run(['sysctl', '-n', 'hw.cpu64bit_capable']).rstrip() == '1'
620
621    def _executable_is_64bit(self, path):
622        # Again, platform.architecture() fails us.  On my machine
623        # git_bits = platform.architecture(executable=git_path, bits='default')[0]
624        # git_bits is just 'default', meaning the call failed.
625        file_output = self.run(['file', path])
626        return re.search('x86_64', file_output)
627
628    def _check_git_architecture(self):
629        if not self._machine_is_64bit():
630            return
631
632        # We could path-search entirely in python or with
633        # which.py (http://code.google.com/p/which), but this is easier:
634        git_path = self.run(['which', 'git']).rstrip()
635        if self._executable_is_64bit(git_path):
636            return
637
638        webkit_dev_thead_url = "https://lists.webkit.org/pipermail/webkit-dev/2010-December/015249.html"
639        log("Warning: This machine is 64-bit, but the git binary (%s) does not support 64-bit.\nInstall a 64-bit git for better performance, see:\n%s\n" % (git_path, webkit_dev_thead_url))
640
641    @classmethod
642    def in_working_directory(cls, path):
643        return run_command(['git', 'rev-parse', '--is-inside-work-tree'], cwd=path, error_handler=Executive.ignore_error).rstrip() == "true"
644
645    @classmethod
646    def find_checkout_root(cls, path):
647        # "git rev-parse --show-cdup" would be another way to get to the root
648        (checkout_root, dot_git) = os.path.split(run_command(['git', 'rev-parse', '--git-dir'], cwd=(path or "./")))
649        # If we were using 2.6 # checkout_root = os.path.relpath(checkout_root, path)
650        if not os.path.isabs(checkout_root): # Sometimes git returns relative paths
651            checkout_root = os.path.join(path, checkout_root)
652        return checkout_root
653
654    @classmethod
655    def to_object_name(cls, filepath):
656        root_end_with_slash = os.path.join(cls.find_checkout_root(os.path.dirname(filepath)), '')
657        return filepath.replace(root_end_with_slash, '')
658
659    @classmethod
660    def read_git_config(cls, key):
661        # FIXME: This should probably use cwd=self.checkout_root.
662        # Pass --get-all for cases where the config has multiple values
663        return run_command(["git", "config", "--get-all", key],
664            error_handler=Executive.ignore_error).rstrip('\n')
665
666    @staticmethod
667    def commit_success_regexp():
668        return "^Committed r(?P<svn_revision>\d+)$"
669
670    def discard_local_commits(self):
671        # FIXME: This should probably use cwd=self.checkout_root
672        self.run(['git', 'reset', '--hard', self.remote_branch_ref()])
673
674    def local_commits(self):
675        # FIXME: This should probably use cwd=self.checkout_root
676        return self.run(['git', 'log', '--pretty=oneline', 'HEAD...' + self.remote_branch_ref()]).splitlines()
677
678    def rebase_in_progress(self):
679        return os.path.exists(os.path.join(self.checkout_root, '.git/rebase-apply'))
680
681    def working_directory_is_clean(self):
682        # FIXME: This should probably use cwd=self.checkout_root
683        return self.run(['git', 'diff', 'HEAD', '--name-only']) == ""
684
685    def clean_working_directory(self):
686        # FIXME: These should probably use cwd=self.checkout_root.
687        # Could run git clean here too, but that wouldn't match working_directory_is_clean
688        self.run(['git', 'reset', '--hard', 'HEAD'])
689        # Aborting rebase even though this does not match working_directory_is_clean
690        if self.rebase_in_progress():
691            self.run(['git', 'rebase', '--abort'])
692
693    def status_command(self):
694        # git status returns non-zero when there are changes, so we use git diff name --name-status HEAD instead.
695        # No file contents printed, thus utf-8 autodecoding in self.run is fine.
696        return ["git", "diff", "--name-status", "HEAD"]
697
698    def _status_regexp(self, expected_types):
699        return '^(?P<status>[%s])\t(?P<filename>.+)$' % expected_types
700
701    def add(self, path, return_exit_code=False):
702        return self.run(["git", "add", path], return_exit_code=return_exit_code)
703
704    def delete(self, path):
705        return self.run(["git", "rm", "-f", path])
706
707    def merge_base(self, git_commit):
708        if git_commit:
709            # Special-case HEAD.. to mean working-copy changes only.
710            if git_commit.upper() == 'HEAD..':
711                return 'HEAD'
712
713            if '..' not in git_commit:
714                git_commit = git_commit + "^.." + git_commit
715            return git_commit
716
717        return self.remote_merge_base()
718
719    def changed_files(self, git_commit=None):
720        # FIXME: --diff-filter could be used to avoid the "extract_filenames" step.
721        status_command = ['git', 'diff', '-r', '--name-status', '-C', '-M', "--no-ext-diff", "--full-index", self.merge_base(git_commit)]
722        # FIXME: I'm not sure we're returning the same set of files that SVN.changed_files is.
723        # Added (A), Copied (C), Deleted (D), Modified (M), Renamed (R)
724        return self.run_status_and_extract_filenames(status_command, self._status_regexp("ADM"))
725
726    def _changes_files_for_commit(self, git_commit):
727        # --pretty="format:" makes git show not print the commit log header,
728        changed_files = self.run(["git", "show", "--pretty=format:", "--name-only", git_commit]).splitlines()
729        # instead it just prints a blank line at the top, so we skip the blank line:
730        return changed_files[1:]
731
732    def changed_files_for_revision(self, revision):
733        commit_id = self.git_commit_from_svn_revision(revision)
734        return self._changes_files_for_commit(commit_id)
735
736    def revisions_changing_file(self, path, limit=5):
737        # git rev-list head --remove-empty --limit=5 -- path would be equivalent.
738        commit_ids = self.run(["git", "log", "--remove-empty", "--pretty=format:%H", "-%s" % limit, "--", path]).splitlines()
739        return filter(lambda revision: revision, map(self.svn_revision_from_git_commit, commit_ids))
740
741    def conflicted_files(self):
742        # We do not need to pass decode_output for this diff command
743        # as we're passing --name-status which does not output any data.
744        status_command = ['git', 'diff', '--name-status', '-C', '-M', '--diff-filter=U']
745        return self.run_status_and_extract_filenames(status_command, self._status_regexp("U"))
746
747    def added_files(self):
748        return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("A"))
749
750    def deleted_files(self):
751        return self.run_status_and_extract_filenames(self.status_command(), self._status_regexp("D"))
752
753    @staticmethod
754    def supports_local_commits():
755        return True
756
757    def display_name(self):
758        return "git"
759
760    def prepend_svn_revision(self, diff):
761        git_log = self.run(['git', 'log', '-25'])
762        match = re.search("^\s*git-svn-id:.*@(?P<svn_revision>\d+)\ ", git_log, re.MULTILINE)
763        if not match:
764            return diff
765
766        return "Subversion Revision: " + str(match.group('svn_revision')) + '\n' + diff
767
768    def create_patch(self, git_commit=None, changed_files=None):
769        """Returns a byte array (str()) representing the patch file.
770        Patch files are effectively binary since they may contain
771        files of multiple different encodings."""
772        command = ['git', 'diff', '--binary', "--no-ext-diff", "--full-index", "-M", self.merge_base(git_commit), "--"]
773        if changed_files:
774            command += changed_files
775        return self.prepend_svn_revision(self.run(command, decode_output=False, cwd=self.checkout_root))
776
777    def _run_git_svn_find_rev(self, arg):
778        # git svn find-rev always exits 0, even when the revision or commit is not found.
779        return self.run(['git', 'svn', 'find-rev', arg], cwd=self.checkout_root).rstrip()
780
781    def _string_to_int_or_none(self, string):
782        try:
783            return int(string)
784        except ValueError, e:
785            return None
786
787    @memoized
788    def git_commit_from_svn_revision(self, svn_revision):
789        git_commit = self._run_git_svn_find_rev('r%s' % svn_revision)
790        if not git_commit:
791            # FIXME: Alternatively we could offer to update the checkout? Or return None?
792            raise ScriptError(message='Failed to find git commit for revision %s, your checkout likely needs an update.' % svn_revision)
793        return git_commit
794
795    @memoized
796    def svn_revision_from_git_commit(self, git_commit):
797        svn_revision = self._run_git_svn_find_rev(git_commit)
798        return self._string_to_int_or_none(svn_revision)
799
800    def contents_at_revision(self, path, revision):
801        """Returns a byte array (str()) containing the contents
802        of path @ revision in the repository."""
803        return self.run(["git", "show", "%s:%s" % (self.git_commit_from_svn_revision(revision), path)], decode_output=False)
804
805    def diff_for_revision(self, revision):
806        git_commit = self.git_commit_from_svn_revision(revision)
807        return self.create_patch(git_commit)
808
809    def diff_for_file(self, path, log=None):
810        return self.run(['git', 'diff', 'HEAD', '--', path])
811
812    def show_head(self, path):
813        return self.run(['git', 'show', 'HEAD:' + self.to_object_name(path)], decode_output=False)
814
815    def committer_email_for_revision(self, revision):
816        git_commit = self.git_commit_from_svn_revision(revision)
817        committer_email = self.run(["git", "log", "-1", "--pretty=format:%ce", git_commit])
818        # Git adds an extra @repository_hash to the end of every committer email, remove it:
819        return committer_email.rsplit("@", 1)[0]
820
821    def apply_reverse_diff(self, revision):
822        # Assume the revision is an svn revision.
823        git_commit = self.git_commit_from_svn_revision(revision)
824        # I think this will always fail due to ChangeLogs.
825        self.run(['git', 'revert', '--no-commit', git_commit], error_handler=Executive.ignore_error)
826
827    def revert_files(self, file_paths):
828        self.run(['git', 'checkout', 'HEAD'] + file_paths)
829
830    def _assert_can_squash(self, working_directory_is_clean):
831        squash = Git.read_git_config('webkit-patch.commit-should-always-squash')
832        should_squash = squash and squash.lower() == "true"
833
834        if not should_squash:
835            # Only warn if there are actually multiple commits to squash.
836            num_local_commits = len(self.local_commits())
837            if num_local_commits > 1 or (num_local_commits > 0 and not working_directory_is_clean):
838                raise AmbiguousCommitError(num_local_commits, working_directory_is_clean)
839
840    def commit_with_message(self, message, username=None, password=None, git_commit=None, force_squash=False, changed_files=None):
841        # Username is ignored during Git commits.
842        working_directory_is_clean = self.working_directory_is_clean()
843
844        if git_commit:
845            # Special-case HEAD.. to mean working-copy changes only.
846            if git_commit.upper() == 'HEAD..':
847                if working_directory_is_clean:
848                    raise ScriptError(message="The working copy is not modified. --git-commit=HEAD.. only commits working copy changes.")
849                self.commit_locally_with_message(message)
850                return self._commit_on_branch(message, 'HEAD', username=username, password=password)
851
852            # Need working directory changes to be committed so we can checkout the merge branch.
853            if not working_directory_is_clean:
854                # FIXME: webkit-patch land will modify the ChangeLogs to correct the reviewer.
855                # That will modify the working-copy and cause us to hit this error.
856                # The ChangeLog modification could be made to modify the existing local commit.
857                raise ScriptError(message="Working copy is modified. Cannot commit individual git_commits.")
858            return self._commit_on_branch(message, git_commit, username=username, password=password)
859
860        if not force_squash:
861            self._assert_can_squash(working_directory_is_clean)
862        self.run(['git', 'reset', '--soft', self.remote_merge_base()])
863        self.commit_locally_with_message(message)
864        return self.push_local_commits_to_server(username=username, password=password)
865
866    def _commit_on_branch(self, message, git_commit, username=None, password=None):
867        branch_ref = self.run(['git', 'symbolic-ref', 'HEAD']).strip()
868        branch_name = branch_ref.replace('refs/heads/', '')
869        commit_ids = self.commit_ids_from_commitish_arguments([git_commit])
870
871        # We want to squash all this branch's commits into one commit with the proper description.
872        # We do this by doing a "merge --squash" into a new commit branch, then dcommitting that.
873        MERGE_BRANCH_NAME = 'webkit-patch-land'
874        self.delete_branch(MERGE_BRANCH_NAME)
875
876        # We might be in a directory that's present in this branch but not in the
877        # trunk.  Move up to the top of the tree so that git commands that expect a
878        # valid CWD won't fail after we check out the merge branch.
879        os.chdir(self.checkout_root)
880
881        # Stuff our change into the merge branch.
882        # We wrap in a try...finally block so if anything goes wrong, we clean up the branches.
883        commit_succeeded = True
884        try:
885            self.run(['git', 'checkout', '-q', '-b', MERGE_BRANCH_NAME, self.remote_branch_ref()])
886
887            for commit in commit_ids:
888                # We're on a different branch now, so convert "head" to the branch name.
889                commit = re.sub(r'(?i)head', branch_name, commit)
890                # FIXME: Once changed_files and create_patch are modified to separately handle each
891                # commit in a commit range, commit each cherry pick so they'll get dcommitted separately.
892                self.run(['git', 'cherry-pick', '--no-commit', commit])
893
894            self.run(['git', 'commit', '-m', message])
895            output = self.push_local_commits_to_server(username=username, password=password)
896        except Exception, e:
897            log("COMMIT FAILED: " + str(e))
898            output = "Commit failed."
899            commit_succeeded = False
900        finally:
901            # And then swap back to the original branch and clean up.
902            self.clean_working_directory()
903            self.run(['git', 'checkout', '-q', branch_name])
904            self.delete_branch(MERGE_BRANCH_NAME)
905
906        return output
907
908    def svn_commit_log(self, svn_revision):
909        svn_revision = self.strip_r_from_svn_revision(svn_revision)
910        return self.run(['git', 'svn', 'log', '-r', svn_revision])
911
912    def last_svn_commit_log(self):
913        return self.run(['git', 'svn', 'log', '--limit=1'])
914
915    # Git-specific methods:
916    def _branch_ref_exists(self, branch_ref):
917        return self.run(['git', 'show-ref', '--quiet', '--verify', branch_ref], return_exit_code=True) == 0
918
919    def delete_branch(self, branch_name):
920        if self._branch_ref_exists('refs/heads/' + branch_name):
921            self.run(['git', 'branch', '-D', branch_name])
922
923    def remote_merge_base(self):
924        return self.run(['git', 'merge-base', self.remote_branch_ref(), 'HEAD']).strip()
925
926    def remote_branch_ref(self):
927        # Use references so that we can avoid collisions, e.g. we don't want to operate on refs/heads/trunk if it exists.
928        remote_branch_refs = Git.read_git_config('svn-remote.svn.fetch')
929        if not remote_branch_refs:
930            remote_master_ref = 'refs/remotes/origin/master'
931            if not self._branch_ref_exists(remote_master_ref):
932                raise ScriptError(message="Can't find a branch to diff against. svn-remote.svn.fetch is not in the git config and %s does not exist" % remote_master_ref)
933            return remote_master_ref
934
935        # FIXME: What's the right behavior when there are multiple svn-remotes listed?
936        # For now, just use the first one.
937        first_remote_branch_ref = remote_branch_refs.split('\n')[0]
938        return first_remote_branch_ref.split(':')[1]
939
940    def commit_locally_with_message(self, message):
941        self.run(['git', 'commit', '--all', '-F', '-'], input=message)
942
943    def push_local_commits_to_server(self, username=None, password=None):
944        dcommit_command = ['git', 'svn', 'dcommit']
945        if self.dryrun:
946            dcommit_command.append('--dry-run')
947        if not self.has_authorization_for_realm(SVN.svn_server_realm):
948            raise AuthenticationError(SVN.svn_server_host, prompt_for_password=True)
949        if username:
950            dcommit_command.extend(["--username", username])
951        output = self.run(dcommit_command, error_handler=commit_error_handler, input=password)
952        # Return a string which looks like a commit so that things which parse this output will succeed.
953        if self.dryrun:
954            output += "\nCommitted r0"
955        return output
956
957    # This function supports the following argument formats:
958    # no args : rev-list trunk..HEAD
959    # A..B    : rev-list A..B
960    # A...B   : error!
961    # A B     : [A, B]  (different from git diff, which would use "rev-list A..B")
962    def commit_ids_from_commitish_arguments(self, args):
963        if not len(args):
964            args.append('%s..HEAD' % self.remote_branch_ref())
965
966        commit_ids = []
967        for commitish in args:
968            if '...' in commitish:
969                raise ScriptError(message="'...' is not supported (found in '%s'). Did you mean '..'?" % commitish)
970            elif '..' in commitish:
971                commit_ids += reversed(self.run(['git', 'rev-list', commitish]).splitlines())
972            else:
973                # Turn single commits or branch or tag names into commit ids.
974                commit_ids += self.run(['git', 'rev-parse', '--revs-only', commitish]).splitlines()
975        return commit_ids
976
977    def commit_message_for_local_commit(self, commit_id):
978        commit_lines = self.run(['git', 'cat-file', 'commit', commit_id]).splitlines()
979
980        # Skip the git headers.
981        first_line_after_headers = 0
982        for line in commit_lines:
983            first_line_after_headers += 1
984            if line == "":
985                break
986        return CommitMessage(commit_lines[first_line_after_headers:])
987
988    def files_changed_summary_for_commit(self, commit_id):
989        return self.run(['git', 'diff-tree', '--shortstat', '--no-commit-id', commit_id])
990