revision_control.py revision 5f6b233a9406b4b6b78c6b47c1e6fefa123ccd5e
1"""
2Module with abstraction layers to revision control systems.
3
4With this library, autotest developers can handle source code checkouts and
5updates on both client as well as server code.
6"""
7
8import os, warnings, logging
9import error, utils
10from autotest_lib.client.bin import os_dep
11
12
13class RevisionControlError(Exception):
14    """Local exception to be raised by code in this file."""
15
16
17class GitError(RevisionControlError):
18    """Exceptions raised for general git errors."""
19
20
21class GitCloneError(GitError):
22    """Exceptions raised for git clone errors."""
23
24
25class GitFetchError(GitError):
26    """Exception raised for git fetch errors."""
27
28
29class GitPullError(GitError):
30    """Exception raised for git pull errors."""
31
32
33class GitResetError(GitError):
34    """Exception raised for git reset errors."""
35
36
37class GitCommitError(GitError):
38    """Exception raised for git commit errors."""
39
40
41class GitRepo(object):
42    """
43    This class represents a git repo.
44
45    It is used to pull down a local copy of a git repo, check if the local
46    repo is up-to-date, if not update.  It delegates the install to
47    implementation classes.
48    """
49
50    def __init__(self, repodir, giturl=None, weburl=None, abs_work_tree=None):
51        """
52        Initialized reposotory.
53
54        @param repodir: destination repo directory.
55        @param giturl: master repo git url.
56        @param weburl: a web url for the master repo.
57        @param abs_work_tree: work tree of the git repo. In the
58            absence of a work tree git manipulations will occur
59            in the current working directory for non bare repos.
60            In such repos the -git-dir option should point to
61            the .git directory and -work-tree should point to
62            the repos working tree.
63        Note: a bare reposotory is one which contains all the
64        working files (the tree) and the other wise hidden files
65        (.git) in the same directory. This class assumes non-bare
66        reposotories.
67        """
68        if repodir is None:
69            raise ValueError('You must provide a path that will hold the'
70                             'git repository')
71        self.repodir = utils.sh_escape(repodir)
72        self._giturl = giturl
73        if weburl is not None:
74            warnings.warn("Param weburl: You are no longer required to provide "
75                          "a web URL for your git repos", DeprecationWarning)
76
77        # path to .git dir
78        self.gitpath = utils.sh_escape(os.path.join(self.repodir,'.git'))
79
80        # Find git base command. If not found, this will throw an exception
81        self.git_base_cmd = os_dep.command('git')
82        self.work_tree = abs_work_tree
83
84        # default to same remote path as local
85        self._build = os.path.dirname(self.repodir)
86
87
88    @property
89    def giturl(self):
90        """
91        A giturl is necessary to perform certain actions (clone, pull, fetch)
92        but not others (like diff).
93        """
94        if self._giturl is None:
95            raise ValueError('Unsupported operation -- this object was not'
96                             'constructed with a git URL.')
97        return self._giturl
98
99
100    def gen_git_cmd_base(self):
101        """
102        The command we use to run git cannot be set. It is reconstructed
103        on each access from it's component variables. This is it's getter.
104        """
105        # base git command , pointing to gitpath git dir
106        gitcmdbase = '%s --git-dir=%s' % (self.git_base_cmd,
107                                          self.gitpath)
108        if self.work_tree:
109            gitcmdbase += ' --work-tree=%s' % self.work_tree
110        return gitcmdbase
111
112
113    def _run(self, command, timeout=None, ignore_status=False):
114        """
115        Auxiliary function to run a command, with proper shell escaping.
116
117        @param timeout: Timeout to run the command.
118        @param ignore_status: Whether we should supress error.CmdError
119                exceptions if the command did return exit code !=0 (True), or
120                not supress them (False).
121        """
122        return utils.run(r'%s' % (utils.sh_escape(command)),
123                         timeout, ignore_status)
124
125
126    def gitcmd(self, cmd, ignore_status=False, error_class=None,
127               error_msg=None):
128        """
129        Wrapper for a git command.
130
131        @param cmd: Git subcommand (ex 'clone').
132        @param ignore_status: If True, ignore the CmdError raised by the
133                underlying command runner. NB: Passing in an error_class
134                impiles ignore_status=True.
135        @param error_class: When ignore_status is False, optional error
136                error class to log and raise in case of errors. Must be a
137                (sub)type of GitError.
138        @param error_msg: When passed with error_class, used as a friendly
139                error message.
140        """
141        # TODO(pprabhu) Get rid of the ignore_status argument.
142        # Now that we support raising custom errors, we always want to get a
143        # return code from the command execution, instead of an exception.
144        ignore_status = ignore_status or error_class is not None
145        cmd = '%s %s' % (self.gen_git_cmd_base(), cmd)
146        rv = self._run(cmd, ignore_status=ignore_status)
147        if rv.exit_status != 0 and error_class is not None:
148            logging.error('git command failed: %s: %s',
149                          cmd, error_msg if error_msg is not None else '')
150            logging.error(rv.stderr)
151            raise error_class(error_msg if error_msg is not None
152                              else rv.stderr)
153
154        return rv
155
156
157    def clone(self):
158        """
159        Clones a repo using giturl and repodir.
160
161        Since we're cloning the master repo we don't have a work tree yet,
162        make sure the getter of the gitcmd doesn't think we do by setting
163        work_tree to None.
164
165        @raises GitCloneError: if cloning the master repo fails.
166        """
167        logging.info('Cloning git repo %s', self.giturl)
168        cmd = 'clone %s %s ' % (self.giturl, self.repodir)
169        abs_work_tree = self.work_tree
170        self.work_tree = None
171        try:
172            rv = self.gitcmd(cmd, True)
173            if rv.exit_status != 0:
174                logging.error(rv.stderr)
175                raise GitCloneError('Failed to clone git url', rv)
176            else:
177                logging.info(rv.stdout)
178        finally:
179            self.work_tree = abs_work_tree
180
181
182    def pull(self, rebase=False):
183        """
184        Pulls into repodir using giturl.
185
186        @param rebase: If true forces git pull to perform a rebase instead of a
187                        merge.
188        @raises GitPullError: if pulling from giturl fails.
189        """
190        logging.info('Updating git repo %s', self.giturl)
191        cmd = 'pull '
192        if rebase:
193            cmd += '--rebase '
194        cmd += self.giturl
195
196        rv = self.gitcmd(cmd, True)
197        if rv.exit_status != 0:
198            logging.error(rv.stderr)
199            e_msg = 'Failed to pull git repo data'
200            raise GitPullError(e_msg, rv)
201
202
203    def commit(self, msg='default'):
204        """
205        Commit changes to repo with the supplied commit msg.
206
207        @param msg: A message that goes with the commit.
208        """
209        rv = self.gitcmd('commit -a -m %s' % msg)
210        if rv.exit_status != 0:
211            logging.error(rv.stderr)
212            raise revision_control.GitCommitError('Unable to commit', rv)
213
214
215    def reset(self, branch_or_sha):
216        """
217        Reset repo to the given branch or git sha.
218
219        @param branch_or_sha: Name of a local or remote branch or git sha.
220
221        @raises GitResetError if operation fails.
222        """
223        self.gitcmd('reset --hard %s' % branch_or_sha,
224                    error_class=GitResetError,
225                    error_msg='Failed to reset to %s' % branch_or_sha)
226
227
228    def reset_head(self):
229        """
230        Reset repo to HEAD@{0} by running git reset --hard HEAD.
231
232        TODO(pprabhu): cleanup. Use reset.
233
234        @raises GitResetError: if we fails to reset HEAD.
235        """
236        logging.info('Resetting head on repo %s', self.repodir)
237        rv = self.gitcmd('reset --hard HEAD')
238        if rv.exit_status != 0:
239            logging.error(rv.stderr)
240            e_msg = 'Failed to reset HEAD'
241            raise GitResetError(e_msg, rv)
242
243
244    def fetch_remote(self):
245        """
246        Fetches all files from the remote but doesn't reset head.
247
248        @raises GitFetchError: if we fail to fetch all files from giturl.
249        """
250        logging.info('fetching from repo %s', self.giturl)
251        rv = self.gitcmd('fetch --all')
252        if rv.exit_status != 0:
253            logging.error(rv.stderr)
254            e_msg = 'Failed to fetch from %s' % self.giturl
255            raise GitFetchError(e_msg, rv)
256
257
258    def reinit_repo_at(self, remote_branch):
259        """
260        Does all it can to ensure that the repo is at remote_branch.
261
262        This will try to be nice and detect any local changes and bail early.
263        OTOH, if it finishes successfully, it'll blow away anything and
264        everything so that local repo reflects the upstream branch requested.
265        """
266        if not self.is_repo_initialized():
267            self.clone()
268
269        # Play nice. Detect any local changes and bail.
270        # Re-stat all files before comparing index. This is needed for
271        # diff-index to work properly in cases when the stat info on files is
272        # stale. (e.g., you just untarred the whole git folder that you got from
273        # Alice)
274        rv = self.gitcmd('update-index --refresh -q',
275                         error_class=GitError,
276                         error_msg='Failed to refresh index.')
277        rv = self.gitcmd(
278                'diff-index --quiet HEAD --',
279                error_class=GitError,
280                error_msg='Failed to check for local changes.')
281        if rv.stdout:
282            loggin.error(rv.stdout)
283            e_msg = 'Local checkout dirty. (%s)'
284            raise GitError(e_msg % rv.stdout)
285
286        # Play the bad cop. Destroy everything in your path.
287        # Don't trust the existing repo setup at all (so don't trust the current
288        # config, current branches / remotes etc).
289        self.gitcmd('config remote.origin.url %s' % self.giturl,
290                    error_class=GitError,
291                    error_msg='Failed to set origin.')
292        self.gitcmd('checkout -f',
293                    error_class=GitError,
294                    error_msg='Failed to checkout.')
295        self.gitcmd('clean -qxdf',
296                    error_class=GitError,
297                    error_msg='Failed to clean.')
298        self.fetch_remote()
299        self.reset('origin/%s' % remote_branch)
300
301
302    def get(self, **kwargs):
303        """
304        This method overrides baseclass get so we can do proper git
305        clone/pulls, and check for updated versions.  The result of
306        this method will leave an up-to-date version of git repo at
307        'giturl' in 'repodir' directory to be used by build/install
308        methods.
309
310        @param kwargs: Dictionary of parameters to the method get.
311        """
312        if not self.is_repo_initialized():
313            # this is your first time ...
314            self.clone()
315        elif self.is_out_of_date():
316            # exiting repo, check if we're up-to-date
317            self.pull()
318        else:
319            logging.info('repo up-to-date')
320
321        # remember where the source is
322        self.source_material = self.repodir
323
324
325    def get_local_head(self):
326        """
327        Get the top commit hash of the current local git branch.
328
329        @return: Top commit hash of local git branch
330        """
331        cmd = 'log --pretty=format:"%H" -1'
332        l_head_cmd = self.gitcmd(cmd)
333        return l_head_cmd.stdout.strip()
334
335
336    def get_remote_head(self):
337        """
338        Get the top commit hash of the current remote git branch.
339
340        @return: Top commit hash of remote git branch
341        """
342        cmd1 = 'remote show'
343        origin_name_cmd = self.gitcmd(cmd1)
344        cmd2 = 'log --pretty=format:"%H" -1 ' + origin_name_cmd.stdout.strip()
345        r_head_cmd = self.gitcmd(cmd2)
346        return r_head_cmd.stdout.strip()
347
348
349    def is_out_of_date(self):
350        """
351        Return whether this branch is out of date with regards to remote branch.
352
353        @return: False, if the branch is outdated, True if it is current.
354        """
355        local_head = self.get_local_head()
356        remote_head = self.get_remote_head()
357
358        # local is out-of-date, pull
359        if local_head != remote_head:
360            return True
361
362        return False
363
364
365    def is_repo_initialized(self):
366        """
367        Return whether the git repo was already initialized.
368
369        Counts objects in .git directory, since these will exist even if the
370        repo is empty. Assumes non-bare reposotories like the rest of this file.
371
372        @return: True if the repo is initialized.
373        """
374        cmd = 'count-objects'
375        rv = self.gitcmd(cmd, True)
376        if rv.exit_status == 0:
377            return True
378
379        return False
380
381
382    def get_latest_commit_hash(self):
383        """
384        Get the commit hash of the latest commit in the repo.
385
386        We don't raise an exception if no commit hash was found as
387        this could be an empty repository. The caller should notice this
388        methods return value and raise one appropriately.
389
390        @return: The first commit hash if anything has been committed.
391        """
392        cmd = 'rev-list -n 1 --all'
393        rv = self.gitcmd(cmd, True)
394        if rv.exit_status == 0:
395            return rv.stdout
396        return None
397
398
399    def is_repo_empty(self):
400        """
401        Checks for empty but initialized repos.
402
403        eg: we clone an empty master repo, then don't pull
404        after the master commits.
405
406        @return True if the repo has no commits.
407        """
408        if self.get_latest_commit_hash():
409            return False
410        return True
411
412
413    def get_revision(self):
414        """
415        Return current HEAD commit id
416        """
417        if not self.is_repo_initialized():
418            self.get()
419
420        cmd = 'rev-parse --verify HEAD'
421        gitlog = self.gitcmd(cmd, True)
422        if gitlog.exit_status != 0:
423            logging.error(gitlog.stderr)
424            raise error.CmdError('Failed to find git sha1 revision', gitlog)
425        else:
426            return gitlog.stdout.strip('\n')
427
428
429    def checkout(self, remote, local=None):
430        """
431        Check out the git commit id, branch, or tag given by remote.
432
433        Optional give the local branch name as local.
434
435        @param remote: Remote commit hash
436        @param local: Local commit hash
437        @note: For git checkout tag git version >= 1.5.0 is required
438        """
439        if not self.is_repo_initialized():
440            self.get()
441
442        assert(isinstance(remote, basestring))
443        if local:
444            cmd = 'checkout -b %s %s' % (local, remote)
445        else:
446            cmd = 'checkout %s' % (remote)
447        gitlog = self.gitcmd(cmd, True)
448        if gitlog.exit_status != 0:
449            logging.error(gitlog.stderr)
450            raise error.CmdError('Failed to checkout git branch', gitlog)
451        else:
452            logging.info(gitlog.stdout)
453
454
455    def get_branch(self, all=False, remote_tracking=False):
456        """
457        Show the branches.
458
459        @param all: List both remote-tracking branches and local branches (True)
460                or only the local ones (False).
461        @param remote_tracking: Lists the remote-tracking branches.
462        """
463        if not self.is_repo_initialized():
464            self.get()
465
466        cmd = 'branch --no-color'
467        if all:
468            cmd = " ".join([cmd, "-a"])
469        if remote_tracking:
470            cmd = " ".join([cmd, "-r"])
471
472        gitlog = self.gitcmd(cmd, True)
473        if gitlog.exit_status != 0:
474            logging.error(gitlog.stderr)
475            raise error.CmdError('Failed to get git branch', gitlog)
476        elif all or remote_tracking:
477            return gitlog.stdout.strip('\n')
478        else:
479            branch = [b[2:] for b in gitlog.stdout.split('\n')
480                      if b.startswith('*')][0]
481            return branch
482