1## @file
2#  Check a patch for various format issues
3#
4#  Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
5#
6#  This program and the accompanying materials are licensed and made
7#  available under the terms and conditions of the BSD License which
8#  accompanies this distribution. The full text of the license may be
9#  found at http://opensource.org/licenses/bsd-license.php
10#
11#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS"
12#  BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER
13#  EXPRESS OR IMPLIED.
14#
15
16from __future__ import print_function
17
18VersionNumber = '0.1'
19__copyright__ = "Copyright (c) 2015, Intel Corporation  All rights reserved."
20
21import email
22import argparse
23import os
24import re
25import subprocess
26import sys
27
28class Verbose:
29    SILENT, ONELINE, NORMAL = range(3)
30    level = NORMAL
31
32class CommitMessageCheck:
33    """Checks the contents of a git commit message."""
34
35    def __init__(self, subject, message):
36        self.ok = True
37
38        if subject is None and  message is None:
39            self.error('Commit message is missing!')
40            return
41
42        self.subject = subject
43        self.msg = message
44
45        self.check_contributed_under()
46        self.check_signed_off_by()
47        self.check_misc_signatures()
48        self.check_overall_format()
49        self.report_message_result()
50
51    url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
52
53    def report_message_result(self):
54        if Verbose.level < Verbose.NORMAL:
55            return
56        if self.ok:
57            # All checks passed
58            return_code = 0
59            print('The commit message format passed all checks.')
60        else:
61            return_code = 1
62        if not self.ok:
63            print(self.url)
64
65    def error(self, *err):
66        if self.ok and Verbose.level > Verbose.ONELINE:
67            print('The commit message format is not valid:')
68        self.ok = False
69        if Verbose.level < Verbose.NORMAL:
70            return
71        count = 0
72        for line in err:
73            prefix = (' *', '  ')[count > 0]
74            print(prefix, line)
75            count += 1
76
77    def check_contributed_under(self):
78        cu_msg='Contributed-under: TianoCore Contribution Agreement 1.0'
79        if self.msg.find(cu_msg) < 0:
80            self.error('Missing Contributed-under! (Note: this must be ' +
81                       'added by the code contributor!)')
82
83    @staticmethod
84    def make_signature_re(sig, re_input=False):
85        if re_input:
86            sub_re = sig
87        else:
88            sub_re = sig.replace('-', r'[-\s]+')
89        re_str = (r'^(?P<tag>' + sub_re +
90                  r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
91        try:
92            return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
93        except Exception:
94            print("Tried to compile re:", re_str)
95            raise
96
97    sig_block_re = \
98        re.compile(r'''^
99                        (?: (?P<tag>[^:]+) \s* : \s*
100                            (?P<value>\S.*?) )
101                            |
102                        (?: \[ (?P<updater>[^:]+) \s* : \s*
103                               (?P<note>.+?) \s* \] )
104                    \s* $''',
105                   re.VERBOSE | re.MULTILINE)
106
107    def find_signatures(self, sig):
108        if not sig.endswith('-by') and sig != 'Cc':
109            sig += '-by'
110        regex = self.make_signature_re(sig)
111
112        sigs = regex.findall(self.msg)
113
114        bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
115        for s in bad_case_sigs:
116            self.error("'" +s[0] + "' should be '" + sig + "'")
117
118        for s in sigs:
119            if s[1] != '':
120                self.error('There should be no spaces between ' + sig +
121                           " and the ':'")
122            if s[2] != ' ':
123                self.error("There should be a space after '" + sig + ":'")
124
125            self.check_email_address(s[3])
126
127        return sigs
128
129    email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
130                           re.MULTILINE|re.IGNORECASE)
131
132    def check_email_address(self, email):
133        email = email.strip()
134        mo = self.email_re1.match(email)
135        if mo is None:
136            self.error("Email format is invalid: " + email.strip())
137            return
138
139        name = mo.group(1).strip()
140        if name == '':
141            self.error("Name is not provided with email address: " +
142                       email)
143        else:
144            quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
145            if name.find(',') >= 0 and not quoted:
146                self.error('Add quotes (") around name with a comma: ' +
147                           name)
148
149        if mo.group(2) == '':
150            self.error("There should be a space between the name and " +
151                       "email address: " + email)
152
153        if mo.group(3).find(' ') >= 0:
154            self.error("The email address cannot contain a space: " +
155                       mo.group(3))
156
157    def check_signed_off_by(self):
158        sob='Signed-off-by'
159        if self.msg.find(sob) < 0:
160            self.error('Missing Signed-off-by! (Note: this must be ' +
161                       'added by the code contributor!)')
162            return
163
164        sobs = self.find_signatures('Signed-off')
165
166        if len(sobs) == 0:
167            self.error('Invalid Signed-off-by format!')
168            return
169
170    sig_types = (
171        'Reviewed',
172        'Reported',
173        'Tested',
174        'Suggested',
175        'Acked',
176        'Cc'
177        )
178
179    def check_misc_signatures(self):
180        for sig in self.sig_types:
181            self.find_signatures(sig)
182
183    def check_overall_format(self):
184        lines = self.msg.splitlines()
185
186        if len(lines) >= 1 and lines[0].endswith('\r\n'):
187            empty_line = '\r\n'
188        else:
189            empty_line = '\n'
190
191        lines.insert(0, empty_line)
192        lines.insert(0, self.subject + empty_line)
193
194        count = len(lines)
195
196        if count <= 0:
197            self.error('Empty commit message!')
198            return
199
200        if count >= 1 and len(lines[0]) > 76:
201            self.error('First line of commit message (subject line) ' +
202                       'is too long.')
203
204        if count >= 1 and len(lines[0].strip()) == 0:
205            self.error('First line of commit message (subject line) ' +
206                       'is empty.')
207
208        if count >= 2 and lines[1].strip() != '':
209            self.error('Second line of commit message should be ' +
210                       'empty.')
211
212        for i in range(2, count):
213            if (len(lines[i]) > 76 and
214                len(lines[i].split()) > 1 and
215                not lines[i].startswith('git-svn-id:')):
216                self.error('Line %d of commit message is too long.' % (i + 1))
217
218        last_sig_line = None
219        for i in range(count - 1, 0, -1):
220            line = lines[i]
221            mo = self.sig_block_re.match(line)
222            if mo is None:
223                if line.strip() == '':
224                    break
225                elif last_sig_line is not None:
226                    err2 = 'Add empty line before "%s"?' % last_sig_line
227                    self.error('The line before the signature block ' +
228                               'should be empty', err2)
229                else:
230                    self.error('The signature block was not found')
231                break
232            last_sig_line = line.strip()
233
234(START, PRE_PATCH, PATCH) = range(3)
235
236class GitDiffCheck:
237    """Checks the contents of a git diff."""
238
239    def __init__(self, diff):
240        self.ok = True
241        self.format_ok = True
242        self.lines = diff.splitlines(True)
243        self.count = len(self.lines)
244        self.line_num = 0
245        self.state = START
246        while self.line_num < self.count and self.format_ok:
247            line_num = self.line_num
248            self.run()
249            assert(self.line_num > line_num)
250        self.report_message_result()
251
252    def report_message_result(self):
253        if Verbose.level < Verbose.NORMAL:
254            return
255        if self.ok:
256            print('The code passed all checks.')
257
258    def run(self):
259        line = self.lines[self.line_num]
260
261        if self.state in (PRE_PATCH, PATCH):
262            if line.startswith('diff --git'):
263                self.state = START
264        if self.state == PATCH:
265            if line.startswith('@@ '):
266                self.state = PRE_PATCH
267            elif len(line) >= 1 and line[0] not in ' -+' and \
268                 not line.startswith(r'\ No newline '):
269                for line in self.lines[self.line_num + 1:]:
270                    if line.startswith('diff --git'):
271                        self.format_error('diff found after end of patch')
272                        break
273                self.line_num = self.count
274                return
275
276        if self.state == START:
277            if line.startswith('diff --git'):
278                self.state = PRE_PATCH
279                self.set_filename(None)
280            elif len(line.rstrip()) != 0:
281                self.format_error("didn't find diff command")
282            self.line_num += 1
283        elif self.state == PRE_PATCH:
284            if line.startswith('+++ b/'):
285                self.set_filename(line[6:].rstrip())
286            if line.startswith('@@ '):
287                self.state = PATCH
288                self.binary = False
289            elif line.startswith('GIT binary patch'):
290                self.state = PATCH
291                self.binary = True
292            else:
293                ok = False
294                for pfx in self.pre_patch_prefixes:
295                    if line.startswith(pfx):
296                        ok = True
297                if not ok:
298                    self.format_error("didn't find diff hunk marker (@@)")
299            self.line_num += 1
300        elif self.state == PATCH:
301            if self.binary:
302                pass
303            if line.startswith('-'):
304                pass
305            elif line.startswith('+'):
306                self.check_added_line(line[1:])
307            elif line.startswith(r'\ No newline '):
308                pass
309            elif not line.startswith(' '):
310                self.format_error("unexpected patch line")
311            self.line_num += 1
312
313    pre_patch_prefixes = (
314        '--- ',
315        '+++ ',
316        'index ',
317        'new file ',
318        'deleted file ',
319        'old mode ',
320        'new mode ',
321        'similarity index ',
322        'rename ',
323        'Binary files ',
324        )
325
326    line_endings = ('\r\n', '\n\r', '\n', '\r')
327
328    def set_filename(self, filename):
329        self.hunk_filename = filename
330        if filename:
331            self.force_crlf = not filename.endswith('.sh')
332        else:
333            self.force_crlf = True
334
335    def added_line_error(self, msg, line):
336        lines = [ msg ]
337        if self.hunk_filename is not None:
338            lines.append('File: ' + self.hunk_filename)
339        lines.append('Line: ' + line)
340
341        self.error(*lines)
342
343    def check_added_line(self, line):
344        eol = ''
345        for an_eol in self.line_endings:
346            if line.endswith(an_eol):
347                eol = an_eol
348                line = line[:-len(eol)]
349
350        stripped = line.rstrip()
351
352        if self.force_crlf and eol != '\r\n':
353            self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
354                                  line)
355        if '\t' in line:
356            self.added_line_error('Tab character used', line)
357        if len(stripped) < len(line):
358            self.added_line_error('Trailing whitespace found', line)
359
360    split_diff_re = re.compile(r'''
361                                   (?P<cmd>
362                                       ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
363                                   )
364                                   (?P<index>
365                                       ^ index \s+ .+ $
366                                   )
367                               ''',
368                               re.IGNORECASE | re.VERBOSE | re.MULTILINE)
369
370    def format_error(self, err):
371        self.format_ok = False
372        err = 'Patch format error: ' + err
373        err2 = 'Line: ' + self.lines[self.line_num].rstrip()
374        self.error(err, err2)
375
376    def error(self, *err):
377        if self.ok and Verbose.level > Verbose.ONELINE:
378            print('Code format is not valid:')
379        self.ok = False
380        if Verbose.level < Verbose.NORMAL:
381            return
382        count = 0
383        for line in err:
384            prefix = (' *', '  ')[count > 0]
385            print(prefix, line)
386            count += 1
387
388class CheckOnePatch:
389    """Checks the contents of a git email formatted patch.
390
391    Various checks are performed on both the commit message and the
392    patch content.
393    """
394
395    def __init__(self, name, patch):
396        self.patch = patch
397        self.find_patch_pieces()
398
399        msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
400        msg_ok = msg_check.ok
401
402        diff_ok = True
403        if self.diff is not None:
404            diff_check = GitDiffCheck(self.diff)
405            diff_ok = diff_check.ok
406
407        self.ok = msg_ok and diff_ok
408
409        if Verbose.level == Verbose.ONELINE:
410            if self.ok:
411                result = 'ok'
412            else:
413                result = list()
414                if not msg_ok:
415                    result.append('commit message')
416                if not diff_ok:
417                    result.append('diff content')
418                result = 'bad ' + ' and '.join(result)
419            print(name, result)
420
421
422    git_diff_re = re.compile(r'''
423                                 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
424                             ''',
425                             re.IGNORECASE | re.VERBOSE | re.MULTILINE)
426
427    stat_re = \
428        re.compile(r'''
429                       (?P<commit_message> [\s\S\r\n]* )
430                       (?P<stat>
431                           ^ --- $ [\r\n]+
432                           (?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
433                               $ [\r\n]+ )+
434                           [\s\S\r\n]+
435                       )
436                   ''',
437                   re.IGNORECASE | re.VERBOSE | re.MULTILINE)
438
439    def find_patch_pieces(self):
440        if sys.version_info < (3, 0):
441            patch = self.patch.encode('ascii', 'ignore')
442        else:
443            patch = self.patch
444
445        self.commit_msg = None
446        self.stat = None
447        self.commit_subject = None
448        self.commit_prefix = None
449        self.diff = None
450
451        if patch.startswith('diff --git'):
452            self.diff = patch
453            return
454
455        pmail = email.message_from_string(patch)
456        parts = list(pmail.walk())
457        assert(len(parts) == 1)
458        assert(parts[0].get_content_type() == 'text/plain')
459        content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
460
461        mo = self.git_diff_re.search(content)
462        if mo is not None:
463            self.diff = content[mo.start():]
464            content = content[:mo.start()]
465
466        mo = self.stat_re.search(content)
467        if mo is None:
468            self.commit_msg = content
469        else:
470            self.stat = mo.group('stat')
471            self.commit_msg = mo.group('commit_message')
472
473        self.commit_subject = pmail['subject'].replace('\r\n', '')
474        self.commit_subject = self.commit_subject.replace('\n', '')
475
476        pfx_start = self.commit_subject.find('[')
477        if pfx_start >= 0:
478            pfx_end = self.commit_subject.find(']')
479            if pfx_end > pfx_start:
480                self.commit_prefix = self.commit_subject[pfx_start + 1 : pfx_end]
481                self.commit_subject = self.commit_subject[pfx_end + 1 :].lstrip()
482
483
484class CheckGitCommits:
485    """Reads patches from git based on the specified git revision range.
486
487    The patches are read from git, and then checked.
488    """
489
490    def __init__(self, rev_spec, max_count):
491        commits = self.read_commit_list_from_git(rev_spec, max_count)
492        if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
493            commits = [ rev_spec ]
494        self.ok = True
495        blank_line = False
496        for commit in commits:
497            if Verbose.level > Verbose.ONELINE:
498                if blank_line:
499                    print()
500                else:
501                    blank_line = True
502                print('Checking git commit:', commit)
503            patch = self.read_patch_from_git(commit)
504            self.ok &= CheckOnePatch(commit, patch).ok
505
506    def read_commit_list_from_git(self, rev_spec, max_count):
507        # Run git to get the commit patch
508        cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
509        if max_count is not None:
510            cmd.append('--max-count=' + str(max_count))
511        cmd.append(rev_spec)
512        out = self.run_git(*cmd)
513        return out.split()
514
515    def read_patch_from_git(self, commit):
516        # Run git to get the commit patch
517        return self.run_git('show', '--pretty=email', commit)
518
519    def run_git(self, *args):
520        cmd = [ 'git' ]
521        cmd += args
522        p = subprocess.Popen(cmd,
523                     stdout=subprocess.PIPE,
524                     stderr=subprocess.STDOUT)
525        return p.communicate()[0].decode('utf-8', 'ignore')
526
527class CheckOnePatchFile:
528    """Performs a patch check for a single file.
529
530    stdin is used when the filename is '-'.
531    """
532
533    def __init__(self, patch_filename):
534        if patch_filename == '-':
535            patch = sys.stdin.read()
536            patch_filename = 'stdin'
537        else:
538            f = open(patch_filename, 'rb')
539            patch = f.read().decode('utf-8', 'ignore')
540            f.close()
541        if Verbose.level > Verbose.ONELINE:
542            print('Checking patch file:', patch_filename)
543        self.ok = CheckOnePatch(patch_filename, patch).ok
544
545class CheckOneArg:
546    """Performs a patch check for a single command line argument.
547
548    The argument will be handed off to a file or git-commit based
549    checker.
550    """
551
552    def __init__(self, param, max_count=None):
553        self.ok = True
554        if param == '-' or os.path.exists(param):
555            checker = CheckOnePatchFile(param)
556        else:
557            checker = CheckGitCommits(param, max_count)
558        self.ok = checker.ok
559
560class PatchCheckApp:
561    """Checks patches based on the command line arguments."""
562
563    def __init__(self):
564        self.parse_options()
565        patches = self.args.patches
566
567        if len(patches) == 0:
568            patches = [ 'HEAD' ]
569
570        self.ok = True
571        self.count = None
572        for patch in patches:
573            self.process_one_arg(patch)
574
575        if self.count is not None:
576            self.process_one_arg('HEAD')
577
578        if self.ok:
579            self.retval = 0
580        else:
581            self.retval = -1
582
583    def process_one_arg(self, arg):
584        if len(arg) >= 2 and arg[0] == '-':
585            try:
586                self.count = int(arg[1:])
587                return
588            except ValueError:
589                pass
590        self.ok &= CheckOneArg(arg, self.count).ok
591        self.count = None
592
593    def parse_options(self):
594        parser = argparse.ArgumentParser(description=__copyright__)
595        parser.add_argument('--version', action='version',
596                            version='%(prog)s ' + VersionNumber)
597        parser.add_argument('patches', nargs='*',
598                            help='[patch file | git rev list]')
599        group = parser.add_mutually_exclusive_group()
600        group.add_argument("--oneline",
601                           action="store_true",
602                           help="Print one result per line")
603        group.add_argument("--silent",
604                           action="store_true",
605                           help="Print nothing")
606        self.args = parser.parse_args()
607        if self.args.oneline:
608            Verbose.level = Verbose.ONELINE
609        if self.args.silent:
610            Verbose.level = Verbose.SILENT
611
612if __name__ == "__main__":
613    sys.exit(PatchCheckApp().retval)
614