audk/BaseTools/Scripts/PatchCheck.py

608 lines
20 KiB
Python
Raw Normal View History

BaseTools/Scripts: Add PatchCheck.py script This script can be used to check some expected rules for EDK II patches. It only works on git formatted patches. It checks both the commit message and the lines that are added in the patch diff. In the commit message it verifies line lengths, signature formats, and the Contributed-under tag. In the patch, it checks that line endings are CRLF for all files that don't have a .sh extension. It verifies that no trailing whitespace is present and that tab characters are not used. Patch contributors should use this script prior to submitting their patches. Package maintainers can also use it to verify incoming patches. It can also be run by specifying a git revision list, so actual patch files are not always required. For example, to checkout this last 5 patches in your git branch you can run: python PatchCheck.py HEAD~5.. Or, a shortcut (like git log): python PatchCheck.py -5 The --oneline option works similar to git log --oneline. The --silent option enables silent operation. The script supports python 2.7 and python 3. Contributed-under: TianoCore Contribution Agreement 1.0 Signed-off-by: Jordan Justen <jordan.l.justen@intel.com> Cc: Erik Bjorge <erik.c.bjorge@intel.com> Cc: Yonghong Zhu <yonghong.zhu@intel.com> Cc: Liming Gao <liming.gao@intel.com> Reviewed-by: Leif Lindholm <leif.lindholm@linaro.org> Reviewed-by: Erik Bjorge <erik.c.bjorge@intel.com> Reviewed-by: Liming Gao <liming.gao@intel.com> git-svn-id: https://svn.code.sf.net/p/edk2/code/trunk/edk2@18652 6f19259b-4bc3-4df7-8a09-765794883524
2015-10-22 08:01:02 +02:00
## @file
# Check a patch for various format issues
#
# Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>
#
# This program and the accompanying materials are licensed and made
# available under the terms and conditions of the BSD License which
# accompanies this distribution. The full text of the license may be
# found at http://opensource.org/licenses/bsd-license.php
#
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS"
# BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER
# EXPRESS OR IMPLIED.
#
from __future__ import print_function
VersionNumber = '0.1'
__copyright__ = "Copyright (c) 2015, Intel Corporation All rights reserved."
import email
import argparse
import os
import re
import subprocess
import sys
class Verbose:
SILENT, ONELINE, NORMAL = range(3)
level = NORMAL
class CommitMessageCheck:
"""Checks the contents of a git commit message."""
def __init__(self, subject, message):
self.ok = True
if subject is None and message is None:
self.error('Commit message is missing!')
return
self.subject = subject
self.msg = message
self.check_contributed_under()
self.check_signed_off_by()
self.check_misc_signatures()
self.check_overall_format()
self.report_message_result()
url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
def report_message_result(self):
if Verbose.level < Verbose.NORMAL:
return
if self.ok:
# All checks passed
return_code = 0
print('The commit message format passed all checks.')
else:
return_code = 1
if not self.ok:
print(self.url)
def error(self, *err):
if self.ok and Verbose.level > Verbose.ONELINE:
print('The commit message format is not valid:')
self.ok = False
if Verbose.level < Verbose.NORMAL:
return
count = 0
for line in err:
prefix = (' *', ' ')[count > 0]
print(prefix, line)
count += 1
def check_contributed_under(self):
cu_msg='Contributed-under: TianoCore Contribution Agreement 1.0'
if self.msg.find(cu_msg) < 0:
self.error('Missing Contributed-under! (Note: this must be ' +
'added by the code contributor!)')
@staticmethod
def make_signature_re(sig, re_input=False):
if re_input:
sub_re = sig
else:
sub_re = sig.replace('-', r'[-\s]+')
re_str = (r'^(?P<tag>' + sub_re +
r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
try:
return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
except Exception:
print("Tried to compile re:", re_str)
raise
sig_block_re = \
re.compile(r'''^
(?: (?P<tag>[^:]+) \s* : \s*
(?P<value>\S.*?) )
|
(?: \[ (?P<updater>[^:]+) \s* : \s*
(?P<note>.+?) \s* \] )
\s* $''',
re.VERBOSE | re.MULTILINE)
def find_signatures(self, sig):
if not sig.endswith('-by') and sig != 'Cc':
sig += '-by'
regex = self.make_signature_re(sig)
sigs = regex.findall(self.msg)
bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
for s in bad_case_sigs:
self.error("'" +s[0] + "' should be '" + sig + "'")
for s in sigs:
if s[1] != '':
self.error('There should be no spaces between ' + sig +
" and the ':'")
if s[2] != ' ':
self.error("There should be a space after '" + sig + ":'")
self.check_email_address(s[3])
return sigs
email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
re.MULTILINE|re.IGNORECASE)
def check_email_address(self, email):
email = email.strip()
mo = self.email_re1.match(email)
if mo is None:
self.error("Email format is invalid: " + email.strip())
return
name = mo.group(1).strip()
if name == '':
self.error("Name is not provided with email address: " +
email)
else:
quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
if name.find(',') >= 0 and not quoted:
self.error('Add quotes (") around name with a comma: ' +
name)
if mo.group(2) == '':
self.error("There should be a space between the name and " +
"email address: " + email)
if mo.group(3).find(' ') >= 0:
self.error("The email address cannot contain a space: " +
mo.group(3))
def check_signed_off_by(self):
sob='Signed-off-by'
if self.msg.find(sob) < 0:
self.error('Missing Signed-off-by! (Note: this must be ' +
'added by the code contributor!)')
return
sobs = self.find_signatures('Signed-off')
if len(sobs) == 0:
self.error('Invalid Signed-off-by format!')
return
sig_types = (
'Reviewed',
'Reported',
'Tested',
'Suggested',
'Acked',
'Cc'
)
def check_misc_signatures(self):
for sig in self.sig_types:
self.find_signatures(sig)
def check_overall_format(self):
lines = self.msg.splitlines()
if len(lines) >= 1 and lines[0].endswith('\r\n'):
empty_line = '\r\n'
else:
empty_line = '\n'
lines.insert(0, empty_line)
lines.insert(0, self.subject + empty_line)
count = len(lines)
if count <= 0:
self.error('Empty commit message!')
return
if count >= 1 and len(lines[0]) > 76:
self.error('First line of commit message (subject line) ' +
'is too long.')
if count >= 1 and len(lines[0].strip()) == 0:
self.error('First line of commit message (subject line) ' +
'is empty.')
if count >= 2 and lines[1].strip() != '':
self.error('Second line of commit message should be ' +
'empty.')
for i in range(2, count):
if (len(lines[i]) > 76 and
len(lines[i].split()) > 1 and
not lines[i].startswith('git-svn-id:')):
self.error('Line %d of commit message is too long.' % (i + 1))
last_sig_line = None
for i in range(count - 1, 0, -1):
line = lines[i]
mo = self.sig_block_re.match(line)
if mo is None:
if line.strip() == '':
break
elif last_sig_line is not None:
err2 = 'Add empty line before "%s"?' % last_sig_line
self.error('The line before the signature block ' +
'should be empty', err2)
else:
self.error('The signature block was not found')
break
last_sig_line = line.strip()
(START, PRE_PATCH, PATCH) = range(3)
class GitDiffCheck:
"""Checks the contents of a git diff."""
def __init__(self, diff):
self.ok = True
self.format_ok = True
self.lines = diff.splitlines(True)
self.count = len(self.lines)
self.line_num = 0
self.state = START
while self.line_num < self.count and self.format_ok:
line_num = self.line_num
self.run()
assert(self.line_num > line_num)
self.report_message_result()
def report_message_result(self):
if Verbose.level < Verbose.NORMAL:
return
if self.ok:
print('The code passed all checks.')
def run(self):
line = self.lines[self.line_num]
if self.state in (PRE_PATCH, PATCH):
if line.startswith('diff --git'):
self.state = START
if self.state == PATCH:
if line.startswith('@@ '):
self.state = PRE_PATCH
elif len(line) >= 1 and line[0] not in ' -+' and \
not line.startswith(r'\ No newline '):
for line in self.lines[self.line_num + 1:]:
if line.startswith('diff --git'):
self.format_error('diff found after end of patch')
break
self.line_num = self.count
return
if self.state == START:
if line.startswith('diff --git'):
self.state = PRE_PATCH
self.set_filename(None)
elif len(line.rstrip()) != 0:
self.format_error("didn't find diff command")
self.line_num += 1
elif self.state == PRE_PATCH:
if line.startswith('+++ b/'):
self.set_filename(line[6:].rstrip())
if line.startswith('@@ '):
self.state = PATCH
else:
ok = False
for pfx in self.pre_patch_prefixes:
if line.startswith(pfx):
ok = True
if not ok:
self.format_error("didn't find diff hunk marker (@@)")
self.line_num += 1
elif self.state == PATCH:
if line.startswith('-'):
pass
elif line.startswith('+'):
self.check_added_line(line[1:])
elif line.startswith(r'\ No newline '):
pass
elif not line.startswith(' '):
self.format_error("unexpected patch line")
self.line_num += 1
pre_patch_prefixes = (
'--- ',
'+++ ',
'index ',
'new file ',
'deleted file ',
'old mode ',
'new mode ',
'similarity index ',
'rename ',
'Binary files ',
)
line_endings = ('\r\n', '\n\r', '\n', '\r')
def set_filename(self, filename):
self.hunk_filename = filename
if filename:
self.force_crlf = not filename.endswith('.sh')
else:
self.force_crlf = True
def added_line_error(self, msg, line):
lines = [ msg ]
if self.hunk_filename is not None:
lines.append('File: ' + self.hunk_filename)
lines.append('Line: ' + line)
self.error(*lines)
def check_added_line(self, line):
eol = ''
for an_eol in self.line_endings:
if line.endswith(an_eol):
eol = an_eol
line = line[:-len(eol)]
stripped = line.rstrip()
if self.force_crlf and eol != '\r\n':
self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
line)
if '\t' in line:
self.added_line_error('Tab character used', line)
if len(stripped) < len(line):
self.added_line_error('Trailing whitespace found', line)
split_diff_re = re.compile(r'''
(?P<cmd>
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
)
(?P<index>
^ index \s+ .+ $
)
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
def format_error(self, err):
self.format_ok = False
err = 'Patch format error: ' + err
err2 = 'Line: ' + self.lines[self.line_num].rstrip()
self.error(err, err2)
def error(self, *err):
if self.ok and Verbose.level > Verbose.ONELINE:
print('Code format is not valid:')
self.ok = False
if Verbose.level < Verbose.NORMAL:
return
count = 0
for line in err:
prefix = (' *', ' ')[count > 0]
print(prefix, line)
count += 1
class CheckOnePatch:
"""Checks the contents of a git email formatted patch.
Various checks are performed on both the commit message and the
patch content.
"""
def __init__(self, name, patch):
self.patch = patch
self.find_patch_pieces()
msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
msg_ok = msg_check.ok
diff_ok = True
if self.diff is not None:
diff_check = GitDiffCheck(self.diff)
diff_ok = diff_check.ok
self.ok = msg_ok and diff_ok
if Verbose.level == Verbose.ONELINE:
if self.ok:
result = 'ok'
else:
result = list()
if not msg_ok:
result.append('commit message')
if not diff_ok:
result.append('diff content')
result = 'bad ' + ' and '.join(result)
print(name, result)
git_diff_re = re.compile(r'''
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
stat_re = \
re.compile(r'''
(?P<commit_message> [\s\S\r\n]* )
(?P<stat>
^ --- $ [\r\n]+
(?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
$ [\r\n]+ )+
[\s\S\r\n]+
)
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
def find_patch_pieces(self):
if sys.version_info < (3, 0):
patch = self.patch.encode('ascii', 'ignore')
else:
patch = self.patch
self.commit_msg = None
self.stat = None
self.commit_subject = None
self.commit_prefix = None
self.diff = None
if patch.startswith('diff --git'):
self.diff = patch
return
pmail = email.message_from_string(patch)
parts = list(pmail.walk())
assert(len(parts) == 1)
assert(parts[0].get_content_type() == 'text/plain')
content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
mo = self.git_diff_re.search(content)
if mo is not None:
self.diff = content[mo.start():]
content = content[:mo.start()]
mo = self.stat_re.search(content)
if mo is None:
self.commit_msg = content
else:
self.stat = mo.group('stat')
self.commit_msg = mo.group('commit_message')
self.commit_subject = pmail['subject'].replace('\r\n', '')
self.commit_subject = self.commit_subject.replace('\n', '')
pfx_start = self.commit_subject.find('[')
if pfx_start >= 0:
pfx_end = self.commit_subject.find(']')
if pfx_end > pfx_start:
self.commit_prefix = self.commit_subject[pfx_start + 1 : pfx_end]
self.commit_subject = self.commit_subject[pfx_end + 1 :].lstrip()
class CheckGitCommits:
"""Reads patches from git based on the specified git revision range.
The patches are read from git, and then checked.
"""
def __init__(self, rev_spec, max_count):
commits = self.read_commit_list_from_git(rev_spec, max_count)
if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
commits = [ rev_spec ]
self.ok = True
blank_line = False
for commit in commits:
if Verbose.level > Verbose.ONELINE:
if blank_line:
print()
else:
blank_line = True
print('Checking git commit:', commit)
patch = self.read_patch_from_git(commit)
self.ok &= CheckOnePatch(commit, patch).ok
def read_commit_list_from_git(self, rev_spec, max_count):
# Run git to get the commit patch
cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
if max_count is not None:
cmd.append('--max-count=' + str(max_count))
cmd.append(rev_spec)
out = self.run_git(*cmd)
return out.split()
def read_patch_from_git(self, commit):
# Run git to get the commit patch
return self.run_git('show', '--pretty=email', commit)
def run_git(self, *args):
cmd = [ 'git' ]
cmd += args
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
return p.communicate()[0].decode('utf-8', 'ignore')
class CheckOnePatchFile:
"""Performs a patch check for a single file.
stdin is used when the filename is '-'.
"""
def __init__(self, patch_filename):
if patch_filename == '-':
patch = sys.stdin.read()
patch_filename = 'stdin'
else:
f = open(patch_filename, 'rb')
patch = f.read().decode('utf-8', 'ignore')
f.close()
if Verbose.level > Verbose.ONELINE:
print('Checking patch file:', patch_filename)
self.ok = CheckOnePatch(patch_filename, patch).ok
class CheckOneArg:
"""Performs a patch check for a single command line argument.
The argument will be handed off to a file or git-commit based
checker.
"""
def __init__(self, param, max_count=None):
self.ok = True
if param == '-' or os.path.exists(param):
checker = CheckOnePatchFile(param)
else:
checker = CheckGitCommits(param, max_count)
self.ok = checker.ok
class PatchCheckApp:
"""Checks patches based on the command line arguments."""
def __init__(self):
self.parse_options()
patches = self.args.patches
if len(patches) == 0:
patches = [ 'HEAD' ]
self.ok = True
self.count = None
for patch in patches:
self.process_one_arg(patch)
if self.count is not None:
self.process_one_arg('HEAD')
if self.ok:
self.retval = 0
else:
self.retval = -1
def process_one_arg(self, arg):
if len(arg) >= 2 and arg[0] == '-':
try:
self.count = int(arg[1:])
return
except ValueError:
pass
self.ok &= CheckOneArg(arg, self.count).ok
self.count = None
def parse_options(self):
parser = argparse.ArgumentParser(description=__copyright__)
parser.add_argument('--version', action='version',
version='%(prog)s ' + VersionNumber)
parser.add_argument('patches', nargs='*',
help='[patch file | git rev list]')
group = parser.add_mutually_exclusive_group()
group.add_argument("--oneline",
action="store_true",
help="Print one result per line")
group.add_argument("--silent",
action="store_true",
help="Print nothing")
self.args = parser.parse_args()
if self.args.oneline:
Verbose.level = Verbose.ONELINE
if self.args.silent:
Verbose.level = Verbose.SILENT
if __name__ == "__main__":
sys.exit(PatchCheckApp().retval)