1#!/usr/bin/env python 2 3# Copyright (c) 2009, Google Inc. All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions are 7# met: 8# 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above 12# copyright notice, this list of conditions and the following disclaimer 13# in the documentation and/or other materials provided with the 14# distribution. 15# * Neither the name of Google Inc. nor the names of its 16# contributors may be used to endorse or promote products derived from 17# this software without specific prior written permission. 18# 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30# 31# Checks Python's known list of committers against lists.webkit.org and SVN history. 32 33 34import os 35import subprocess 36import re 37import urllib2 38from datetime import date, datetime, timedelta 39from webkitpy.common.config.committers import CommitterList 40from webkitpy.common.system.deprecated_logging import log, error 41from webkitpy.common.checkout.scm import Git 42 43# WebKit includes a built copy of BeautifulSoup in Scripts/webkitpy 44# so this import should always succeed. 45from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup 46 47def print_list_if_non_empty(title, list_to_print): 48 if not list_to_print: 49 return 50 print # Newline before the list 51 print title 52 for item in list_to_print: 53 print item 54 55class CommitterListFromMailingList: 56 committers_list_url = "http://lists.webkit.org/mailman/roster.cgi/webkit-committers" 57 reviewers_list_url = "http://lists.webkit.org/mailman/roster.cgi/webkit-reviewers" 58 59 def _fetch_emails_from_page(self, url): 60 page = urllib2.urlopen(url) 61 soup = BeautifulSoup(page) 62 63 emails = [] 64 # Grab the cells in the first column (which happens to be the bug ids). 65 for email_item in soup('li'): 66 email_link = email_item.find("a") 67 email = email_link.string.replace(" at ", "@") # The email is obfuscated using " at " instead of "@". 68 emails.append(email) 69 return emails 70 71 @staticmethod 72 def _commiters_not_found_in_email_list(committers, emails): 73 missing_from_mailing_list = [] 74 for committer in committers: 75 for email in committer.emails: 76 if email in emails: 77 break 78 else: 79 missing_from_mailing_list.append(committer) 80 return missing_from_mailing_list 81 82 @staticmethod 83 def _emails_not_found_in_committer_list(committers, emails): 84 email_to_committer_map = {} 85 for committer in committers: 86 for email in committer.emails: 87 email_to_committer_map[email] = committer 88 89 return filter(lambda email: not email_to_committer_map.get(email), emails) 90 91 def check_for_emails_missing_from_list(self, committer_list): 92 committer_emails = self._fetch_emails_from_page(self.committers_list_url) 93 list_name = "webkit-committers@lists.webkit.org" 94 95 missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.committers(), committer_emails) 96 print_list_if_non_empty("Committers missing from %s:" % list_name, missing_from_mailing_list) 97 98 users_missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), committer_emails) 99 print_list_if_non_empty("Subcribers to %s missing from committer.py:" % list_name, users_missing_from_committers) 100 101 102 reviewer_emails = self._fetch_emails_from_page(self.reviewers_list_url) 103 list_name = "webkit-reviewers@lists.webkit.org" 104 105 missing_from_mailing_list = self._commiters_not_found_in_email_list(committer_list.reviewers(), reviewer_emails) 106 print_list_if_non_empty("Reviewers missing from %s:" % list_name, missing_from_mailing_list) 107 108 missing_from_reviewers = self._emails_not_found_in_committer_list(committer_list.reviewers(), reviewer_emails) 109 print_list_if_non_empty("Subcribers to %s missing from reviewers in committer.py:" % list_name, missing_from_reviewers) 110 111 missing_from_committers = self._emails_not_found_in_committer_list(committer_list.committers(), reviewer_emails) 112 print_list_if_non_empty("Subcribers to %s completely missing from committers.py" % list_name, missing_from_committers) 113 114 115class CommitterListFromGit: 116 login_to_email_address = { 117 'aliceli1' : 'alice.liu@apple.com', 118 'bdash' : 'mrowe@apple.com', 119 'bdibello' : 'bdibello@apple.com', # Bruce DiBello, only 4 commits: r10023, r9548, r9538, r9535 120 'cblu' : 'cblu@apple.com', 121 'cpeterse' : 'cpetersen@apple.com', 122 'eseidel' : 'eric@webkit.org', 123 'gdennis' : 'gdennis@webkit.org', 124 'goldsmit' : 'goldsmit@apple.com', # Debbie Goldsmith, only one commit r8839 125 'gramps' : 'gramps@apple.com', 126 'honeycutt' : 'jhoneycutt@apple.com', 127 'jdevalk' : 'joost@webkit.org', 128 'jens' : 'jens@apple.com', 129 'justing' : 'justin.garcia@apple.com', 130 'kali' : 'kali@apple.com', # Christy Warren, did BIDI work, 5 commits: r8815, r8802, r8801, r8791, r8773, r8603 131 'kjk' : 'kkowalczyk@gmail.com', 132 'kmccullo' : 'kmccullough@apple.com', 133 'kocienda' : 'kocienda@apple.com', 134 'lamadio' : 'lamadio@apple.com', # Lou Amadio, only 2 commits: r17949 and r17783 135 'lars' : 'lars@kde.org', 136 'lweintraub' : 'lweintraub@apple.com', 137 'lypanov' : 'lypanov@kde.org', 138 'mhay' : 'mhay@apple.com', # Mike Hay, 3 commits: r3813, r2552, r2548 139 'ouch' : 'ouch@apple.com', # John Louch 140 'pyeh' : 'patti@apple.com', # Patti Yeh, did VoiceOver work in WebKit 141 'rjw' : 'rjw@apple.com', 142 'seangies' : 'seangies@apple.com', # Sean Gies?, only 5 commits: r16600, r16592, r16511, r16489, r16484 143 'sheridan' : 'sheridan@apple.com', # Shelly Sheridan 144 'thatcher' : 'timothy@apple.com', 145 'tomernic' : 'timo@apple.com', 146 'trey' : 'trey@usa.net', 147 'tristan' : 'tristan@apple.com', 148 'vicki' : 'vicki@apple.com', 149 'voas' : 'voas@apple.com', # Ed Voas, did some Carbon work in WebKit 150 'zack' : 'zack@kde.org', 151 'zimmermann' : 'zimmermann@webkit.org', 152 } 153 154 def __init__(self): 155 self._last_commit_time_by_author_cache = {} 156 157 def _fetch_authors_and_last_commit_time_from_git_log(self): 158 last_commit_dates = {} 159 git_log_args = ['git', 'log', '--reverse', '--pretty=format:%ae %at'] 160 process = subprocess.Popen(git_log_args, stdout=subprocess.PIPE) 161 162 # eric@webkit.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc 1257090899 163 line_regexp = re.compile("^(?P<author>.+)@\S+ (?P<timestamp>\d+)$") 164 while True: 165 output_line = process.stdout.readline() 166 if output_line == '' and process.poll() != None: 167 return last_commit_dates 168 169 match_result = line_regexp.match(output_line) 170 if not match_result: 171 error("Failed to match line: %s" % output_line) 172 last_commit_dates[match_result.group('author')] = float(match_result.group('timestamp')) 173 174 def _fill_in_emails_for_old_logins(self): 175 authors_missing_email = filter(lambda author: author.find('@') == -1, self._last_commit_time_by_author_cache) 176 authors_with_email = filter(lambda author: author.find('@') != -1, self._last_commit_time_by_author_cache) 177 prefixes_of_authors_with_email = map(lambda author: author.split('@')[0], authors_with_email) 178 179 for author in authors_missing_email: 180 # First check to see if we have a manual mapping from login to email. 181 author_email = self.login_to_email_address.get(author) 182 183 # Most old logins like 'darin' are now just 'darin@apple.com', so check for a prefix match if a manual mapping was not found. 184 if not author_email and author in prefixes_of_authors_with_email: 185 author_email_index = prefixes_of_authors_with_email.index(author) 186 author_email = authors_with_email[author_email_index] 187 188 if not author_email: 189 # No known email mapping, likely not an active committer. We could log here. 190 continue 191 192 # log("%s -> %s" % (author, author_email)) # For sanity checking. 193 no_email_commit_time = self._last_commit_time_by_author_cache.get(author) 194 email_commit_time = self._last_commit_time_by_author_cache.get(author_email) 195 # We compare the timestamps for extra sanity even though we could assume commits before email address were used for login are always going to be older. 196 if not email_commit_time or email_commit_time < no_email_commit_time: 197 self._last_commit_time_by_author_cache[author_email] = no_email_commit_time 198 del self._last_commit_time_by_author_cache[author] 199 200 def _last_commit_by_author(self): 201 if not self._last_commit_time_by_author_cache: 202 self._last_commit_time_by_author_cache = self._fetch_authors_and_last_commit_time_from_git_log() 203 self._fill_in_emails_for_old_logins() 204 del self._last_commit_time_by_author_cache['(no author)'] # The initial svn import isn't very useful. 205 return self._last_commit_time_by_author_cache 206 207 @staticmethod 208 def _print_three_column_row(widths, values): 209 print "%s%s%s" % (values[0].ljust(widths[0]), values[1].ljust(widths[1]), values[2]) 210 211 def print_possibly_expired_committers(self, committer_list): 212 authors_and_last_commits = self._last_commit_by_author().items() 213 authors_and_last_commits.sort(lambda a,b: cmp(a[1], b[1]), reverse=True) 214 committer_cuttof = date.today() - timedelta(days=365) 215 column_widths = [13, 25] 216 print 217 print "Committers who have not committed within one year:" 218 self._print_three_column_row(column_widths, ("Last Commit", "Committer Email", "Committer Record")) 219 for (author, last_commit) in authors_and_last_commits: 220 last_commit_date = date.fromtimestamp(last_commit) 221 if committer_cuttof > last_commit_date: 222 committer_record = committer_list.committer_by_email(author) 223 self._print_three_column_row(column_widths, (str(last_commit_date), author, committer_record)) 224 225 def print_committers_missing_from_committer_list(self, committer_list): 226 missing_from_committers_py = [] 227 last_commit_time_by_author = self._last_commit_by_author() 228 for author in last_commit_time_by_author: 229 if not committer_list.committer_by_email(author): 230 missing_from_committers_py.append(author) 231 232 never_committed = [] 233 for committer in committer_list.committers(): 234 for email in committer.emails: 235 if last_commit_time_by_author.get(email): 236 break 237 else: 238 never_committed.append(committer) 239 240 print_list_if_non_empty("Historical committers missing from committer.py:", missing_from_committers_py) 241 print_list_if_non_empty("Committers in committer.py who have never committed:", never_committed) 242 243 244def main(): 245 committer_list = CommitterList() 246 CommitterListFromMailingList().check_for_emails_missing_from_list(committer_list) 247 248 if not Git.in_working_directory("."): 249 print """\n\nWARNING: validate-committer-lists requires a git checkout. 250The following checks are disabled: 251 - List of committers ordered by last commit 252 - List of historical committers missing from committers.py 253""" 254 return 1 255 svn_committer_list = CommitterListFromGit() 256 svn_committer_list.print_possibly_expired_committers(committer_list) 257 svn_committer_list.print_committers_missing_from_committer_list(committer_list) 258 259if __name__ == "__main__": 260 main() 261