-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
scripts/release-notes.py: derive author aliases from AUTHORS
Previously, the release-notes script contained an explicit list of contributors to the repo with their name aliases, to disambiguate entries in the git log while constructing the release notes report. The explicit list was problematic in numerous ways, foremost because it was often out of date and incomplete, and also because it couldn't distinguish different folk with the same name but different addresses. Luckily, Git has a standard solution for this problem, called a "mailmap" file. The format of this file is standardized and documented in the man page git-check-mailmap(1). To simplify, this file format has one line per _person_, and each line can contain multiple name/mail combinations for that person. Meanwhile, the CockroachDB repository also maintains a file `AUTHORS` at the top of the source tree. This also contains a list of contributors with names and e-mails, although for far this file was not maintained to conform exactly to Git's mailmap format. This commit bridges the two things as follows: - it updates the `AUTHORS` file to conform to the mailmap format. This removes duplicate entries, and spells out Git commit/author aliases alongside the primary name of a person. - it simplifies the release-notes script to use AUTHORS as its input database. Release note: None
- Loading branch information
Showing
11 changed files
with
301 additions
and
193 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,112 +37,112 @@ | |
from optparse import OptionParser | ||
from git.repo.fun import name_to_object | ||
from git.util import Stats | ||
import os.path | ||
|
||
### Global behavior constants ### | ||
|
||
# minimum sha length to disambiguate | ||
shamin = 9 | ||
|
||
# FIXME(knz): This probably needs to use the .mailmap. | ||
author_aliases = { | ||
'changangela': "Angela Chang", | ||
'dianasaur323': "Diana Hsieh", | ||
'kena': "Raphael 'kena' Poss", | ||
'vivekmenezes': "Vivek Menezes", | ||
'Darin':"Darin Peshev", | ||
'RaduBerinde': "Radu Berinde", | ||
'Andy Kimball': "Andrew Kimball", | ||
'marc': "Marc Berhault", | ||
'Lauren': "Lauren Hirata", | ||
'lhirata' : "Lauren Hirata", | ||
'Emmanuel': "Emmanuel Sales", | ||
'MBerhault': "Marc Berhault", | ||
'Nate': "Nathaniel Stewart", | ||
'a6802739': "Song Hao", | ||
'Abhemailk [email protected]': "Abhishek Madan", | ||
'rytaft': "Rebecca Taft", | ||
'songhao': "Song Hao", | ||
'solongordon': "Solon Gordon", | ||
'tim-o': "Tim O'Brien", | ||
'Tyler314': "Tyler Roberts", | ||
'Amruta': "Amruta Ranade", | ||
'yuzefovich': "Yahor Yuzefovich", | ||
'madhavsuresh': "Madhav Suresh", | ||
'Richard Loveland': "Rich Loveland", | ||
} | ||
|
||
# FIXME(knz): This too. | ||
crdb_folk = set([ | ||
"Abhishek Madan", | ||
"Alex Robinson", | ||
"Alfonso Subiotto Marqués", | ||
"Amruta Ranade", | ||
"Andrei Matei", | ||
"Andrew Couch", | ||
"Andrew Kimball", | ||
"Andrew Werner", | ||
"Andrew Kryczka", | ||
"Andy Woods", | ||
"Aditya Maru", | ||
"Angela Chang", | ||
"Arjun Narayan", | ||
"Ben Darnell", | ||
"Bilal Akhtar", | ||
"Bob Vawter", | ||
"Bram Gruneir", | ||
"Celia La", | ||
"Daniel Harrison", | ||
"David Taylor", | ||
"Darin Peshev", | ||
"Diana Hsieh", | ||
"Emmanuel Sales", | ||
"Erik Trinh", | ||
"George Utsin", | ||
"Jesse Seldess", | ||
"Jessica Edwards", | ||
"Joseph Lowinske", | ||
"Joey Pereira", | ||
"Jordan Lewis", | ||
"Justin Jaffray", | ||
"Jeffrey Xiao", | ||
"Ken Liu", | ||
"Kendra Curtis", | ||
"Kuan Luo", | ||
"Lauren Hirata", | ||
"Lucy Zhang", | ||
"Madhav Suresh", | ||
"Marc Berhault", | ||
"Masha Schneider", | ||
"Matt Jibson", | ||
"Matt Tracy", | ||
"Nathan VanBenschoten", | ||
"Nathaniel Stewart", | ||
"Nikhil Benesch", | ||
"Paul Bardea", | ||
"Pete Vilter", | ||
"Peter Mattis", | ||
"Radu Berinde", | ||
"Rafi Shamim", | ||
"Raphael 'kena' Poss", | ||
"Rebecca Taft", | ||
"Rich Loveland", | ||
"Richard Wu", | ||
"Ridwan Sharif", | ||
"Rohan Yadav", | ||
"Roland Crosby", | ||
"Sean Loiselle", | ||
"Solon Gordon", | ||
"Spencer Kimball", | ||
"Tamir Duberstein", | ||
"Tim O'Brien", | ||
"Tobias Schottdorf", | ||
"Tyler Roberts", | ||
"Will Cross", | ||
"Victor Chen", | ||
"Vivek Menezes", | ||
"Yahor Yuzefovich", | ||
]) | ||
|
||
# Basic mailmap functionality using the AUTHORS file. | ||
mmre = re.compile(r'^(?P<name>.*?)\s+<(?P<addr>[^>]*)>(?P<aliases>(?:[^<]*<[^>]*>)*)$') | ||
mmare = re.compile('(?P<alias>[^<]*)<(?P<addr>[^>]*)>') | ||
crdb_folk = set() | ||
class P: | ||
def __init__(self, name, addr): | ||
self.name = name | ||
self.email = addr | ||
self.aliases = [(name, addr)] | ||
self.crdb = '@cockroachlabs.com' in addr | ||
if self.crdb: | ||
crdb_folk.add(self) | ||
def __repr__(self): | ||
return "%s <%s>" % (self.name, self.email) | ||
def __lt__(self, other): | ||
return self.name < other.name or (self.name == other.name and self.email < other.email) | ||
mmap_bycanon = {} | ||
mmap_byaddr = {} | ||
mmap_byname = {} | ||
def define_person(name, addr): | ||
p = P(name, addr) | ||
canon = (name, addr) | ||
if canon in mmap_bycanon: | ||
print('warning: duplicate person %r, ignoring', canon) | ||
return None | ||
mmap_bycanon[canon] = p | ||
byaddr = mmap_byaddr.get(addr, []) | ||
byaddr.append(p) | ||
mmap_byaddr[addr] = byaddr | ||
byname = mmap_byname.get(name, []) | ||
byname.append(p) | ||
mmap_byname[name] = byname | ||
return p | ||
|
||
if not os.path.exists('AUTHORS'): | ||
print('warning: AUTHORS missing in current directory.', file=sys.stderr) | ||
print('Maybe use "cd" to navigate to the working tree root.', file=sys.stderr) | ||
else: | ||
with open('AUTHORS', 'r') as f: | ||
for line in f.readlines(): | ||
if line.strip().startswith('#'): | ||
continue | ||
m = mmre.match(line) | ||
if m is None: | ||
continue | ||
p = define_person(m.group('name'), m.group('addr')) | ||
if p is None: | ||
continue | ||
p.crdb = '@cockroachlabs.com' in line | ||
if p.crdb: | ||
crdb_folk.add(p) | ||
aliases = m.group('aliases') | ||
aliases = mmare.findall(aliases) | ||
for alias, addr in aliases: | ||
name = alias.strip() | ||
byaddr = mmap_byaddr.get(addr, []) | ||
if p not in byaddr: | ||
byaddr.append(p) | ||
mmap_byaddr[addr] = byaddr | ||
if name == '': | ||
name = p.name | ||
canon = (name, addr) | ||
if canon in mmap_bycanon: | ||
print('warning: duplicate alias %r, ignoring', canon) | ||
continue | ||
mmap_bycanon[canon] = p | ||
p.aliases.append(canon) | ||
byname = mmap_byname.get(name, []) | ||
if p not in byname: | ||
byname.append(p) | ||
mmap_byname[name] = byname | ||
|
||
# lookup_person retrieves the main identity of a person given one of their | ||
# names or email aliases in the mailmap. | ||
def lookup_person(name, email): | ||
key = (name, email) | ||
if key in mmap_bycanon: | ||
# lucky case. | ||
return mmap_bycanon[key] | ||
# Name+email didn't work. | ||
# Let's see email next. | ||
if email in mmap_byaddr: | ||
candidates = mmap_byaddr[email] | ||
if len(candidates) > 1: | ||
print('warning: no direct name match for', (name, email), | ||
'and addr', email, 'is ambiguous,', | ||
'keeping as-is', file=sys.stderr) | ||
return define_person(name, email) | ||
return candidates[0] | ||
# Email didn't work either. That's not great. | ||
if name in mmap_byname: | ||
candidates = mmap_byname[name] | ||
if len(candidates) > 1: | ||
print('warning: no direct name match for', (name, email), | ||
'and name', name, 'is ambiguous,', | ||
'keeping as-is', file=sys.stderr) | ||
return define_person(name, email) | ||
return candidates[0] | ||
return define_person(name, email) | ||
|
||
# Section titles for release notes. | ||
relnotetitles = { | ||
|
@@ -492,15 +492,16 @@ def get_direct_history(firstCommit, lastCommit): | |
|
||
def collect_authors(commit): | ||
authors = set() | ||
author = author_aliases.get(commit.author.name, commit.author.name) | ||
if author != 'GitHub': | ||
author = lookup_person(commit.author.name, commit.author.email) | ||
if author.name != 'GitHub': | ||
authors.add(author) | ||
author = author_aliases.get(commit.committer.name, commit.committer.name) | ||
if author != 'GitHub': | ||
author = lookup_person(commit.committer.name, commit.committer.email) | ||
if author.name != 'GitHub': | ||
authors.add(author) | ||
for m in coauthor.finditer(commit.message): | ||
aname = m.group('name').strip() | ||
author = author_aliases.get(aname, aname) | ||
amail = m.group('email').strip() | ||
author = lookup_person(aname, amail) | ||
authors.add(author) | ||
return authors | ||
|
||
|
@@ -526,7 +527,7 @@ def process_release_notes(pr, title, commit): | |
|
||
|
||
def makeitem(pr, prtitle, sha, authors): | ||
return {'authors': ', '.join(sorted(authors)), | ||
return {'authors': authors, | ||
'sha': sha, | ||
'pr': pr, | ||
'title': prtitle, | ||
|
@@ -676,9 +677,11 @@ def collect_item(pr, prtitle, sha, ncommits, authors, stats, prts): | |
'date': datetime.date.fromtimestamp(prts).isoformat(), | ||
}) | ||
|
||
history = per_group_history.get(item['authors'], []) | ||
history.append(item) | ||
per_group_history[item['authors']] = history | ||
al = item['authors'] | ||
k = str(sorted(al)) | ||
history = per_group_history.get(k, (al, [])) | ||
history[1].append(item) | ||
per_group_history[k] = history | ||
|
||
|
||
def analyze_standalone_commit(commit): | ||
|
@@ -729,15 +732,12 @@ def analyze_standalone_commit(commit): | |
firsttime_contributors = [] | ||
for a in individual_authors: | ||
# Find all aliases known for this person | ||
aliases = [a] | ||
for alias, name in author_aliases.items(): | ||
if name == a: | ||
aliases.append(alias) | ||
aliases = a.aliases | ||
# Collect the history for every alias | ||
hist = b'' | ||
for al in aliases: | ||
spin() | ||
c = subprocess.run(["git", "log", "--author=%s" % al, options.from_commit, '-n', '1'], stdout=subprocess.PIPE, check=True) | ||
c = subprocess.run(["git", "log", "--author=%s <%s>" % al, options.from_commit, '-n', '1'], stdout=subprocess.PIPE, check=True) | ||
hist += c.stdout | ||
if len(hist) == 0: | ||
# No commit from that author older than the first commit | ||
|
@@ -851,7 +851,7 @@ def renderlinks(item): | |
print("#### Changes without release note annotation") | ||
print() | ||
for item in missing_release_notes: | ||
authors = item['authors'] | ||
authors = ', '.join(str(x) for x in sorted(item['authors'])) | ||
print("- [%(pr)s][%(pr)s] [%(sha)s][%(sha)s] %(title)s" % item, "(%s)" % authors) | ||
seenshas.add(item['sha']) | ||
seenprs.add(item['pr']) | ||
|
@@ -878,10 +878,10 @@ def renderlinks(item): | |
print("We would like to thank the following contributors from the CockroachDB community:") | ||
print() | ||
for person in notified_authors: | ||
print("-", person, end='') | ||
print("-", person.name, end='') | ||
if person in firsttime_contributors: | ||
annot = "" | ||
if person in crdb_folk: | ||
if person.crdb: | ||
annot = ", CockroachDB team member" | ||
print(" (first-time contributor%s)" % annot, end='') | ||
print() | ||
|
@@ -897,9 +897,9 @@ def renderlinks(item): | |
fmt = " - %(date)s [%(pr)-6s][%(pr)-6s] (+%(insertions)4d -%(deletions)4d ~%(lines)4d/%(files)2d) %(title)s" | ||
|
||
for group in allgroups: | ||
items = per_group_history[group] | ||
print("- %s:" % group) | ||
al, items = per_group_history[group] | ||
items.sort(key=lambda x:x[sortkey],reverse=not revsort) | ||
print("- %s:" % ', '.join(a.name for a in sorted(al))) | ||
for item in items: | ||
print(fmt % item, end='') | ||
if not hideshas: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
|
||
#### Changes without release note annotation | ||
|
||
- [#unknown][#unknown] [e3a1f2c94][e3a1f2c94] master update (test1) | ||
- [#unknown][#unknown] [e3a1f2c94][e3a1f2c94] master update (test1 <[email protected]>) | ||
|
||
### Doc updates | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,7 @@ | |
|
||
#### Changes without release note annotation | ||
|
||
- [#unknown][#unknown] [f872999e8][f872999e8] master update (test2) | ||
- [#unknown][#unknown] [f872999e8][f872999e8] master update (test2 <[email protected]>) | ||
|
||
### Doc updates | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ | |
|
||
#### Changes without release note annotation | ||
|
||
- [#unknown][#unknown] [4f4329fdc][4f4329fdc] master update (test3) | ||
- [#unknown][#unknown] [4f4329fdc][4f4329fdc] master update (test3 <[email protected]>) | ||
|
||
### Doc updates | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
|
||
#### Changes without release note annotation | ||
|
||
- [#2][#2] [8156afc96][8156afc96] PR title in need of release note (test5) | ||
- [#2][#2] [8156afc96][8156afc96] PR title in need of release note (test5 <[email protected]>) | ||
|
||
### Doc updates | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
* 801acad030ef8c14766f72609287d323075af29b Merge pull request #200 from foo/bar | ||
|\ | ||
| * 4ba203f0af12e361707c20fa4254fe58f9c3a86e merge pr canary | ||
|/ | ||
* 20f736f8b7421f226d15111e2377a8e069972bf0 Merge #2 | ||
|\ | ||
| * fc02f2ab9ac0c1924c479fa601065235579a605e feature A2 | ||
|/ | ||
* ac02f9cf6095d6430c609812730a34fe254f5e77 Merge pull request #100 from foo/bar | ||
|\ | ||
| * 1fa346db29169f9d1c6f229e34e1e0b0af687af7 merge pr canary | ||
|/ | ||
* 931a977579e2e0f0efee763fa289bd2b3162755b Merge #1 | ||
|\ | ||
| * f76e64ed372d84433ac4c5e0a06e3dfe34a80686 feature A1 | ||
|/ | ||
* a1dec56519bfa4d850e2771bd9880d2dcdb715aa update AUTHORS | ||
* 15d3108780a91a71af56bdaeb020dbdf650fd1ef initial |
Oops, something went wrong.