Skip to content

Commit

Permalink
Add --analyze-stacks option
Browse files Browse the repository at this point in the history
That using `git rev-base`, prints total number of stacks, and its
average, mean and max depth

At the time of submission here is top 10 ghstack uses of pytorch:
```
ezyang has 462 stacks max depth is 15 avg depth is 1.70 mean is 1
awgu has 240 stacks max depth is 28 avg depth is 4.30 mean is 1
peterbell10 has 146 stacks max depth is 7 avg depth is 1.84 mean is 1
zou3519 has 128 stacks max depth is 7 avg depth is 1.98 mean is 1
jerryzh168 has 113 stacks max depth is 16 avg depth is 1.45 mean is 1
bdhirsh has 111 stacks max depth is 7 avg depth is 1.85 mean is 2
wconstab has 108 stacks max depth is 7 avg depth is 2.15 mean is 1
SherlockNoMad has 99 stacks max depth is 4 avg depth is 1.24 mean is 1
zasdfgbnm has 80 stacks max depth is 11 avg depth is 2.52 mean is 6
desertfire has 73 stacks max depth is 3 avg depth is 1.14 mean is 1
```
  • Loading branch information
malfet committed Feb 28, 2023
1 parent ae69de5 commit 7fc7dfe
Showing 1 changed file with 38 additions and 2 deletions.
40 changes: 38 additions & 2 deletions analytics/github_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,12 @@ def __init__(self, path, remote='upstream'):
self.repo_dir = path
self.remote = remote

def _run_git_cmd(self, *args) -> str:
return _check_output(['git', '-C', self.repo_dir] + list(args))

def _run_git_log(self, revision_range) -> List[GitCommit]:
log = _check_output(['git', '-C', self.repo_dir, 'log',
'--format=fuller', '--date=unix', revision_range, '--', '.']).split("\n")
log = self._run_git_cmd('log', '--format=fuller',
'--date=unix', revision_range, '--', '.').split("\n")
rc: List[GitCommit] = []
cur_msg: List[str] = []
for line in log:
Expand All @@ -179,6 +182,18 @@ def _run_git_log(self, revision_range) -> List[GitCommit]:
def get_commit_list(self, from_ref, to_ref) -> List[GitCommit]:
return self._run_git_log(f"{self.remote}/{from_ref}..{self.remote}/{to_ref}")

def get_ghstack_orig_branches(self) -> List[str]:
return [x.strip() for x in self._run_git_cmd("branch", "--remotes", "--list", self.remote + "/gh/*/orig").strip().split("\n")]

def show_ref(self, ref) -> str:
return self._run_git_cmd("show-ref", ref).split(" ")[0]

def merge_base(self, ref1, ref2) -> str:
return self._run_git_cmd("merge-base", ref1, ref2).strip()

def rev_list(self, ref):
return self._run_git_cmd("rev-list", f"{self.remote}/master..{ref}").strip().split()


def build_commit_dict(commits: List[GitCommit]) -> Dict[str, GitCommit]:
rc = {}
Expand Down Expand Up @@ -358,6 +373,22 @@ def get_commits_dict(x, y):
print(f'{html_url};{issue["title"]};{state}')


def analyze_stacks(repo: GitRepo) -> None:
from tqdm.contrib.concurrent import thread_map
branches = repo.get_ghstack_orig_branches()
stacks_by_author: Dict[str, List[int]] = {}
for branch,rv_commits in thread_map(lambda x: (x, repo.rev_list(x)), branches, max_workers=10):
author = branch.split("/")[2]
if author not in stacks_by_author:
stacks_by_author[author]=[]
stacks_by_author[author].append(len(rv_commits))
for author, slen in sorted(stacks_by_author.items(), key=lambda x:len(x[1]), reverse=True):
if len(slen) == 1:
print(f"{author} has 1 stack of depth {slen[0]}")
continue
print(f"{author} has {len(slen)} stacks max depth is {max(slen)} avg depth is {sum(slen)/len(slen):.2f} mean is {slen[len(slen)//2]}")


def parse_arguments():
from argparse import ArgumentParser
parser = ArgumentParser(description="Print GitHub repo stats")
Expand All @@ -375,6 +406,7 @@ def parse_arguments():
parser.add_argument("--print-reverts", action="store_true")
parser.add_argument("--contributor-stats", action="store_true")
parser.add_argument("--missing-in-branch", action="store_true")
parser.add_argument("--analyze-stacks", action="store_true")
return parser.parse_args()


Expand All @@ -392,6 +424,10 @@ def main():

repo = GitRepo(args.repo_path, remote)

if args.analyze_stacks:
analyze_stacks(repo)
return

if args.missing_in_branch:
# Use milestone idx or search it along milestone titles
try:
Expand Down

0 comments on commit 7fc7dfe

Please sign in to comment.