From 0c397e8e269e839e8909e002e449e66892cdacb3 Mon Sep 17 00:00:00 2001 From: Kevin Schoedel <kpschoedel@google.com> Date: Wed, 29 Sep 2021 16:11:47 -0400 Subject: [PATCH] Generate github comments from size reports (small scale) #### Problem The current bloat report requires preserving large binaries, and has trouble matching tree parents for comparison. In a24a6c308a4 (PR #9331), many workflows started uploading small json artifacts contains size information for their builds, but these have not yet been used to generate reports. #### Change overview This change adds a run of `scripts/tools/memory/gh_report.py` to the periodic bloat check workflow, with tight rate limiting in case there are unexpected problems. #### Testing Manually run offline with various data sets, and minimally on the live repository. --- .github/workflows/bloat_check.yaml | 13 +++++++ scripts/tools/memory/gh_report.py | 60 +++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/.github/workflows/bloat_check.yaml b/.github/workflows/bloat_check.yaml index 9487abddb7a7b5..e6a967967d015f 100644 --- a/.github/workflows/bloat_check.yaml +++ b/.github/workflows/bloat_check.yaml @@ -41,3 +41,16 @@ jobs: scripts/helpers/bloat_check.py \ --github-repository project-chip/connectedhomeip \ --github-api-token "${{ secrets.GITHUB_TOKEN }}" + + - name: Report2 + run: | + scripts/tools/memory/gh_report.py \ + --verbose \ + --report-increases 1 \ + --report-pr \ + --github-comment \ + --github-limit-artifact-pages 5 \ + --github-limit-artifacts 50 \ + --github-limit-comments 2 \ + --github-repository project-chip/connectedhomeip \ + --github-api-token "${{ secrets.GITHUB_TOKEN }}" diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py index 7f395a684b0540..3271566badf8b1 100755 --- a/scripts/tools/memory/gh_report.py +++ b/scripts/tools/memory/gh_report.py @@ -73,6 +73,34 @@ 'alias': ['--keep'], }, }, + 'github.dryrun-comment': { + 'help': 'Dry run for sending output as github PR comments', + 'default': False, + }, + 'github.limit-comments': { + 'help': 'Send no more than COUNT comments', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, + 'github.limit-artifacts': { + 'help': 'Download no more than COUNT artifacts', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, + 'github.limit-artifact-pages': { + 'help': 'Examine no more than COUNT pages of artifacts', + 'metavar': 'COUNT', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, Config.group_map('report'): { 'group': 'output' }, @@ -208,10 +236,14 @@ def add_sizes_from_github(self): if not self.gh: return + artifact_limit = self.config['github.limit-artifacts'] + artifact_pages = self.config['github.limit-artifact-pages'] + # Size artifacts have names of the form # Size,{group},{pr},{commit_hash},{parent_hash} # Record them keyed by group and commit_hash to match them up # after we have the entire list. + page = 0 size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {} for i in ghapi.all.paged(self.gh.actions.list_artifacts_for_repo): if not i.artifacts: @@ -226,6 +258,10 @@ def add_sizes_from_github(self): if group not in size_artifacts: size_artifacts[group] = {} size_artifacts[group][commit] = a + page += 1 + logging.debug('Artifact list page %d of %d', page, artifact_pages) + if artifact_pages and page >= artifact_pages: + break # Determine required size artifacts. required_artifact_ids: set[int] = set() @@ -236,6 +272,9 @@ def add_sizes_from_github(self): if report.parent not in group_reports: logging.info(' No match for %s', report.name) continue + if (artifact_limit + and len(required_artifact_ids) >= artifact_limit): + continue # We have size information for both this report and its # parent, so ensure that both artifacts are downloaded. parent = group_reports[report.parent] @@ -291,7 +330,7 @@ def delete_stale_builds(self, build_ids: Iterable[int]): self.commit() def delete_artifact(self, artifact_id: int): - if self.gh and artifact_id not in self.deleted_artifacts: + if self.gh and artifact_id and artifact_id not in self.deleted_artifacts: self.deleted_artifacts.add(artifact_id) self.gh.actions.delete_artifact(artifact_id) @@ -450,6 +489,15 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame, text = md.getvalue() md.close() + if existing_comment_id: + comment = f'updating comment {existing_comment_id}' + else: + comment = 'as new comment' + logging.info('%s for %s %s', df.attrs['title'], summary, comment) + + if db.config['github.dryrun-comment']: + return False + try: if existing_comment_id: db.gh.issues.update_comment(existing_comment_id, text) @@ -464,6 +512,12 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: """Report on all new comparable commits.""" if not (db.config['report.pr'] or db.config['report.push']): return {} + + comment_count = 0 + comment_limit = db.config['github.limit-comments'] + comment_enabled = (db.config['github.comment'] + or db.config['github.dryrun-comment']) + dfs = {} for pr, commit, parent in db.select_matching_commits().fetchall(): if not db.config['report.pr' if pr else 'report.push']: @@ -483,7 +537,9 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: f'Increases above {threshold:.1f}% from {commit} to {parent}') dfs[tdf.attrs['name']] = tdf - if pr and db.config['github.comment']: + if (pr and comment_enabled + and (comment_limit == 0 or comment_limit > comment_count)): + comment_count += 1 if gh_send_change_report(db, df, tdf): # Mark the originating builds, and remove the originating # artifacts, so that they don't generate duplicate report