From 0c397e8e269e839e8909e002e449e66892cdacb3 Mon Sep 17 00:00:00 2001
From: Kevin Schoedel <kpschoedel@google.com>
Date: Wed, 29 Sep 2021 16:11:47 -0400
Subject: [PATCH] Generate github comments from size reports (small scale)

#### Problem

The current bloat report requires preserving large binaries, and has
trouble matching tree parents for comparison.

In a24a6c308a4 (PR #9331), many workflows started uploading small json
artifacts contains size information for their builds, but these have not
yet been used to generate reports.

#### Change overview

This change adds a run of `scripts/tools/memory/gh_report.py` to the
periodic bloat check workflow, with tight rate limiting in case there
are unexpected problems.

#### Testing

Manually run offline with various data sets, and minimally on the live
repository.
---
 .github/workflows/bloat_check.yaml | 13 +++++++
 scripts/tools/memory/gh_report.py  | 60 +++++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/bloat_check.yaml b/.github/workflows/bloat_check.yaml
index 9487abddb7a7b5..e6a967967d015f 100644
--- a/.github/workflows/bloat_check.yaml
+++ b/.github/workflows/bloat_check.yaml
@@ -41,3 +41,16 @@ jobs:
                   scripts/helpers/bloat_check.py                        \
                        --github-repository project-chip/connectedhomeip \
                        --github-api-token "${{ secrets.GITHUB_TOKEN }}"
+
+            - name: Report2
+              run: |
+                  scripts/tools/memory/gh_report.py \
+                    --verbose \
+                    --report-increases 1 \
+                    --report-pr \
+                    --github-comment \
+                    --github-limit-artifact-pages 5 \
+                    --github-limit-artifacts 50 \
+                    --github-limit-comments 2 \
+                    --github-repository project-chip/connectedhomeip \
+                    --github-api-token "${{ secrets.GITHUB_TOKEN }}"
diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py
index 7f395a684b0540..3271566badf8b1 100755
--- a/scripts/tools/memory/gh_report.py
+++ b/scripts/tools/memory/gh_report.py
@@ -73,6 +73,34 @@
             'alias': ['--keep'],
         },
     },
+    'github.dryrun-comment': {
+        'help': 'Dry run for sending output as github PR comments',
+        'default': False,
+    },
+    'github.limit-comments': {
+        'help': 'Send no more than COUNT comments',
+        'metavar': 'COUNT',
+        'default': 0,
+        'argparse': {
+            'type': int,
+        },
+    },
+    'github.limit-artifacts': {
+        'help': 'Download no more than COUNT artifacts',
+        'metavar': 'COUNT',
+        'default': 0,
+        'argparse': {
+            'type': int,
+        },
+    },
+    'github.limit-artifact-pages': {
+        'help': 'Examine no more than COUNT pages of artifacts',
+        'metavar': 'COUNT',
+        'default': 0,
+        'argparse': {
+            'type': int,
+        },
+    },
     Config.group_map('report'): {
         'group': 'output'
     },
@@ -208,10 +236,14 @@ def add_sizes_from_github(self):
         if not self.gh:
             return
 
+        artifact_limit = self.config['github.limit-artifacts']
+        artifact_pages = self.config['github.limit-artifact-pages']
+
         # Size artifacts have names of the form
         #   Size,{group},{pr},{commit_hash},{parent_hash}
         # Record them keyed by group and commit_hash to match them up
         # after we have the entire list.
+        page = 0
         size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {}
         for i in ghapi.all.paged(self.gh.actions.list_artifacts_for_repo):
             if not i.artifacts:
@@ -226,6 +258,10 @@ def add_sizes_from_github(self):
                     if group not in size_artifacts:
                         size_artifacts[group] = {}
                     size_artifacts[group][commit] = a
+            page += 1
+            logging.debug('Artifact list page %d of %d', page, artifact_pages)
+            if artifact_pages and page >= artifact_pages:
+                break
 
         # Determine required size artifacts.
         required_artifact_ids: set[int] = set()
@@ -236,6 +272,9 @@ def add_sizes_from_github(self):
                     if report.parent not in group_reports:
                         logging.info('  No match for %s', report.name)
                         continue
+                    if (artifact_limit
+                            and len(required_artifact_ids) >= artifact_limit):
+                        continue
                     # We have size information for both this report and its
                     # parent, so ensure that both artifacts are downloaded.
                     parent = group_reports[report.parent]
@@ -291,7 +330,7 @@ def delete_stale_builds(self, build_ids: Iterable[int]):
         self.commit()
 
     def delete_artifact(self, artifact_id: int):
-        if self.gh and artifact_id not in self.deleted_artifacts:
+        if self.gh and artifact_id and artifact_id not in self.deleted_artifacts:
             self.deleted_artifacts.add(artifact_id)
             self.gh.actions.delete_artifact(artifact_id)
 
@@ -450,6 +489,15 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
     text = md.getvalue()
     md.close()
 
+    if existing_comment_id:
+        comment = f'updating comment {existing_comment_id}'
+    else:
+        comment = 'as new comment'
+    logging.info('%s for %s %s', df.attrs['title'], summary, comment)
+
+    if db.config['github.dryrun-comment']:
+        return False
+
     try:
         if existing_comment_id:
             db.gh.issues.update_comment(existing_comment_id, text)
@@ -464,6 +512,12 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
     """Report on all new comparable commits."""
     if not (db.config['report.pr'] or db.config['report.push']):
         return {}
+
+    comment_count = 0
+    comment_limit = db.config['github.limit-comments']
+    comment_enabled = (db.config['github.comment']
+                       or db.config['github.dryrun-comment'])
+
     dfs = {}
     for pr, commit, parent in db.select_matching_commits().fetchall():
         if not db.config['report.pr' if pr else 'report.push']:
@@ -483,7 +537,9 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
                 f'Increases above {threshold:.1f}% from {commit} to {parent}')
             dfs[tdf.attrs['name']] = tdf
 
-        if pr and db.config['github.comment']:
+        if (pr and comment_enabled
+                and (comment_limit == 0 or comment_limit > comment_count)):
+            comment_count += 1
             if gh_send_change_report(db, df, tdf):
                 # Mark the originating builds, and remove the originating
                 # artifacts, so that they don't generate duplicate report