From 323571581c11193d98beabe60aa8f3668712dd02 Mon Sep 17 00:00:00 2001
From: Kevin Schoedel <67607049+kpschoedel@users.noreply.github.com>
Date: Tue, 26 Oct 2021 13:36:55 -0400
Subject: [PATCH] Size reports: Improved comments (#10982)
#### Problem
It's hard to pick out actual size changes, beyond those called out in
the large-increase tables.
Report information can be distributed across multiple collapsed tables,
since the generator often runs before all builds are complete.
#### Change overview
Read back any existing comment and merge in new reports.
Comments now contain:
1. An open table highlighting large increases, if any.
2. A collapsed table of all increases, if any.
3. A collapsed table of all decreases, if any.
4. A collapsed table of all data.
#### Testing
Tested with manual runs:
https://github.com/project-chip/connectedhomeip/pull/10979#issuecomment-951981745
---
scripts/tools/memory/gh_report.py | 218 ++++++++++++++++------
scripts/tools/memory/memdf/util/sqlite.py | 16 +-
2 files changed, 170 insertions(+), 64 deletions(-)
diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py
index e4ea4eabb3d3b6..44644f9220bf2d 100755
--- a/scripts/tools/memory/gh_report.py
+++ b/scripts/tools/memory/gh_report.py
@@ -22,6 +22,7 @@
import logging
import os
import os.path
+import re
import sqlite3
import sys
import zipfile
@@ -101,6 +102,14 @@
'type': int,
},
},
+ 'github.limit-pr': {
+ 'help': 'Report only on PR, if present.',
+ 'metavar': 'PR',
+ 'default': 0,
+ 'argparse': {
+ 'type': int,
+ },
+ },
Config.group_map('report'): {
'group': 'output'
},
@@ -194,8 +203,11 @@ def add_sizes(self, **kwargs):
bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')}
cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
- for d in kwargs['sizes']:
- self.store('size', build_id=build, **d)
+ if build is None:
+ logging.error('Failed to store %s %s %s', thing, bd, cd)
+ else:
+ for d in kwargs['sizes']:
+ self.store('size', build_id=build, **d)
def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
"""Add sizes from a JSON size report."""
@@ -331,9 +343,14 @@ def delete_stale_builds(self, build_ids: Iterable[int]):
self.commit()
def delete_artifact(self, artifact_id: int):
- if self.gh and artifact_id and artifact_id not in self.deleted_artifacts:
+ if (self.gh and artifact_id
+ and artifact_id not in self.deleted_artifacts):
self.deleted_artifacts.add(artifact_id)
- self.gh.actions.delete_artifact(artifact_id)
+ try:
+ self.gh.actions.delete_artifact(artifact_id)
+ except Exception:
+ # During manual testing we sometimes lose the race against CI.
+ logging.error('Failed to delete artifact %d', artifact_id)
def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
if not self.config['github.keep']:
@@ -454,11 +471,16 @@ def changes_for_commit(db: SizeDatabase, pr: int, commit: str,
return df
-def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
- tdf: pd.DataFrame) -> bool:
+comment_format_re = re.compile(r"^")
+
+
+def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool:
"""Send a change report as a github comment."""
+
if not db.gh:
return False
+
+ # Look for an existing comment for this change.
pr = df.attrs['pr']
# Check the most recent commit on the PR, so that we don't comment on
@@ -475,54 +497,32 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
# Check for an existing size report comment. If one exists, we'll add
# the new report to it.
title = df.attrs['title']
- existing_comment_id = 0
+ existing_comment = None
+ existing_comment_format = 0
for comment in gh_get_comments_for_pr(db.gh, pr):
- if comment.body.partition('\n')[0] == df.attrs['title']:
- existing_comment_id = comment.id
- title = comment.body
+ comment_parts = comment.body.partition('\n')
+ if comment_parts[0].strip() == title:
+ existing_comment = comment
+ if m := comment_format_re.match(comment_parts[2]):
+ existing_comment_format = int(m.group(1))
break
- md = io.StringIO()
- md.write(title)
- md.write('\n')
-
- if tdf is not None and not tdf.empty:
- md.write(f'\n**{tdf.attrs["title"]}:**\n\n')
- memdf.report.write_df(db.config,
- tdf,
- md,
- 'pipe',
- hierify=True,
- title=False,
- tabulate={'floatfmt': '5.1f'})
-
- count = len(df.attrs['things'])
- platforms = ', '.join(sorted(list(set(df['platform']))))
- summary = f'{count} build{"" if count == 1 else "s"} (for {platforms})'
- md.write(f'\n\n{summary}
\n\n')
- memdf.report.write_df(db.config,
- df,
- md,
- 'pipe',
- hierify=True,
- title=False,
- tabulate={'floatfmt': '5.1f'})
- md.write('\n \n')
- text = md.getvalue()
- md.close()
+ if existing_comment_format != 1:
+ existing_comment = None
+ text = gh_comment_v1(db, df, existing_comment)
- if existing_comment_id:
- comment = f'updating comment {existing_comment_id}'
- else:
- comment = 'as new comment'
- logging.info('SCR: %s for %s %s', df.attrs['title'], summary, comment)
+ logging.info(
+ 'SCR: %s %s', df.attrs['title'],
+ f'updating comment {existing_comment.id}'
+ if existing_comment else 'as new comment')
if db.config['github.dryrun-comment']:
+ logging.debug('%s', text)
return False
try:
- if existing_comment_id:
- db.gh.issues.update_comment(existing_comment_id, text)
+ if existing_comment:
+ db.gh.issues.update_comment(existing_comment.id, text)
else:
db.gh.issues.create_comment(pr, text)
return True
@@ -530,6 +530,114 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame,
return False
+def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str:
+ """Format a github comment."""
+
+ if existing_comment:
+ df = v1_comment_merge(df, existing_comment)
+
+ threshold_df = None
+ increase_df = df[df['change'] > 0]
+ if increase_df.empty:
+ increase_df = None
+ elif threshold := db.config['report.increases']:
+ threshold_df = df[df['% change'] > threshold]
+ if threshold_df.empty:
+ threshold_df = None
+ decrease_df = df[df['change'] < 0]
+ if decrease_df.empty:
+ decrease_df = None
+
+ with io.StringIO() as md:
+ md.write(df.attrs['title'])
+ md.write('\n\n\n')
+
+ if threshold_df is not None:
+ md.write(f'**Increases above {threshold:.2g}%:**\n\n')
+ md.write('\n\n')
+ v1_comment_write_df(db, threshold_df, md)
+
+ if increase_df is not None:
+ summary = v1_comment_summary(increase_df)
+ md.write('\n')
+ md.write(f'Increases ({summary})
\n')
+ md.write('\n\n')
+ v1_comment_write_df(db, increase_df, md)
+ md.write(' \n\n')
+
+ if decrease_df is not None:
+ summary = v1_comment_summary(decrease_df)
+ md.write('\n')
+ md.write(f'Decreases ({summary})
\n')
+ md.write('\n\n')
+ v1_comment_write_df(db, decrease_df, md)
+ md.write(' \n\n')
+
+ summary = v1_comment_summary(df)
+ md.write('\n')
+ md.write(f'Full report ({summary})
\n')
+ md.write('\n\n')
+ v1_comment_write_df(db, df, md)
+ md.write('\n \n')
+
+ return md.getvalue()
+
+
+def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame:
+ with io.StringIO(comment.body) as body:
+ for line in body:
+ if line.startswith(''):
+ body.readline() # Blank line before table.
+ header, rows = read_hierified(body)
+ break
+ logging.debug('REC: read %d rows', len(rows))
+ df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes))
+ return df.sort_values(
+ by=['platform', 'target', 'config', 'section']).drop_duplicates()
+
+
+def read_hierified(f):
+ """Read a markdown table in ‘hierified’ format."""
+
+ line = f.readline()
+ header = tuple((s.strip() for s in line.split('|')[1:-1]))
+
+ _ = f.readline() # The line under the header.
+
+ rows = []
+ for line in f:
+ line = line.strip()
+ if not line:
+ break
+ row = []
+ columns = line.split('|')
+ for i in range(0, len(header)):
+ column = columns[i + 1].strip()
+ if not column:
+ column = rows[-1][i]
+ row.append(column)
+ rows.append(tuple(row))
+
+ return (header, rows)
+
+
+def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame,
+ out: memdf.report.OutputOption):
+ memdf.report.write_df(db.config,
+ df,
+ out,
+ 'pipe',
+ hierify=True,
+ title=False,
+ tabulate={'floatfmt': '5.1f'})
+
+
+def v1_comment_summary(df: pd.DataFrame) -> str:
+ count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
+ platforms = ', '.join(sorted(list(set(df['platform']))))
+ return f'{count} build{"" if count == 1 else "s"} for {platforms}'
+
+
def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
"""Report on all new comparable commits."""
if not (db.config['report.pr'] or db.config['report.push']):
@@ -542,30 +650,24 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
report_push = db.config['report.push']
report_pr = db.config['report.pr']
+ only_pr = db.config['github.limit-pr']
dfs = {}
for pr, commit, parent in db.select_matching_commits().fetchall():
if not (report_pr if pr else report_push):
continue
+ # Github doesn't have a way to fetch artifacts associated with a
+ # particular PR. For testing purposes, filter to a single PR here.
+ if only_pr and pr != only_pr:
+ continue
+
df = changes_for_commit(db, pr, commit, parent)
dfs[df.attrs['name']] = df
- if threshold := db.config['report.increases']:
- tdf = df[df['% change'] > threshold]
- else:
- tdf = None
- if tdf is not None and not tdf.empty:
- commit = df.attrs['commit']
- parent = df.attrs['parent']
- tdf.attrs['name'] = f'L,{commit},{parent}'
- tdf.attrs['title'] = (
- f'Increases above {threshold:.1f}% from {parent} to {commit}')
- dfs[tdf.attrs['name']] = tdf
-
if (pr and comment_enabled
and (comment_limit == 0 or comment_limit > comment_count)):
- if gh_send_change_report(db, df, tdf):
+ if gh_send_change_report(db, df):
# Mark the originating builds, and remove the originating
# artifacts, so that they don't generate duplicate report
# comments.
diff --git a/scripts/tools/memory/memdf/util/sqlite.py b/scripts/tools/memory/memdf/util/sqlite.py
index eba40f1818de8c..94b6f9b33a2f81 100644
--- a/scripts/tools/memory/memdf/util/sqlite.py
+++ b/scripts/tools/memory/memdf/util/sqlite.py
@@ -97,14 +97,18 @@ def store(self, table: str, **kwargs):
self.connection().execute(q, v)
def get_matching(self, table: str, columns: List[str], **kwargs):
- return self.connection().execute(
- f"SELECT {','.join(columns)} FROM {table}"
- f" WHERE {'=? AND '.join(kwargs.keys())}=?",
- list(kwargs.values()))
+ q = (f"SELECT {','.join(columns)} FROM {table}"
+ f" WHERE {'=? AND '.join(kwargs.keys())}=?")
+ v = list(kwargs.values())
+ return self.connection().execute(q, v)
def get_matching_id(self, table: str, **kwargs):
- return self.get_matching(table, ['id'], **kwargs).fetchone()[0]
+ cur = self.get_matching(table, ['id'], **kwargs)
+ row = cur.fetchone()
+ if row:
+ return row[0]
+ return None
- def store_and_return_id(self, table: str, **kwargs) -> int:
+ def store_and_return_id(self, table: str, **kwargs) -> Optional[int]:
self.store(table, **kwargs)
return self.get_matching_id(table, **kwargs)