From 323571581c11193d98beabe60aa8f3668712dd02 Mon Sep 17 00:00:00 2001 From: Kevin Schoedel <67607049+kpschoedel@users.noreply.github.com> Date: Tue, 26 Oct 2021 13:36:55 -0400 Subject: [PATCH] Size reports: Improved comments (#10982) #### Problem It's hard to pick out actual size changes, beyond those called out in the large-increase tables. Report information can be distributed across multiple collapsed tables, since the generator often runs before all builds are complete. #### Change overview Read back any existing comment and merge in new reports. Comments now contain: 1. An open table highlighting large increases, if any. 2. A collapsed table of all increases, if any. 3. A collapsed table of all decreases, if any. 4. A collapsed table of all data. #### Testing Tested with manual runs: https://github.com/project-chip/connectedhomeip/pull/10979#issuecomment-951981745 --- scripts/tools/memory/gh_report.py | 218 ++++++++++++++++------ scripts/tools/memory/memdf/util/sqlite.py | 16 +- 2 files changed, 170 insertions(+), 64 deletions(-) diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py index e4ea4eabb3d3b6..44644f9220bf2d 100755 --- a/scripts/tools/memory/gh_report.py +++ b/scripts/tools/memory/gh_report.py @@ -22,6 +22,7 @@ import logging import os import os.path +import re import sqlite3 import sys import zipfile @@ -101,6 +102,14 @@ 'type': int, }, }, + 'github.limit-pr': { + 'help': 'Report only on PR, if present.', + 'metavar': 'PR', + 'default': 0, + 'argparse': { + 'type': int, + }, + }, Config.group_map('report'): { 'group': 'output' }, @@ -194,8 +203,11 @@ def add_sizes(self, **kwargs): bd = {k: kwargs[k] for k in ('hash', 'parent', 'time')} cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')} build = self.store_and_return_id('build', thing_id=thing, **bd, **cd) - for d in kwargs['sizes']: - self.store('size', build_id=build, **d) + if build is None: + logging.error('Failed to store %s %s %s', thing, bd, cd) + else: + for d in kwargs['sizes']: + self.store('size', build_id=build, **d) def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict): """Add sizes from a JSON size report.""" @@ -331,9 +343,14 @@ def delete_stale_builds(self, build_ids: Iterable[int]): self.commit() def delete_artifact(self, artifact_id: int): - if self.gh and artifact_id and artifact_id not in self.deleted_artifacts: + if (self.gh and artifact_id + and artifact_id not in self.deleted_artifacts): self.deleted_artifacts.add(artifact_id) - self.gh.actions.delete_artifact(artifact_id) + try: + self.gh.actions.delete_artifact(artifact_id) + except Exception: + # During manual testing we sometimes lose the race against CI. + logging.error('Failed to delete artifact %d', artifact_id) def delete_stale_artifacts(self, stale_artifacts: Iterable[int]): if not self.config['github.keep']: @@ -454,11 +471,16 @@ def changes_for_commit(db: SizeDatabase, pr: int, commit: str, return df -def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame, - tdf: pd.DataFrame) -> bool: +comment_format_re = re.compile(r"^") + + +def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool: """Send a change report as a github comment.""" + if not db.gh: return False + + # Look for an existing comment for this change. pr = df.attrs['pr'] # Check the most recent commit on the PR, so that we don't comment on @@ -475,54 +497,32 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame, # Check for an existing size report comment. If one exists, we'll add # the new report to it. title = df.attrs['title'] - existing_comment_id = 0 + existing_comment = None + existing_comment_format = 0 for comment in gh_get_comments_for_pr(db.gh, pr): - if comment.body.partition('\n')[0] == df.attrs['title']: - existing_comment_id = comment.id - title = comment.body + comment_parts = comment.body.partition('\n') + if comment_parts[0].strip() == title: + existing_comment = comment + if m := comment_format_re.match(comment_parts[2]): + existing_comment_format = int(m.group(1)) break - md = io.StringIO() - md.write(title) - md.write('\n') - - if tdf is not None and not tdf.empty: - md.write(f'\n**{tdf.attrs["title"]}:**\n\n') - memdf.report.write_df(db.config, - tdf, - md, - 'pipe', - hierify=True, - title=False, - tabulate={'floatfmt': '5.1f'}) - - count = len(df.attrs['things']) - platforms = ', '.join(sorted(list(set(df['platform'])))) - summary = f'{count} build{"" if count == 1 else "s"} (for {platforms})' - md.write(f'\n
\n{summary}\n\n') - memdf.report.write_df(db.config, - df, - md, - 'pipe', - hierify=True, - title=False, - tabulate={'floatfmt': '5.1f'}) - md.write('\n
\n') - text = md.getvalue() - md.close() + if existing_comment_format != 1: + existing_comment = None + text = gh_comment_v1(db, df, existing_comment) - if existing_comment_id: - comment = f'updating comment {existing_comment_id}' - else: - comment = 'as new comment' - logging.info('SCR: %s for %s %s', df.attrs['title'], summary, comment) + logging.info( + 'SCR: %s %s', df.attrs['title'], + f'updating comment {existing_comment.id}' + if existing_comment else 'as new comment') if db.config['github.dryrun-comment']: + logging.debug('%s', text) return False try: - if existing_comment_id: - db.gh.issues.update_comment(existing_comment_id, text) + if existing_comment: + db.gh.issues.update_comment(existing_comment.id, text) else: db.gh.issues.create_comment(pr, text) return True @@ -530,6 +530,114 @@ def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame, return False +def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str: + """Format a github comment.""" + + if existing_comment: + df = v1_comment_merge(df, existing_comment) + + threshold_df = None + increase_df = df[df['change'] > 0] + if increase_df.empty: + increase_df = None + elif threshold := db.config['report.increases']: + threshold_df = df[df['% change'] > threshold] + if threshold_df.empty: + threshold_df = None + decrease_df = df[df['change'] < 0] + if decrease_df.empty: + decrease_df = None + + with io.StringIO() as md: + md.write(df.attrs['title']) + md.write('\n\n\n') + + if threshold_df is not None: + md.write(f'**Increases above {threshold:.2g}%:**\n\n') + md.write('\n\n') + v1_comment_write_df(db, threshold_df, md) + + if increase_df is not None: + summary = v1_comment_summary(increase_df) + md.write('
\n') + md.write(f'Increases ({summary})\n') + md.write('\n\n') + v1_comment_write_df(db, increase_df, md) + md.write('
\n\n') + + if decrease_df is not None: + summary = v1_comment_summary(decrease_df) + md.write('
\n') + md.write(f'Decreases ({summary})\n') + md.write('\n\n') + v1_comment_write_df(db, decrease_df, md) + md.write('
\n\n') + + summary = v1_comment_summary(df) + md.write('
\n') + md.write(f'Full report ({summary})\n') + md.write('\n\n') + v1_comment_write_df(db, df, md) + md.write('\n
\n') + + return md.getvalue() + + +def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame: + with io.StringIO(comment.body) as body: + for line in body: + if line.startswith(''): + body.readline() # Blank line before table. + header, rows = read_hierified(body) + break + logging.debug('REC: read %d rows', len(rows)) + df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes)) + return df.sort_values( + by=['platform', 'target', 'config', 'section']).drop_duplicates() + + +def read_hierified(f): + """Read a markdown table in ‘hierified’ format.""" + + line = f.readline() + header = tuple((s.strip() for s in line.split('|')[1:-1])) + + _ = f.readline() # The line under the header. + + rows = [] + for line in f: + line = line.strip() + if not line: + break + row = [] + columns = line.split('|') + for i in range(0, len(header)): + column = columns[i + 1].strip() + if not column: + column = rows[-1][i] + row.append(column) + rows.append(tuple(row)) + + return (header, rows) + + +def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame, + out: memdf.report.OutputOption): + memdf.report.write_df(db.config, + df, + out, + 'pipe', + hierify=True, + title=False, + tabulate={'floatfmt': '5.1f'}) + + +def v1_comment_summary(df: pd.DataFrame) -> str: + count = df[['platform', 'target', 'config']].drop_duplicates().shape[0] + platforms = ', '.join(sorted(list(set(df['platform'])))) + return f'{count} build{"" if count == 1 else "s"} for {platforms}' + + def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: """Report on all new comparable commits.""" if not (db.config['report.pr'] or db.config['report.push']): @@ -542,30 +650,24 @@ def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]: report_push = db.config['report.push'] report_pr = db.config['report.pr'] + only_pr = db.config['github.limit-pr'] dfs = {} for pr, commit, parent in db.select_matching_commits().fetchall(): if not (report_pr if pr else report_push): continue + # Github doesn't have a way to fetch artifacts associated with a + # particular PR. For testing purposes, filter to a single PR here. + if only_pr and pr != only_pr: + continue + df = changes_for_commit(db, pr, commit, parent) dfs[df.attrs['name']] = df - if threshold := db.config['report.increases']: - tdf = df[df['% change'] > threshold] - else: - tdf = None - if tdf is not None and not tdf.empty: - commit = df.attrs['commit'] - parent = df.attrs['parent'] - tdf.attrs['name'] = f'L,{commit},{parent}' - tdf.attrs['title'] = ( - f'Increases above {threshold:.1f}% from {parent} to {commit}') - dfs[tdf.attrs['name']] = tdf - if (pr and comment_enabled and (comment_limit == 0 or comment_limit > comment_count)): - if gh_send_change_report(db, df, tdf): + if gh_send_change_report(db, df): # Mark the originating builds, and remove the originating # artifacts, so that they don't generate duplicate report # comments. diff --git a/scripts/tools/memory/memdf/util/sqlite.py b/scripts/tools/memory/memdf/util/sqlite.py index eba40f1818de8c..94b6f9b33a2f81 100644 --- a/scripts/tools/memory/memdf/util/sqlite.py +++ b/scripts/tools/memory/memdf/util/sqlite.py @@ -97,14 +97,18 @@ def store(self, table: str, **kwargs): self.connection().execute(q, v) def get_matching(self, table: str, columns: List[str], **kwargs): - return self.connection().execute( - f"SELECT {','.join(columns)} FROM {table}" - f" WHERE {'=? AND '.join(kwargs.keys())}=?", - list(kwargs.values())) + q = (f"SELECT {','.join(columns)} FROM {table}" + f" WHERE {'=? AND '.join(kwargs.keys())}=?") + v = list(kwargs.values()) + return self.connection().execute(q, v) def get_matching_id(self, table: str, **kwargs): - return self.get_matching(table, ['id'], **kwargs).fetchone()[0] + cur = self.get_matching(table, ['id'], **kwargs) + row = cur.fetchone() + if row: + return row[0] + return None - def store_and_return_id(self, table: str, **kwargs) -> int: + def store_and_return_id(self, table: str, **kwargs) -> Optional[int]: self.store(table, **kwargs) return self.get_matching_id(table, **kwargs)