diff --git a/.github/.wordlist.txt b/.github/.wordlist.txt
index 9ac4419af442c9..0a930013cb9509 100644
--- a/.github/.wordlist.txt
+++ b/.github/.wordlist.txt
@@ -455,6 +455,9 @@ GetDeviceInfo
GetDns
GetIP
getstarted
+GH
+gh
+ghp
githubusercontent
gitignore
glibc
@@ -848,6 +851,7 @@ PyEval
PyFunction
pylint
PyObject
+pypi
PyRun
pytest
QEMU
@@ -958,6 +962,7 @@ SiLabs
SiliconLabs
SimpleFileExFlags
SimpleLink
+sizedb
sl
SLAAC
SLTB
@@ -1041,6 +1046,7 @@ testws
texinfo
textboxes
TFT
+ThIsIsNoTMyReAlGiThUbToKeNSoDoNoTtRy
threadOperationalDataset
ThreadStackManager
ThreadStackManagerImpl
@@ -1052,6 +1058,7 @@ TLV
tmp
tngvndl
TODO
+toJson
tokenized
toolchain
toolchains
diff --git a/scripts/tools/memory/.pylintrc b/scripts/tools/memory/.pylintrc
index 6b77df256b40d1..413c27f80e068c 100644
--- a/scripts/tools/memory/.pylintrc
+++ b/scripts/tools/memory/.pylintrc
@@ -1,7 +1,7 @@
[BASIC]
-disable=too-few-public-methods,bad-whitespace
+disable=too-few-public-methods,bad-whitespace,broad-except
-no-docstring-rgx=main
+no-docstring-rgx=main|__init__
docstring-min-length=5
min-public-methods=1
max-args=7
diff --git a/scripts/tools/memory/README-GitHub-CI.md b/scripts/tools/memory/README-GitHub-CI.md
new file mode 100644
index 00000000000000..d2171e26eb9050
--- /dev/null
+++ b/scripts/tools/memory/README-GitHub-CI.md
@@ -0,0 +1,180 @@
+# Scripts for GitHub CI
+
+A set of `gh_*.py` scripts work together to produce size comparisons for PRs.
+
+## Reports on Pull Requests
+
+The scripts' results are presented as comments on PRs.
+
+**Note** that a comment may be updated by the scripts as CI run results become
+available.
+
+**Note** that the scripts will not create a comment for a commit if there is
+already a newer commit in the PR.
+
+A size report comment consists of a title followed by one to four tables. A
+title looks like:
+
+> PR #12345678: Size comparison from `base-SHA` to `pr-SHA`
+
+The first table, if present, lists items with a large increase, according to a
+configurable threshold.
+
+The next table, if present, lists all items that have increased in size.
+
+The next table, if present, lists all items that have decreased in size.
+
+The final table, always present, lists all items.
+
+## Usage in CI
+
+The original intent was to have a tool that would run after a build in CI, add
+its sizes to a central database, and immediately report on size changes from the
+parent commit in the database. Unfortunately, GitHub provides no practical place
+to store and share such a database between workflow actions. Instead, the
+process is split; builds in CI record size information in the form of GitHub
+[artifacts](https://docs.github.com/en/actions/advanced-guides/storing-workflow-data-as-artifacts),
+and a later step reads these artifacts to generate reports.
+
+### 1. Build workflows
+
+#### gh_sizes_environment.py
+
+The `gh_sizes_environment.py` script should be run once in each workflow that
+records sizes, _after_ checkout and _before_ any use of `gh_sizes.py` It takes a
+single argument, a JSON dictionary of the `github` context. Typically run as:
+
+```
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+
+ - name: Set up environment for size reports
+ if: ${{ !env.ACT }}
+ env:
+ GH_CONTEXT: ${{ toJson(github) }}
+ run: scripts/tools/memory/gh_sizes_environment.py "${GH_CONTEXT}"
+```
+
+#### gh_sizes.py
+
+The `gh_sizes.py` script runs on a built binary (executable or library) and
+produces a JSON file containing size information.
+
+Usage: `gh_sizes.py` _platform_ _config_ _target_ _binary_ [_output_]
+
+Where _platform_ is the platform name, corresponding to a config file in
+`scripts/tools/memory/platform/`.
+
+Where _config_ is a configuration identification string. This has no fixed
+meaning, but is intended to describe a build variation, e.g. a particular target
+board or debug vs release.
+
+Where _target_ is a readable name for the build artifact, identifying it in
+reports.
+
+Where _binary_ is the input build artifact.
+
+Where _output_ is the name for the output JSON file, or a directory for it, in
+which case the name will be
+_platform_`-`_config_name_`-`_target_name_`-sizes.json`.
+
+Example:
+
+```
+ scripts/tools/memory/gh_sizes.py \
+ linux arm64 thermostat-no-ble \
+ out/linux-arm64-thermostat-no-ble/thermostat-app \
+ /tmp/bloat_reports/
+```
+
+#### Upload artifacts
+
+The JSON files generated by `gh_sizes.py` must be uploaded with an artifact name
+of a very specific form in order to be processed correctly.
+
+Example:
+
+```
+Size,Linux-Examples,${{ env.GH_EVENT_PR }},${{ env.GH_EVENT_HASH }},${{ env.GH_EVENT_PARENT }},${{ github.event_name }}
+```
+
+Other builds must replace `Linux-Examples` with a label unique to the workflow,
+but otherwise use the form exactly.
+
+### 2. Reporting workflow
+
+Run a periodic workflow calling `gh_report.py` to generate PR comments. This
+script has full `--help`, but normal use is probably best illustrated by an
+example:
+
+```
+ scripts/tools/memory/gh_report.py \
+ --verbose \
+ --report-increases 0.2 \
+ --report-pr \
+ --github-comment \
+ --github-limit-artifact-pages 50 \
+ --github-limit-artifacts 500 \
+ --github-limit-comments 20 \
+ --github-repository project-chip/connectedhomeip \
+ --github-api-token "${{ secrets.GITHUB_TOKEN }}"
+```
+
+Notably, the `--report-increases` flag provides a _percent growth_ threshold for
+calling out ‘large’ increases in GitHub comments.
+
+When this script successfully posts a comment on a GitHub PR, it removes the
+corresponding PR artifact(s) so that a future run will not process it again and
+post the same comment. Only PR artifacts are removed, not push (trunk)
+artifacts, since those may be used as a comparison base by many different PRs.
+
+## Using a database
+
+It can be useful to keep a permanent record of build sizes.
+
+### Updating the database: `gh_db_load.py`
+
+To update an SQLite file of trunk commit sizes, periodically run:
+
+```
+ gh_db_load.py \
+ --repo project-chip/connectedhomeip \
+ --token ghp_ThIsIsNoTMyReAlGiThUbToKeNSoDoNoTtRy \
+ --db /path/to/database
+```
+
+Those interested in only a single platform can add the `--github-label` option,
+providing the same name as in the size artifact name after `Size,` (e.g.
+`Linux-Examples` in the upload example above).
+
+See `--help` for additional options.
+
+_Note_: Transient 4xx and 5xx errors from GitHub's API are very common. Run
+`gh_db_load.py` frequently enough to give it several attempts before the
+relevant artifacts expire.
+
+### Querying the database: `gh_db_query.py`
+
+While the database can of course be used directly, the `gh_db_query.py` script
+provides a handful of common queries.
+
+Note that this script (like others that show tables) has an `--output-format`
+option offering (among others) CSV, several JSON formats, and any text format
+provided by [tabulate](https://pypi.org/project/tabulate/).
+
+Two notable options:
+
+- `--query-build-sizes PLATFORM,CONFIG,TARGET` lists sizes for all builds of
+ the given kind, with a column for each section.
+- `--query-section-changes PLATFORM,CONFIG,TARGET,SECTION` lists changes for
+ the given section. The `--report-increases PERCENT` option limits this to
+ changes over a given threshold (as is done for PR comments).
+
+(To find out what PLATFORM, CONFIG, TARGET, and SECTION exist:
+`--query-platforms`, then `--query-platform-targets=PLATFORM` and
+`--query-platform-sections=PLATFORM`.)
+
+See `--help` for additional options.
diff --git a/scripts/tools/memory/README.md b/scripts/tools/memory/README.md
index 9ccd4a2560813e..3bde9607ce165c 100644
--- a/scripts/tools/memory/README.md
+++ b/scripts/tools/memory/README.md
@@ -41,14 +41,15 @@ The following options are common to _most_ of the scripts, where applicable:
- `--output-format` _FORMAT_, `--to` _FORMAT_, `-t` _FORMAT_ Output format.
One of:
- `text` — Plain text tables, in a single file.
- - `csv` — Comma-separated tables (in several files).
- - `tsv` — Tab-separated tables (in several files).
+ - `csv` — Comma-separated tables (in several files, if not stdout).
+ - `tsv` — Tab-separated tables (in several files, if not stdout).
- `json_split` — JSON - see Pandas documentation for details.
- `json_records` — JSON - see Pandas documentation for details.
- `json_index` — JSON - see Pandas documentation for details.
- `json_columns` — JSON - see Pandas documentation for details.
- `json_values` — JSON - see Pandas documentation for details.
- `json_table` — JSON - see Pandas documentation for details.
+ - Any format provided by [tabulate](https://pypi.org/project/tabulate/).
- `--report-limit` _BYTES_, `--limit` _BYTES_ Limit display to items above the
given size. Suffixes (e.g. `K`) are accepted.
- `--report-by` _GROUP_, `--by` _GROUP_ Reporting group. One of:
diff --git a/scripts/tools/memory/gh_db_load.py b/scripts/tools/memory/gh_db_load.py
new file mode 100755
index 00000000000000..9cb69bea5c2b7e
--- /dev/null
+++ b/scripts/tools/memory/gh_db_load.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Fetch data from GitHub size artifacts."""
+
+import io
+import logging
+import sys
+
+import memdf.sizedb
+import memdf.util.config
+import memdf.util.markdown
+import memdf.util.sqlite
+from memdf.util.github import Gh
+from memdf import Config, ConfigDescription
+
+GITHUB_CONFIG: ConfigDescription = {
+ Config.group_def('github'): {
+ 'title': 'github options',
+ },
+ 'github.event': {
+ 'help': 'Download only event type(s) (default ‘push’)',
+ 'metavar': 'EVENT',
+ 'default': [],
+ 'argparse': {
+ 'alias': ['--event']
+ },
+ },
+ 'github.limit-artifacts': {
+ 'help': 'Download no more than COUNT artifacts',
+ 'metavar': 'COUNT',
+ 'default': 0,
+ 'argparse': {
+ 'type': int,
+ },
+ },
+ 'github.label': {
+ 'help': 'Download artifacts for one label only',
+ 'metavar': 'LABEL',
+ 'default': '',
+ },
+}
+
+
+def main(argv):
+ status = 0
+ try:
+ sqlite_config = memdf.util.sqlite.CONFIG
+ sqlite_config['database.file']['argparse']['required'] = True
+
+ config = Config().init({
+ **memdf.util.config.CONFIG,
+ **memdf.util.github.CONFIG,
+ **sqlite_config,
+ **GITHUB_CONFIG,
+ })
+ config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
+ config.parse(argv)
+
+ db = memdf.sizedb.SizeDatabase(config['database.file']).open()
+
+ if gh := Gh(config):
+
+ artifact_limit = config['github.limit-artifacts']
+ artifacts_added = 0
+ events = config['github.event']
+ if not events:
+ events = ['push']
+ for a in gh.get_size_artifacts(label=config['github.label']):
+ if events and a.event not in events:
+ logging.debug('Skipping %s artifact %d', a.event, a.id)
+ continue
+ cur = db.execute('SELECT id FROM build WHERE artifact = ?',
+ (a.id,))
+ if cur.fetchone():
+ logging.debug('Skipping known artifact %d', a.id)
+ continue
+ blob = gh.download_artifact(a.id)
+ if blob:
+ logging.info('Adding artifact %d %s %s %s %s',
+ a.id, a.commit[:12], a.pr, a.event, a.group)
+ db.add_sizes_from_zipfile(io.BytesIO(blob),
+ {'artifact': a.id})
+ db.commit()
+ artifacts_added += 1
+ if artifact_limit and artifact_limit <= artifacts_added:
+ break
+
+ for filename in config['args.inputs']:
+ db.add_sizes_from_file(filename)
+ db.commit()
+
+ except Exception as exception:
+ raise exception
+
+ return status
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/scripts/tools/memory/gh_db_query.py b/scripts/tools/memory/gh_db_query.py
new file mode 100755
index 00000000000000..e42262e567ba7f
--- /dev/null
+++ b/scripts/tools/memory/gh_db_query.py
@@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Common queries on a size database."""
+
+import datetime
+import logging
+import sys
+
+from typing import cast, Dict, List, Mapping, Optional, Tuple
+
+import pandas as pd # type: ignore
+
+import memdf.report
+import memdf.util.config
+import memdf.util.sqlite
+from memdf.sizedb import SizeDatabase
+from memdf import Config
+
+
+QUERY_CONFIG = {
+ Config.group_map('query'): {
+ 'group': 'output'
+ },
+ 'report.increases': {
+ 'help': 'Highlight large increases',
+ 'metavar': 'PERCENT',
+ 'default': 0.0,
+ 'argparse': {
+ 'alias': ['--threshold'],
+ 'type': float,
+ },
+ },
+ 'query.where': {
+ 'help': 'SQL filter',
+ 'metavar': 'SQL-EXPR',
+ 'default': '',
+ 'argparse': {
+ 'alias': ['--where'],
+ },
+ },
+ 'query.order': {
+ 'help': 'sort order',
+ 'metavar': 'COLUMN[,COLUMN]*',
+ 'default': '',
+ 'argparse': {
+ 'alias': ['--order'],
+ },
+ },
+ 'query.limit': {
+ 'help': 'limit result size',
+ 'metavar': 'ROWS',
+ 'default': 0,
+ 'argparse': {
+ 'alias': ['--limit'],
+ },
+ },
+}
+
+
+def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
+ """Given comma-separated metavar and values, match them up."""
+ values = tuple(value.split(','))
+ names = metavar.split(',')
+ if len(names) < len(values):
+ logging.error('Too many values for %s', metavar)
+ return (None, {})
+ if len(names) > len(values):
+ logging.error('Missing %s for %s', ','.join(names[len(values):]),
+ metavar)
+ return (None, {})
+ return (values, dict(zip(names, values)))
+
+
+def postprocess_canned_sql_option(config: Config, key: str,
+ info: Mapping) -> None:
+ """Record information from simple SQL query options in one place."""
+ value = config[key]
+ if not value:
+ return
+ title = info['sql']['title']
+ if isinstance(value, str):
+ metavar = info.get('metavar', 'VALUE')
+ if ',' in metavar:
+ values, args = argsplit(metavar, value)
+ if not values:
+ return
+ else:
+ values = (value,)
+ args = {metavar: value}
+ title = title.format(**args)
+ else:
+ values = tuple()
+
+ if config['queries'] is None:
+ config['queries'] = []
+ cast(list, config['queries']).append((title, key, values, info))
+
+
+def make_query(config: Config, info: Mapping) -> str:
+ """Construct an SQL query string for a simple SQL query option."""
+ args = {'where': '', 'order': '', 'limit': ''}
+ if where := config.get('query.where'):
+ if kw := info['sql'].get('where'):
+ args['where'] = f'{kw} {where}'
+ if order := (config.get('query.order') or info['sql'].get('order')):
+ args['order'] = f'ORDER BY {order}'
+ if limit := config.get('query.limit'):
+ args['limit'] = f'LIMIT {limit}'
+ return info['sql']['query'].format(**args)
+
+
+def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
+ """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
+ df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
+ .isoformat())
+ return df
+
+
+def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
+ """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
+ df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
+ axis=1)
+ df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
+ row.parent_size, row.commit_size),
+ axis=1)
+ if threshold := config['report.increases']:
+ df = df[df['% change'] > threshold]
+ return df
+
+
+QUERY_CONFIG |= {
+ 'query.platforms': {
+ 'help': 'List known platforms',
+ 'default': False,
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Platforms',
+ 'query': '''
+ SELECT DISTINCT platform FROM thing {where} {order} {limit}
+ ''',
+ 'where': 'WHERE',
+ 'order': 'platform',
+ },
+ 'argparse': {
+ 'alias': ['--platforms'],
+ },
+ },
+ 'query.platform-targets': {
+ 'help': 'List known targets for the given platform',
+ 'metavar': 'PLATFORM',
+ 'default': '',
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Platform Targets',
+ 'query': '''
+ SELECT DISTINCT platform, config, target
+ FROM thing
+ WHERE platform=? {where}
+ {order} {limit}
+ ''',
+ 'where': 'AND',
+ 'order': 'platform, config, target',
+ },
+ 'argparse': {
+ 'alias': ['--platform-targets'],
+ },
+ },
+ 'query.platform-sections': {
+ 'help': 'List known sections for the given platform',
+ 'metavar': 'PLATFORM',
+ 'default': '',
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Platform Sections',
+ 'query': '''
+ SELECT DISTINCT platform, s.name AS section
+ FROM thing t
+ INNER JOIN build b ON t.id == b.thing_id
+ INNER JOIN size s ON b.id == s.build_id
+ WHERE platform=? {where}
+ {order} {limit}
+ ''',
+ 'where': 'AND',
+ 'order': 'platform, section',
+ },
+ 'argparse': {
+ 'alias': ['--platform-sections'],
+ },
+ },
+ 'query.section-sizes': {
+ 'help': 'List size data for a given build section',
+ 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
+ 'default': '',
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
+ 'query': '''
+ SELECT DISTINCT time, hash, pr, size
+ FROM build b
+ INNER JOIN size s ON b.id == s.build_id
+ WHERE b.thing_id == (SELECT id FROM thing
+ WHERE platform == ?
+ AND config == ?
+ AND target == ?)
+ AND name == ?
+ {where}
+ {order} {limit}
+ ''',
+ 'where': 'AND',
+ 'order': 'time',
+ 'postprocess': [postprocess_df_time],
+ },
+ },
+ 'query.section-changes': {
+ 'help': 'List size changes for a given build section',
+ 'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
+ 'default': '',
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
+ 'query': '''
+ WITH builds (bid, pid, time, pr, hash) AS (
+ SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
+ FROM build b
+ INNER JOIN build p
+ ON p.hash = b.parent AND p.thing_id == b.thing_id
+ WHERE b.thing_id == (SELECT id FROM thing
+ WHERE platform == ?
+ AND config == ?
+ AND target == ?)
+ )
+ SELECT DISTINCT
+ time, hash, pr,
+ ps.size as parent_size,
+ bs.size as commit_size
+ FROM builds
+ INNER JOIN size bs ON builds.bid == bs.build_id
+ INNER JOIN size ps ON builds.pid == ps.build_id
+ WHERE bs.name == ? AND ps.name == bs.name
+ {where}
+ {order} {limit}
+ ''',
+ 'where': 'AND',
+ 'order': 'time',
+ 'postprocess': [postprocess_df_time, postprocess_df_changes],
+ },
+ },
+ 'query.all-changes': {
+ 'help': 'List all size changes',
+ 'default': False,
+ 'postprocess': postprocess_canned_sql_option,
+ 'sql': {
+ 'title': 'Size Changes',
+ 'query': '''
+ WITH
+ builds (bid, pid, time, pr, hash, thing_id) AS (
+ SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
+ FROM build b
+ INNER JOIN build p
+ ON p.hash = b.parent AND p.thing_id == b.thing_id
+ ),
+ changes (bid, tid, name, parent_size, commit_size, change) AS (
+ SELECT DISTINCT
+ bs.build_id,
+ thing_id,
+ bs.name,
+ ps.size as parent_size,
+ bs.size as commit_size,
+ bs.size - ps.size as change
+ FROM builds
+ INNER JOIN size bs ON builds.bid == bs.build_id
+ INNER JOIN size ps ON builds.pid == ps.build_id
+ WHERE bs.name == ps.name
+ )
+ SELECT
+ time, hash,
+ platform, config, target, name,
+ parent_size, commit_size, change
+ FROM changes
+ INNER JOIN build ON bid == build.id
+ INNER JOIN thing ON tid == thing.id
+ {where} {order} {limit}
+ ''',
+ 'where': 'AND',
+ 'order': 'time',
+ 'postprocess': [postprocess_df_time, postprocess_df_changes],
+ },
+ },
+ 'query.build-sizes': {
+ # SQLite doesn't have PIVOT so we have to script this.
+ 'help': 'List size changes for a given build',
+ 'metavar': 'PLATFORM,CONFIG,TARGET',
+ 'default': '',
+ },
+}
+
+
+def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
+ """Split a build arg and get its thing_id and sections."""
+ values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
+ if not values:
+ return None
+
+ platform = args['PLATFORM']
+ pconfig = args['CONFIG']
+ ptarget = args['TARGET']
+ thing_id = db.select_thing_id(platform, pconfig, ptarget)
+ if not thing_id:
+ logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
+ return None
+
+ sections = db.select_sections_for_thing(thing_id)
+ if not sections:
+ logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
+ return None
+
+ return (platform, pconfig, ptarget, thing_id, sections)
+
+
+def make_build_sizes_query(config: Config, thing_id: str,
+ sections: List[str]) -> Tuple[List[str], str]:
+ """Construct and SQL query for all section sizes for a given thing."""
+ # SQLite doesn't have PIVOT so we need to construct a query with
+ # a column for each section.
+ columns = ['time', 'hash', 'pr']
+ cols = ', '.join(columns)
+ joins = ''
+ where = f' WHERE b.thing_id == {thing_id}'
+ for i, s in enumerate(sections):
+ columns.append(s)
+ cols += f', s{i}.size AS s{i}z'
+ joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
+ where += f' AND s{i}.name == "{s}"'
+ if qw := config['query.where']:
+ where += f' AND {qw}'
+ query = f'''SELECT {cols}
+ FROM build b
+ {joins}
+ {where}
+ ORDER BY {config.get('query.order') or 'time'}'''
+ if limit := config['query.limit']:
+ query += f' LIMIT {limit}'
+ return (columns, query)
+
+
+def query_build_sizes(config: Config, db: SizeDatabase,
+ build: str) -> Optional[pd.DataFrame]:
+ """Get all sizes for the given build."""
+ t = get_build_sections(db, build)
+ if not t:
+ return None
+ platform, pconfig, ptarget, thing_id, sections = t
+
+ columns, query = make_build_sizes_query(config, thing_id, sections)
+ logging.debug('Query: %s', query)
+
+ cur = db.execute(query)
+ rows = cur.fetchall()
+ if rows:
+ df = pd.DataFrame(rows, columns=columns)
+ df.attrs = {
+ 'name': f'qbs-{build}',
+ 'title': f'Sizes for {platform} {pconfig} {ptarget}',
+ }
+ return postprocess_df_time(config, df)
+
+ return None
+
+
+def main(argv):
+ status = 0
+ try:
+ cfg = {
+ **memdf.util.config.CONFIG,
+ **memdf.util.sqlite.CONFIG,
+ **memdf.report.OUTPUT_CONFIG,
+ **QUERY_CONFIG,
+ }
+ cfg['database.file']['argparse']['required'] = True
+
+ config = Config().init(cfg)
+ config.parse(argv)
+
+ db = SizeDatabase(config['database.file'], writable=False)
+ db.open()
+
+ dfs = {}
+
+ q = 0
+ for title, key, values, info in config.get('queries', []):
+ q += 1
+ query = make_query(config, info)
+ logging.debug('Option: %s', key)
+ logging.debug('Title: %s', title)
+ logging.debug('Query: %s', query.strip())
+ logging.debug('With: %s', values)
+ cur = db.execute(query, values)
+ columns = [i[0] for i in cur.description]
+ rows = cur.fetchall()
+ if rows:
+ df = pd.DataFrame(rows, columns=columns)
+ df.attrs = {'name': f'query{q}', 'title': title}
+ for f in info['sql'].get('postprocess', []):
+ df = f(config, df)
+ dfs[df.attrs['name']] = df
+
+ if build := config['query.build-sizes']:
+ q += 1
+ if (df := query_build_sizes(config, db, build)) is not None:
+ dfs[df.attrs['name']] = df
+
+ if q == 0:
+ config.argparse.print_help()
+ return 1
+
+ memdf.report.write_dfs(config,
+ dfs,
+ hierify=config['hierify'],
+ title=True,
+ floatfmt='5.1f')
+
+ except Exception as exception:
+ raise exception
+
+ return status
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py
index 7a52052c4287a5..608fe9c5edbf9a 100755
--- a/scripts/tools/memory/gh_report.py
+++ b/scripts/tools/memory/gh_report.py
@@ -17,48 +17,39 @@
"""Generate reports from size artifacts."""
import io
-import itertools
-import json
import logging
-import os
-import os.path
import re
-import sqlite3
import sys
-import zipfile
-from pathlib import Path
-from typing import Dict, IO, Iterable, Optional, Union
+from typing import Dict
-import dateutil # type: ignore
import fastcore # type: ignore
-import ghapi.all # type: ignore
import pandas as pd # type: ignore
import memdf.report
+import memdf.sizedb
import memdf.util.config
+import memdf.util.markdown
import memdf.util.sqlite
+from memdf.util.github import Gh
from memdf import Config, ConfigDescription
-GITHUB_CONFIG: ConfigDescription = {
- Config.group_def('github'): {
- 'title': 'github options',
+DB_CONFIG: ConfigDescription = {
+ Config.group_def('database'): {
+ 'title': 'database options',
},
- 'github.token': {
- 'help': 'Github API token, or "SKIP" to suppress connecting to github',
- 'metavar': 'TOKEN',
- 'default': '',
+ 'database.readonly': {
+ 'help': 'Open database read only',
+ 'default': False,
'argparse': {
- 'alias': ['--github-api-token', '--token'],
+ 'alias': ['--db-readonly'],
},
},
- 'github.repository': {
- 'help': 'Github repostiory',
- 'metavar': 'OWNER/REPO',
- 'default': '',
- 'argparse': {
- 'alias': ['--repo'],
- },
+}
+
+GITHUB_CONFIG: ConfigDescription = {
+ Config.group_def('github'): {
+ 'title': 'github options',
},
'github.comment': {
'help': 'Send output as github PR comments',
@@ -67,17 +58,6 @@
'alias': ['--comment'],
},
},
- 'github.keep': {
- 'help': 'Leave PR artifacts after commenting',
- 'default': False,
- 'argparse': {
- 'alias': ['--keep'],
- },
- },
- 'github.dryrun-comment': {
- 'help': 'Dry run for sending output as github PR comments',
- 'default': False,
- },
'github.limit-comments': {
'help': 'Send no more than COUNT comments',
'metavar': 'COUNT',
@@ -94,14 +74,6 @@
'type': int,
},
},
- 'github.limit-artifact-pages': {
- 'help': 'Examine no more than COUNT pages of artifacts',
- 'metavar': 'COUNT',
- 'default': 0,
- 'argparse': {
- 'type': int,
- },
- },
'github.limit-pr': {
'help': 'Report only on PR, if present.',
'metavar': 'PR',
@@ -110,6 +82,9 @@
'type': int,
},
},
+}
+
+REPORT_CONFIG: ConfigDescription = {
Config.group_map('report'): {
'group': 'output'
},
@@ -127,13 +102,6 @@
'alias': ['--push']
},
},
- 'report.query': {
- 'help': 'Run an SQL query',
- 'default': [],
- 'argparse': {
- 'alias': ['--query', '--sql']
- },
- },
'report.increases': {
'help': 'Highlight large increases',
'metavar': 'PERCENT',
@@ -146,151 +114,33 @@
}
-class SizeDatabase(memdf.util.sqlite.Database):
- """A database for recording and comparing size reports."""
- on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
- on_writable = [
- """
- -- A ‘thing’ identifies the kind of built object.
- -- Builds of the same thing are comparable.
- CREATE TABLE IF NOT EXISTS thing (
- id INTEGER PRIMARY KEY,
- platform TEXT NOT NULL, -- Build platform
- config TEXT NOT NULL, -- Build configuration discriminator
- target TEXT NOT NULL, -- Build target
- UNIQUE(platform, config, target)
- )
- """, """
- -- A ‘build’ identifies a built instance of a thing at some point.
- CREATE TABLE IF NOT EXISTS build (
- id INTEGER PRIMARY KEY,
- thing_id INTEGER REFERENCES thing(id),
- hash TEXT NOT NULL, -- Commit hash
- parent TEXT NOT NULL, -- Parent commit hash
- pr INTEGER DEFAULT 0, -- Github PR number
- time INTEGER NOT NULL, -- Unix-epoch timestamp
- artifact INTEGER DEFAULT 0, -- Github artifact ID
- commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment
- ref TEXT, -- Target git ref
- event TEXT, -- Github build trigger event
- UNIQUE(thing_id, hash, parent, pr, time, artifact)
- )
- """, """
- -- A ‘size’ entry gives the size of a section for a particular build.
- CREATE TABLE IF NOT EXISTS size (
- build_id INTEGER REFERENCES build(id),
- name TEXT NOT NULL, -- Section name
- size INTEGER NOT NULL, -- Section size in bytes
- PRIMARY KEY (build_id, name)
- )
- """
- ]
+class SizeContext:
+ """Generate reports from size artifacts."""
+
+ comment_format_re = re.compile(r"^")
def __init__(self, config: Config):
- super().__init__(config['database.file'])
self.config = config
- self.gh = gh_open(config)
- self.deleted_artifacts: set[int] = set()
-
- def add_sizes(self, **kwargs):
- """
- Add a size report to the database.
-
- The incoming arguments must contain the required non-ID column names
- from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
- sequence of mappings containing 'name' and 'size'.
- """
- td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
- thing = self.store_and_return_id('thing', **td)
- bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
- if 'ref' in kwargs:
- bd['ref'] = kwargs['ref']
- cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
- build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
- if build is None:
- logging.error('Failed to store %s %s %s', thing, bd, cd)
- else:
- for d in kwargs['sizes']:
- self.store('size', build_id=build, **d)
-
- def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
- """Add sizes from a JSON size report."""
- r = origin.copy()
- r.update(json.loads(s))
- r['sizes'] = []
- # Add section sizes.
- for i in r['frames'].get('section', []):
- r['sizes'].append({'name': i['section'], 'size': i['size']})
- # Add segment sizes.
- for i in r['frames'].get('wr', []):
- r['sizes'].append({
- 'name': ('(read only)', '(read/write)')[int(i['wr'])],
- 'size': i['size']
- })
- self.add_sizes(**r)
-
- def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
- """Add size reports from a zip."""
- with zipfile.ZipFile(f, 'r') as zip_file:
- for i in zip_file.namelist():
- if i.endswith('-sizes.json'):
- origin['member'] = i
- with zip_file.open(i) as member:
- self.add_sizes_from_json(member.read(), origin)
-
- def add_sizes_from_file(self, filename: str):
- """Add size reports from a file."""
- origin = {'file': filename}
- path = Path(filename)
- if path.suffix == '.json':
- logging.info('ASJ: reading JSON %s', path)
- with open(path) as f:
- self.add_sizes_from_json(f.read(), origin)
- elif path.suffix == '.zip':
- logging.info('ASZ: reading ZIP %s', path)
- self.add_sizes_from_zipfile(path, origin)
- else:
- logging.warning('Unknown file type "%s" ignored', filename)
+ self.gh = Gh(config)
+ db_file = config.get('database.file', ':memory:')
+ self.db = memdf.sizedb.SizeDatabase(db_file,
+ not config['database.readonly'])
+ self.db.open()
def add_sizes_from_github(self):
"""Read size report artifacts from github."""
- if not self.gh:
- return
- artifact_limit = self.config['github.limit-artifacts']
- artifact_pages = self.config['github.limit-artifact-pages']
-
- # Size artifacts have names of the form:
- # Size,{group},{pr},{commit_hash},{parent_hash}[,{event}]
- # Record them keyed by group and commit_hash to match them up
+ # Record size artifacts keyed by group and commit_hash to match them up
# after we have the entire list.
- page = 0
size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {}
- for i in ghapi.all.paged(self.gh.actions.list_artifacts_for_repo, 100):
- if not i.artifacts:
- break
- for a in i.artifacts:
- if a.name.startswith('Size,') and a.name.count(',') >= 4:
- _, group, pr, commit, parent, *etc = a.name.split(',')
- a.parent = parent
- a.pr = pr
- a.created_at = dateutil.parser.isoparse(a.created_at)
- # Old artifact names don't include the event.
- if etc:
- event = etc[0]
- else:
- event = 'push' if pr == '0' else 'pull_request'
- a.event = event
- if group not in size_artifacts:
- size_artifacts[group] = {}
- size_artifacts[group][commit] = a
- logging.debug('ASG: artifact %d %s', a.id, a.name)
- page += 1
- logging.debug('ASP: artifact page %d of %d', page, artifact_pages)
- if artifact_pages and page >= artifact_pages:
- break
+ for a in self.gh.get_size_artifacts():
+ if a.group not in size_artifacts:
+ size_artifacts[a.group] = {}
+ size_artifacts[a.group][a.commit] = a
+ logging.debug('ASG: artifact %d %s', a.id, a.name)
# Determine required size artifacts.
+ artifact_limit = self.config['github.limit-artifacts']
required_artifact_ids: set[int] = set()
for group, group_reports in size_artifacts.items():
logging.debug('ASG: group %s', group)
@@ -313,419 +163,223 @@ def add_sizes_from_github(self):
# Download and add required artifacts.
for i in required_artifact_ids:
- logging.debug('ASD: download artifact %d', i)
- try:
- blob = self.gh.actions.download_artifact(i, 'zip')
- except Exception as e:
- blob = None
- logging.error('Failed to download artifact %d: %s', i, e)
+ blob = self.gh.download_artifact(i)
if blob:
- self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i})
+ self.db.add_sizes_from_zipfile(io.BytesIO(blob),
+ {'artifact': i})
def read_inputs(self):
"""Read size report from github and/or local files."""
- self.add_sizes_from_github()
+ if self.gh:
+ self.add_sizes_from_github()
for filename in self.config['args.inputs']:
- self.add_sizes_from_file(filename)
- self.commit()
-
- def select_matching_commits(self):
- """Find matching builds, where one's commit is the other's parent."""
- return self.execute('''
- SELECT DISTINCT
- c.event as event,
- c.pr AS pr,
- c.hash AS hash,
- p.hash AS parent
- FROM build c
- INNER JOIN build p ON p.hash = c.parent
- WHERE c.commented = 0
- ORDER BY c.time DESC, c.pr, c.hash, p.hash
- ''')
-
- def set_commented(self, build_ids: Iterable[int]):
- """Set the commented flag for the given builds."""
- if not build_ids:
- return
- for build_id in build_ids:
- self.execute('UPDATE build SET commented = 1 WHERE id = ?',
- (build_id, ))
- self.commit()
-
- def delete_stale_builds(self, build_ids: Iterable[int]):
- """Delete stale builds."""
- if not build_ids:
- return
- for build_id in build_ids:
- logging.info('DSB: deleting obsolete build %d', build_id)
- self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
- self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
- self.commit()
-
- def delete_artifact(self, artifact_id: int):
- if (self.gh and artifact_id
- and artifact_id not in self.deleted_artifacts):
- self.deleted_artifacts.add(artifact_id)
- try:
- self.gh.actions.delete_artifact(artifact_id)
- except Exception:
- # During manual testing we sometimes lose the race against CI.
- logging.error('Failed to delete artifact %d', artifact_id)
-
- def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
- if not self.config['github.keep']:
- for artifact_id in stale_artifacts:
- logging.info('DSA: deleting obsolete artifact %d', artifact_id)
- self.delete_artifact(artifact_id)
-
- def should_report(self, event: Optional[str] = None) -> bool:
- """Return true if reporting is enabled for the event."""
- if event is None:
+ self.db.add_sizes_from_file(filename)
+ self.db.commit()
+ return self
+
+ def should_report(self, event: str = '') -> bool:
+ """Return true if reporting is enabled for the action event."""
+ if not event:
return self.config['report.pr'] or self.config['report.push']
if event == 'pull_request':
return self.config['report.pr']
return self.config['report.push']
-
-def gh_open(config: Config) -> Optional[ghapi.core.GhApi]:
- """Return a GhApi, if so configured."""
- gh: Optional[ghapi.core.GhApi] = None
- if config['github.repository']:
- owner, repo = config.get('github.repository').split('/', 1)
- config.put('github.owner', owner)
- config.put('github.repo', repo)
- if not config['github.token']:
- config['github.token'] = os.environ.get('GITHUB_TOKEN')
- if not config['github.token']:
- logging.error('Missing --github-token')
- return None
- token = config['github.token']
- if token != 'SKIP':
- gh = ghapi.all.GhApi(owner=owner,
- repo=repo,
- token=config['github.token'])
- return gh
-
-
-def gh_get_comments_for_pr(gh: ghapi.core.GhApi, pr: int):
- return itertools.chain.from_iterable(
- ghapi.all.paged(gh.issues.list_comments, pr))
-
-
-def gh_get_commits_for_pr(gh: ghapi.core.GhApi, pr: int):
- return itertools.chain.from_iterable(
- ghapi.all.paged(gh.pulls.list_commits, pr))
-
-
-def percent_change(a: int, b: int) -> float:
- if a == 0:
- return 0.0 if b == 0 else float('inf')
- return 100. * (b - a) / a
-
-
-def changes_for_commit(db: SizeDatabase, pr: int, commit: str,
- parent: str) -> pd.DataFrame:
- """Return a DataFrame with size changes between the given commits."""
- cur = db.execute(
- '''
- SELECT DISTINCT
- t.id AS thing,
- cb.artifact AS artifact,
- pb.id AS parent_build,
- cb.id AS commit_build,
- t.platform, t.config, t.target,
- cs.name AS name,
- ps.size AS parent_size,
- cs.size AS commit_size,
- cs.size - ps.size AS change,
- cb.time AS time
- FROM thing t
- INNER JOIN build cb ON cb.thing_id = t.id
- INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
- INNER JOIN size cs ON cs.build_id = cb.id
- INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
- WHERE cb.hash = ? AND pb.hash = ?
- ORDER BY t.platform, t.config, t.target,
- cs.name, cb.time DESC, pb.time DESC
- ''', (commit, parent))
-
- keep = ('platform', 'target', 'config', 'name', 'parent_size',
- 'commit_size', 'change')
- things: set[int] = set()
- artifacts: set[int] = set()
- builds: set[int] = set()
- stale_builds: set[int] = set()
- stale_artifacts: set[int] = set()
- previous: Optional[sqlite3.Row] = None
- rows = []
-
- for row in cur.fetchall():
- row = sqlite3.Row(cur, row)
- things.add(row['thing'])
- if (previous is not None and row['thing'] == previous['thing']
- and row['name'] == previous['name']):
- # This is duplicate build, older because we sort descending,
- # presumably from a partial workflow re-run.
- if row['parent_build'] != previous['parent_build']:
- stale_builds.add(row['parent_build'])
- if row['commit_build'] != previous['commit_build']:
- stale_builds.add(row['commit_build'])
- stale_artifacts.add(row['artifact'])
- else:
- previous = row
- new = [row[k] for k in keep]
- new.append(percent_change(row['parent_size'], row['commit_size']))
- rows.append(new)
- artifacts.add(row['artifact'])
- builds.add(row['commit_build'])
-
- db.delete_stale_builds(stale_builds)
- db.delete_stale_artifacts(stale_artifacts)
-
- df = pd.DataFrame(rows,
- columns=('platform', 'target', 'config', 'section',
- parent[:8], commit[:8], 'change', '% change'))
- df.attrs = {
- 'name': f'{pr},{parent},{commit}',
- 'title': (f'PR #{pr}: ' if pr else '') +
- f'Size comparison from {parent} to {commit}',
- 'things': things,
- 'builds': builds,
- 'artifacts': artifacts,
- 'pr': pr,
- 'commit': commit,
- 'parent': parent,
- }
- return df
-
-
-comment_format_re = re.compile(r"^")
-
-
-def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool:
- """Send a change report as a github comment."""
-
- if not db.gh:
- return False
-
- # Look for an existing comment for this change.
- pr = df.attrs['pr']
-
- # Check the most recent commit on the PR, so that we don't comment on
- # builds that are already outdated.
- commit = df.attrs['commit']
- commits = sorted(
- gh_get_commits_for_pr(db.gh, pr),
- key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
- reverse=True)
- if commits and commit != commits[0].sha:
- logging.info('SCS: PR #%s: not commenting for stale %s; newest is %s',
- pr, commit, commits[0].sha)
- # Return True so that the obsolete artifacts get removed.
- return True
-
- # Check for an existing size report comment. If one exists, we'll add
- # the new report to it.
- title = df.attrs['title']
- existing_comment = None
- existing_comment_format = 0
- for comment in gh_get_comments_for_pr(db.gh, pr):
- comment_parts = comment.body.partition('\n')
- if comment_parts[0].strip() == title:
- existing_comment = comment
- if m := comment_format_re.match(comment_parts[2]):
- existing_comment_format = int(m.group(1))
- break
-
- if existing_comment_format != 1:
+ def get_existing_comment(self, pr: int, title: str):
+ """Check for an existing comment."""
existing_comment = None
- text = gh_comment_v1(db, df, existing_comment)
+ existing_comment_format = 0
+ for comment in self.gh.get_comments_for_pr(pr):
+ comment_parts = comment.body.partition('\n')
+ if comment_parts[0].strip() == title:
+ existing_comment = comment
+ if m := self.comment_format_re.match(comment_parts[2]):
+ existing_comment_format = int(m.group(1))
+ break
+ return (existing_comment, existing_comment_format)
+
+ def get_newest_commit(self, pr: int) -> str:
+ """Get the hash of the most recent commit on the PR."""
+ commits = sorted(
+ self.gh.get_commits_for_pr(pr),
+ key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
+ reverse=True)
+ return commits[0].sha if commits else ''
+
+ def post_change_report(self, df: pd.DataFrame) -> bool:
+ """Send a change report as a github comment."""
+ if not self.gh:
+ return False
+ pr = df.attrs['pr']
+
+ # Check for an existing size report comment. If one exists, we'll add
+ # the new information to it.
+ existing_comment, existing_comment_format = self.get_existing_comment(
+ pr, df.attrs['title'])
+
+ if not existing_comment:
+ # Check the most recent commit on the PR, so that we don't comment
+ # for commits that are already outdated.
+ commit = df.attrs['commit']
+ latest = self.get_newest_commit(pr)
+ if commit != latest:
+ logging.info(
+ 'SCS: PR #%s: not commenting for stale %s; newest is %s',
+ pr, commit, latest)
+ # Return True so that the obsolete artifacts get removed.
+ return True
+
+ if existing_comment_format == 1:
+ df = V1Comment.merge(df, existing_comment)
+ else:
+ existing_comment = None
+ text = V1Comment.format(self.config, df)
- logging.info(
- 'SCR: %s %s', df.attrs['title'],
- f'updating comment {existing_comment.id}'
- if existing_comment else 'as new comment')
+ if existing_comment:
+ return self.gh.update_comment(existing_comment.id, text)
+ return self.gh.create_comment(pr, text)
- if db.config['github.dryrun-comment']:
- logging.debug('%s', text)
- return False
+ def report_matching_commits(self) -> Dict[str, pd.DataFrame]:
+ """Report on all new comparable commits."""
+ if not self.should_report():
+ return {}
- try:
- if existing_comment:
- db.gh.issues.update_comment(existing_comment.id, text)
- else:
- db.gh.issues.create_comment(pr, text)
- return True
- except Exception:
- return False
-
-
-def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str:
- """Format a github comment."""
-
- if existing_comment:
- df = v1_comment_merge(df, existing_comment)
-
- threshold_df = None
- increase_df = df[df['change'] > 0]
- if increase_df.empty:
- increase_df = None
- elif threshold := db.config['report.increases']:
- threshold_df = df[df['% change'] > threshold]
- if threshold_df.empty:
- threshold_df = None
- decrease_df = df[df['change'] < 0]
- if decrease_df.empty:
- decrease_df = None
-
- with io.StringIO() as md:
- md.write(df.attrs['title'])
- md.write('\n\n\n')
-
- if threshold_df is not None:
- md.write(f'**Increases above {threshold:.2g}%:**\n\n')
- md.write('\n\n')
- v1_comment_write_df(db, threshold_df, md)
-
- if increase_df is not None:
- summary = v1_comment_summary(increase_df)
- md.write('\n')
- md.write(f'Increases ({summary})
\n')
- md.write('\n\n')
- v1_comment_write_df(db, increase_df, md)
- md.write(' \n\n')
+ comment_count = 0
+ comment_limit = self.config['github.limit-comments']
+ comment_enabled = (self.config['github.comment']
+ or self.config['github.dryrun-comment'])
- if decrease_df is not None:
- summary = v1_comment_summary(decrease_df)
- md.write('\n')
- md.write(f'Decreases ({summary})
\n')
- md.write('\n\n')
- v1_comment_write_df(db, decrease_df, md)
- md.write(' \n\n')
-
- summary = v1_comment_summary(df)
- md.write('\n')
- md.write(f'Full report ({summary})
\n')
- md.write('\n\n')
- v1_comment_write_df(db, df, md)
- md.write('\n \n')
-
- return md.getvalue()
-
-
-def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame:
- with io.StringIO(comment.body) as body:
- for line in body:
- if line.startswith(''):
- body.readline() # Blank line before table.
- header, rows = read_hierified(body)
- break
- logging.debug('REC: read %d rows', len(rows))
- df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes))
- return df.sort_values(
- by=['platform', 'target', 'config', 'section']).drop_duplicates()
-
-
-def read_hierified(f):
- """Read a markdown table in ‘hierified’ format."""
-
- line = f.readline()
- header = tuple((s.strip() for s in line.split('|')[1:-1]))
-
- _ = f.readline() # The line under the header.
-
- rows = []
- for line in f:
- line = line.strip()
- if not line:
- break
- row = []
- columns = line.split('|')
- for i in range(0, len(header)):
- column = columns[i + 1].strip()
- if not column:
- column = rows[-1][i]
- row.append(column)
- rows.append(tuple(row))
-
- return (header, rows)
-
-
-def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame,
- out: memdf.report.OutputOption):
- memdf.report.write_df(db.config,
- df,
- out,
- 'pipe',
- hierify=True,
- title=False,
- floatfmt='5.1f')
-
-
-def v1_comment_summary(df: pd.DataFrame) -> str:
- count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
- platforms = ', '.join(sorted(list(set(df['platform']))))
- return f'{count} build{"" if count == 1 else "s"} for {platforms}'
-
-
-def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
- """Report on all new comparable commits."""
- if not db.should_report():
- return {}
-
- comment_count = 0
- comment_limit = db.config['github.limit-comments']
- comment_enabled = (db.config['github.comment']
- or db.config['github.dryrun-comment'])
-
- only_pr = db.config['github.limit-pr']
-
- dfs = {}
- for event, pr, commit, parent in db.select_matching_commits().fetchall():
- if not db.should_report(event):
- continue
-
- # Github doesn't have a way to fetch artifacts associated with a
- # particular PR. For testing purposes, filter to a single PR here.
- if only_pr and pr != only_pr:
- continue
-
- df = changes_for_commit(db, pr, commit, parent)
- if df.empty:
- # Matching commits had no new matching builds.
- continue
- dfs[df.attrs['name']] = df
-
- if (event == 'pull_request' and comment_enabled
- and (comment_limit == 0 or comment_limit > comment_count)):
- if gh_send_change_report(db, df):
- # Mark the originating builds, and remove the originating
- # artifacts, so that they don't generate duplicate report
- # comments.
- db.set_commented(df.attrs['builds'])
- if not db.config['github.keep']:
- for artifact_id in df.attrs['artifacts']:
- logging.info('RMC: deleting artifact %d', artifact_id)
- db.delete_artifact(artifact_id)
- comment_count += 1
- return dfs
-
-
-def report_queries(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
- """Perform any requested SQL queries."""
- dfs = {}
- q = 0
- for query in db.config['report.query']:
- q += 1
- cur = db.execute(query)
- columns = [i[0] for i in cur.description]
- rows = cur.fetchall()
- if rows:
- df = pd.DataFrame(rows, columns=columns)
- df.attrs = {'name': f'query{q}', 'title': query}
+ only_pr = self.config['github.limit-pr']
+
+ dfs = {}
+ commits = self.db.select_matching_commits()
+ for event, pr, commit, parent in commits.fetchall():
+ if not self.should_report(event):
+ continue
+
+ # Github doesn't have a way to fetch artifacts associated with a
+ # particular PR. For testing purposes, filter to a single PR here.
+ if only_pr and pr != only_pr:
+ continue
+
+ changes = self.db.select_changes(parent, commit)
+
+ self.db.delete_builds(changes.stale_builds)
+ self.gh.delete_artifacts(changes.stale_artifacts)
+
+ if not changes.rows:
+ # Matching commits had no new matching builds.
+ continue
+
+ df = pd.DataFrame(changes.rows, columns=changes.columns)
+ df.attrs = {
+ 'name': f'{pr},{parent},{commit}',
+ 'title': (f'PR #{pr}: ' if pr else '') +
+ f'Size comparison from {parent} to {commit}',
+ 'things': changes.things,
+ 'builds': changes.builds,
+ 'artifacts': changes.artifacts,
+ 'pr': pr,
+ 'commit': commit,
+ 'parent': parent,
+ }
dfs[df.attrs['name']] = df
- db.commit()
- return dfs
+
+ if (event == 'pull_request' and comment_enabled
+ and (comment_limit == 0 or comment_limit > comment_count)):
+ if self.post_change_report(df):
+ # Mark the originating builds, and remove the originating
+ # artifacts, so that they don't generate duplicate report
+ # comments.
+ self.db.set_commented(df.attrs['builds'])
+ self.gh.delete_artifacts(df.attrs['artifacts'])
+ comment_count += 1
+ return dfs
+
+
+class V1Comment:
+ """Format of a GitHub comment."""
+
+ @staticmethod
+ def format(config: Config, df: pd.DataFrame):
+ """Format a GitHub comment."""
+
+ threshold_df = None
+ increase_df = df[df['change'] > 0]
+ if increase_df.empty:
+ increase_df = None
+ elif threshold := config['report.increases']:
+ threshold_df = df[df['% change'] > threshold]
+ if threshold_df.empty:
+ threshold_df = None
+ decrease_df = df[df['change'] < 0]
+ if decrease_df.empty:
+ decrease_df = None
+
+ with io.StringIO() as md:
+ md.write(df.attrs['title'])
+ md.write('\n\n\n')
+
+ if threshold_df is not None:
+ md.write(f'**Increases above {threshold:.2g}%:**\n\n')
+ md.write('\n\n')
+ V1Comment.write_df(config, threshold_df, md)
+
+ if increase_df is not None:
+ summary = V1Comment.summary(increase_df)
+ md.write('\n')
+ md.write(f'Increases ({summary})
\n')
+ md.write('\n\n')
+ V1Comment.write_df(config, increase_df, md)
+ md.write(' \n\n')
+
+ if decrease_df is not None:
+ summary = V1Comment.summary(decrease_df)
+ md.write('\n')
+ md.write(f'Decreases ({summary})
\n')
+ md.write('\n\n')
+ V1Comment.write_df(config, decrease_df, md)
+ md.write(' \n\n')
+
+ summary = V1Comment.summary(df)
+ md.write('\n')
+ md.write(f'Full report ({summary})
\n')
+ md.write('\n\n')
+ V1Comment.write_df(config, df, md)
+ md.write('\n \n')
+
+ return md.getvalue()
+
+ @staticmethod
+ def summary(df: pd.DataFrame) -> str:
+ count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
+ platforms = ', '.join(sorted(list(set(df['platform']))))
+ return f'{count} build{"" if count == 1 else "s"} for {platforms}'
+
+ @staticmethod
+ def write_df(config: Config, df: pd.DataFrame,
+ out: memdf.report.OutputOption):
+ memdf.report.write_df(config,
+ df,
+ out,
+ 'pipe',
+ hierify=True,
+ title=False,
+ floatfmt='5.1f')
+
+ @staticmethod
+ def merge(df: pd.DataFrame, comment) -> pd.DataFrame:
+ """Merge an existing comment into the DataFrame."""
+ with io.StringIO(comment.body) as body:
+ for line in body:
+ if line.startswith(''):
+ body.readline() # Blank line before table.
+ cols, rows = memdf.util.markdown.read_hierified(body)
+ break
+ logging.debug('REC: read %d rows', len(rows))
+ df = df.append(pd.DataFrame(data=rows, columns=cols).astype(df.dtypes))
+ return df.sort_values(
+ by=['platform', 'target', 'config', 'section']).drop_duplicates()
def main(argv):
@@ -733,18 +387,19 @@ def main(argv):
try:
config = Config().init({
**memdf.util.config.CONFIG,
+ **memdf.util.github.CONFIG,
**memdf.util.sqlite.CONFIG,
**memdf.report.OUTPUT_CONFIG,
**GITHUB_CONFIG,
+ **DB_CONFIG,
+ **REPORT_CONFIG,
})
config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
config.parse(argv)
- dfs = {}
- with SizeDatabase(config) as db:
- db.read_inputs()
- dfs.update(report_matching_commits(db))
- dfs.update(report_queries(db))
+ szc = SizeContext(config)
+ szc.read_inputs()
+ dfs = szc.report_matching_commits()
memdf.report.write_dfs(config,
dfs,
diff --git a/scripts/tools/memory/memdf/README.md b/scripts/tools/memory/memdf/README.md
index 6741da33162378..83628c7676e25a 100644
--- a/scripts/tools/memory/memdf/README.md
+++ b/scripts/tools/memory/memdf/README.md
@@ -1,17 +1,25 @@
This package contains routines to to collect, aggregate, and report memory
usage, using Pandas `DataFrame` as the primary representation.
-- memdf.df — `DataFrame` utilities, in particular definitions of columns and
- types for the main uses of data frames.
-- memdf.name — Names for synthetic symbols, etc.
- memdf.collect — Helpers to read memory information from various sources
(e.g. executables) according to command line options.
-- memdf.select — Helpers to select relevant subsets of data frames according
- to command line or configured options.
+- memdf.df — `DataFrame` utilities, in particular definitions of columns and
+ types for the main uses of data frames.
+- memdf.name — Names for synthetic symbols, etc. Individual readers are
+ located under memdf.collector.
- memdf.report — Helpers to write data frames in various formats according to
command line or configured options.
+- memdf.select — Helpers to select relevant subsets of data frames according
+ to command line or configured options.
+- memdf.sizedb — Helpers for a database of size information.
+
+Modules under memdf.util are not specifically tied to memory usage.
+
- memdf.util.config — `Config` utility class for managing command line or
other options according to a declarative description.
+- memdf.util.github — Utilities for communicating with GitHub.
+- memdf.util.markdown — Utilities for manipulating Markdown text.
- memdf.util.nd — Nested dictionary utilities, used by `Config`.
- memdf.util.pretty — Pretty-printed logging utility functions.
+- memdf.util.sqlite - Utilities for connecting to a sqlite3 database.
- memdf.util.subprocess — Utilities for executing external commands.
diff --git a/scripts/tools/memory/memdf/collect.py b/scripts/tools/memory/memdf/collect.py
index b04c7b87d2b63c..0575b967fe4dd7 100644
--- a/scripts/tools/memory/memdf/collect.py
+++ b/scripts/tools/memory/memdf/collect.py
@@ -62,7 +62,9 @@
**memdf.collector.elftools.CONFIG,
**memdf.collector.readelf.CONFIG,
'collect.method': {
- 'help': 'Method of input processing',
+ 'help':
+ 'Method of input processing: one of'
+ ' elftools, readelf, bloaty, csv, tsv, su.',
'metavar': 'METHOD',
'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
'default': 'elftools',
diff --git a/scripts/tools/memory/memdf/report.py b/scripts/tools/memory/memdf/report.py
index ad5e391492dcc0..bda23163842bd8 100644
--- a/scripts/tools/memory/memdf/report.py
+++ b/scripts/tools/memory/memdf/report.py
@@ -69,7 +69,7 @@
}
-def postprocess_report_by(config: Config, key: str) -> None:
+def postprocess_report_by(config: Config, key: str, info: Mapping) -> None:
"""For --report-by=region, select all sections."""
assert key == 'report.by'
if config.get(key) == 'region':
@@ -147,7 +147,8 @@ def hierify(df: pd.DataFrame) -> pd.DataFrame:
}
-def postprocess_output_metadata(config: Config, key: str) -> None:
+def postprocess_output_metadata(config: Config, key: str,
+ info: Mapping) -> None:
"""For --output-metadata=KEY:VALUE list, convert to dictionary."""
assert key == 'output.metadata'
metadata = {}
@@ -466,7 +467,7 @@ def __init__(self,
'title': 'output options',
},
'output.format': {
- 'help': 'Output format',
+ 'help': f'Output format: one of {", ".join(WRITERS)}.',
'metavar': 'FORMAT',
'default': 'simple',
'choices': list(WRITERS.keys()),
diff --git a/scripts/tools/memory/memdf/select.py b/scripts/tools/memory/memdf/select.py
index 77a3d3f05b4d1e..251b1be5642c9a 100644
--- a/scripts/tools/memory/memdf/select.py
+++ b/scripts/tools/memory/memdf/select.py
@@ -23,7 +23,7 @@
from memdf import Config, ConfigDescription, DF
-from typing import Optional
+from typing import Mapping, Optional
def split_size(config: Config, key: str) -> None:
@@ -51,7 +51,7 @@ def get_limit(config: Config, column: str, name: str) -> int:
return config.getl([column, 'limit', name], config.get('report.limit', 0))
-def postprocess_selections(config: Config, key: str) -> None:
+def postprocess_selections(config: Config, key: str, info: Mapping) -> None:
"""Resolve select/ignore command options."""
split_size(config, key)
choice, select = key.split('.')
diff --git a/scripts/tools/memory/memdf/sizedb.py b/scripts/tools/memory/memdf/sizedb.py
new file mode 100644
index 00000000000000..7e59caedd042e6
--- /dev/null
+++ b/scripts/tools/memory/memdf/sizedb.py
@@ -0,0 +1,254 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Sqlite3 database of binary sizes over time."""
+
+import collections
+import json
+import logging
+import sqlite3
+import zipfile
+
+from pathlib import Path
+from typing import Dict, IO, Iterable, List, Optional, Union
+
+import memdf.util.sqlite
+
+ChangeInfo = collections.namedtuple('ChangeInfo', [
+ 'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts',
+ 'stale_artifacts'
+])
+
+
+class SizeDatabase(memdf.util.sqlite.Database):
+ """A database for recording and comparing size reports."""
+ on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
+ on_writable = [
+ """
+ -- A ‘thing’ identifies the kind of built object.
+ -- Builds of the same thing are comparable.
+ CREATE TABLE IF NOT EXISTS thing (
+ id INTEGER PRIMARY KEY,
+ platform TEXT NOT NULL, -- Build platform
+ config TEXT NOT NULL, -- Build configuration discriminator
+ target TEXT NOT NULL, -- Build target
+ UNIQUE(platform, config, target)
+ )
+ """, """
+ -- A ‘build’ identifies a built instance of a thing at some point.
+ CREATE TABLE IF NOT EXISTS build (
+ id INTEGER PRIMARY KEY,
+ thing_id INTEGER REFERENCES thing(id),
+ hash TEXT NOT NULL, -- Commit hash
+ parent TEXT NOT NULL, -- Parent commit hash
+ pr INTEGER DEFAULT 0, -- Github PR number
+ time INTEGER NOT NULL, -- Unix-epoch timestamp
+ artifact INTEGER DEFAULT 0, -- Github artifact ID
+ commented INTEGER DEFAULT 0, -- 1 if recorded in a GH comment
+ ref TEXT, -- Target git ref
+ event TEXT, -- Github build trigger event
+ UNIQUE(thing_id, hash, parent, pr, time, artifact)
+ )
+ """, """
+ -- A ‘size’ entry gives the size of a section for a particular build.
+ CREATE TABLE IF NOT EXISTS size (
+ build_id INTEGER REFERENCES build(id),
+ name TEXT NOT NULL, -- Section name
+ size INTEGER NOT NULL, -- Section size in bytes
+ PRIMARY KEY (build_id, name)
+ )
+ """
+ ]
+
+ def __init__(self, filename: str, writable: bool = True):
+ super().__init__(filename, writable)
+
+ def add_sizes(self, **kwargs):
+ """
+ Add a size report to the database.
+
+ The incoming arguments must contain the required non-ID column names
+ from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
+ sequence of mappings containing 'name' and 'size'.
+ """
+ td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
+ thing = self.store_and_return_id('thing', **td)
+ bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
+ if 'ref' in kwargs:
+ bd['ref'] = kwargs['ref']
+ cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
+ build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
+ if build is None:
+ logging.error('Failed to store %s %s %s', thing, bd, cd)
+ else:
+ for d in kwargs['sizes']:
+ self.store('size', build_id=build, **d)
+
+ def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
+ """Add sizes from a JSON size report."""
+ r = origin.copy()
+ r.update(json.loads(s))
+ r['sizes'] = []
+ # Add section sizes.
+ for i in r['frames'].get('section', []):
+ r['sizes'].append({'name': i['section'], 'size': i['size']})
+ # Add segment sizes.
+ for i in r['frames'].get('wr', []):
+ r['sizes'].append({
+ 'name': ('(read only)', '(read/write)')[int(i['wr'])],
+ 'size':
+ i['size']
+ })
+ self.add_sizes(**r)
+
+ def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
+ """Add size reports from a zip."""
+ with zipfile.ZipFile(f, 'r') as zip_file:
+ for i in zip_file.namelist():
+ if i.endswith('-sizes.json'):
+ origin['member'] = i
+ with zip_file.open(i) as member:
+ self.add_sizes_from_json(member.read(), origin)
+
+ def add_sizes_from_file(self, filename: str):
+ """Add size reports from a file."""
+ origin = {'file': filename}
+ path = Path(filename)
+ if path.suffix == '.json':
+ logging.info('ASJ: reading JSON %s', path)
+ with open(path, encoding='utf-8') as f:
+ self.add_sizes_from_json(f.read(), origin)
+ elif path.suffix == '.zip':
+ logging.info('ASZ: reading ZIP %s', path)
+ self.add_sizes_from_zipfile(path, origin)
+ else:
+ logging.warning('Unknown file type "%s" ignored', filename)
+
+ def select_thing_id(self, platform: str, config: str,
+ target: str) -> Optional[str]:
+ cur = self.execute(
+ 'SELECT id FROM thing WHERE platform=? AND config=? AND target=?',
+ (platform, config, target))
+ row = cur.fetchone()
+ return row[0] if row else None
+
+ def select_sections_for_thing(self, thing: str) -> List[str]:
+ cur = self.execute(
+ '''
+ SELECT DISTINCT name FROM size WHERE build_id = (
+ SELECT DISTINCT id FROM build WHERE thing_id == ?)
+ ORDER BY name
+ ''', (thing,))
+ return [row[0] for row in cur.fetchall()]
+
+ def select_matching_commits(self):
+ """Find matching builds, where one's commit is the other's parent."""
+ return self.execute('''
+ SELECT DISTINCT
+ c.event as event,
+ c.pr AS pr,
+ c.hash AS hash,
+ p.hash AS parent
+ FROM build c
+ INNER JOIN build p ON p.hash = c.parent
+ WHERE c.commented = 0
+ ORDER BY c.time DESC, c.pr, c.hash, p.hash
+ ''')
+
+ def select_changes(self, parent: str, commit: str) -> ChangeInfo:
+ """Returns size changes between the given commits."""
+ cur = self.execute(
+ '''
+ SELECT DISTINCT
+ t.id AS thing,
+ cb.artifact AS artifact,
+ pb.id AS parent_build,
+ cb.id AS commit_build,
+ t.platform, t.config, t.target,
+ cs.name AS name,
+ ps.size AS parent_size,
+ cs.size AS commit_size,
+ cb.time AS time
+ FROM thing t
+ INNER JOIN build cb ON cb.thing_id = t.id
+ INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
+ INNER JOIN size cs ON cs.build_id = cb.id
+ INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
+ WHERE cb.hash = ? AND pb.hash = ?
+ ORDER BY t.platform, t.config, t.target,
+ cs.name, cb.time DESC, pb.time DESC
+ ''', (commit, parent))
+
+ keep = ('platform', 'target', 'config', 'name', 'parent_size',
+ 'commit_size')
+ things: set[int] = set()
+ artifacts: set[int] = set()
+ builds: set[int] = set()
+ stale_builds: set[int] = set()
+ stale_artifacts: set[int] = set()
+ previous: Optional[sqlite3.Row] = None
+ rows = []
+
+ for row in cur.fetchall():
+ row = sqlite3.Row(cur, row)
+ things.add(row['thing'])
+ if (previous is not None and row['thing'] == previous['thing']
+ and row['name'] == previous['name']):
+ # This is duplicate build, older because we sort descending,
+ # presumably from a partial workflow re-run.
+ if row['parent_build'] != previous['parent_build']:
+ stale_builds.add(row['parent_build'])
+ if row['commit_build'] != previous['commit_build']:
+ stale_builds.add(row['commit_build'])
+ stale_artifacts.add(row['artifact'])
+ else:
+ previous = row
+ new = [row[k] for k in keep]
+ parent_size = row['parent_size']
+ commit_size = row['commit_size']
+ new.append(commit_size - parent_size)
+ new.append(self.percent_change(parent_size, commit_size))
+ rows.append(new)
+ artifacts.add(row['artifact'])
+ builds.add(row['commit_build'])
+
+ return ChangeInfo(('platform', 'target', 'config', 'section',
+ parent[:8], commit[:8], 'change', '% change'), rows,
+ things, builds, stale_builds, artifacts,
+ stale_artifacts)
+
+ def set_commented(self, build_ids: Iterable[int]):
+ """Set the commented flag for the given builds."""
+ if not build_ids:
+ return
+ for build_id in build_ids:
+ self.execute('UPDATE build SET commented = 1 WHERE id = ?',
+ (build_id, ))
+ self.commit()
+
+ def delete_builds(self, build_ids: Iterable[int]):
+ """Delete the given builds."""
+ if not build_ids:
+ return
+ for build_id in build_ids:
+ self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
+ self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
+ self.commit()
+
+ @staticmethod
+ def percent_change(a: int, b: int) -> float:
+ if a == 0:
+ return 0.0 if b == 0 else float('inf')
+ return 100. * (b - a) / a
diff --git a/scripts/tools/memory/memdf/util/config.py b/scripts/tools/memory/memdf/util/config.py
index 20f1434a236233..f8294ad8b037a5 100644
--- a/scripts/tools/memory/memdf/util/config.py
+++ b/scripts/tools/memory/memdf/util/config.py
@@ -41,8 +41,8 @@
# supplied as keyword arguments to `argparse.add_argument()`,
# except for:
# 'alias': list of alternate command line option names
-# 'postprocess': a callable invoked after argument parsing with two
-# arguments: the config and the key
+# 'postprocess': a callable invoked after argument parsing with three
+# arguments: the config, the key, and the description entry.
#
# Special keys can be used to control argument parser groups. By default any
# configuration key containing a ‘.’ belongs to a group determined by the
@@ -157,6 +157,7 @@ def init_args(self, desc: ConfigDescription, *args, **kwargs) -> 'Config':
arg_info = arg_info.copy()
name = arg_info.pop('argument', '--' + key.replace('.', '-'))
names = [name] + arg_info.pop('alias', [])
+ info['names'] = names
for k in ['metavar', 'choices']:
if k in info:
arg_info[k] = info[k]
@@ -171,7 +172,7 @@ def init_args(self, desc: ConfigDescription, *args, **kwargs) -> 'Config':
elif isinstance(default, int) and 'metavar' not in info:
arg_info['action'] = 'count'
if postprocess := info.get('postprocess'):
- self.postprocess_args[key] = postprocess
+ self.postprocess_args[key] = (postprocess, info)
group: Optional[str] = info.get('group')
if group is None and (e := key.find('.')) > 0:
@@ -226,10 +227,6 @@ def parse(self, argv: Sequence[str]) -> 'Config':
key = 'args.' + dest
self.put(key, value)
- # Postprocess config.
- for key, action in self.postprocess_args.items():
- action(self, key)
-
# Configure logging.
if self.get('log-level') is None:
verbose = self.get('verbose', 0)
@@ -242,6 +239,11 @@ def parse(self, argv: Sequence[str]) -> 'Config':
logging.basicConfig(level=self.get('log-level'),
format=self.get('log-format'))
+ # Postprocess config.
+ for key, postprocess in self.postprocess_args.items():
+ action, info = postprocess
+ action(self, key, info)
+
memdf.util.pretty.debug(self.d)
return self
@@ -292,8 +294,8 @@ def __call__(self, parser, namespace, values, option_string=None):
# Config description of options shared by all tools.
CONFIG: ConfigDescription = {
'log-level': {
- 'help': 'Set logging level',
- 'metavar': 'LEVEL',
+ 'help':
+ 'Set logging level: one of critical, error, warning, info, debug.',
'default': None,
'choices': ['critical', 'error', 'warning', 'info', 'debug'],
},
diff --git a/scripts/tools/memory/memdf/util/github.py b/scripts/tools/memory/memdf/util/github.py
new file mode 100644
index 00000000000000..bcd14c597701a2
--- /dev/null
+++ b/scripts/tools/memory/memdf/util/github.py
@@ -0,0 +1,237 @@
+#
+# Copyright (c) 2022 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utility wrapper for GitHub operations."""
+
+import itertools
+import logging
+import os
+
+from typing import Iterable, Mapping, Optional
+
+import dateutil # type: ignore
+import dateutil.parser # type: ignore
+import ghapi.all # type: ignore
+
+from memdf import Config, ConfigDescription
+
+
+def postprocess_config(config: Config, _key: str, _info: Mapping) -> None:
+ """Postprocess --github-repository."""
+ if config['github.repository']:
+ owner, repo = config.get('github.repository').split('/', 1)
+ config.put('github.owner', owner)
+ config.put('github.repo', repo)
+ if not config['github.token']:
+ config['github.token'] = os.environ.get('GITHUB_TOKEN')
+ if not config['github.token']:
+ logging.error('Missing --github-token')
+
+
+CONFIG: ConfigDescription = {
+ Config.group_def('github'): {
+ 'title': 'github options',
+ },
+ 'github.token': {
+ 'help': 'Github API token, or "SKIP" to suppress connecting to github',
+ 'metavar': 'TOKEN',
+ 'default': '',
+ 'argparse': {
+ 'alias': ['--github-api-token', '--token'],
+ },
+ },
+ 'github.repository': {
+ 'help': 'Github repostiory',
+ 'metavar': 'OWNER/REPO',
+ 'default': '',
+ 'argparse': {
+ 'alias': ['--repo'],
+ },
+ 'postprocess': postprocess_config,
+ },
+ 'github.dryrun-comment': {
+ 'help': "Don't actually post comments",
+ 'default': False,
+ },
+ 'github.keep': {
+ 'help': "Don't remove PR artifacts",
+ 'default': False,
+ 'argparse': {
+ 'alias': ['--keep'],
+ },
+ },
+ 'github.limit-artifact-pages': {
+ 'help': 'Examine no more than COUNT pages of artifacts',
+ 'metavar': 'COUNT',
+ 'default': 0,
+ 'argparse': {
+ 'type': int,
+ },
+ },
+}
+
+
+class Gh:
+ """Utility wrapper for GitHub operations."""
+
+ def __init__(self, config: Config):
+ self.config = config
+ self.ghapi: Optional[ghapi.all.GhApi] = None
+ self.deleted_artifacts: set[int] = set()
+
+ owner = config['github.owner']
+ repo = config['github.repo']
+ token = config['github.token']
+ if owner and repo and token and token != 'SKIP':
+ self.ghapi = ghapi.all.GhApi(owner=owner, repo=repo, token=token)
+
+ def __bool__(self):
+ return self.ghapi is not None
+
+ def get_comments_for_pr(self, pr: int):
+ """Iterate PR comments."""
+ assert self.ghapi
+ try:
+ return itertools.chain.from_iterable(
+ ghapi.all.paged(self.ghapi.issues.list_comments, pr))
+ except Exception as e:
+ logging.error('Failed to get comments for PR #%d: %s', pr, e)
+ return []
+
+ def get_commits_for_pr(self, pr: int):
+ """Iterate PR commits."""
+ assert self.ghapi
+ try:
+ return itertools.chain.from_iterable(
+ ghapi.all.paged(self.ghapi.pulls.list_commits, pr))
+ except Exception as e:
+ logging.error('Failed to get commits for PR #%d: %s', pr, e)
+ return []
+
+ def get_artifacts(self, page_limit: int = -1, per_page: int = -1):
+ """Iterate artifact descriptions."""
+ if page_limit < 0:
+ page_limit = self.config['github.limit-artifact-pages']
+ if per_page < 0:
+ per_page = self.config['github.artifacts-per-page'] or 100
+
+ assert self.ghapi
+ try:
+ page = 0
+ for i in ghapi.all.paged(
+ self.ghapi.actions.list_artifacts_for_repo,
+ per_page):
+ if not i.artifacts:
+ break
+ for a in i.artifacts:
+ yield a
+ page += 1
+ logging.debug('ASP: artifact page %d of %d', page, page_limit)
+ if page_limit and page >= page_limit:
+ break
+ except Exception as e:
+ logging.error('Failed to get artifact list: %s', e)
+
+ def get_size_artifacts(self,
+ page_limit: int = -1,
+ per_page: int = -1,
+ label: str = ''):
+ """Iterate size artifact descriptions."""
+ for a in self.get_artifacts(page_limit, per_page):
+ # Size artifacts have names of the form:
+ # Size,{group},{pr},{commit_hash},{parent_hash}[,{event}]
+ # This information is added to the attribute record from GitHub.
+ if a.name.startswith('Size,') and a.name.count(',') >= 4:
+ _, group, pr, commit, parent, *etc = a.name.split(',')
+ if label and group != label:
+ continue
+ a.group = group
+ a.commit = commit
+ a.parent = parent
+ a.pr = pr
+ a.created_at = dateutil.parser.isoparse(a.created_at)
+ # Old artifact names don't include the event.
+ if etc:
+ event = etc[0]
+ else:
+ event = 'push' if pr == '0' else 'pull_request'
+ a.event = event
+ yield a
+
+ def download_artifact(self, artifact_id: int):
+ """Download a GitHub artifact, returning a binary zip object."""
+ logging.debug('Downloading artifact %d', artifact_id)
+ try:
+ assert self.ghapi
+ return self.ghapi.actions.download_artifact(artifact_id, 'zip')
+ except Exception as e:
+ logging.error('Failed to download artifact %d: %s', artifact_id, e)
+ return None
+
+ def delete_artifact(self, artifact_id: int) -> bool:
+ """Delete a GitHub artifact."""
+ if not artifact_id or artifact_id in self.deleted_artifacts:
+ return True
+ self.deleted_artifacts.add(artifact_id)
+
+ if self.config['github.keep']:
+ logging.info('Suppressed deleting artifact %d', artifact_id)
+ return False
+
+ try:
+ assert self.ghapi
+ logging.info('Deleting artifact %d', artifact_id)
+ self.ghapi.actions.delete_artifact(artifact_id)
+ return True
+ except Exception as e:
+ # During manual testing we sometimes lose the race against CI.
+ logging.error('Failed to delete artifact %d: %s', artifact_id, e)
+ return False
+
+ def delete_artifacts(self, artifacts: Iterable[int]):
+ for artifact_id in artifacts:
+ self.delete_artifact(artifact_id)
+
+ def create_comment(self, issue_id: int, text: str) -> bool:
+ """Create a GitHub comment."""
+ if self.config['github.dryrun-comment']:
+ logging.info('Suppressed creating comment on #%d', issue_id)
+ logging.debug('%s', text)
+ return False
+
+ assert self.ghapi
+ logging.info('Creating comment on #%d', issue_id)
+ try:
+ self.ghapi.issues.create_comment(issue_id, text)
+ return True
+ except Exception as e:
+ logging.error('Failed to created comment on #%d: %s', issue_id, e)
+ return False
+
+ def update_comment(self, comment_id: int, text: str) -> bool:
+ """Update a GitHub comment."""
+ if self.config['github.dryrun-comment']:
+ logging.info('Suppressed updating comment #%d', comment_id)
+ logging.debug('%s', text)
+ return False
+
+ logging.info('Updating comment #%d', comment_id)
+ try:
+ assert self.ghapi
+ self.ghapi.issues.update_comment(comment_id, text)
+ return True
+ except Exception as e:
+ logging.error('Failed to update comment %d: %s', comment_id, e)
+ return False
diff --git a/scripts/tools/memory/memdf/util/markdown.py b/scripts/tools/memory/memdf/util/markdown.py
new file mode 100644
index 00000000000000..52aea03e86db91
--- /dev/null
+++ b/scripts/tools/memory/memdf/util/markdown.py
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Markdown utilities."""
+
+
+def read_hierified(f):
+ """Read a markdown table in ‘hierified’ format."""
+
+ line = f.readline()
+ header = tuple((s.strip() for s in line.split('|')[1:-1]))
+
+ _ = f.readline() # The line under the header.
+
+ rows = []
+ for line in f:
+ line = line.strip()
+ if not line:
+ break
+ row = []
+ columns = line.split('|')
+ for i in range(0, len(header)):
+ column = columns[i + 1].strip()
+ if not column:
+ column = rows[-1][i]
+ row.append(column)
+ rows.append(tuple(row))
+
+ return (header, rows)
diff --git a/scripts/tools/memory/memdf/util/sqlite.py b/scripts/tools/memory/memdf/util/sqlite.py
index 94b6f9b33a2f81..c0d825ac98f860 100644
--- a/scripts/tools/memory/memdf/util/sqlite.py
+++ b/scripts/tools/memory/memdf/util/sqlite.py
@@ -20,6 +20,8 @@
from typing import List, Optional
+import pandas as pd # type: ignore
+
from memdf import Config, ConfigDescription
CONFIG: ConfigDescription = {
@@ -29,7 +31,7 @@
'database.file': {
'help': 'Sqlite3 file',
'metavar': 'FILENAME',
- 'default': ':memory:',
+ 'default': None,
'argparse': {
'alias': ['--db'],
},
@@ -112,3 +114,12 @@ def get_matching_id(self, table: str, **kwargs):
def store_and_return_id(self, table: str, **kwargs) -> Optional[int]:
self.store(table, **kwargs)
return self.get_matching_id(table, **kwargs)
+
+ def data_frame(self, query, parameters=None) -> pd.DataFrame:
+ """Return the results of a query as a DataFrame."""
+ cur = self.execute(query, parameters)
+ columns = [i[0] for i in cur.description]
+ df = pd.DataFrame(cur.fetchall(), columns=columns)
+ self.commit()
+ df.attrs = {'title': query}
+ return df