diff --git a/.github/.wordlist.txt b/.github/.wordlist.txt
index 9ac4419af442c9..0a930013cb9509 100644
--- a/.github/.wordlist.txt
+++ b/.github/.wordlist.txt
@@ -455,6 +455,9 @@ GetDeviceInfo
 GetDns
 GetIP
 getstarted
+GH
+gh
+ghp
 githubusercontent
 gitignore
 glibc
@@ -848,6 +851,7 @@ PyEval
 PyFunction
 pylint
 PyObject
+pypi
 PyRun
 pytest
 QEMU
@@ -958,6 +962,7 @@ SiLabs
 SiliconLabs
 SimpleFileExFlags
 SimpleLink
+sizedb
 sl
 SLAAC
 SLTB
@@ -1041,6 +1046,7 @@ testws
 texinfo
 textboxes
 TFT
+ThIsIsNoTMyReAlGiThUbToKeNSoDoNoTtRy
 threadOperationalDataset
 ThreadStackManager
 ThreadStackManagerImpl
@@ -1052,6 +1058,7 @@ TLV
 tmp
 tngvndl
 TODO
+toJson
 tokenized
 toolchain
 toolchains
diff --git a/scripts/tools/memory/.pylintrc b/scripts/tools/memory/.pylintrc
index 6b77df256b40d1..413c27f80e068c 100644
--- a/scripts/tools/memory/.pylintrc
+++ b/scripts/tools/memory/.pylintrc
@@ -1,7 +1,7 @@
 [BASIC]
-disable=too-few-public-methods,bad-whitespace
+disable=too-few-public-methods,bad-whitespace,broad-except
 
-no-docstring-rgx=main
+no-docstring-rgx=main|__init__
 docstring-min-length=5
 min-public-methods=1
 max-args=7
diff --git a/scripts/tools/memory/README-GitHub-CI.md b/scripts/tools/memory/README-GitHub-CI.md
new file mode 100644
index 00000000000000..d2171e26eb9050
--- /dev/null
+++ b/scripts/tools/memory/README-GitHub-CI.md
@@ -0,0 +1,180 @@
+# Scripts for GitHub CI
+
+A set of `gh_*.py` scripts work together to produce size comparisons for PRs.
+
+## Reports on Pull Requests
+
+The scripts' results are presented as comments on PRs.
+
+**Note** that a comment may be updated by the scripts as CI run results become
+available.
+
+**Note** that the scripts will not create a comment for a commit if there is
+already a newer commit in the PR.
+
+A size report comment consists of a title followed by one to four tables. A
+title looks like:
+
+> PR #12345678: Size comparison from `base-SHA` to `pr-SHA`
+
+The first table, if present, lists items with a large increase, according to a
+configurable threshold.
+
+The next table, if present, lists all items that have increased in size.
+
+The next table, if present, lists all items that have decreased in size.
+
+The final table, always present, lists all items.
+
+## Usage in CI
+
+The original intent was to have a tool that would run after a build in CI, add
+its sizes to a central database, and immediately report on size changes from the
+parent commit in the database. Unfortunately, GitHub provides no practical place
+to store and share such a database between workflow actions. Instead, the
+process is split; builds in CI record size information in the form of GitHub
+[artifacts](https://docs.github.com/en/actions/advanced-guides/storing-workflow-data-as-artifacts),
+and a later step reads these artifacts to generate reports.
+
+### 1. Build workflows
+
+#### gh_sizes_environment.py
+
+The `gh_sizes_environment.py` script should be run once in each workflow that
+records sizes, _after_ checkout and _before_ any use of `gh_sizes.py` It takes a
+single argument, a JSON dictionary of the `github` context. Typically run as:
+
+```
+    steps:
+        - name: Checkout
+            uses: actions/checkout@v2
+            with:
+                submodules: true
+
+        - name: Set up environment for size reports
+            if: ${{ !env.ACT }}
+            env:
+                GH_CONTEXT: ${{ toJson(github) }}
+            run: scripts/tools/memory/gh_sizes_environment.py "${GH_CONTEXT}"
+```
+
+#### gh_sizes.py
+
+The `gh_sizes.py` script runs on a built binary (executable or library) and
+produces a JSON file containing size information.
+
+Usage: `gh_sizes.py` _platform_ _config_ _target_ _binary_ [_output_]
+
+Where _platform_ is the platform name, corresponding to a config file in
+`scripts/tools/memory/platform/`.
+
+Where _config_ is a configuration identification string. This has no fixed
+meaning, but is intended to describe a build variation, e.g. a particular target
+board or debug vs release.
+
+Where _target_ is a readable name for the build artifact, identifying it in
+reports.
+
+Where _binary_ is the input build artifact.
+
+Where _output_ is the name for the output JSON file, or a directory for it, in
+which case the name will be
+_platform_`-`_config_name_`-`_target_name_`-sizes.json`.
+
+Example:
+
+```
+    scripts/tools/memory/gh_sizes.py \
+        linux arm64 thermostat-no-ble \
+        out/linux-arm64-thermostat-no-ble/thermostat-app \
+        /tmp/bloat_reports/
+```
+
+#### Upload artifacts
+
+The JSON files generated by `gh_sizes.py` must be uploaded with an artifact name
+of a very specific form in order to be processed correctly.
+
+Example:
+
+```
+Size,Linux-Examples,${{ env.GH_EVENT_PR }},${{ env.GH_EVENT_HASH }},${{ env.GH_EVENT_PARENT }},${{ github.event_name }}
+```
+
+Other builds must replace `Linux-Examples` with a label unique to the workflow,
+but otherwise use the form exactly.
+
+### 2. Reporting workflow
+
+Run a periodic workflow calling `gh_report.py` to generate PR comments. This
+script has full `--help`, but normal use is probably best illustrated by an
+example:
+
+```
+    scripts/tools/memory/gh_report.py \
+        --verbose \
+        --report-increases 0.2 \
+        --report-pr \
+        --github-comment \
+        --github-limit-artifact-pages 50 \
+        --github-limit-artifacts 500 \
+        --github-limit-comments 20 \
+        --github-repository project-chip/connectedhomeip \
+        --github-api-token "${{ secrets.GITHUB_TOKEN }}"
+```
+
+Notably, the `--report-increases` flag provides a _percent growth_ threshold for
+calling out ‘large’ increases in GitHub comments.
+
+When this script successfully posts a comment on a GitHub PR, it removes the
+corresponding PR artifact(s) so that a future run will not process it again and
+post the same comment. Only PR artifacts are removed, not push (trunk)
+artifacts, since those may be used as a comparison base by many different PRs.
+
+## Using a database
+
+It can be useful to keep a permanent record of build sizes.
+
+### Updating the database: `gh_db_load.py`
+
+To update an SQLite file of trunk commit sizes, periodically run:
+
+```
+    gh_db_load.py \
+        --repo project-chip/connectedhomeip \
+        --token ghp_ThIsIsNoTMyReAlGiThUbToKeNSoDoNoTtRy \
+        --db /path/to/database
+```
+
+Those interested in only a single platform can add the `--github-label` option,
+providing the same name as in the size artifact name after `Size,` (e.g.
+`Linux-Examples` in the upload example above).
+
+See `--help` for additional options.
+
+_Note_: Transient 4xx and 5xx errors from GitHub's API are very common. Run
+`gh_db_load.py` frequently enough to give it several attempts before the
+relevant artifacts expire.
+
+### Querying the database: `gh_db_query.py`
+
+While the database can of course be used directly, the `gh_db_query.py` script
+provides a handful of common queries.
+
+Note that this script (like others that show tables) has an `--output-format`
+option offering (among others) CSV, several JSON formats, and any text format
+provided by [tabulate](https://pypi.org/project/tabulate/).
+
+Two notable options:
+
+-   `--query-build-sizes PLATFORM,CONFIG,TARGET` lists sizes for all builds of
+    the given kind, with a column for each section.
+-   `--query-section-changes PLATFORM,CONFIG,TARGET,SECTION` lists changes for
+    the given section. The `--report-increases PERCENT` option limits this to
+    changes over a given threshold (as is done for PR comments).
+
+(To find out what PLATFORM, CONFIG, TARGET, and SECTION exist:
+`--query-platforms`, then `--query-platform-targets=PLATFORM` and
+`--query-platform-sections=PLATFORM`.)
+
+See `--help` for additional options.
diff --git a/scripts/tools/memory/README.md b/scripts/tools/memory/README.md
index 9ccd4a2560813e..3bde9607ce165c 100644
--- a/scripts/tools/memory/README.md
+++ b/scripts/tools/memory/README.md
@@ -41,14 +41,15 @@ The following options are common to _most_ of the scripts, where applicable:
 -   `--output-format` _FORMAT_, `--to` _FORMAT_, `-t` _FORMAT_ Output format.
     One of:
     -   `text` — Plain text tables, in a single file.
-    -   `csv` — Comma-separated tables (in several files).
-    -   `tsv` — Tab-separated tables (in several files).
+    -   `csv` — Comma-separated tables (in several files, if not stdout).
+    -   `tsv` — Tab-separated tables (in several files, if not stdout).
     -   `json_split` — JSON - see Pandas documentation for details.
     -   `json_records` — JSON - see Pandas documentation for details.
     -   `json_index` — JSON - see Pandas documentation for details.
     -   `json_columns` — JSON - see Pandas documentation for details.
     -   `json_values` — JSON - see Pandas documentation for details.
     -   `json_table` — JSON - see Pandas documentation for details.
+    -   Any format provided by [tabulate](https://pypi.org/project/tabulate/).
 -   `--report-limit` _BYTES_, `--limit` _BYTES_ Limit display to items above the
     given size. Suffixes (e.g. `K`) are accepted.
 -   `--report-by` _GROUP_, `--by` _GROUP_ Reporting group. One of:
diff --git a/scripts/tools/memory/gh_db_load.py b/scripts/tools/memory/gh_db_load.py
new file mode 100755
index 00000000000000..9cb69bea5c2b7e
--- /dev/null
+++ b/scripts/tools/memory/gh_db_load.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Fetch data from GitHub size artifacts."""
+
+import io
+import logging
+import sys
+
+import memdf.sizedb
+import memdf.util.config
+import memdf.util.markdown
+import memdf.util.sqlite
+from memdf.util.github import Gh
+from memdf import Config, ConfigDescription
+
+GITHUB_CONFIG: ConfigDescription = {
+    Config.group_def('github'): {
+        'title': 'github options',
+    },
+    'github.event': {
+        'help': 'Download only event type(s) (default ‘push’)',
+        'metavar': 'EVENT',
+        'default': [],
+        'argparse': {
+            'alias': ['--event']
+        },
+    },
+    'github.limit-artifacts': {
+        'help': 'Download no more than COUNT artifacts',
+        'metavar': 'COUNT',
+        'default': 0,
+        'argparse': {
+            'type': int,
+        },
+    },
+    'github.label': {
+        'help': 'Download artifacts for one label only',
+        'metavar': 'LABEL',
+        'default': '',
+    },
+}
+
+
+def main(argv):
+    status = 0
+    try:
+        sqlite_config = memdf.util.sqlite.CONFIG
+        sqlite_config['database.file']['argparse']['required'] = True
+
+        config = Config().init({
+            **memdf.util.config.CONFIG,
+            **memdf.util.github.CONFIG,
+            **sqlite_config,
+            **GITHUB_CONFIG,
+        })
+        config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
+        config.parse(argv)
+
+        db = memdf.sizedb.SizeDatabase(config['database.file']).open()
+
+        if gh := Gh(config):
+
+            artifact_limit = config['github.limit-artifacts']
+            artifacts_added = 0
+            events = config['github.event']
+            if not events:
+                events = ['push']
+            for a in gh.get_size_artifacts(label=config['github.label']):
+                if events and a.event not in events:
+                    logging.debug('Skipping %s artifact %d', a.event, a.id)
+                    continue
+                cur = db.execute('SELECT id FROM build WHERE artifact = ?',
+                                 (a.id,))
+                if cur.fetchone():
+                    logging.debug('Skipping known artifact %d', a.id)
+                    continue
+                blob = gh.download_artifact(a.id)
+                if blob:
+                    logging.info('Adding artifact %d %s %s %s %s',
+                                 a.id, a.commit[:12], a.pr, a.event, a.group)
+                    db.add_sizes_from_zipfile(io.BytesIO(blob),
+                                              {'artifact': a.id})
+                    db.commit()
+                    artifacts_added += 1
+                    if artifact_limit and artifact_limit <= artifacts_added:
+                        break
+
+        for filename in config['args.inputs']:
+            db.add_sizes_from_file(filename)
+            db.commit()
+
+    except Exception as exception:
+        raise exception
+
+    return status
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/scripts/tools/memory/gh_db_query.py b/scripts/tools/memory/gh_db_query.py
new file mode 100755
index 00000000000000..e42262e567ba7f
--- /dev/null
+++ b/scripts/tools/memory/gh_db_query.py
@@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2022 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Common queries on a size database."""
+
+import datetime
+import logging
+import sys
+
+from typing import cast, Dict, List, Mapping, Optional, Tuple
+
+import pandas as pd  # type: ignore
+
+import memdf.report
+import memdf.util.config
+import memdf.util.sqlite
+from memdf.sizedb import SizeDatabase
+from memdf import Config
+
+
+QUERY_CONFIG = {
+    Config.group_map('query'): {
+        'group': 'output'
+    },
+    'report.increases': {
+        'help': 'Highlight large increases',
+        'metavar': 'PERCENT',
+        'default': 0.0,
+        'argparse': {
+            'alias': ['--threshold'],
+            'type': float,
+        },
+    },
+    'query.where': {
+        'help': 'SQL filter',
+        'metavar': 'SQL-EXPR',
+        'default': '',
+        'argparse': {
+            'alias': ['--where'],
+        },
+    },
+    'query.order': {
+        'help': 'sort order',
+        'metavar': 'COLUMN[,COLUMN]*',
+        'default': '',
+        'argparse': {
+            'alias': ['--order'],
+        },
+    },
+    'query.limit': {
+        'help': 'limit result size',
+        'metavar': 'ROWS',
+        'default': 0,
+        'argparse': {
+            'alias': ['--limit'],
+        },
+    },
+}
+
+
+def argsplit(metavar: str, value: str) -> Tuple[Optional[Tuple], Dict]:
+    """Given comma-separated metavar and values, match them up."""
+    values = tuple(value.split(','))
+    names = metavar.split(',')
+    if len(names) < len(values):
+        logging.error('Too many values for %s', metavar)
+        return (None, {})
+    if len(names) > len(values):
+        logging.error('Missing %s for %s', ','.join(names[len(values):]),
+                      metavar)
+        return (None, {})
+    return (values, dict(zip(names, values)))
+
+
+def postprocess_canned_sql_option(config: Config, key: str,
+                                  info: Mapping) -> None:
+    """Record information from simple SQL query options in one place."""
+    value = config[key]
+    if not value:
+        return
+    title = info['sql']['title']
+    if isinstance(value, str):
+        metavar = info.get('metavar', 'VALUE')
+        if ',' in metavar:
+            values, args = argsplit(metavar, value)
+            if not values:
+                return
+        else:
+            values = (value,)
+            args = {metavar: value}
+        title = title.format(**args)
+    else:
+        values = tuple()
+
+    if config['queries'] is None:
+        config['queries'] = []
+    cast(list, config['queries']).append((title, key, values, info))
+
+
+def make_query(config: Config, info: Mapping) -> str:
+    """Construct an SQL query string for a simple SQL query option."""
+    args = {'where': '', 'order': '', 'limit': ''}
+    if where := config.get('query.where'):
+        if kw := info['sql'].get('where'):
+            args['where'] = f'{kw} {where}'
+    if order := (config.get('query.order') or info['sql'].get('order')):
+        args['order'] = f'ORDER BY {order}'
+    if limit := config.get('query.limit'):
+        args['limit'] = f'LIMIT {limit}'
+    return info['sql']['query'].format(**args)
+
+
+def postprocess_df_time(_config: Config, df: pd.DataFrame) -> pd.DataFrame:
+    """Convert a DataFrame ‘time’ column from Unix timestamp to ISO."""
+    df['time'] = df['time'].map(lambda t: datetime.datetime.utcfromtimestamp(t)
+                                .isoformat())
+    return df
+
+
+def postprocess_df_changes(config: Config, df: pd.DataFrame) -> pd.DataFrame:
+    """Given ‘parent_size’and ‘commit_size’ columns, add change columns."""
+    df['change'] = df.apply(lambda row: row.commit_size - row.parent_size,
+                            axis=1)
+    df['% change'] = df.apply(lambda row: SizeDatabase.percent_change(
+        row.parent_size, row.commit_size),
+        axis=1)
+    if threshold := config['report.increases']:
+        df = df[df['% change'] > threshold]
+    return df
+
+
+QUERY_CONFIG |= {
+    'query.platforms': {
+        'help': 'List known platforms',
+        'default': False,
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Platforms',
+            'query': '''
+                SELECT DISTINCT platform FROM thing {where} {order} {limit}
+                ''',
+            'where': 'WHERE',
+            'order': 'platform',
+        },
+        'argparse': {
+            'alias': ['--platforms'],
+        },
+    },
+    'query.platform-targets': {
+        'help': 'List known targets for the given platform',
+        'metavar': 'PLATFORM',
+        'default': '',
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Platform Targets',
+            'query': '''
+                SELECT DISTINCT platform, config, target
+                  FROM thing
+                  WHERE platform=? {where}
+                  {order} {limit}
+                ''',
+                'where': 'AND',
+                'order': 'platform, config, target',
+        },
+        'argparse': {
+            'alias': ['--platform-targets'],
+        },
+    },
+    'query.platform-sections': {
+        'help': 'List known sections for the given platform',
+        'metavar': 'PLATFORM',
+        'default': '',
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Platform Sections',
+            'query': '''
+                SELECT DISTINCT platform, s.name AS section
+                  FROM thing t
+                  INNER JOIN build b ON t.id == b.thing_id
+                  INNER JOIN size s ON b.id == s.build_id
+                  WHERE platform=? {where}
+                  {order} {limit}
+                ''',
+            'where': 'AND',
+            'order': 'platform, section',
+        },
+        'argparse': {
+            'alias': ['--platform-sections'],
+        },
+    },
+    'query.section-sizes': {
+        'help': 'List size data for a given build section',
+        'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
+        'default': '',
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Sizes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
+            'query': '''
+                SELECT DISTINCT time, hash, pr, size
+                  FROM build b
+                  INNER JOIN size s ON b.id == s.build_id
+                  WHERE b.thing_id == (SELECT id FROM thing
+                                       WHERE platform == ?
+                                       AND config == ?
+                                       AND target == ?)
+                    AND name == ?
+                    {where}
+                  {order} {limit}
+                ''',
+            'where': 'AND',
+            'order': 'time',
+            'postprocess': [postprocess_df_time],
+        },
+    },
+    'query.section-changes': {
+        'help': 'List size changes for a given build section',
+        'metavar': 'PLATFORM,CONFIG,TARGET,SECTION',
+        'default': '',
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Changes for {PLATFORM} {CONFIG} {TARGET} {SECTION}',
+            'query': '''
+                WITH builds (bid, pid, time, pr, hash) AS (
+                SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash
+                  FROM build b
+                  INNER JOIN build p
+                  ON p.hash = b.parent AND p.thing_id == b.thing_id
+                  WHERE b.thing_id == (SELECT id FROM thing
+                                       WHERE platform == ?
+                                       AND config == ?
+                                       AND target == ?)
+                )
+                SELECT DISTINCT
+                  time, hash, pr,
+                  ps.size as parent_size,
+                  bs.size as commit_size
+                FROM builds
+                INNER JOIN size bs ON builds.bid == bs.build_id
+                INNER JOIN size ps ON builds.pid == ps.build_id
+                WHERE bs.name == ? AND ps.name == bs.name
+                  {where}
+                {order} {limit}
+                ''',
+            'where': 'AND',
+            'order': 'time',
+            'postprocess': [postprocess_df_time, postprocess_df_changes],
+        },
+    },
+    'query.all-changes': {
+        'help': 'List all size changes',
+        'default': False,
+        'postprocess': postprocess_canned_sql_option,
+        'sql': {
+            'title': 'Size Changes',
+            'query': '''
+                WITH
+                builds (bid, pid, time, pr, hash, thing_id) AS (
+                  SELECT DISTINCT b.id, p.id, b.time, b.pr, b.hash, b.thing_id
+                  FROM build b
+                  INNER JOIN build p
+                    ON p.hash = b.parent AND p.thing_id == b.thing_id
+                ),
+                changes (bid, tid, name, parent_size, commit_size, change) AS (
+                  SELECT DISTINCT
+                    bs.build_id,
+                    thing_id,
+                    bs.name,
+                    ps.size as parent_size,
+                    bs.size as commit_size,
+                    bs.size - ps.size as change
+                  FROM builds
+                  INNER JOIN size bs ON builds.bid == bs.build_id
+                  INNER JOIN size ps ON builds.pid == ps.build_id
+                  WHERE bs.name == ps.name
+                )
+                SELECT
+                  time, hash,
+                  platform, config, target, name,
+                  parent_size, commit_size, change
+                FROM changes
+                INNER JOIN build ON bid == build.id
+                INNER JOIN thing ON tid == thing.id
+                {where} {order} {limit}
+                ''',
+            'where': 'AND',
+            'order': 'time',
+            'postprocess': [postprocess_df_time, postprocess_df_changes],
+        },
+    },
+    'query.build-sizes': {
+        # SQLite doesn't have PIVOT so we have to script this.
+        'help': 'List size changes for a given build',
+        'metavar': 'PLATFORM,CONFIG,TARGET',
+        'default': '',
+    },
+}
+
+
+def get_build_sections(db: SizeDatabase, build: str) -> Optional[Tuple]:
+    """Split a build arg and get its thing_id and sections."""
+    values, args = argsplit('PLATFORM,CONFIG,TARGET', build)
+    if not values:
+        return None
+
+    platform = args['PLATFORM']
+    pconfig = args['CONFIG']
+    ptarget = args['TARGET']
+    thing_id = db.select_thing_id(platform, pconfig, ptarget)
+    if not thing_id:
+        logging.error('No match for %s,%s,%s', platform, pconfig, ptarget)
+        return None
+
+    sections = db.select_sections_for_thing(thing_id)
+    if not sections:
+        logging.warning('No sections for %s,%s,%s', platform, pconfig, ptarget)
+        return None
+
+    return (platform, pconfig, ptarget, thing_id, sections)
+
+
+def make_build_sizes_query(config: Config, thing_id: str,
+                           sections: List[str]) -> Tuple[List[str], str]:
+    """Construct and SQL query for all section sizes for a given thing."""
+    # SQLite doesn't have PIVOT so we need to construct a query with
+    # a column for each section.
+    columns = ['time', 'hash', 'pr']
+    cols = ', '.join(columns)
+    joins = ''
+    where = f' WHERE b.thing_id == {thing_id}'
+    for i, s in enumerate(sections):
+        columns.append(s)
+        cols += f', s{i}.size AS s{i}z'
+        joins += f' INNER JOIN size s{i} ON b.id == s{i}.build_id'
+        where += f' AND s{i}.name == "{s}"'
+    if qw := config['query.where']:
+        where += f' AND {qw}'
+    query = f'''SELECT {cols}
+                  FROM build b
+                  {joins}
+                  {where}
+                  ORDER BY {config.get('query.order') or 'time'}'''
+    if limit := config['query.limit']:
+        query += f' LIMIT {limit}'
+    return (columns, query)
+
+
+def query_build_sizes(config: Config, db: SizeDatabase,
+                      build: str) -> Optional[pd.DataFrame]:
+    """Get all sizes for the given build."""
+    t = get_build_sections(db, build)
+    if not t:
+        return None
+    platform, pconfig, ptarget, thing_id, sections = t
+
+    columns, query = make_build_sizes_query(config, thing_id, sections)
+    logging.debug('Query: %s', query)
+
+    cur = db.execute(query)
+    rows = cur.fetchall()
+    if rows:
+        df = pd.DataFrame(rows, columns=columns)
+        df.attrs = {
+            'name': f'qbs-{build}',
+            'title': f'Sizes for {platform} {pconfig} {ptarget}',
+        }
+        return postprocess_df_time(config, df)
+
+    return None
+
+
+def main(argv):
+    status = 0
+    try:
+        cfg = {
+            **memdf.util.config.CONFIG,
+            **memdf.util.sqlite.CONFIG,
+            **memdf.report.OUTPUT_CONFIG,
+            **QUERY_CONFIG,
+        }
+        cfg['database.file']['argparse']['required'] = True
+
+        config = Config().init(cfg)
+        config.parse(argv)
+
+        db = SizeDatabase(config['database.file'], writable=False)
+        db.open()
+
+        dfs = {}
+
+        q = 0
+        for title, key, values, info in config.get('queries', []):
+            q += 1
+            query = make_query(config, info)
+            logging.debug('Option: %s', key)
+            logging.debug('Title: %s', title)
+            logging.debug('Query: %s', query.strip())
+            logging.debug('With: %s', values)
+            cur = db.execute(query, values)
+            columns = [i[0] for i in cur.description]
+            rows = cur.fetchall()
+            if rows:
+                df = pd.DataFrame(rows, columns=columns)
+                df.attrs = {'name': f'query{q}', 'title': title}
+                for f in info['sql'].get('postprocess', []):
+                    df = f(config, df)
+                dfs[df.attrs['name']] = df
+
+        if build := config['query.build-sizes']:
+            q += 1
+            if (df := query_build_sizes(config, db, build)) is not None:
+                dfs[df.attrs['name']] = df
+
+        if q == 0:
+            config.argparse.print_help()
+            return 1
+
+        memdf.report.write_dfs(config,
+                               dfs,
+                               hierify=config['hierify'],
+                               title=True,
+                               floatfmt='5.1f')
+
+    except Exception as exception:
+        raise exception
+
+    return status
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/scripts/tools/memory/gh_report.py b/scripts/tools/memory/gh_report.py
index 7a52052c4287a5..608fe9c5edbf9a 100755
--- a/scripts/tools/memory/gh_report.py
+++ b/scripts/tools/memory/gh_report.py
@@ -17,48 +17,39 @@
 """Generate reports from size artifacts."""
 
 import io
-import itertools
-import json
 import logging
-import os
-import os.path
 import re
-import sqlite3
 import sys
-import zipfile
 
-from pathlib import Path
-from typing import Dict, IO, Iterable, Optional, Union
+from typing import Dict
 
-import dateutil  # type: ignore
 import fastcore  # type: ignore
-import ghapi.all  # type: ignore
 import pandas as pd  # type: ignore
 
 import memdf.report
+import memdf.sizedb
 import memdf.util.config
+import memdf.util.markdown
 import memdf.util.sqlite
+from memdf.util.github import Gh
 from memdf import Config, ConfigDescription
 
-GITHUB_CONFIG: ConfigDescription = {
-    Config.group_def('github'): {
-        'title': 'github options',
+DB_CONFIG: ConfigDescription = {
+    Config.group_def('database'): {
+        'title': 'database options',
     },
-    'github.token': {
-        'help': 'Github API token, or "SKIP" to suppress connecting to github',
-        'metavar': 'TOKEN',
-        'default': '',
+    'database.readonly': {
+        'help': 'Open database read only',
+        'default': False,
         'argparse': {
-            'alias': ['--github-api-token', '--token'],
+            'alias': ['--db-readonly'],
         },
     },
-    'github.repository': {
-        'help': 'Github repostiory',
-        'metavar': 'OWNER/REPO',
-        'default': '',
-        'argparse': {
-            'alias': ['--repo'],
-        },
+}
+
+GITHUB_CONFIG: ConfigDescription = {
+    Config.group_def('github'): {
+        'title': 'github options',
     },
     'github.comment': {
         'help': 'Send output as github PR comments',
@@ -67,17 +58,6 @@
             'alias': ['--comment'],
         },
     },
-    'github.keep': {
-        'help': 'Leave PR artifacts after commenting',
-        'default': False,
-        'argparse': {
-            'alias': ['--keep'],
-        },
-    },
-    'github.dryrun-comment': {
-        'help': 'Dry run for sending output as github PR comments',
-        'default': False,
-    },
     'github.limit-comments': {
         'help': 'Send no more than COUNT comments',
         'metavar': 'COUNT',
@@ -94,14 +74,6 @@
             'type': int,
         },
     },
-    'github.limit-artifact-pages': {
-        'help': 'Examine no more than COUNT pages of artifacts',
-        'metavar': 'COUNT',
-        'default': 0,
-        'argparse': {
-            'type': int,
-        },
-    },
     'github.limit-pr': {
         'help': 'Report only on PR, if present.',
         'metavar': 'PR',
@@ -110,6 +82,9 @@
             'type': int,
         },
     },
+}
+
+REPORT_CONFIG: ConfigDescription = {
     Config.group_map('report'): {
         'group': 'output'
     },
@@ -127,13 +102,6 @@
             'alias': ['--push']
         },
     },
-    'report.query': {
-        'help': 'Run an SQL query',
-        'default': [],
-        'argparse': {
-            'alias': ['--query', '--sql']
-        },
-    },
     'report.increases': {
         'help': 'Highlight large increases',
         'metavar': 'PERCENT',
@@ -146,151 +114,33 @@
 }
 
 
-class SizeDatabase(memdf.util.sqlite.Database):
-    """A database for recording and comparing size reports."""
-    on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
-    on_writable = [
-        """
-        -- A ‘thing’ identifies the kind of built object.
-        -- Builds of the same thing are comparable.
-        CREATE TABLE IF NOT EXISTS thing (
-            id          INTEGER PRIMARY KEY,
-            platform    TEXT NOT NULL,  -- Build platform
-            config      TEXT NOT NULL,  -- Build configuration discriminator
-            target      TEXT NOT NULL,  -- Build target
-            UNIQUE(platform, config, target)
-        )
-        """, """
-        -- A ‘build’ identifies a built instance of a thing at some point.
-        CREATE TABLE IF NOT EXISTS build (
-            id          INTEGER PRIMARY KEY,
-            thing_id    INTEGER REFERENCES thing(id),
-            hash        TEXT NOT NULL,      -- Commit hash
-            parent      TEXT NOT NULL,      -- Parent commit hash
-            pr          INTEGER DEFAULT 0,  -- Github PR number
-            time        INTEGER NOT NULL,   -- Unix-epoch timestamp
-            artifact    INTEGER DEFAULT 0,  -- Github artifact ID
-            commented   INTEGER DEFAULT 0,  -- 1 if recorded in a GH comment
-            ref         TEXT,               -- Target git ref
-            event       TEXT,               -- Github build trigger event
-            UNIQUE(thing_id, hash, parent, pr, time, artifact)
-        )
-        """, """
-        -- A ‘size’ entry gives the size of a section for a particular build.
-        CREATE TABLE IF NOT EXISTS size (
-            build_id    INTEGER REFERENCES build(id),
-            name        TEXT NOT NULL,      -- Section name
-            size        INTEGER NOT NULL,   -- Section size in bytes
-            PRIMARY KEY (build_id, name)
-        )
-        """
-    ]
+class SizeContext:
+    """Generate reports from size artifacts."""
+
+    comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")
 
     def __init__(self, config: Config):
-        super().__init__(config['database.file'])
         self.config = config
-        self.gh = gh_open(config)
-        self.deleted_artifacts: set[int] = set()
-
-    def add_sizes(self, **kwargs):
-        """
-        Add a size report to the database.
-
-        The incoming arguments must contain the required non-ID column names
-        from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
-        sequence of mappings containing 'name' and 'size'.
-        """
-        td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
-        thing = self.store_and_return_id('thing', **td)
-        bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
-        if 'ref' in kwargs:
-            bd['ref'] = kwargs['ref']
-        cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
-        build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
-        if build is None:
-            logging.error('Failed to store %s %s %s', thing, bd, cd)
-        else:
-            for d in kwargs['sizes']:
-                self.store('size', build_id=build, **d)
-
-    def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
-        """Add sizes from a JSON size report."""
-        r = origin.copy()
-        r.update(json.loads(s))
-        r['sizes'] = []
-        # Add section sizes.
-        for i in r['frames'].get('section', []):
-            r['sizes'].append({'name': i['section'], 'size': i['size']})
-        # Add segment sizes.
-        for i in r['frames'].get('wr', []):
-            r['sizes'].append({
-                'name': ('(read only)', '(read/write)')[int(i['wr'])],
-                'size': i['size']
-            })
-        self.add_sizes(**r)
-
-    def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
-        """Add size reports from a zip."""
-        with zipfile.ZipFile(f, 'r') as zip_file:
-            for i in zip_file.namelist():
-                if i.endswith('-sizes.json'):
-                    origin['member'] = i
-                    with zip_file.open(i) as member:
-                        self.add_sizes_from_json(member.read(), origin)
-
-    def add_sizes_from_file(self, filename: str):
-        """Add size reports from a file."""
-        origin = {'file': filename}
-        path = Path(filename)
-        if path.suffix == '.json':
-            logging.info('ASJ: reading JSON %s', path)
-            with open(path) as f:
-                self.add_sizes_from_json(f.read(), origin)
-        elif path.suffix == '.zip':
-            logging.info('ASZ: reading ZIP %s', path)
-            self.add_sizes_from_zipfile(path, origin)
-        else:
-            logging.warning('Unknown file type "%s" ignored', filename)
+        self.gh = Gh(config)
+        db_file = config.get('database.file', ':memory:')
+        self.db = memdf.sizedb.SizeDatabase(db_file,
+                                            not config['database.readonly'])
+        self.db.open()
 
     def add_sizes_from_github(self):
         """Read size report artifacts from github."""
-        if not self.gh:
-            return
 
-        artifact_limit = self.config['github.limit-artifacts']
-        artifact_pages = self.config['github.limit-artifact-pages']
-
-        # Size artifacts have names of the form:
-        #   Size,{group},{pr},{commit_hash},{parent_hash}[,{event}]
-        # Record them keyed by group and commit_hash to match them up
+        # Record size artifacts keyed by group and commit_hash to match them up
         # after we have the entire list.
-        page = 0
         size_artifacts: Dict[str, Dict[str, fastcore.basics.AttrDict]] = {}
-        for i in ghapi.all.paged(self.gh.actions.list_artifacts_for_repo, 100):
-            if not i.artifacts:
-                break
-            for a in i.artifacts:
-                if a.name.startswith('Size,') and a.name.count(',') >= 4:
-                    _, group, pr, commit, parent, *etc = a.name.split(',')
-                    a.parent = parent
-                    a.pr = pr
-                    a.created_at = dateutil.parser.isoparse(a.created_at)
-                    # Old artifact names don't include the event.
-                    if etc:
-                        event = etc[0]
-                    else:
-                        event = 'push' if pr == '0' else 'pull_request'
-                    a.event = event
-                    if group not in size_artifacts:
-                        size_artifacts[group] = {}
-                    size_artifacts[group][commit] = a
-                    logging.debug('ASG: artifact %d %s', a.id, a.name)
-            page += 1
-            logging.debug('ASP: artifact page %d of %d', page, artifact_pages)
-            if artifact_pages and page >= artifact_pages:
-                break
+        for a in self.gh.get_size_artifacts():
+            if a.group not in size_artifacts:
+                size_artifacts[a.group] = {}
+            size_artifacts[a.group][a.commit] = a
+            logging.debug('ASG: artifact %d %s', a.id, a.name)
 
         # Determine required size artifacts.
+        artifact_limit = self.config['github.limit-artifacts']
         required_artifact_ids: set[int] = set()
         for group, group_reports in size_artifacts.items():
             logging.debug('ASG: group %s', group)
@@ -313,419 +163,223 @@ def add_sizes_from_github(self):
 
         # Download and add required artifacts.
         for i in required_artifact_ids:
-            logging.debug('ASD: download artifact %d', i)
-            try:
-                blob = self.gh.actions.download_artifact(i, 'zip')
-            except Exception as e:
-                blob = None
-                logging.error('Failed to download artifact %d: %s', i, e)
+            blob = self.gh.download_artifact(i)
             if blob:
-                self.add_sizes_from_zipfile(io.BytesIO(blob), {'artifact': i})
+                self.db.add_sizes_from_zipfile(io.BytesIO(blob),
+                                               {'artifact': i})
 
     def read_inputs(self):
         """Read size report from github and/or local files."""
-        self.add_sizes_from_github()
+        if self.gh:
+            self.add_sizes_from_github()
         for filename in self.config['args.inputs']:
-            self.add_sizes_from_file(filename)
-        self.commit()
-
-    def select_matching_commits(self):
-        """Find matching builds, where one's commit is the other's parent."""
-        return self.execute('''
-            SELECT DISTINCT
-                c.event as event,
-                c.pr AS pr,
-                c.hash AS hash,
-                p.hash AS parent
-              FROM build c
-              INNER JOIN build p ON p.hash = c.parent
-              WHERE c.commented = 0
-              ORDER BY c.time DESC, c.pr, c.hash, p.hash
-            ''')
-
-    def set_commented(self, build_ids: Iterable[int]):
-        """Set the commented flag for the given builds."""
-        if not build_ids:
-            return
-        for build_id in build_ids:
-            self.execute('UPDATE build SET commented = 1 WHERE id = ?',
-                         (build_id, ))
-        self.commit()
-
-    def delete_stale_builds(self, build_ids: Iterable[int]):
-        """Delete stale builds."""
-        if not build_ids:
-            return
-        for build_id in build_ids:
-            logging.info('DSB: deleting obsolete build %d', build_id)
-            self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
-            self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
-        self.commit()
-
-    def delete_artifact(self, artifact_id: int):
-        if (self.gh and artifact_id
-                and artifact_id not in self.deleted_artifacts):
-            self.deleted_artifacts.add(artifact_id)
-            try:
-                self.gh.actions.delete_artifact(artifact_id)
-            except Exception:
-                # During manual testing we sometimes lose the race against CI.
-                logging.error('Failed to delete artifact %d', artifact_id)
-
-    def delete_stale_artifacts(self, stale_artifacts: Iterable[int]):
-        if not self.config['github.keep']:
-            for artifact_id in stale_artifacts:
-                logging.info('DSA: deleting obsolete artifact %d', artifact_id)
-                self.delete_artifact(artifact_id)
-
-    def should_report(self, event: Optional[str] = None) -> bool:
-        """Return true if reporting is enabled for the event."""
-        if event is None:
+            self.db.add_sizes_from_file(filename)
+        self.db.commit()
+        return self
+
+    def should_report(self, event: str = '') -> bool:
+        """Return true if reporting is enabled for the action event."""
+        if not event:
             return self.config['report.pr'] or self.config['report.push']
         if event == 'pull_request':
             return self.config['report.pr']
         return self.config['report.push']
 
-
-def gh_open(config: Config) -> Optional[ghapi.core.GhApi]:
-    """Return a GhApi, if so configured."""
-    gh: Optional[ghapi.core.GhApi] = None
-    if config['github.repository']:
-        owner, repo = config.get('github.repository').split('/', 1)
-        config.put('github.owner', owner)
-        config.put('github.repo', repo)
-        if not config['github.token']:
-            config['github.token'] = os.environ.get('GITHUB_TOKEN')
-            if not config['github.token']:
-                logging.error('Missing --github-token')
-                return None
-        token = config['github.token']
-        if token != 'SKIP':
-            gh = ghapi.all.GhApi(owner=owner,
-                                 repo=repo,
-                                 token=config['github.token'])
-    return gh
-
-
-def gh_get_comments_for_pr(gh: ghapi.core.GhApi, pr: int):
-    return itertools.chain.from_iterable(
-        ghapi.all.paged(gh.issues.list_comments, pr))
-
-
-def gh_get_commits_for_pr(gh: ghapi.core.GhApi, pr: int):
-    return itertools.chain.from_iterable(
-        ghapi.all.paged(gh.pulls.list_commits, pr))
-
-
-def percent_change(a: int, b: int) -> float:
-    if a == 0:
-        return 0.0 if b == 0 else float('inf')
-    return 100. * (b - a) / a
-
-
-def changes_for_commit(db: SizeDatabase, pr: int, commit: str,
-                       parent: str) -> pd.DataFrame:
-    """Return a DataFrame with size changes between the given commits."""
-    cur = db.execute(
-        '''
-       SELECT DISTINCT
-           t.id AS thing,
-           cb.artifact AS artifact,
-           pb.id AS parent_build,
-           cb.id AS commit_build,
-           t.platform, t.config, t.target,
-           cs.name AS name,
-           ps.size AS parent_size,
-           cs.size AS commit_size,
-           cs.size - ps.size AS change,
-           cb.time AS time
-         FROM thing t
-         INNER JOIN build cb ON cb.thing_id = t.id
-         INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
-         INNER JOIN size cs ON cs.build_id = cb.id
-         INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
-         WHERE cb.hash = ? AND pb.hash = ?
-         ORDER BY t.platform, t.config, t.target,
-                  cs.name, cb.time DESC, pb.time DESC
-       ''', (commit, parent))
-
-    keep = ('platform', 'target', 'config', 'name', 'parent_size',
-            'commit_size', 'change')
-    things: set[int] = set()
-    artifacts: set[int] = set()
-    builds: set[int] = set()
-    stale_builds: set[int] = set()
-    stale_artifacts: set[int] = set()
-    previous: Optional[sqlite3.Row] = None
-    rows = []
-
-    for row in cur.fetchall():
-        row = sqlite3.Row(cur, row)
-        things.add(row['thing'])
-        if (previous is not None and row['thing'] == previous['thing']
-                and row['name'] == previous['name']):
-            # This is duplicate build, older because we sort descending,
-            # presumably from a partial workflow re-run.
-            if row['parent_build'] != previous['parent_build']:
-                stale_builds.add(row['parent_build'])
-            if row['commit_build'] != previous['commit_build']:
-                stale_builds.add(row['commit_build'])
-                stale_artifacts.add(row['artifact'])
-        else:
-            previous = row
-            new = [row[k] for k in keep]
-            new.append(percent_change(row['parent_size'], row['commit_size']))
-            rows.append(new)
-            artifacts.add(row['artifact'])
-            builds.add(row['commit_build'])
-
-    db.delete_stale_builds(stale_builds)
-    db.delete_stale_artifacts(stale_artifacts)
-
-    df = pd.DataFrame(rows,
-                      columns=('platform', 'target', 'config', 'section',
-                               parent[:8], commit[:8], 'change', '% change'))
-    df.attrs = {
-        'name': f'{pr},{parent},{commit}',
-        'title': (f'PR #{pr}: ' if pr else '') +
-        f'Size comparison from {parent} to {commit}',
-        'things': things,
-        'builds': builds,
-        'artifacts': artifacts,
-        'pr': pr,
-        'commit': commit,
-        'parent': parent,
-    }
-    return df
-
-
-comment_format_re = re.compile(r"^<!--ghr-comment-format:(\d+)-->")
-
-
-def gh_send_change_report(db: SizeDatabase, df: pd.DataFrame) -> bool:
-    """Send a change report as a github comment."""
-
-    if not db.gh:
-        return False
-
-    # Look for an existing comment for this change.
-    pr = df.attrs['pr']
-
-    # Check the most recent commit on the PR, so that we don't comment on
-    # builds that are already outdated.
-    commit = df.attrs['commit']
-    commits = sorted(
-        gh_get_commits_for_pr(db.gh, pr),
-        key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
-        reverse=True)
-    if commits and commit != commits[0].sha:
-        logging.info('SCS: PR #%s: not commenting for stale %s; newest is %s',
-                     pr, commit, commits[0].sha)
-        # Return True so that the obsolete artifacts get removed.
-        return True
-
-    # Check for an existing size report comment. If one exists, we'll add
-    # the new report to it.
-    title = df.attrs['title']
-    existing_comment = None
-    existing_comment_format = 0
-    for comment in gh_get_comments_for_pr(db.gh, pr):
-        comment_parts = comment.body.partition('\n')
-        if comment_parts[0].strip() == title:
-            existing_comment = comment
-            if m := comment_format_re.match(comment_parts[2]):
-                existing_comment_format = int(m.group(1))
-            break
-
-    if existing_comment_format != 1:
+    def get_existing_comment(self, pr: int, title: str):
+        """Check for an existing comment."""
         existing_comment = None
-    text = gh_comment_v1(db, df, existing_comment)
+        existing_comment_format = 0
+        for comment in self.gh.get_comments_for_pr(pr):
+            comment_parts = comment.body.partition('\n')
+            if comment_parts[0].strip() == title:
+                existing_comment = comment
+                if m := self.comment_format_re.match(comment_parts[2]):
+                    existing_comment_format = int(m.group(1))
+                break
+        return (existing_comment, existing_comment_format)
+
+    def get_newest_commit(self, pr: int) -> str:
+        """Get the hash of the most recent commit on the PR."""
+        commits = sorted(
+            self.gh.get_commits_for_pr(pr),
+            key=lambda c: f'{c.commit.committer.date}{c.commit.author.date}',
+            reverse=True)
+        return commits[0].sha if commits else ''
+
+    def post_change_report(self, df: pd.DataFrame) -> bool:
+        """Send a change report as a github comment."""
+        if not self.gh:
+            return False
+        pr = df.attrs['pr']
+
+        # Check for an existing size report comment. If one exists, we'll add
+        # the new information to it.
+        existing_comment, existing_comment_format = self.get_existing_comment(
+            pr, df.attrs['title'])
+
+        if not existing_comment:
+            # Check the most recent commit on the PR, so that we don't comment
+            # for commits that are already outdated.
+            commit = df.attrs['commit']
+            latest = self.get_newest_commit(pr)
+            if commit != latest:
+                logging.info(
+                    'SCS: PR #%s: not commenting for stale %s; newest is %s',
+                    pr, commit, latest)
+                # Return True so that the obsolete artifacts get removed.
+                return True
+
+        if existing_comment_format == 1:
+            df = V1Comment.merge(df, existing_comment)
+        else:
+            existing_comment = None
+        text = V1Comment.format(self.config, df)
 
-    logging.info(
-        'SCR: %s %s', df.attrs['title'],
-        f'updating comment {existing_comment.id}'
-        if existing_comment else 'as new comment')
+        if existing_comment:
+            return self.gh.update_comment(existing_comment.id, text)
+        return self.gh.create_comment(pr, text)
 
-    if db.config['github.dryrun-comment']:
-        logging.debug('%s', text)
-        return False
+    def report_matching_commits(self) -> Dict[str, pd.DataFrame]:
+        """Report on all new comparable commits."""
+        if not self.should_report():
+            return {}
 
-    try:
-        if existing_comment:
-            db.gh.issues.update_comment(existing_comment.id, text)
-        else:
-            db.gh.issues.create_comment(pr, text)
-        return True
-    except Exception:
-        return False
-
-
-def gh_comment_v1(db: SizeDatabase, df: pd.DataFrame, existing_comment) -> str:
-    """Format a github comment."""
-
-    if existing_comment:
-        df = v1_comment_merge(df, existing_comment)
-
-    threshold_df = None
-    increase_df = df[df['change'] > 0]
-    if increase_df.empty:
-        increase_df = None
-    elif threshold := db.config['report.increases']:
-        threshold_df = df[df['% change'] > threshold]
-        if threshold_df.empty:
-            threshold_df = None
-    decrease_df = df[df['change'] < 0]
-    if decrease_df.empty:
-        decrease_df = None
-
-    with io.StringIO() as md:
-        md.write(df.attrs['title'])
-        md.write('\n<!--ghr-comment-format:1-->\n\n')
-
-        if threshold_df is not None:
-            md.write(f'**Increases above {threshold:.2g}%:**\n\n')
-            md.write('<!--ghr-report:threshold-->\n\n')
-            v1_comment_write_df(db, threshold_df, md)
-
-        if increase_df is not None:
-            summary = v1_comment_summary(increase_df)
-            md.write('<details>\n')
-            md.write(f'<summary>Increases ({summary})</summary>\n')
-            md.write('<!--ghr-report:increases-->\n\n')
-            v1_comment_write_df(db, increase_df, md)
-            md.write('</details>\n\n')
+        comment_count = 0
+        comment_limit = self.config['github.limit-comments']
+        comment_enabled = (self.config['github.comment']
+                           or self.config['github.dryrun-comment'])
 
-        if decrease_df is not None:
-            summary = v1_comment_summary(decrease_df)
-            md.write('<details>\n')
-            md.write(f'<summary>Decreases ({summary})</summary>\n')
-            md.write('<!--ghr-report:decreases-->\n\n')
-            v1_comment_write_df(db, decrease_df, md)
-            md.write('</details>\n\n')
-
-        summary = v1_comment_summary(df)
-        md.write('<details>\n')
-        md.write(f'<summary>Full report ({summary})</summary>\n')
-        md.write('<!--ghr-report:full-->\n\n')
-        v1_comment_write_df(db, df, md)
-        md.write('\n</details>\n')
-
-        return md.getvalue()
-
-
-def v1_comment_merge(df: pd.DataFrame, comment) -> pd.DataFrame:
-    with io.StringIO(comment.body) as body:
-        for line in body:
-            if line.startswith('<!--ghr-report:full-->'):
-                body.readline()  # Blank line before table.
-                header, rows = read_hierified(body)
-                break
-    logging.debug('REC: read %d rows', len(rows))
-    df = df.append(pd.DataFrame(data=rows, columns=header).astype(df.dtypes))
-    return df.sort_values(
-        by=['platform', 'target', 'config', 'section']).drop_duplicates()
-
-
-def read_hierified(f):
-    """Read a markdown table in ‘hierified’ format."""
-
-    line = f.readline()
-    header = tuple((s.strip() for s in line.split('|')[1:-1]))
-
-    _ = f.readline()  # The line under the header.
-
-    rows = []
-    for line in f:
-        line = line.strip()
-        if not line:
-            break
-        row = []
-        columns = line.split('|')
-        for i in range(0, len(header)):
-            column = columns[i + 1].strip()
-            if not column:
-                column = rows[-1][i]
-            row.append(column)
-        rows.append(tuple(row))
-
-    return (header, rows)
-
-
-def v1_comment_write_df(db: SizeDatabase, df: pd.DataFrame,
-                        out: memdf.report.OutputOption):
-    memdf.report.write_df(db.config,
-                          df,
-                          out,
-                          'pipe',
-                          hierify=True,
-                          title=False,
-                          floatfmt='5.1f')
-
-
-def v1_comment_summary(df: pd.DataFrame) -> str:
-    count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
-    platforms = ', '.join(sorted(list(set(df['platform']))))
-    return f'{count} build{"" if count == 1 else "s"} for {platforms}'
-
-
-def report_matching_commits(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
-    """Report on all new comparable commits."""
-    if not db.should_report():
-        return {}
-
-    comment_count = 0
-    comment_limit = db.config['github.limit-comments']
-    comment_enabled = (db.config['github.comment']
-                       or db.config['github.dryrun-comment'])
-
-    only_pr = db.config['github.limit-pr']
-
-    dfs = {}
-    for event, pr, commit, parent in db.select_matching_commits().fetchall():
-        if not db.should_report(event):
-            continue
-
-        # Github doesn't have a way to fetch artifacts associated with a
-        # particular PR. For testing purposes, filter to a single PR here.
-        if only_pr and pr != only_pr:
-            continue
-
-        df = changes_for_commit(db, pr, commit, parent)
-        if df.empty:
-            # Matching commits had no new matching builds.
-            continue
-        dfs[df.attrs['name']] = df
-
-        if (event == 'pull_request' and comment_enabled
-                and (comment_limit == 0 or comment_limit > comment_count)):
-            if gh_send_change_report(db, df):
-                # Mark the originating builds, and remove the originating
-                # artifacts, so that they don't generate duplicate report
-                # comments.
-                db.set_commented(df.attrs['builds'])
-                if not db.config['github.keep']:
-                    for artifact_id in df.attrs['artifacts']:
-                        logging.info('RMC: deleting artifact %d', artifact_id)
-                        db.delete_artifact(artifact_id)
-                comment_count += 1
-    return dfs
-
-
-def report_queries(db: SizeDatabase) -> Dict[str, pd.DataFrame]:
-    """Perform any requested SQL queries."""
-    dfs = {}
-    q = 0
-    for query in db.config['report.query']:
-        q += 1
-        cur = db.execute(query)
-        columns = [i[0] for i in cur.description]
-        rows = cur.fetchall()
-        if rows:
-            df = pd.DataFrame(rows, columns=columns)
-            df.attrs = {'name': f'query{q}', 'title': query}
+        only_pr = self.config['github.limit-pr']
+
+        dfs = {}
+        commits = self.db.select_matching_commits()
+        for event, pr, commit, parent in commits.fetchall():
+            if not self.should_report(event):
+                continue
+
+            # Github doesn't have a way to fetch artifacts associated with a
+            # particular PR. For testing purposes, filter to a single PR here.
+            if only_pr and pr != only_pr:
+                continue
+
+            changes = self.db.select_changes(parent, commit)
+
+            self.db.delete_builds(changes.stale_builds)
+            self.gh.delete_artifacts(changes.stale_artifacts)
+
+            if not changes.rows:
+                # Matching commits had no new matching builds.
+                continue
+
+            df = pd.DataFrame(changes.rows, columns=changes.columns)
+            df.attrs = {
+                'name': f'{pr},{parent},{commit}',
+                'title': (f'PR #{pr}: ' if pr else '') +
+                f'Size comparison from {parent} to {commit}',
+                'things': changes.things,
+                'builds': changes.builds,
+                'artifacts': changes.artifacts,
+                'pr': pr,
+                'commit': commit,
+                'parent': parent,
+            }
             dfs[df.attrs['name']] = df
-        db.commit()
-    return dfs
+
+            if (event == 'pull_request' and comment_enabled
+                    and (comment_limit == 0 or comment_limit > comment_count)):
+                if self.post_change_report(df):
+                    # Mark the originating builds, and remove the originating
+                    # artifacts, so that they don't generate duplicate report
+                    # comments.
+                    self.db.set_commented(df.attrs['builds'])
+                    self.gh.delete_artifacts(df.attrs['artifacts'])
+                    comment_count += 1
+        return dfs
+
+
+class V1Comment:
+    """Format of a GitHub comment."""
+
+    @staticmethod
+    def format(config: Config, df: pd.DataFrame):
+        """Format a GitHub comment."""
+
+        threshold_df = None
+        increase_df = df[df['change'] > 0]
+        if increase_df.empty:
+            increase_df = None
+        elif threshold := config['report.increases']:
+            threshold_df = df[df['% change'] > threshold]
+            if threshold_df.empty:
+                threshold_df = None
+        decrease_df = df[df['change'] < 0]
+        if decrease_df.empty:
+            decrease_df = None
+
+        with io.StringIO() as md:
+            md.write(df.attrs['title'])
+            md.write('\n<!--ghr-comment-format:1-->\n\n')
+
+            if threshold_df is not None:
+                md.write(f'**Increases above {threshold:.2g}%:**\n\n')
+                md.write('<!--ghr-report:threshold-->\n\n')
+                V1Comment.write_df(config, threshold_df, md)
+
+            if increase_df is not None:
+                summary = V1Comment.summary(increase_df)
+                md.write('<details>\n')
+                md.write(f'<summary>Increases ({summary})</summary>\n')
+                md.write('<!--ghr-report:increases-->\n\n')
+                V1Comment.write_df(config, increase_df, md)
+                md.write('</details>\n\n')
+
+            if decrease_df is not None:
+                summary = V1Comment.summary(decrease_df)
+                md.write('<details>\n')
+                md.write(f'<summary>Decreases ({summary})</summary>\n')
+                md.write('<!--ghr-report:decreases-->\n\n')
+                V1Comment.write_df(config, decrease_df, md)
+                md.write('</details>\n\n')
+
+            summary = V1Comment.summary(df)
+            md.write('<details>\n')
+            md.write(f'<summary>Full report ({summary})</summary>\n')
+            md.write('<!--ghr-report:full-->\n\n')
+            V1Comment.write_df(config, df, md)
+            md.write('\n</details>\n')
+
+            return md.getvalue()
+
+    @staticmethod
+    def summary(df: pd.DataFrame) -> str:
+        count = df[['platform', 'target', 'config']].drop_duplicates().shape[0]
+        platforms = ', '.join(sorted(list(set(df['platform']))))
+        return f'{count} build{"" if count == 1 else "s"} for {platforms}'
+
+    @staticmethod
+    def write_df(config: Config, df: pd.DataFrame,
+                 out: memdf.report.OutputOption):
+        memdf.report.write_df(config,
+                              df,
+                              out,
+                              'pipe',
+                              hierify=True,
+                              title=False,
+                              floatfmt='5.1f')
+
+    @staticmethod
+    def merge(df: pd.DataFrame, comment) -> pd.DataFrame:
+        """Merge an existing comment into the DataFrame."""
+        with io.StringIO(comment.body) as body:
+            for line in body:
+                if line.startswith('<!--ghr-report:full-->'):
+                    body.readline()  # Blank line before table.
+                    cols, rows = memdf.util.markdown.read_hierified(body)
+                    break
+        logging.debug('REC: read %d rows', len(rows))
+        df = df.append(pd.DataFrame(data=rows, columns=cols).astype(df.dtypes))
+        return df.sort_values(
+            by=['platform', 'target', 'config', 'section']).drop_duplicates()
 
 
 def main(argv):
@@ -733,18 +387,19 @@ def main(argv):
     try:
         config = Config().init({
             **memdf.util.config.CONFIG,
+            **memdf.util.github.CONFIG,
             **memdf.util.sqlite.CONFIG,
             **memdf.report.OUTPUT_CONFIG,
             **GITHUB_CONFIG,
+            **DB_CONFIG,
+            **REPORT_CONFIG,
         })
         config.argparse.add_argument('inputs', metavar='FILE', nargs='*')
         config.parse(argv)
 
-        dfs = {}
-        with SizeDatabase(config) as db:
-            db.read_inputs()
-            dfs.update(report_matching_commits(db))
-            dfs.update(report_queries(db))
+        szc = SizeContext(config)
+        szc.read_inputs()
+        dfs = szc.report_matching_commits()
 
         memdf.report.write_dfs(config,
                                dfs,
diff --git a/scripts/tools/memory/memdf/README.md b/scripts/tools/memory/memdf/README.md
index 6741da33162378..83628c7676e25a 100644
--- a/scripts/tools/memory/memdf/README.md
+++ b/scripts/tools/memory/memdf/README.md
@@ -1,17 +1,25 @@
 This package contains routines to to collect, aggregate, and report memory
 usage, using Pandas `DataFrame` as the primary representation.
 
--   memdf.df — `DataFrame` utilities, in particular definitions of columns and
-    types for the main uses of data frames.
--   memdf.name — Names for synthetic symbols, etc.
 -   memdf.collect — Helpers to read memory information from various sources
     (e.g. executables) according to command line options.
--   memdf.select — Helpers to select relevant subsets of data frames according
-    to command line or configured options.
+-   memdf.df — `DataFrame` utilities, in particular definitions of columns and
+    types for the main uses of data frames.
+-   memdf.name — Names for synthetic symbols, etc. Individual readers are
+    located under memdf.collector.
 -   memdf.report — Helpers to write data frames in various formats according to
     command line or configured options.
+-   memdf.select — Helpers to select relevant subsets of data frames according
+    to command line or configured options.
+-   memdf.sizedb — Helpers for a database of size information.
+
+Modules under memdf.util are not specifically tied to memory usage.
+
 -   memdf.util.config — `Config` utility class for managing command line or
     other options according to a declarative description.
+-   memdf.util.github — Utilities for communicating with GitHub.
+-   memdf.util.markdown — Utilities for manipulating Markdown text.
 -   memdf.util.nd — Nested dictionary utilities, used by `Config`.
 -   memdf.util.pretty — Pretty-printed logging utility functions.
+-   memdf.util.sqlite - Utilities for connecting to a sqlite3 database.
 -   memdf.util.subprocess — Utilities for executing external commands.
diff --git a/scripts/tools/memory/memdf/collect.py b/scripts/tools/memory/memdf/collect.py
index b04c7b87d2b63c..0575b967fe4dd7 100644
--- a/scripts/tools/memory/memdf/collect.py
+++ b/scripts/tools/memory/memdf/collect.py
@@ -62,7 +62,9 @@
     **memdf.collector.elftools.CONFIG,
     **memdf.collector.readelf.CONFIG,
     'collect.method': {
-        'help': 'Method of input processing',
+        'help':
+            'Method of input processing: one of'
+            ' elftools, readelf, bloaty, csv, tsv, su.',
         'metavar': 'METHOD',
         'choices': ['elftools', 'readelf', 'bloaty', 'csv', 'tsv', 'su'],
         'default': 'elftools',
diff --git a/scripts/tools/memory/memdf/report.py b/scripts/tools/memory/memdf/report.py
index ad5e391492dcc0..bda23163842bd8 100644
--- a/scripts/tools/memory/memdf/report.py
+++ b/scripts/tools/memory/memdf/report.py
@@ -69,7 +69,7 @@
 }
 
 
-def postprocess_report_by(config: Config, key: str) -> None:
+def postprocess_report_by(config: Config, key: str, info: Mapping) -> None:
     """For --report-by=region, select all sections."""
     assert key == 'report.by'
     if config.get(key) == 'region':
@@ -147,7 +147,8 @@ def hierify(df: pd.DataFrame) -> pd.DataFrame:
 }
 
 
-def postprocess_output_metadata(config: Config, key: str) -> None:
+def postprocess_output_metadata(config: Config, key: str,
+                                info: Mapping) -> None:
     """For --output-metadata=KEY:VALUE list, convert to dictionary."""
     assert key == 'output.metadata'
     metadata = {}
@@ -466,7 +467,7 @@ def __init__(self,
         'title': 'output options',
     },
     'output.format': {
-        'help': 'Output format',
+        'help': f'Output format: one of {", ".join(WRITERS)}.',
         'metavar': 'FORMAT',
         'default': 'simple',
         'choices': list(WRITERS.keys()),
diff --git a/scripts/tools/memory/memdf/select.py b/scripts/tools/memory/memdf/select.py
index 77a3d3f05b4d1e..251b1be5642c9a 100644
--- a/scripts/tools/memory/memdf/select.py
+++ b/scripts/tools/memory/memdf/select.py
@@ -23,7 +23,7 @@
 
 from memdf import Config, ConfigDescription, DF
 
-from typing import Optional
+from typing import Mapping, Optional
 
 
 def split_size(config: Config, key: str) -> None:
@@ -51,7 +51,7 @@ def get_limit(config: Config, column: str, name: str) -> int:
     return config.getl([column, 'limit', name], config.get('report.limit', 0))
 
 
-def postprocess_selections(config: Config, key: str) -> None:
+def postprocess_selections(config: Config, key: str, info: Mapping) -> None:
     """Resolve select/ignore command options."""
     split_size(config, key)
     choice, select = key.split('.')
diff --git a/scripts/tools/memory/memdf/sizedb.py b/scripts/tools/memory/memdf/sizedb.py
new file mode 100644
index 00000000000000..7e59caedd042e6
--- /dev/null
+++ b/scripts/tools/memory/memdf/sizedb.py
@@ -0,0 +1,254 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Sqlite3 database of binary sizes over time."""
+
+import collections
+import json
+import logging
+import sqlite3
+import zipfile
+
+from pathlib import Path
+from typing import Dict, IO, Iterable, List, Optional, Union
+
+import memdf.util.sqlite
+
+ChangeInfo = collections.namedtuple('ChangeInfo', [
+    'columns', 'rows', 'things', 'builds', 'stale_builds', 'artifacts',
+    'stale_artifacts'
+])
+
+
+class SizeDatabase(memdf.util.sqlite.Database):
+    """A database for recording and comparing size reports."""
+    on_open = ["PRAGMA foreign_keys = ON", "PRAGMA encoding = 'UTF-8'"]
+    on_writable = [
+        """
+        -- A ‘thing’ identifies the kind of built object.
+        -- Builds of the same thing are comparable.
+        CREATE TABLE IF NOT EXISTS thing (
+            id          INTEGER PRIMARY KEY,
+            platform    TEXT NOT NULL,  -- Build platform
+            config      TEXT NOT NULL,  -- Build configuration discriminator
+            target      TEXT NOT NULL,  -- Build target
+            UNIQUE(platform, config, target)
+        )
+        """, """
+        -- A ‘build’ identifies a built instance of a thing at some point.
+        CREATE TABLE IF NOT EXISTS build (
+            id          INTEGER PRIMARY KEY,
+            thing_id    INTEGER REFERENCES thing(id),
+            hash        TEXT NOT NULL,      -- Commit hash
+            parent      TEXT NOT NULL,      -- Parent commit hash
+            pr          INTEGER DEFAULT 0,  -- Github PR number
+            time        INTEGER NOT NULL,   -- Unix-epoch timestamp
+            artifact    INTEGER DEFAULT 0,  -- Github artifact ID
+            commented   INTEGER DEFAULT 0,  -- 1 if recorded in a GH comment
+            ref         TEXT,               -- Target git ref
+            event       TEXT,               -- Github build trigger event
+            UNIQUE(thing_id, hash, parent, pr, time, artifact)
+        )
+        """, """
+        -- A ‘size’ entry gives the size of a section for a particular build.
+        CREATE TABLE IF NOT EXISTS size (
+            build_id    INTEGER REFERENCES build(id),
+            name        TEXT NOT NULL,      -- Section name
+            size        INTEGER NOT NULL,   -- Section size in bytes
+            PRIMARY KEY (build_id, name)
+        )
+        """
+    ]
+
+    def __init__(self, filename: str, writable: bool = True):
+        super().__init__(filename, writable)
+
+    def add_sizes(self, **kwargs):
+        """
+        Add a size report to the database.
+
+        The incoming arguments must contain the required non-ID column names
+        from ‘thing’ and ‘build’ tables, plus a 'sizes' entry that is a
+        sequence of mappings containing 'name' and 'size'.
+        """
+        td = {k: kwargs[k] for k in ('platform', 'config', 'target')}
+        thing = self.store_and_return_id('thing', **td)
+        bd = {k: kwargs[k] for k in ('hash', 'parent', 'time', 'event')}
+        if 'ref' in kwargs:
+            bd['ref'] = kwargs['ref']
+        cd = {k: kwargs.get(k, 0) for k in ('pr', 'artifact', 'commented')}
+        build = self.store_and_return_id('build', thing_id=thing, **bd, **cd)
+        if build is None:
+            logging.error('Failed to store %s %s %s', thing, bd, cd)
+        else:
+            for d in kwargs['sizes']:
+                self.store('size', build_id=build, **d)
+
+    def add_sizes_from_json(self, s: Union[bytes, str], origin: Dict):
+        """Add sizes from a JSON size report."""
+        r = origin.copy()
+        r.update(json.loads(s))
+        r['sizes'] = []
+        # Add section sizes.
+        for i in r['frames'].get('section', []):
+            r['sizes'].append({'name': i['section'], 'size': i['size']})
+        # Add segment sizes.
+        for i in r['frames'].get('wr', []):
+            r['sizes'].append({
+                'name': ('(read only)', '(read/write)')[int(i['wr'])],
+                'size':
+                i['size']
+            })
+        self.add_sizes(**r)
+
+    def add_sizes_from_zipfile(self, f: Union[IO, Path], origin: Dict):
+        """Add size reports from a zip."""
+        with zipfile.ZipFile(f, 'r') as zip_file:
+            for i in zip_file.namelist():
+                if i.endswith('-sizes.json'):
+                    origin['member'] = i
+                    with zip_file.open(i) as member:
+                        self.add_sizes_from_json(member.read(), origin)
+
+    def add_sizes_from_file(self, filename: str):
+        """Add size reports from a file."""
+        origin = {'file': filename}
+        path = Path(filename)
+        if path.suffix == '.json':
+            logging.info('ASJ: reading JSON %s', path)
+            with open(path, encoding='utf-8') as f:
+                self.add_sizes_from_json(f.read(), origin)
+        elif path.suffix == '.zip':
+            logging.info('ASZ: reading ZIP %s', path)
+            self.add_sizes_from_zipfile(path, origin)
+        else:
+            logging.warning('Unknown file type "%s" ignored', filename)
+
+    def select_thing_id(self, platform: str, config: str,
+                        target: str) -> Optional[str]:
+        cur = self.execute(
+            'SELECT id FROM thing WHERE platform=? AND config=? AND target=?',
+            (platform, config, target))
+        row = cur.fetchone()
+        return row[0] if row else None
+
+    def select_sections_for_thing(self, thing: str) -> List[str]:
+        cur = self.execute(
+            '''
+            SELECT DISTINCT name FROM size WHERE build_id = (
+            SELECT DISTINCT id FROM build WHERE thing_id == ?)
+            ORDER BY name
+            ''', (thing,))
+        return [row[0] for row in cur.fetchall()]
+
+    def select_matching_commits(self):
+        """Find matching builds, where one's commit is the other's parent."""
+        return self.execute('''
+            SELECT DISTINCT
+                c.event as event,
+                c.pr AS pr,
+                c.hash AS hash,
+                p.hash AS parent
+              FROM build c
+              INNER JOIN build p ON p.hash = c.parent
+              WHERE c.commented = 0
+              ORDER BY c.time DESC, c.pr, c.hash, p.hash
+            ''')
+
+    def select_changes(self, parent: str, commit: str) -> ChangeInfo:
+        """Returns size changes between the given commits."""
+        cur = self.execute(
+            '''
+        SELECT DISTINCT
+            t.id AS thing,
+            cb.artifact AS artifact,
+            pb.id AS parent_build,
+            cb.id AS commit_build,
+            t.platform, t.config, t.target,
+            cs.name AS name,
+            ps.size AS parent_size,
+            cs.size AS commit_size,
+            cb.time AS time
+            FROM thing t
+            INNER JOIN build cb ON cb.thing_id = t.id
+            INNER JOIN build pb ON pb.thing_id = t.id AND pb.hash = cb.parent
+            INNER JOIN size cs ON cs.build_id = cb.id
+            INNER JOIN size ps ON ps.build_id = pb.id AND cs.name = ps.name
+            WHERE cb.hash = ? AND pb.hash = ?
+            ORDER BY t.platform, t.config, t.target,
+                     cs.name, cb.time DESC, pb.time DESC
+        ''', (commit, parent))
+
+        keep = ('platform', 'target', 'config', 'name', 'parent_size',
+                'commit_size')
+        things: set[int] = set()
+        artifacts: set[int] = set()
+        builds: set[int] = set()
+        stale_builds: set[int] = set()
+        stale_artifacts: set[int] = set()
+        previous: Optional[sqlite3.Row] = None
+        rows = []
+
+        for row in cur.fetchall():
+            row = sqlite3.Row(cur, row)
+            things.add(row['thing'])
+            if (previous is not None and row['thing'] == previous['thing']
+                    and row['name'] == previous['name']):
+                # This is duplicate build, older because we sort descending,
+                # presumably from a partial workflow re-run.
+                if row['parent_build'] != previous['parent_build']:
+                    stale_builds.add(row['parent_build'])
+                if row['commit_build'] != previous['commit_build']:
+                    stale_builds.add(row['commit_build'])
+                    stale_artifacts.add(row['artifact'])
+            else:
+                previous = row
+                new = [row[k] for k in keep]
+                parent_size = row['parent_size']
+                commit_size = row['commit_size']
+                new.append(commit_size - parent_size)
+                new.append(self.percent_change(parent_size, commit_size))
+                rows.append(new)
+                artifacts.add(row['artifact'])
+                builds.add(row['commit_build'])
+
+        return ChangeInfo(('platform', 'target', 'config', 'section',
+                           parent[:8], commit[:8], 'change', '% change'), rows,
+                          things, builds, stale_builds, artifacts,
+                          stale_artifacts)
+
+    def set_commented(self, build_ids: Iterable[int]):
+        """Set the commented flag for the given builds."""
+        if not build_ids:
+            return
+        for build_id in build_ids:
+            self.execute('UPDATE build SET commented = 1 WHERE id = ?',
+                         (build_id, ))
+        self.commit()
+
+    def delete_builds(self, build_ids: Iterable[int]):
+        """Delete the given builds."""
+        if not build_ids:
+            return
+        for build_id in build_ids:
+            self.execute('DELETE FROM size WHERE build_id = ?', (build_id, ))
+            self.execute('DELETE FROM build WHERE id = ?', (build_id, ))
+        self.commit()
+
+    @staticmethod
+    def percent_change(a: int, b: int) -> float:
+        if a == 0:
+            return 0.0 if b == 0 else float('inf')
+        return 100. * (b - a) / a
diff --git a/scripts/tools/memory/memdf/util/config.py b/scripts/tools/memory/memdf/util/config.py
index 20f1434a236233..f8294ad8b037a5 100644
--- a/scripts/tools/memory/memdf/util/config.py
+++ b/scripts/tools/memory/memdf/util/config.py
@@ -41,8 +41,8 @@
 #               supplied as keyword arguments to `argparse.add_argument()`,
 #               except for:
 #                   'alias': list of alternate command line option names
-#   'postprocess': a callable invoked after argument parsing with two
-#               arguments: the config and the key
+#   'postprocess': a callable invoked after argument parsing with three
+#               arguments: the config, the key, and the description entry.
 #
 # Special keys can be used to control argument parser groups. By default any
 # configuration key containing a ‘.’ belongs to a group determined by the
@@ -157,6 +157,7 @@ def init_args(self, desc: ConfigDescription, *args, **kwargs) -> 'Config':
             arg_info = arg_info.copy()
             name = arg_info.pop('argument', '--' + key.replace('.', '-'))
             names = [name] + arg_info.pop('alias', [])
+            info['names'] = names
             for k in ['metavar', 'choices']:
                 if k in info:
                     arg_info[k] = info[k]
@@ -171,7 +172,7 @@ def init_args(self, desc: ConfigDescription, *args, **kwargs) -> 'Config':
                 elif isinstance(default, int) and 'metavar' not in info:
                     arg_info['action'] = 'count'
             if postprocess := info.get('postprocess'):
-                self.postprocess_args[key] = postprocess
+                self.postprocess_args[key] = (postprocess, info)
 
             group: Optional[str] = info.get('group')
             if group is None and (e := key.find('.')) > 0:
@@ -226,10 +227,6 @@ def parse(self, argv: Sequence[str]) -> 'Config':
                 key = 'args.' + dest
             self.put(key, value)
 
-        # Postprocess config.
-        for key, action in self.postprocess_args.items():
-            action(self, key)
-
         # Configure logging.
         if self.get('log-level') is None:
             verbose = self.get('verbose', 0)
@@ -242,6 +239,11 @@ def parse(self, argv: Sequence[str]) -> 'Config':
         logging.basicConfig(level=self.get('log-level'),
                             format=self.get('log-format'))
 
+        # Postprocess config.
+        for key, postprocess in self.postprocess_args.items():
+            action, info = postprocess
+            action(self, key, info)
+
         memdf.util.pretty.debug(self.d)
         return self
 
@@ -292,8 +294,8 @@ def __call__(self, parser, namespace, values, option_string=None):
 # Config description of options shared by all tools.
 CONFIG: ConfigDescription = {
     'log-level': {
-        'help': 'Set logging level',
-        'metavar': 'LEVEL',
+        'help':
+            'Set logging level: one of critical, error, warning, info, debug.',
         'default': None,
         'choices': ['critical', 'error', 'warning', 'info', 'debug'],
     },
diff --git a/scripts/tools/memory/memdf/util/github.py b/scripts/tools/memory/memdf/util/github.py
new file mode 100644
index 00000000000000..bcd14c597701a2
--- /dev/null
+++ b/scripts/tools/memory/memdf/util/github.py
@@ -0,0 +1,237 @@
+#
+# Copyright (c) 2022 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Utility wrapper for GitHub operations."""
+
+import itertools
+import logging
+import os
+
+from typing import Iterable, Mapping, Optional
+
+import dateutil  # type: ignore
+import dateutil.parser  # type: ignore
+import ghapi.all  # type: ignore
+
+from memdf import Config, ConfigDescription
+
+
+def postprocess_config(config: Config, _key: str, _info: Mapping) -> None:
+    """Postprocess --github-repository."""
+    if config['github.repository']:
+        owner, repo = config.get('github.repository').split('/', 1)
+        config.put('github.owner', owner)
+        config.put('github.repo', repo)
+        if not config['github.token']:
+            config['github.token'] = os.environ.get('GITHUB_TOKEN')
+            if not config['github.token']:
+                logging.error('Missing --github-token')
+
+
+CONFIG: ConfigDescription = {
+    Config.group_def('github'): {
+        'title': 'github options',
+    },
+    'github.token': {
+        'help': 'Github API token, or "SKIP" to suppress connecting to github',
+        'metavar': 'TOKEN',
+        'default': '',
+        'argparse': {
+            'alias': ['--github-api-token', '--token'],
+        },
+    },
+    'github.repository': {
+        'help': 'Github repostiory',
+        'metavar': 'OWNER/REPO',
+        'default': '',
+        'argparse': {
+            'alias': ['--repo'],
+        },
+        'postprocess': postprocess_config,
+    },
+    'github.dryrun-comment': {
+        'help': "Don't actually post comments",
+        'default': False,
+    },
+    'github.keep': {
+        'help': "Don't remove PR artifacts",
+        'default': False,
+        'argparse': {
+            'alias': ['--keep'],
+        },
+    },
+    'github.limit-artifact-pages': {
+        'help': 'Examine no more than COUNT pages of artifacts',
+        'metavar': 'COUNT',
+        'default': 0,
+        'argparse': {
+            'type': int,
+        },
+    },
+}
+
+
+class Gh:
+    """Utility wrapper for GitHub operations."""
+
+    def __init__(self, config: Config):
+        self.config = config
+        self.ghapi: Optional[ghapi.all.GhApi] = None
+        self.deleted_artifacts: set[int] = set()
+
+        owner = config['github.owner']
+        repo = config['github.repo']
+        token = config['github.token']
+        if owner and repo and token and token != 'SKIP':
+            self.ghapi = ghapi.all.GhApi(owner=owner, repo=repo, token=token)
+
+    def __bool__(self):
+        return self.ghapi is not None
+
+    def get_comments_for_pr(self, pr: int):
+        """Iterate PR comments."""
+        assert self.ghapi
+        try:
+            return itertools.chain.from_iterable(
+                ghapi.all.paged(self.ghapi.issues.list_comments, pr))
+        except Exception as e:
+            logging.error('Failed to get comments for PR #%d: %s', pr, e)
+            return []
+
+    def get_commits_for_pr(self, pr: int):
+        """Iterate PR commits."""
+        assert self.ghapi
+        try:
+            return itertools.chain.from_iterable(
+                ghapi.all.paged(self.ghapi.pulls.list_commits, pr))
+        except Exception as e:
+            logging.error('Failed to get commits for PR #%d: %s', pr, e)
+            return []
+
+    def get_artifacts(self, page_limit: int = -1, per_page: int = -1):
+        """Iterate artifact descriptions."""
+        if page_limit < 0:
+            page_limit = self.config['github.limit-artifact-pages']
+        if per_page < 0:
+            per_page = self.config['github.artifacts-per-page'] or 100
+
+        assert self.ghapi
+        try:
+            page = 0
+            for i in ghapi.all.paged(
+                    self.ghapi.actions.list_artifacts_for_repo,
+                    per_page):
+                if not i.artifacts:
+                    break
+                for a in i.artifacts:
+                    yield a
+                page += 1
+                logging.debug('ASP: artifact page %d of %d', page, page_limit)
+                if page_limit and page >= page_limit:
+                    break
+        except Exception as e:
+            logging.error('Failed to get artifact list: %s', e)
+
+    def get_size_artifacts(self,
+                           page_limit: int = -1,
+                           per_page: int = -1,
+                           label: str = ''):
+        """Iterate size artifact descriptions."""
+        for a in self.get_artifacts(page_limit, per_page):
+            # Size artifacts have names of the form:
+            #   Size,{group},{pr},{commit_hash},{parent_hash}[,{event}]
+            # This information is added to the attribute record from GitHub.
+            if a.name.startswith('Size,') and a.name.count(',') >= 4:
+                _, group, pr, commit, parent, *etc = a.name.split(',')
+                if label and group != label:
+                    continue
+                a.group = group
+                a.commit = commit
+                a.parent = parent
+                a.pr = pr
+                a.created_at = dateutil.parser.isoparse(a.created_at)
+                # Old artifact names don't include the event.
+                if etc:
+                    event = etc[0]
+                else:
+                    event = 'push' if pr == '0' else 'pull_request'
+                a.event = event
+                yield a
+
+    def download_artifact(self, artifact_id: int):
+        """Download a GitHub artifact, returning a binary zip object."""
+        logging.debug('Downloading artifact %d', artifact_id)
+        try:
+            assert self.ghapi
+            return self.ghapi.actions.download_artifact(artifact_id, 'zip')
+        except Exception as e:
+            logging.error('Failed to download artifact %d: %s', artifact_id, e)
+        return None
+
+    def delete_artifact(self, artifact_id: int) -> bool:
+        """Delete a GitHub artifact."""
+        if not artifact_id or artifact_id in self.deleted_artifacts:
+            return True
+        self.deleted_artifacts.add(artifact_id)
+
+        if self.config['github.keep']:
+            logging.info('Suppressed deleting artifact %d', artifact_id)
+            return False
+
+        try:
+            assert self.ghapi
+            logging.info('Deleting artifact %d', artifact_id)
+            self.ghapi.actions.delete_artifact(artifact_id)
+            return True
+        except Exception as e:
+            # During manual testing we sometimes lose the race against CI.
+            logging.error('Failed to delete artifact %d: %s', artifact_id, e)
+        return False
+
+    def delete_artifacts(self, artifacts: Iterable[int]):
+        for artifact_id in artifacts:
+            self.delete_artifact(artifact_id)
+
+    def create_comment(self, issue_id: int, text: str) -> bool:
+        """Create a GitHub comment."""
+        if self.config['github.dryrun-comment']:
+            logging.info('Suppressed creating comment on #%d', issue_id)
+            logging.debug('%s', text)
+            return False
+
+        assert self.ghapi
+        logging.info('Creating comment on #%d', issue_id)
+        try:
+            self.ghapi.issues.create_comment(issue_id, text)
+            return True
+        except Exception as e:
+            logging.error('Failed to created comment on #%d: %s', issue_id, e)
+        return False
+
+    def update_comment(self, comment_id: int, text: str) -> bool:
+        """Update a GitHub comment."""
+        if self.config['github.dryrun-comment']:
+            logging.info('Suppressed updating comment #%d', comment_id)
+            logging.debug('%s', text)
+            return False
+
+        logging.info('Updating comment #%d', comment_id)
+        try:
+            assert self.ghapi
+            self.ghapi.issues.update_comment(comment_id, text)
+            return True
+        except Exception as e:
+            logging.error('Failed to update comment %d: %s', comment_id, e)
+        return False
diff --git a/scripts/tools/memory/memdf/util/markdown.py b/scripts/tools/memory/memdf/util/markdown.py
new file mode 100644
index 00000000000000..52aea03e86db91
--- /dev/null
+++ b/scripts/tools/memory/memdf/util/markdown.py
@@ -0,0 +1,41 @@
+#
+# Copyright (c) 2021 Project CHIP Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Markdown utilities."""
+
+
+def read_hierified(f):
+    """Read a markdown table in ‘hierified’ format."""
+
+    line = f.readline()
+    header = tuple((s.strip() for s in line.split('|')[1:-1]))
+
+    _ = f.readline()  # The line under the header.
+
+    rows = []
+    for line in f:
+        line = line.strip()
+        if not line:
+            break
+        row = []
+        columns = line.split('|')
+        for i in range(0, len(header)):
+            column = columns[i + 1].strip()
+            if not column:
+                column = rows[-1][i]
+            row.append(column)
+        rows.append(tuple(row))
+
+    return (header, rows)
diff --git a/scripts/tools/memory/memdf/util/sqlite.py b/scripts/tools/memory/memdf/util/sqlite.py
index 94b6f9b33a2f81..c0d825ac98f860 100644
--- a/scripts/tools/memory/memdf/util/sqlite.py
+++ b/scripts/tools/memory/memdf/util/sqlite.py
@@ -20,6 +20,8 @@
 
 from typing import List, Optional
 
+import pandas as pd  # type: ignore
+
 from memdf import Config, ConfigDescription
 
 CONFIG: ConfigDescription = {
@@ -29,7 +31,7 @@
     'database.file': {
         'help': 'Sqlite3 file',
         'metavar': 'FILENAME',
-        'default': ':memory:',
+        'default': None,
         'argparse': {
             'alias': ['--db'],
         },
@@ -112,3 +114,12 @@ def get_matching_id(self, table: str, **kwargs):
     def store_and_return_id(self, table: str, **kwargs) -> Optional[int]:
         self.store(table, **kwargs)
         return self.get_matching_id(table, **kwargs)
+
+    def data_frame(self, query, parameters=None) -> pd.DataFrame:
+        """Return the results of a query as a DataFrame."""
+        cur = self.execute(query, parameters)
+        columns = [i[0] for i in cur.description]
+        df = pd.DataFrame(cur.fetchall(), columns=columns)
+        self.commit()
+        df.attrs = {'title': query}
+        return df