From 3f53a9246f444ef1df1aa53d9f3d1537f3210475 Mon Sep 17 00:00:00 2001
From: Andreas Motl <andreas.motl@crate.io>
Date: Fri, 1 Dec 2023 14:13:58 +0100
Subject: [PATCH] cratedb-wtf: Add `cratedb-wtf` diagnostics program

---
 .gitignore                             |   2 +
 CHANGES.md                             |   1 +
 cratedb_toolkit/cli.py                 |   2 +
 cratedb_toolkit/util/cli.py            |   8 +-
 cratedb_toolkit/util/data.py           |  15 +-
 cratedb_toolkit/util/platform.py       |  56 +++
 cratedb_toolkit/util/service.py        |  23 +
 cratedb_toolkit/wtf/README.md          |  47 ++
 cratedb_toolkit/wtf/__init__.py        |   0
 cratedb_toolkit/wtf/backlog.md         |  34 ++
 cratedb_toolkit/wtf/cli.py             | 188 ++++++++
 cratedb_toolkit/wtf/core.py            |  81 ++++
 cratedb_toolkit/wtf/http.py            |  41 ++
 cratedb_toolkit/wtf/library.py         | 587 +++++++++++++++++++++++++
 cratedb_toolkit/wtf/model.py           |  86 ++++
 cratedb_toolkit/wtf/query_collector.py | 247 +++++++++++
 cratedb_toolkit/wtf/util.py            |  21 +
 pyproject.toml                         |   5 +-
 tests/retention/test_cli.py            |   2 +-
 tests/wtf/__init__.py                  |   0
 tests/wtf/test_cli.py                  | 111 +++++
 21 files changed, 1551 insertions(+), 6 deletions(-)
 create mode 100644 cratedb_toolkit/util/platform.py
 create mode 100644 cratedb_toolkit/util/service.py
 create mode 100644 cratedb_toolkit/wtf/README.md
 create mode 100644 cratedb_toolkit/wtf/__init__.py
 create mode 100644 cratedb_toolkit/wtf/backlog.md
 create mode 100644 cratedb_toolkit/wtf/cli.py
 create mode 100644 cratedb_toolkit/wtf/core.py
 create mode 100644 cratedb_toolkit/wtf/http.py
 create mode 100644 cratedb_toolkit/wtf/library.py
 create mode 100644 cratedb_toolkit/wtf/model.py
 create mode 100644 cratedb_toolkit/wtf/query_collector.py
 create mode 100644 cratedb_toolkit/wtf/util.py
 create mode 100644 tests/wtf/__init__.py
 create mode 100644 tests/wtf/test_cli.py

diff --git a/.gitignore b/.gitignore
index efc0473d..c228796a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ __pycache__
 dist
 .coverage*
 coverage.xml
+/foo
+/tmp
diff --git a/CHANGES.md b/CHANGES.md
index 1a80dc53..16b6d75e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -14,6 +14,7 @@
 - MongoDB: Improve UX by using `ctk load table mongodb://...`
 - load table: Refactor to use more OO
 - Add `examples/cloud_import.py`
+- Add `cratedb-wtf` diagnostics program
 
 
 ## 2023/11/06 v0.0.2
diff --git a/cratedb_toolkit/cli.py b/cratedb_toolkit/cli.py
index 26c0ef65..9315a701 100644
--- a/cratedb_toolkit/cli.py
+++ b/cratedb_toolkit/cli.py
@@ -7,6 +7,7 @@
 from .io.cli import cli as io_cli
 from .job.cli import cli_list_jobs
 from .shell.cli import cli as shell_cli
+from .wtf.cli import cli as wtf_cli
 
 
 @click.group(cls=ClickAliasedGroup)  # type: ignore[arg-type]
@@ -21,4 +22,5 @@ def cli(ctx: click.Context, verbose: bool, debug: bool):
 cli.add_command(cloud_cli, name="cluster")
 cli.add_command(io_cli, name="load")
 cli.add_command(shell_cli, name="shell")
+cli.add_command(wtf_cli, name="wtf")
 cli.add_command(cli_list_jobs)
diff --git a/cratedb_toolkit/util/cli.py b/cratedb_toolkit/util/cli.py
index 9ebcb3ab..f5956dea 100644
--- a/cratedb_toolkit/util/cli.py
+++ b/cratedb_toolkit/util/cli.py
@@ -72,13 +72,15 @@ def boot_with_dburi():
     return dburi
 
 
-def make_command(cli, name, helpfun=None, aliases=None):
+def make_command(cli, name, help=None, aliases=None):  # noqa: A002
     """
     Convenience shortcut for creating a subcommand.
     """
     kwargs = {}
-    if helpfun:
-        kwargs["help"] = docstring_format_verbatim(helpfun.__doc__)
+    if isinstance(help, str):
+        kwargs["help"] = help
+    elif callable(help):
+        kwargs["help"] = docstring_format_verbatim(help.__doc__)
     return cli.command(
         name,
         context_settings={"max_content_width": 120},
diff --git a/cratedb_toolkit/util/data.py b/cratedb_toolkit/util/data.py
index 3a4d67d1..62cdbb33 100644
--- a/cratedb_toolkit/util/data.py
+++ b/cratedb_toolkit/util/data.py
@@ -1,3 +1,4 @@
+import datetime as dt
 import json
 import sys
 import typing as t
@@ -7,7 +8,7 @@ def jd(data: t.Any):
     """
     Pretty-print JSON with indentation.
     """
-    print(json.dumps(data, indent=2), file=sys.stdout)  # noqa: T201
+    print(json.dumps(data, indent=2, cls=JSONEncoderPlus), file=sys.stdout)  # noqa: T201
 
 
 def str_contains(haystack, *needles):
@@ -16,3 +17,15 @@ def str_contains(haystack, *needles):
     """
     haystack = str(haystack)
     return any(needle in haystack for needle in needles)
+
+
+class JSONEncoderPlus(json.JSONEncoder):
+    """
+    https://stackoverflow.com/a/27058505
+    """
+
+    def default(self, o):
+        if isinstance(o, dt.datetime):
+            return o.isoformat()
+
+        return json.JSONEncoder.default(self, o)
diff --git a/cratedb_toolkit/util/platform.py b/cratedb_toolkit/util/platform.py
new file mode 100644
index 00000000..0c588dfd
--- /dev/null
+++ b/cratedb_toolkit/util/platform.py
@@ -0,0 +1,56 @@
+import io
+import json
+from contextlib import redirect_stdout
+
+
+class PlatformInfo:
+    @staticmethod
+    def application():
+        import platform
+
+        from cratedb_toolkit import __appname__, __version__
+
+        data = {}
+
+        data["platform"] = platform.platform()
+        data["version"] = __version__
+        data["name"] = __appname__
+        return data
+
+    @staticmethod
+    def libraries():
+        data = {}
+
+        # SQLAlchemy
+        from importlib.metadata import entry_points
+
+        try:
+            import sqlalchemy.dialects.plugins
+            import sqlalchemy.dialects.registry
+
+            data["sqlalchemy"] = {
+                "dialects_builtin": list(sqlalchemy.dialects.registry.impls.keys()),
+                "dialects_3rdparty": [dialect.name for dialect in entry_points(group="sqlalchemy.dialects")],  # type: ignore[attr-defined,call-arg]
+                "plugins": list(sqlalchemy.dialects.plugins.impls.keys()),
+            }
+        except Exception:  # noqa: S110
+            pass
+
+        # pandas
+        try:
+            import pandas
+
+            buffer = io.StringIO()
+            with redirect_stdout(buffer):
+                pandas.show_versions(as_json=True)
+            buffer.seek(0)
+            data["pandas"] = json.load(buffer)
+        except Exception:  # noqa: S110
+            pass
+
+        # fsspec
+        import fsspec
+
+        data["fsspec"] = {"protocols": fsspec.available_protocols(), "compressions": fsspec.available_compressions()}
+
+        return data
diff --git a/cratedb_toolkit/util/service.py b/cratedb_toolkit/util/service.py
new file mode 100644
index 00000000..a26c70b4
--- /dev/null
+++ b/cratedb_toolkit/util/service.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import logging
+import typing as t
+
+from cratedb_toolkit.util.common import setup_logging
+
+logger = logging.getLogger(__name__)
+
+
+def start_service(app: str, listen_address: t.Union[str, None] = None, reload: bool = False):  # pragma: no cover
+    setup_logging()
+    from uvicorn import run
+
+    if listen_address is None:
+        listen_address = "127.0.0.1:4242"
+
+    host, port = listen_address.split(":")
+    port_int = int(port)
+
+    logger.info(f"Starting HTTP web service on http://{listen_address}")
+
+    run(app=app, host=host, port=port_int, reload=reload)
diff --git a/cratedb_toolkit/wtf/README.md b/cratedb_toolkit/wtf/README.md
new file mode 100644
index 00000000..f06a4774
--- /dev/null
+++ b/cratedb_toolkit/wtf/README.md
@@ -0,0 +1,47 @@
+# cratedb-wtf
+
+A diagnostics utility in the spirit of [git-wtf], [grafana-wtf], and [pip.wtf].
+It is still a work-in-progress, but it is usable already.
+
+
+## Synopsis
+
+Define CrateDB database cluster address.
+```shell
+export CRATEDB_SQLALCHEMY_URL=crate://localhost/
+```
+
+Display system and database cluster information.
+```shell
+cratedb-wtf info
+```
+
+Display database cluster log messages.
+```shell
+cratedb-wtf logs
+```
+
+Statistics.
+```shell
+cratedb-wtf job-statistics quick
+cratedb-wtf job-statistics collect
+cratedb-wtf job-statistics view
+```
+
+
+## HTTP API
+
+Expose collected status information. 
+```shell
+cratedb-wtf --debug serve --reload
+```
+Consume collected status information via HTTP.
+```shell
+http http://127.0.0.1:4242/info/all
+```
+
+
+
+[git-wtf]: http://thrawn01.org/posts/2014/03/03/git-wtf/ 
+[grafana-wtf]: https://github.com/panodata/grafana-wtf
+[pip.wtf]: https://github.com/sabslikesobs/pip.wtf
diff --git a/cratedb_toolkit/wtf/__init__.py b/cratedb_toolkit/wtf/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/cratedb_toolkit/wtf/backlog.md b/cratedb_toolkit/wtf/backlog.md
new file mode 100644
index 00000000..9c616cfa
--- /dev/null
+++ b/cratedb_toolkit/wtf/backlog.md
@@ -0,0 +1,34 @@
+# cratedb-wtf backlog
+
+## Iteration +1
+- Expose collected data via Glances-like UI
+- Experimental UI using Grafana Scenes
+
+## Iteration +2
+- Make `cratedb-wtf logs` also optionally consider `sys.` tables. 
+- cratedb-wtf explore table|shard|partition|node
+- High-level analysis, evaluating a set of threshold rules 
+- Network diagnostics?
+
+## Iteration +3
+- Make it work with CrateDB Cloud.
+  ```
+  ctk cluster info
+  ctk cluster health
+  ctk cluster logs --slow-queries
+  ```
+
+## Done
+- Make it work
+- Proper marshalling of timestamp values (ISO 8601)
+- Expose collected data via HTTP API
+  ```
+  cratedb-wtf serve
+  ```
+- Provide `scrub` option also via HTTP
+- Complete collected queries and code snippets
+- Harvest queries from Admin UI, crash, crate-admin-alt
+- Harvest queries from experts
+  - https://tools.cr8.net/grafana/d/RkpNJx84z/cratedb-jobs-log?orgId=1&refresh=5m&var-datasource=crate-production
+  - https://tools.cr8.net/grafana/d/RkpNJx84z/cratedb-jobs-log?orgId=1&refresh=5m&var-datasource=crate-production&viewPanel=44
+- Add `description` and `unit` fields to each `InfoElement` definition
diff --git a/cratedb_toolkit/wtf/cli.py b/cratedb_toolkit/wtf/cli.py
new file mode 100644
index 00000000..5b2761db
--- /dev/null
+++ b/cratedb_toolkit/wtf/cli.py
@@ -0,0 +1,188 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import logging
+import os
+import sys
+import typing as t
+import urllib.parse
+
+import click
+from click_aliases import ClickAliasedGroup
+
+from cratedb_toolkit.util import DatabaseAdapter
+from cratedb_toolkit.util.cli import (
+    boot_click,
+    make_command,
+)
+from cratedb_toolkit.util.data import jd
+from cratedb_toolkit.wtf.core import InfoContainer, JobInfoContainer, LogContainer
+
+logger = logging.getLogger(__name__)
+
+
+def help_info():
+    """
+    Database cluster and system information.
+
+    Synopsis
+    ========
+
+    export CRATEDB_SQLALCHEMY_URL=crate://localhost/
+    cratedb-wtf info
+
+    """  # noqa: E501
+
+
+def help_logs():
+    """
+    Database cluster logs.
+
+    Synopsis
+    ========
+
+    export CRATEDB_SQLALCHEMY_URL=crate://localhost/
+    cratedb-wtf logs
+
+    """  # noqa: E501
+
+
+def help_statistics():
+    """
+    Database cluster job / query statistics.
+
+    Synopsis
+    ========
+
+    export CRATEDB_SQLALCHEMY_URL=crate://localhost/
+    cratedb-wtf job-statistics quick
+    cratedb-wtf job-statistics collect
+    cratedb-wtf job-statistics view
+
+    """  # noqa: E501
+
+
+def help_serve():
+    """
+    Start HTTP service to expose collected information.
+
+    Synopsis
+    ========
+
+    export CRATEDB_SQLALCHEMY_URL=crate://localhost/
+    cratedb-wtf serve
+
+    """  # noqa: E501
+
+
+cratedb_sqlalchemy_option = click.option(
+    "--cratedb-sqlalchemy-url", envvar="CRATEDB_SQLALCHEMY_URL", type=str, required=False, help="CrateDB SQLAlchemy URL"
+)
+
+
+@click.group(cls=ClickAliasedGroup)  # type: ignore[arg-type]
+@cratedb_sqlalchemy_option
+@click.option("--verbose", is_flag=True, required=False, help="Turn on logging")
+@click.option("--debug", is_flag=True, required=False, help="Turn on logging with debug level")
+@click.option("--scrub", envvar="SCRUB", is_flag=True, required=False, help="Blank out identifiable information")
+@click.version_option()
+@click.pass_context
+def cli(ctx: click.Context, cratedb_sqlalchemy_url: str, verbose: bool, debug: bool, scrub: bool):
+    """
+    Diagnostics and informational utilities.
+    """
+    if not cratedb_sqlalchemy_url:
+        logger.error("Unable to operate without database address")
+        sys.exit(1)
+    ctx.meta.update({"cratedb_sqlalchemy_url": cratedb_sqlalchemy_url, "scrub": scrub})
+    return boot_click(ctx, verbose, debug)
+
+
+@make_command(cli, "info", help_info)
+@click.pass_context
+def info(ctx: click.Context):
+    cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"]
+    scrub = ctx.meta.get("scrub", False)
+    adapter = DatabaseAdapter(dburi=cratedb_sqlalchemy_url)
+    sample = InfoContainer(adapter=adapter, scrub=scrub)
+    jd(sample.to_dict())
+
+
+@make_command(cli, "logs", help_logs)
+@click.pass_context
+def logs(ctx: click.Context):
+    cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"]
+    scrub = ctx.meta.get("scrub", False)
+    adapter = DatabaseAdapter(dburi=cratedb_sqlalchemy_url)
+    sample = LogContainer(adapter=adapter, scrub=scrub)
+    jd(sample.to_dict())
+
+
+@make_command(cli, "job-info", "Display information about jobs / queries.")
+@click.pass_context
+def job_information(ctx: click.Context):
+    """
+    Display ad hoc job information.
+    """
+    cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"]
+    scrub = ctx.meta.get("scrub", False)
+    adapter = DatabaseAdapter(dburi=cratedb_sqlalchemy_url)
+    sample = JobInfoContainer(adapter=adapter, scrub=scrub)
+    jd(sample.to_dict())
+
+
+@click.group(cls=ClickAliasedGroup)  # type: ignore[arg-type]
+@click.pass_context
+def job_statistics(ctx: click.Context):
+    """
+    Collect and display statistics about jobs / queries.
+    """
+    pass
+
+
+cli.add_command(job_statistics, name="job-statistics", aliases=["jobstats"])
+
+
+@make_command(job_statistics, "collect", "Collect queries from sys.jobs_log.")
+@click.pass_context
+def job_statistics_collect(ctx: click.Context):
+    """
+    Run jobs_log collector.
+
+    # TODO: Forward `cratedb_sqlalchemy_url` properly.
+    """
+    import cratedb_toolkit.wtf.query_collector
+
+    cratedb_toolkit.wtf.query_collector.main()
+
+
+@make_command(job_statistics, "view", "View job statistics about collected queries.")
+@click.pass_context
+def job_statistics_view(ctx: click.Context):
+    """
+    View job statistics about collected queries.
+
+    # TODO: Forward `cratedb_sqlalchemy_url` properly.
+    """
+    cratedb_sqlalchemy_url = ctx.meta["cratedb_sqlalchemy_url"]
+    url = urllib.parse.urlparse(cratedb_sqlalchemy_url)
+    hostname = f"{url.hostname}:{url.port or 4200}"
+    os.environ["HOSTNAME"] = hostname
+
+    import cratedb_toolkit.wtf.query_collector
+
+    cratedb_toolkit.wtf.query_collector.init()
+
+    response: t.Dict = {"meta": {}, "data": {}}
+    response["meta"]["remark"] = "WIP! This is a work in progress. The output format will change."
+    response["data"]["stats"] = cratedb_toolkit.wtf.query_collector.read_stats()
+    jd(response)
+
+
+@make_command(cli, "serve", help_serve)
+@click.option("--listen", type=click.STRING, default=None, help="HTTP server listen address")
+@click.option("--reload", is_flag=True, help="Dynamically reload changed files")
+@click.pass_context
+def serve(ctx: click.Context, listen: str, reload: bool):
+    from cratedb_toolkit.wtf.http import start
+
+    start(listen, reload=reload)
diff --git a/cratedb_toolkit/wtf/core.py b/cratedb_toolkit/wtf/core.py
new file mode 100644
index 00000000..a69fcb0b
--- /dev/null
+++ b/cratedb_toolkit/wtf/core.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2021-2024, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import boltons.ecoutils
+
+from cratedb_toolkit.util.platform import PlatformInfo
+from cratedb_toolkit.wtf.library import Library
+from cratedb_toolkit.wtf.model import InfoContainerBase
+
+
+class InfoContainer(InfoContainerBase):
+    def register_builtins(self):
+        self.elements.add(
+            # General cluster health information.
+            Library.Health.cluster_name,
+            Library.Health.nodes_count,
+            Library.Health.nodes_list,
+            Library.Health.table_health,
+            Library.Health.backups_recent,
+            # Shard / node / partition allocation and rebalancing information.
+            Library.Shards.allocation,
+            Library.Shards.table_allocation,
+            Library.Shards.node_shard_distribution,
+            Library.Shards.table_shard_count,
+            Library.Shards.rebalancing_progress,
+            Library.Shards.rebalancing_status,
+            Library.Shards.not_started,
+            Library.Shards.not_started_count,
+            Library.Shards.max_checkpoint_delta,
+            Library.Shards.total_count,
+            Library.Shards.translog_uncommitted,
+            Library.Shards.translog_uncommitted_size,
+        )
+
+    def to_dict(self, data=None):
+        return super().to_dict(data={"system": self.system(), "database": self.database()})
+
+    def system(self):
+        data = {}
+        data["remark"] = (
+            "This section includes system information about the machine running CrateDB "
+            'Toolkit, effectively about the "compute" domain.'
+        )
+        data["application"] = PlatformInfo.application()
+        data["eco"] = boltons.ecoutils.get_profile(scrub=self.scrub)
+        # data["libraries"] = PlatformInfo.libraries()  # noqa: ERA001
+        return data
+
+    def database(self):
+        data = {}
+        data["remark"] = (
+            "This section includes system and other diagnostics information about the CrateDB "
+            'database cluster, effectively about the "storage" domain.'
+        )
+        for element in self.elements.items:
+            data[element.name] = self.evaluate_element(element)
+        return data
+
+
+class LogContainer(InfoContainerBase):
+    def register_builtins(self):
+        self.elements.add(
+            Library.Logs.user_queries_latest,
+        )
+
+
+class JobInfoContainer(InfoContainerBase):
+    def register_builtins(self):
+        self.elements.add(
+            Library.JobInfo.age_range,
+            Library.JobInfo.by_user,
+            Library.JobInfo.duration_buckets,
+            Library.JobInfo.duration_percentiles,
+            Library.JobInfo.history100,
+            Library.JobInfo.history_count,
+            Library.JobInfo.performance15min,
+            Library.JobInfo.running,
+            Library.JobInfo.running_count,
+            Library.JobInfo.top100_count,
+            Library.JobInfo.top100_duration_individual,
+            Library.JobInfo.top100_duration_total,
+        )
diff --git a/cratedb_toolkit/wtf/http.py b/cratedb_toolkit/wtf/http.py
new file mode 100644
index 00000000..1d70a4a7
--- /dev/null
+++ b/cratedb_toolkit/wtf/http.py
@@ -0,0 +1,41 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import logging
+import os
+import typing as t
+from functools import lru_cache
+
+from fastapi import Depends, FastAPI, HTTPException
+
+from cratedb_toolkit.util import DatabaseAdapter
+from cratedb_toolkit.util.service import start_service
+from cratedb_toolkit.wtf.core import InfoContainer
+from cratedb_toolkit.wtf.util import get_baseinfo
+
+logger = logging.getLogger(__name__)
+
+app = FastAPI()
+
+
+@lru_cache
+def database_adapter() -> DatabaseAdapter:
+    # TODO: return config.Settings()
+    cratedb_sqlalchemy_url = os.environ["CRATEDB_SQLALCHEMY_URL"]
+    return DatabaseAdapter(dburi=cratedb_sqlalchemy_url)
+
+
+@app.get("/")
+def read_root():
+    return get_baseinfo()
+
+
+@app.get("/info/{category}")
+def info(category: str, adapter: t.Annotated[DatabaseAdapter, Depends(database_adapter)], scrub: bool = False):  # type: ignore[name-defined]
+    if category != "all":
+        raise HTTPException(status_code=404, detail="Info category not found")
+    sample = InfoContainer(adapter=adapter, scrub=scrub)
+    return sample.to_dict()
+
+
+def start(listen_address: t.Union[str, None] = None, reload: bool = False):  # pragma: no cover
+    start_service(app="cratedb_toolkit.wtf.http:app")
diff --git a/cratedb_toolkit/wtf/library.py b/cratedb_toolkit/wtf/library.py
new file mode 100644
index 00000000..a2167bbc
--- /dev/null
+++ b/cratedb_toolkit/wtf/library.py
@@ -0,0 +1,587 @@
+# Copyright (c) 2021-2024, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+from cratedb_toolkit.wtf.model import InfoElement, LogElement
+from cratedb_toolkit.wtf.util import get_single_value
+
+
+class Library:
+    """
+    A collection of SQL queries and utilities suitable for diagnostics on CrateDB.
+
+    Credits to the many authors and contributors of CrateDB diagnostics utilities,
+    dashboards, and cheat sheets.
+
+    Acknowledgements: Baurzhan Sakhariev, Eduardo Legatti, Georg Traar, Hernan
+    Cianfagna, Ivan Sanchez Valencia, Karyn Silva de Azevedo, Niklas Schmidtmer,
+    Walter Behmann.
+
+    References:
+    - https://community.cratedb.com/t/similar-elasticsearch-commands/1455/4
+    - CrateDB Admin UI.
+    - CrateDB Grafana General Diagnostics Dashboard.
+    - Debugging CrateDB - Queries Cheat Sheet.
+    """
+
+    class Health:
+        """
+        CrateDB health check queries.
+        """
+
+        backups_recent = InfoElement(
+            name="backups_recent",
+            label="Recent Backups",
+            sql="""
+                SELECT repository, name, finished, state
+                FROM sys.snapshots
+                ORDER BY finished DESC
+                LIMIT 10;
+            """,
+            description="Most recent 10 backups",
+        )
+
+        cluster_name = InfoElement(
+            name="cluster_name",
+            label="Cluster name",
+            sql=r"SELECT name FROM sys.cluster;",
+            transform=get_single_value("name"),
+        )
+
+        nodes_count = InfoElement(
+            name="cluster_nodes_count",
+            label="Total number of cluster nodes",
+            sql=r"SELECT COUNT(id) AS count FROM sys.nodes;",
+            transform=get_single_value("count"),
+        )
+        nodes_list = InfoElement(
+            name="cluster_nodes_list",
+            label="Cluster Nodes",
+            sql="SELECT * FROM sys.nodes ORDER BY hostname;",
+            description="Telemetry information for all cluster nodes.",
+        )
+        table_health = InfoElement(
+            name="table_health",
+            label="Table Health",
+            sql="SELECT health, COUNT(*) AS table_count FROM sys.health GROUP BY health;",
+            description="Table health short summary",
+        )
+
+    class JobInfo:
+        """
+        Information distilled from `sys.jobs_log` and `sys.jobs`.
+        """
+
+        age_range = InfoElement(
+            name="age_range",
+            label="Query age range",
+            description="Timestamps of first and last job",
+            sql="""
+                SELECT
+                    MIN(started) AS "first_job",
+                    MAX(started) AS "last_job"
+                FROM sys.jobs_log;
+            """,
+        )
+        by_user = InfoElement(
+            name="by_user",
+            label="Queries by user",
+            sql=r"""
+                SELECT
+                  username,
+                  COUNT(username) AS count
+                FROM sys.jobs_log
+                GROUP BY username
+                ORDER BY count DESC;
+            """,
+            description="Total number of queries per user.",
+        )
+
+        duration_buckets = InfoElement(
+            name="duration_buckets",
+            label="Query Duration Distribution (Buckets)",
+            sql="""
+                WITH dur AS (
+                        SELECT
+                            ended-started::LONG AS duration
+                        FROM sys.jobs_log
+                    ),
+                    pct AS (
+                        SELECT
+                            [0.25,0.5,0.75,0.99,0.999,1] pct_in,
+                            percentile(duration,[0.25,0.5,0.75,0.99,0.999,1]) as pct,
+                            count(*) cnt
+                        FROM dur
+                    )
+                    SELECT
+                        UNNEST(pct_in) * 100 AS bucket,
+                        cnt - CEIL(UNNEST(pct_in) * cnt) AS count,
+                        CEIL(UNNEST(pct)) duration
+                        ---cnt
+                    FROM pct;
+                """,
+            description="Distribution of query durations, bucketed.",
+        )
+        duration_percentiles = InfoElement(
+            name="duration_percentiles",
+            label="Query Duration Distribution (Percentiles)",
+            sql="""
+                SELECT
+                    min(ended-started::LONG) AS min,
+                    percentile(ended-started::LONG, 0.50) AS p50,
+                    percentile(ended-started::LONG, 0.90) AS p90,
+                    percentile(ended-started::LONG, 0.99) AS p99,
+                    MAX(ended-started::LONG) AS max
+                FROM
+                    sys.jobs_log
+                LIMIT 50;
+                """,
+            description="Distribution of query durations, percentiles.",
+        )
+        history100 = InfoElement(
+            name="history",
+            label="Query History",
+            sql="""
+                SELECT
+                  started AS "time",
+                  stmt,
+                  (ended::LONG - started::LONG) AS duration,
+                  username
+                FROM sys.jobs_log
+                WHERE stmt NOT ILIKE '%snapshot%'
+                ORDER BY time DESC
+                LIMIT 100;
+            """,
+            transform=lambda x: list(reversed(x)),
+            description="Statements and durations of the 100 recent queries / jobs.",
+        )
+        history_count = InfoElement(
+            name="history_count",
+            label="Query History Count",
+            sql="""
+                SELECT
+                  COUNT(*) AS job_count
+                FROM
+                    sys.jobs_log;
+            """,
+            transform=get_single_value("job_count"),
+            description="Total number of queries on this node.",
+        )
+        performance15min = InfoElement(
+            name="performance15min",
+            label="Query performance 15min",
+            sql=r"""
+                SELECT
+                    CURRENT_TIMESTAMP AS last_timestamp,
+                    (ended / 10000) * 10000 + 5000 AS ended_time,
+                    COUNT(*) / 10.0 AS qps,
+                    TRUNC(AVG(ended::BIGINT - started::BIGINT), 2) AS duration,
+                    UPPER(regexp_matches(stmt,'^\s*(\w+).*')[1]) AS query_type
+                FROM
+                    sys.jobs_log
+                WHERE
+                    ended > now() - ('15 minutes')::INTERVAL
+                GROUP BY 1, 2, 5
+                ORDER BY ended_time ASC;
+            """,
+            description="The query performance within the last 15 minutes, including two metrics: "
+            "queries per second, and query speed (ms).",
+        )
+        running = InfoElement(
+            name="running",
+            label="Currently Running Queries",
+            sql="""
+                SELECT
+                  started AS "time",
+                  stmt,
+                  (CURRENT_TIMESTAMP::LONG - started::LONG) AS duration,
+                  username
+                FROM sys.jobs
+                WHERE stmt NOT ILIKE '%snapshot%'
+                ORDER BY time;
+            """,
+            description="Statements and durations of currently running queries / jobs.",
+        )
+        running_count = InfoElement(
+            name="running_count",
+            label="Number of running queries",
+            sql="""
+                SELECT
+                    COUNT(*) AS job_count
+                FROM
+                    sys.jobs;
+            """,
+            transform=get_single_value("job_count"),
+            description="Total number of currently running queries.",
+        )
+        top100_count = InfoElement(
+            name="top100_count",
+            label="Query frequency",
+            description="The 100 most frequent queries.",
+            sql="""
+                SELECT
+                  stmt,
+                  COUNT(stmt) AS stmt_count,
+                  MAX((ended::LONG - started::LONG) ) AS max_duration,
+                  MIN((ended::LONG - started::LONG) ) AS min_duration,
+                  AVG((ended::LONG - started::LONG) ) AS avg_duration,
+                  PERCENTILE((ended::LONG - started::LONG), 0.99) AS p90
+                FROM sys.jobs_log
+                GROUP BY stmt
+                ORDER BY stmt_count DESC
+                LIMIT 100;
+            """,
+        )
+        top100_duration_individual = InfoElement(
+            name="top100_duration_individual",
+            label="Individual Query Duration",
+            description="The 100 queries by individual duration.",
+            sql="""
+                SELECT
+                  (ended::LONG - started::LONG) AS duration,
+                  stmt
+                FROM sys.jobs_log
+                ORDER BY duration DESC
+                LIMIT 100;
+            """,
+            unit="ms",
+        )
+        top100_duration_total = InfoElement(
+            name="top100_duration_total",
+            label="Total Query Duration",
+            description="The 100 queries by total duration.",
+            sql="""
+                SELECT
+                  SUM(ended::LONG - started::LONG) AS total_duration,
+                  stmt,
+                  COUNT(stmt) AS stmt_count
+                FROM sys.jobs_log
+                GROUP BY stmt
+                ORDER BY total_duration DESC
+                LIMIT 100;
+            """,
+            unit="ms",
+        )
+
+    class Logs:
+        """
+        Access `sys.jobs_log` for logging purposes.
+        """
+
+        # TODO: Implement `tail` in one way or another. -- https://stackoverflow.com/q/4714975
+        #       SELECT * FROM sys.jobs_log OFFSET -10;
+        #       SELECT * FROM sys.jobs_log OFFSET (SELECT count(*) FROM sys.jobs_log)-10;
+        # https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#to-char-expression-format-string
+        # https://cratedb.com/docs/crate/reference/en/latest/general/builtins/scalar-functions.html#date-format-format-string-timezone-timestamp
+        user_queries_latest = LogElement(
+            name="user_queries_latest",
+            label="Latest User Queries",
+            sql=r"""
+                SELECT
+                    DATE_FORMAT('%Y-%m-%dT%H:%i:%s.%f', started) AS started,
+                    DATE_FORMAT('%Y-%m-%dT%H:%i:%s.%f', ended) AS ended,
+                    classification, stmt, username, node
+                FROM
+                    sys.jobs_log
+                WHERE
+                    stmt NOT LIKE '%sys.%' AND
+                    stmt NOT LIKE '%information_schema.%'
+                ORDER BY ended DESC
+                LIMIT {limit};
+            """,
+        )
+
+    class Replication:
+        """
+        Information about logical replication.
+        """
+
+        # https://github.com/crate/crate/blob/master/docs/admin/logical-replication.rst#monitoring
+        subscriptions = """
+        SELECT s.subname, s.subpublications, sr.srrelid::text, sr.srsubstate, sr.srsubstate_reason
+        FROM pg_subscription s
+        JOIN pg_subscription_rel sr ON s.oid = sr.srsubid
+        ORDER BY s.subname;
+        """
+
+    class Resources:
+        """
+        About system resources.
+        """
+
+        # TODO: Needs templating.
+        column_cardinality = """
+        SELECT tablename, attname, n_distinct
+        FROM pg_stats
+        WHERE schemaname = '...'
+        AND tablename IN (...)
+        AND attname IN (...);
+        """
+
+        file_descriptors = """
+        SELECT
+            name AS node_name,
+            process['open_file_descriptors'] AS "open_file_descriptors",
+            process['max_open_file_descriptors'] AS max_open_file_descriptors
+        FROM sys.nodes
+        ORDER BY node_name;
+        """
+
+        heap_usage = """
+        SELECT
+            name AS node_name,
+            heap['used'] / heap['max']::DOUBLE AS heap_used
+        FROM sys.nodes
+        ORDER BY node_name;
+        """
+
+        tcp_connections = """
+        SELECT
+            name AS node_name,
+            connections
+        FROM sys.nodes
+        ORDER BY node_name;
+        """
+
+        # TODO: Why "14"? Is it about only getting information about the `write` thread pool?
+        thread_pools = """
+        SELECT
+            name AS node_name,
+            thread_pools[14]['queue'],
+            thread_pools[14]['active'],
+            thread_pools[14]['threads']
+        FROM sys.nodes
+        ORDER BY node_name;
+        """
+
+    class Settings:
+        """
+        Reflect cluster settings.
+        """
+
+        info = """
+        SELECT
+            name,
+            master_node,
+            settings['cluster']['routing']['allocation']['cluster_concurrent_rebalance']
+                AS cluster_concurrent_rebalance,
+            settings['indices']['recovery']['max_bytes_per_sec'] AS max_bytes_per_sec
+        FROM sys.cluster
+        LIMIT 1;
+        """
+
+    class Shards:
+        """
+        Information about shard / node / table / partition allocation and rebalancing.
+        """
+
+        # https://cratedb.com/docs/crate/reference/en/latest/admin/system-information.html#example
+        # TODO: Needs templating.
+        for_table = """
+        SELECT
+            schema_name,
+            table_name,
+            id,
+            partition_ident,
+            num_docs,
+            primary,
+            relocating_node,
+            routing_state,
+            state,
+            orphan_partition
+        FROM sys.shards
+        WHERE schema_name = '{schema_name}' AND table_name = '{table_name}';
+        """
+
+        # Identify the location of the shards for each partition.
+        # TODO: Needs templating.
+        location_for_partition = """
+        SELECT   table_partitions.table_schema,
+                 table_partitions.table_name,
+                 table_partitions.values[{partition_column}]::TIMESTAMP,
+                 shards.primary,
+                 shards.node['name']
+        FROM sys.shards
+        JOIN information_schema.table_partitions ON shards.partition_ident=table_partitions.partition_ident
+        WHERE table_partitions.table_name = {table_name}
+        ORDER BY 1,2,3,4,5;
+        """
+
+        allocation = InfoElement(
+            name="shard_allocation",
+            sql="""
+                SELECT
+                    IF(s.primary = TRUE, 'primary', 'replica') AS shard_type,
+                    COALESCE(shards, 0) AS shards
+                FROM
+                    UNNEST([true, false]) s(primary)
+                LEFT JOIN (
+                  SELECT primary, COUNT(*) AS shards
+                  FROM sys.allocations
+                  WHERE current_state != 'STARTED'
+                  GROUP BY 1
+                ) a ON s.primary = a.primary;
+            """,
+            label="Shard Allocation",
+            description="Support identifying issues with shard allocation.",
+        )
+
+        max_checkpoint_delta = InfoElement(
+            name="max_checkpoint_delta",
+            sql="""
+                SELECT
+                    COALESCE(MAX(seq_no_stats['local_checkpoint'] - seq_no_stats['global_checkpoint']), 0)
+                    AS max_checkpoint_delta
+                FROM sys.shards;
+            """,
+            transform=get_single_value("max_checkpoint_delta"),
+            label="Delta between local and global checkpoint",
+            description="If the delta between the local and global checkpoint is significantly large, "
+            "shard replication might have stalled or slowed down.",
+        )
+
+        # data-hot-2 	 262
+        # data-hot-1 	 146
+        node_shard_distribution = InfoElement(
+            name="node_shard_distribution",
+            label="Shard Distribution",
+            sql="""
+                SELECT
+                    node['name'] AS node_name,
+                    COUNT(*) AS num_shards
+                FROM sys.shards
+                WHERE primary = true
+                GROUP BY node_name;
+            """,
+            description="Shard distribution across nodes.",
+        )
+
+        not_started = InfoElement(
+            name="shard_not_started",
+            label="Shards not started",
+            sql="""
+                SELECT *
+                FROM sys.allocations
+                WHERE current_state != 'STARTED';
+            """,
+            description="Information about shards which have not been started.",
+        )
+        not_started_count = InfoElement(
+            name="shard_not_started_count",
+            label="Number of shards not started",
+            description="Total number of shards which have not been started.",
+            sql="""
+                SELECT COUNT(*) AS not_started_count
+                FROM sys.allocations
+                WHERE current_state != 'STARTED';
+            """,
+            transform=get_single_value("not_started_count"),
+        )
+
+        rebalancing_progress = InfoElement(
+            name="shard_rebalancing_progress",
+            label="Shard Rebalancing Progress",
+            sql="""
+                SELECT
+                    table_name,
+                    schema_name,
+                    recovery['stage'] AS recovery_stage,
+                    AVG(recovery['size']['percent']) AS progress,
+                    COUNT(*) AS count
+                FROM
+                    sys.shards
+                GROUP BY table_name, schema_name, recovery_stage;
+            """,
+            description="Information about rebalancing progress.",
+        )
+
+        rebalancing_status = InfoElement(
+            name="shard_rebalancing_status",
+            label="Shard Rebalancing Status",
+            sql="""
+                SELECT node['name'], id, recovery['stage'], recovery['size']['percent'], routing_state, state
+                FROM sys.shards
+                WHERE routing_state IN ('INITIALIZING', 'RELOCATING')
+                ORDER BY id;
+            """,
+            description="Information about rebalancing activities.",
+        )
+
+        table_allocation = InfoElement(
+            name="table_allocation",
+            label="Table Allocations",
+            sql="""
+                SELECT
+                    table_schema, table_name, node_id, shard_id, partition_ident, current_state, decisions, explanation
+                FROM
+                    sys.allocations;
+            """,
+            description="Table allocation across nodes, shards, and partitions.",
+        )
+
+        table_allocation_special = InfoElement(
+            name="table_allocation_special",
+            label="Table Allocations Special",
+            sql="""
+                SELECT decisions[2]['node_name'] AS node_name, COUNT(*) AS table_count
+                FROM sys.allocations
+                GROUP BY decisions[2]['node_name'];
+            """,
+            description="Table allocation. Special.",
+        )
+
+        table_shard_count = InfoElement(
+            name="table_shard_count",
+            label="Table Shard Count",
+            sql="""
+                SELECT
+                    table_schema,
+                    table_name,
+                    SUM(number_of_shards) AS num_shards
+                FROM
+                    information_schema.table_partitions
+                WHERE
+                    closed = false
+                GROUP BY table_schema, table_name;
+            """,
+            description="Total number of shards per table.",
+        )
+
+        total_count = InfoElement(
+            name="shard_total_count",
+            label="Number of shards",
+            description="Total number of shards.",
+            sql="""
+                SELECT COUNT(*) AS shard_count
+                FROM sys.shards
+            """,
+            transform=get_single_value("shard_count"),
+        )
+
+        # TODO: Are both `translog_uncommitted` items sensible?
+        translog_uncommitted = InfoElement(
+            name="translog_uncommitted",
+            label="Uncommitted Translog",
+            description="Check if translogs are committed properly by comparing the "
+            "`flush_threshold_size` with the `uncommitted_size` of a shard.",
+            sql="""
+                SELECT
+                  sh.table_name,
+                  sh.partition_ident,
+                  SUM(sh.translog_stats['uncommitted_size']) / POWER(1024, 3) as "translog_uncomitted_in_gib"
+                FROM information_schema.table_partitions tp
+                JOIN sys.shards sh USING (table_name, partition_ident)
+                WHERE sh.translog_stats['uncommitted_size'] > settings['translog']['flush_threshold_size']
+                GROUP BY 1, 2
+                ORDER BY 3 DESC;
+            """,
+        )
+        translog_uncommitted_size = InfoElement(
+            name="translog_uncommitted_size",
+            label="Total uncommitted translog size",
+            description="A large number of uncommitted translog operations can indicate issues with shard replication.",
+            sql="""
+                SELECT COALESCE(SUM(translog_stats['uncommitted_size']), 0) AS translog_uncommitted_size
+                FROM sys.shards;
+            """,
+            transform=get_single_value("translog_uncommitted_size"),
+            unit="bytes",
+        )
diff --git a/cratedb_toolkit/wtf/model.py b/cratedb_toolkit/wtf/model.py
new file mode 100644
index 00000000..4f1515c0
--- /dev/null
+++ b/cratedb_toolkit/wtf/model.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import dataclasses
+import typing as t
+from abc import abstractmethod
+
+from cratedb_toolkit.util import DatabaseAdapter
+from cratedb_toolkit.wtf.util import get_baseinfo
+
+
+@dataclasses.dataclass
+class InfoElement:
+    name: str
+    label: str
+    sql: str
+    description: t.Union[str, None] = None
+    transform: t.Union[t.Callable, None] = None
+    unit: t.Union[str, None] = None
+
+    def to_dict(self):
+        data = dataclasses.asdict(self)
+        data["sql"] = data["sql"].strip()
+        data["transform"] = str(data["transform"])
+        return data
+
+
+@dataclasses.dataclass
+class LogElement(InfoElement):
+    limit: int = 100
+
+
+@dataclasses.dataclass
+class ElementStore:
+    items: t.List[InfoElement] = dataclasses.field(default_factory=list)
+    index: t.Dict[str, InfoElement] = dataclasses.field(default_factory=dict)
+
+    def add(self, *elements: InfoElement):
+        for element in elements:
+            self.items.append(element)
+            if element.name in self.index:
+                raise KeyError(f"Duplicate key/label: {element.name}")
+            self.index[element.name] = element
+
+
+class InfoContainerBase:
+    def __init__(self, adapter: DatabaseAdapter, scrub: bool = False):
+        self.adapter = adapter
+        self.scrub = scrub
+        self.elements = ElementStore()
+        self.register_builtins()
+
+    @abstractmethod
+    def register_builtins(self):
+        raise NotImplementedError("Method needs to be implemented by child class")
+
+    def metadata(self):
+        data = {}
+        data.update(get_baseinfo())
+        data["elements"] = {}
+        for element in self.elements.items:
+            data["elements"][element.name] = element.to_dict()
+        return data
+
+    def evaluate_element(self, element: InfoElement):
+        sql = element.sql
+        if isinstance(element, LogElement):
+            sql = sql.format(limit=element.limit)
+        results = self.adapter.run_sql(sql, records=True)
+        if element.transform is not None:
+            results = element.transform(results)
+        return results
+
+    def to_dict(self, data=None):
+        if data is None:
+            data = self.render()
+        return {"meta": self.metadata(), "data": data}
+
+    def render(self):
+        data = {}
+        for element in self.elements.items:
+            data[element.name] = self.evaluate_element(element)
+        return data
+
+    # FIXME
+    def by_table(self, schema: str, table: str):
+        pass
diff --git a/cratedb_toolkit/wtf/query_collector.py b/cratedb_toolkit/wtf/query_collector.py
new file mode 100644
index 00000000..47255192
--- /dev/null
+++ b/cratedb_toolkit/wtf/query_collector.py
@@ -0,0 +1,247 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+
+# ruff: noqa: S608
+import json
+import logging
+import os
+import time
+from uuid import uuid4
+
+import urllib3
+from crate import client
+
+logger = logging.getLogger(__name__)
+
+host = os.getenv("HOSTNAME", "localhost:4200")
+username = os.getenv("USERNAME", "crate")
+password = os.getenv("PASSWORD", "")
+interval = float(os.getenv("INTERVAL", 10))
+stmt_log_table = os.getenv("STMT_TABLE", "stats.statement_log")
+last_exec_table = os.getenv("LAST_EXEC_TABLE", "stats.last_execution")
+last_execution_ts = 0
+sys_jobs_log = {}
+bucket_list = [10, 50, 100, 500, 1000, 2000, 5000, 10000, 15000, 20000]
+bucket_dict = {
+    "10": 0,
+    "50": 0,
+    "100": 0,
+    "500": 0,
+    "1000": 0,
+    "2000": 0,
+    "5000": 0,
+    "10000": 0,
+    "15000": 0,
+    "20000": 0,
+    "INF": 0,
+}
+
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+conn = client.connect(host, username=username, password=password)
+cursor = conn.cursor()
+last_scrape = int(time.time() * 1000) - (interval * 60000)
+
+TRACING = False
+
+
+def init():
+    stmt = (
+        f"CREATE TABLE IF NOT EXISTS {stmt_log_table} "
+        f"(id TEXT, stmt TEXT, calls INT, bucket OBJECT, last_used TIMESTAMP, "
+        f"username TEXT, query_type TEXT, avg_duration FLOAT, nodes ARRAY(TEXT))"
+    )
+    cursor.execute(stmt)
+    stmt = f"SELECT id, stmt, calls, bucket, username, query_type, avg_duration, nodes, last_used FROM {stmt_log_table}"
+    cursor.execute(stmt)
+    init_stmts(cursor.fetchall())
+    stmt = f"CREATE TABLE IF NOT EXISTS {last_exec_table} (last_execution TIMESTAMP)"
+    cursor.execute(stmt)
+    stmt = f"SELECT last_execution FROM {last_exec_table}"
+    cursor.execute(stmt)
+    init_last_execution(cursor.fetchall())
+
+
+def init_last_execution(last_execution):
+    global last_execution_ts
+    if len(last_execution) == 0:
+        last_execution_ts = 0
+        stmt = f"INSERT INTO {last_exec_table} (last_execution) VALUES (?)"
+        cursor.execute(stmt, (0,))
+    else:
+        last_execution_ts = last_execution[0][0]
+
+
+def init_stmts(stmts):
+    for stmt in stmts:
+        stmt_id = stmt[0]
+        stmt_column = stmt[1]
+        calls = stmt[2]
+        bucket = stmt[3]
+        user = stmt[4]
+        stmt_type = stmt[5]
+        avg_duration = stmt[6]
+        nodes = stmt[7]
+        last_used = stmt[8]
+
+        if stmt_column not in sys_jobs_log:
+            sys_jobs_log[stmt_column] = {
+                "id": stmt_id,
+                "size": 0,
+                "info": [],
+                "calls": calls,
+                "bucket": bucket,
+                "user": user,
+                "type": stmt_type,
+                "avg_duration": avg_duration,
+                "nodes": nodes,
+                "last_used": last_used,
+                "in_db": True,
+                "changed": False,
+            }
+
+
+def write_stats_to_db():
+    logger.info("Writing statistics to database")
+    write_query_stmt = (
+        f"INSERT INTO {stmt_log_table} "
+        f"(id, stmt, calls, bucket, username, query_type, avg_duration, nodes, last_used) "
+        f"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
+    )
+    update_query_stmt = (
+        f"UPDATE {stmt_log_table} "
+        f"SET calls = ?, avg_duration = ?, nodes = ?, bucket = ?, last_used = ? "
+        f"WHERE id = ?"
+    )
+    write_params = []
+    for key in sys_jobs_log.keys():
+        if not sys_jobs_log[key]["in_db"]:
+            write_params.append(
+                [
+                    sys_jobs_log[key]["id"],
+                    key,
+                    sys_jobs_log[key]["calls"],
+                    sys_jobs_log[key]["bucket"],
+                    sys_jobs_log[key]["user"],
+                    sys_jobs_log[key]["type"],
+                    sys_jobs_log[key]["avg_duration"],
+                    sys_jobs_log[key]["nodes"],
+                    sys_jobs_log[key]["last_used"],
+                ]
+            )
+            sys_jobs_log[key]["in_db"] = True
+            sys_jobs_log[key]["changed"] = False
+        elif sys_jobs_log[key]["changed"]:
+            cursor.execute(
+                update_query_stmt,
+                (
+                    sys_jobs_log[key]["calls"],
+                    sys_jobs_log[key]["avg_duration"],
+                    sys_jobs_log[key]["nodes"],
+                    sys_jobs_log[key]["bucket"],
+                    sys_jobs_log[key]["last_used"],
+                    sys_jobs_log[key]["id"],
+                ),
+            )
+            sys_jobs_log[key]["changed"] = False
+    if len(write_params) > 0:
+        cursor.executemany(write_query_stmt, write_params)
+
+    stmt = f"UPDATE {last_exec_table} SET last_execution = ?"
+    cursor.execute(stmt, (last_scrape,))
+
+
+def read_stats():
+    stmt = (
+        f"SELECT id, stmt, calls, avg_duration, bucket, username, query_type, nodes, last_used "
+        f"FROM {stmt_log_table} ORDER BY calls DESC, avg_duration DESC;"
+    )
+    cursor.execute(stmt)
+    init_stmts(cursor.fetchall())
+    return sys_jobs_log
+
+
+def assign_to_bucket(bucket, duration):
+    found = False
+    for element in bucket_list:
+        if duration < element:
+            found = True
+            bucket[str(element)] += 1
+            break
+    if not found:
+        bucket["INF"] += 1
+
+    return bucket
+
+
+def update_statistics(query_results):
+    global sys_jobs_log
+    for result in query_results:
+        started = result[0]
+        ended = result[1]
+        classification = result[2]
+        stmt = result[3]
+        user = result[4]
+        node = json.dumps(result[5])
+
+        duration = ended - started
+        if stmt not in sys_jobs_log:
+            sys_jobs_log[stmt] = {
+                "id": str(uuid4()),
+                "calls": 0,
+                "bucket": dict(bucket_dict),
+                "user": user,
+                "type": classification["type"],
+                "avg_duration": duration,
+                "in_db": False,
+                "last_used": started,
+                "nodes": [],
+                "changed": True,
+            }
+        sys_jobs_log[stmt]["changed"] = True
+        sys_jobs_log[stmt]["avg_duration"] = (sys_jobs_log[stmt]["avg_duration"] + duration) / 2
+        sys_jobs_log[stmt]["bucket"] = assign_to_bucket(sys_jobs_log[stmt]["bucket"], duration)
+        sys_jobs_log[stmt]["last_used"] = started
+        sys_jobs_log[stmt]["calls"] += 1
+        sys_jobs_log[stmt]["nodes"].append(node)
+        sys_jobs_log[stmt]["nodes"] = list(set(sys_jobs_log[stmt]["nodes"]))  # only save unique nodes
+    if TRACING:
+        logger.info(f"Updated statistics: {sys_jobs_log}")
+
+
+def scrape_db():
+    global last_scrape
+    logger.info("Reading sys.jobs_log")
+    next_scrape = int(time.time() * 1000)
+    stmt = (
+        f"SELECT "
+        f"started, ended, classification, stmt, username, node "
+        f"FROM sys.jobs_log "
+        f"WHERE "
+        f"stmt NOT LIKE '%sys.%' AND "
+        f"stmt NOT LIKE '%information_schema.%' "
+        f"AND ended BETWEEN {last_scrape} AND {next_scrape} "
+        f"ORDER BY ended DESC"
+    )
+
+    cursor.execute(stmt)
+    result = cursor.fetchall()
+    update_statistics(result)
+    last_scrape = next_scrape
+
+
+def run():
+    scrape_db()
+    write_stats_to_db()
+
+
+def main():
+    init()
+    while True:
+        run()
+        logger.info(f"Sleeping for {interval} seconds")
+        time.sleep(interval)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cratedb_toolkit/wtf/util.py b/cratedb_toolkit/wtf/util.py
new file mode 100644
index 00000000..983863f2
--- /dev/null
+++ b/cratedb_toolkit/wtf/util.py
@@ -0,0 +1,21 @@
+# Copyright (c) 2021-2023, Crate.io Inc.
+# Distributed under the terms of the AGPLv3 license, see LICENSE.
+import datetime as dt
+import functools as ft
+import typing as t
+
+from boltons.iterutils import get_path
+
+from cratedb_toolkit import __appname__, __version__
+
+
+def get_baseinfo():
+    data: t.Dict[str, t.Union[str, dt.datetime]] = {}
+    data["system_time"] = dt.datetime.now()
+    data["application_name"] = __appname__
+    data["application_version"] = __version__
+    return data
+
+
+def get_single_value(column_name: str):
+    return ft.partial(get_path, path=(0, column_name))
diff --git a/pyproject.toml b/pyproject.toml
index 814bb780..66f71c82 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -92,10 +92,12 @@ dependencies = [
   "crash",
   "crate[sqlalchemy]>=0.34",
   "croud==1.8",
+  "fastapi<0.105",
   'importlib-metadata; python_version <= "3.7"',
   "python-dotenv<2",
   "sqlalchemy",
   "sqlparse<0.5",
+  "uvicorn<0.25",
 ]
 [project.optional-dependencies]
 all = [
@@ -114,7 +116,7 @@ influxdb = [
 ]
 io = [
   "cr8",
-  "dask>=2020,<=2023.11.0",
+  "dask<=2023.11.0,>=2020",
   "pandas<3,>=1",
 ]
 mongodb = [
@@ -147,6 +149,7 @@ repository = "https://github.com/crate-workbench/cratedb-toolkit"
 [project.scripts]
 cratedb-retention = "cratedb_toolkit.retention.cli:cli"
 cratedb-toolkit = "cratedb_toolkit.cli:cli"
+cratedb-wtf = "cratedb_toolkit.wtf.cli:cli"
 ctk = "cratedb_toolkit.cli:cli"
 migr8 = "cratedb_toolkit.io.mongodb.cli:main"
 
diff --git a/tests/retention/test_cli.py b/tests/retention/test_cli.py
index b100e50a..01b7487c 100644
--- a/tests/retention/test_cli.py
+++ b/tests/retention/test_cli.py
@@ -61,7 +61,7 @@ def test_setup_verbose(caplog, cratedb, settings):
     assert result.exit_code == 0
 
     assert cratedb.database.table_exists(settings.policy_table.fullname) is True
-    assert 3 <= len(caplog.records) <= 10
+    assert 3 <= len(caplog.records) <= 15
 
 
 def test_setup_dryrun(caplog, cratedb, settings):
diff --git a/tests/wtf/__init__.py b/tests/wtf/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/wtf/test_cli.py b/tests/wtf/test_cli.py
new file mode 100644
index 00000000..b9620cd2
--- /dev/null
+++ b/tests/wtf/test_cli.py
@@ -0,0 +1,111 @@
+import json
+
+from boltons.iterutils import get_path
+from click.testing import CliRunner
+
+from cratedb_toolkit.wtf.cli import cli
+
+
+def test_wtf_cli_info(cratedb):
+    """
+    Verify `cratedb-wtf info`.
+    """
+
+    # Invoke command.
+    runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi})
+    result = runner.invoke(
+        cli,
+        args="info",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Verify outcome.
+    info = json.loads(result.output)
+    assert "meta" in info
+    assert "data" in info
+
+    system_keys = list(get_path(info, ("data", "system")).keys())
+    database_keys = list(get_path(info, ("data", "database")).keys())
+    assert system_keys == [
+        "remark",
+        "application",
+        "eco",
+        # "libraries",
+    ]
+    assert "cluster_name" in database_keys
+    assert "cluster_nodes_count" in database_keys
+
+
+def test_wtf_cli_logs(cratedb):
+    """
+    Verify `cratedb-wtf logs`.
+    """
+
+    # Invoke command.
+    runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi})
+    result = runner.invoke(
+        cli,
+        args="logs",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Verify outcome.
+    info = json.loads(result.output)
+    assert "meta" in info
+    assert "data" in info
+
+    data_keys = list(info["data"].keys())
+    assert "user_queries_latest" in data_keys
+    assert len(info["data"]["user_queries_latest"]) > 3
+
+
+def test_wtf_cli_job_info(cratedb):
+    """
+    Verify `cratedb-wtf job-info`.
+    """
+
+    # Invoke command.
+    runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi})
+    result = runner.invoke(
+        cli,
+        args="job-info",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Verify outcome.
+    info = json.loads(result.output)
+    assert "meta" in info
+    assert "data" in info
+
+    data_keys = list(info["data"].keys())
+    assert "by_user" in data_keys
+    assert "top100_count" in data_keys
+    assert "top100_duration_individual" in data_keys
+    assert "top100_duration_total" in data_keys
+    assert "performance15min" in data_keys
+
+
+def test_wtf_cli_statistics_view(cratedb):
+    """
+    Verify `cratedb-wtf job-statistics view`.
+    """
+
+    # Invoke command.
+    runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi})
+    result = runner.invoke(
+        cli,
+        args="job-statistics view",
+        catch_exceptions=False,
+    )
+    assert result.exit_code == 0
+
+    # Verify outcome.
+    info = json.loads(result.output)
+    assert "meta" in info
+    assert "data" in info
+
+    data_keys = list(info["data"].keys())
+    assert "stats" in data_keys