diff --git a/cratedb_toolkit/cfr/backlog.md b/cratedb_toolkit/cfr/backlog.md index 75fb78a7..c71f517a 100644 --- a/cratedb_toolkit/cfr/backlog.md +++ b/cratedb_toolkit/cfr/backlog.md @@ -1,15 +1,21 @@ # CFR Backlog ## Iteration +1 -- sys-export: Does the program need capabilities to **LIMIT** cardinality - on `sys-export` operations, for example, when they are super large? -- sys-import: Accept target database schema. -- Combine with `ctk wtf info` +- Software tests - Converge output into tar archive +- Combine with `ctk wtf info` + - On sys-export, add it to the CFR package + - After sys-import, use it to access the imported data ## Iteration +2 -- Cluster name muss in `cfr//sys/`, für multi-tenancy operations. +- sys-export: Does the program need capabilities to **LIMIT** cardinality + on `sys-export` operations, for example, when they are super large? +- sys-import: Accept target database schema. ## Iteration +3 - Wie komme ich ans `crate.yaml`? - Wie komme ich an die Logfiles? `docker log`? +- Use OpenTelemetry traces in one way or another? + +## Done +- Cluster name muss in `cfr//sys/`, für multi-tenancy operations. diff --git a/cratedb_toolkit/cfr/systable.py b/cratedb_toolkit/cfr/systable.py index 18b0962e..9fb122a4 100644 --- a/cratedb_toolkit/cfr/systable.py +++ b/cratedb_toolkit/cfr/systable.py @@ -107,6 +107,7 @@ def read_table(self, tablename: str) -> pl.DataFrame: return pl.read_database( query=sql, # noqa: S608 connection=self.adapter.engine, + infer_schema_length=1000, ) def dump_table(self, frame: pl.DataFrame, file: t.Union[t.TextIO, None] = None): @@ -132,6 +133,7 @@ def save(self) -> Path: path_data.mkdir(parents=True, exist_ok=True) table_count = 0 for tablename in tqdm(system_tables, disable=None): + logger.debug(f"Exporting table: {tablename}") if tablename in SystemTableKnowledge.REFLECTION_BLOCKLIST: continue diff --git a/cratedb_toolkit/wtf/backlog.md b/cratedb_toolkit/wtf/backlog.md index cd6fc7e2..9f0c82e2 100644 --- a/cratedb_toolkit/wtf/backlog.md +++ b/cratedb_toolkit/wtf/backlog.md @@ -10,6 +10,8 @@ - High-level analysis, evaluating a set of threshold rules - High-level summary reports with heuristics support - Network diagnostics? +- Provide a GUI? + https://github.com/davep/pispy ## Iteration +3 - Make it work with CrateDB Cloud. diff --git a/doc/backlog.md b/doc/backlog.md index 4bfaead6..1a25a5d9 100644 --- a/doc/backlog.md +++ b/doc/backlog.md @@ -32,6 +32,18 @@ - Store `CRATEDB_CLOUD_CLUSTER_ID` into `cratedb_toolkit.constants` - Cloud Tests: Verify file uploads - Docs: Add examples in more languages: Java, JavaScript, Lua, PHP +- Docs: + - https://pypi.org/project/red-panda/ + - https://pypi.org/project/redpanda/ + https://github.com/amancevice/redpanda + - https://pypi.org/project/alyeska/ +- Kafka: + - https://github.com/bakdata/streams-bootstrap + - https://pypi.org/project/kashpy/ +- CFR/WTF + - https://github.com/peekjef72/sql_exporter +- Migrate / I/O adapter + - https://community.cratedb.com/t/migrating-from-postgresql-or-timescale-to-cratedb/620 ## Iteration +2.5 - Retention: Improve retention subsystem CLI API. diff --git a/tests/cfr/assets/sys-operations.jsonl b/tests/cfr/assets/sys-operations.jsonl new file mode 100644 index 00000000..f220dbcf --- /dev/null +++ b/tests/cfr/assets/sys-operations.jsonl @@ -0,0 +1 @@ +{"id":"0","job_id":"3cd98282-50f6-c25d-c69e-90eeea6d7afc","name":"collect","node":{"name":"Testa del Rutor","id":"sy7vpr9mSzS4RMwKJTxWkA"},"started":1713399434586,"used_bytes":0} diff --git a/tests/cfr/assets/sys-operations.sql b/tests/cfr/assets/sys-operations.sql new file mode 100644 index 00000000..580624a9 --- /dev/null +++ b/tests/cfr/assets/sys-operations.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS "sys-operations" ( + id STRING, + job_id STRING, + name STRING, + node OBJECT, + started TIMESTAMP, + used_bytes LONG +); diff --git a/tests/cfr/test_cli.py b/tests/cfr/test_cli.py index fc60705d..a282af29 100644 --- a/tests/cfr/test_cli.py +++ b/tests/cfr/test_cli.py @@ -1,5 +1,14 @@ import json import re +import shutil +import sys + +import tests + +if sys.version_info < (3, 9): + from importlib_resources import files +else: + from importlib.resources import files from pathlib import Path from click.testing import CliRunner @@ -38,3 +47,64 @@ def test_cfr_cli_export(cratedb, tmp_path, caplog): assert len(schema_files) >= 19 assert len(data_files) >= 19 + + +def test_cfr_cli_import(cratedb, tmp_path, caplog): + """ + Verify `ctk cfr sys-import` works. + """ + + # Blank database canvas. + imported_system_tables = [ + "sys-allocations", + "sys-checks", + "sys-cluster", + "sys-health", + "sys-jobs", + "sys-jobs_log", + "sys-jobs_metrics", + "sys-node_checks", + "sys-nodes", + "sys-operations", + "sys-operations_log", + "sys-privileges", + "sys-repositories", + "sys-roles", + "sys-segments", + "sys-shards", + "sys-snapshot_restore", + "sys-snapshots", + "sys-users", + ] + cratedb.reset(imported_system_tables) + + # Provision filesystem to look like a fake `sys-export` trace. + assets_path = files(tests.cfr) / "assets" + sys_operations_schema = assets_path / "sys-operations.sql" + sys_operations_data = assets_path / "sys-operations.jsonl" + schema_path = tmp_path / "schema" + data_path = tmp_path / "data" + schema_path.mkdir() + data_path.mkdir() + shutil.copy(sys_operations_schema, schema_path) + shutil.copy(sys_operations_data, data_path) + + # Invoke command. + runner = CliRunner(env={"CRATEDB_SQLALCHEMY_URL": cratedb.database.dburi, "CFR_SOURCE": str(tmp_path)}) + result = runner.invoke( + cli, + args="--debug sys-import", + catch_exceptions=False, + ) + assert result.exit_code == 0 + + # Verify log output. + assert "Importing system tables from" in caplog.text + assert re.search(r"Successfully imported \d+ system tables", caplog.text), "Log message missing" + + # Verify outcome. + results = cratedb.database.run_sql("SHOW TABLES", records=True) + assert results == [{"table_name": "sys-operations"}] + + cratedb.database.run_sql('REFRESH TABLE "sys-operations"') + assert cratedb.database.count_records("sys-operations") == 1