From 5eeb37c3e80ccc3c96861c856cab6681c05667f9 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Fri, 27 Sep 2024 23:48:28 +0100 Subject: [PATCH 01/13] Test the string utils --- tests/lib/test_string_utils.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/lib/test_string_utils.py b/tests/lib/test_string_utils.py index 2ba7961d..4200a73f 100644 --- a/tests/lib/test_string_utils.py +++ b/tests/lib/test_string_utils.py @@ -1,4 +1,20 @@ -from jobrunner.lib.string_utils import project_name_from_url +import pytest + +from jobrunner.lib.string_utils import project_name_from_url, slugify, tabulate + + +@pytest.mark.parametrize( + "input_string,slug", + [ + ("string", "string"), + ("neko猫", "neko"), + ("string!@#$%^&**()", "string"), + ("string_______string-------string string", "string-string-string-string"), + ("__string__", "string"), + ], +) +def test_slugify(input_string, slug): + assert slugify(input_string) == slug def test_project_name_from_url(): @@ -6,3 +22,18 @@ def test_project_name_from_url(): assert project_name_from_url("https://github.com/opensafely/test2/") == "test2" assert project_name_from_url("/some/local/path/test3/") == "test3" assert project_name_from_url("C:\\some\\windows\\path\\test4\\") == "test4" + + +@pytest.mark.parametrize( + "rows,formatted_output", + [ + ([], ""), + ([["one", "two"], ["three", "four"]], "one two \nthree four"), + ( + [["verylongword", "b"], ["yeahyeahyeah", "猫猫猫"]], + "verylongword b \nyeahyeahyeah 猫猫猫", + ), + ], +) +def test_tabulate(rows, formatted_output): + assert tabulate(rows) == formatted_output From e882b2282cb5de7e65baa3c43f3a4062fff7bfcd Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sat, 28 Sep 2024 00:09:36 +0100 Subject: [PATCH 02/13] Add some nocovers --- jobrunner/job_executor.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/jobrunner/job_executor.py b/jobrunner/job_executor.py index f9f7d730..93126f9f 100644 --- a/jobrunner/job_executor.py +++ b/jobrunner/job_executor.py @@ -298,26 +298,26 @@ def delete_files(self, workspace: str, privacy: Privacy, paths: [str]) -> List[s class NullExecutorAPI(ExecutorAPI): """Null implementation of ExecutorAPI.""" - def prepare(self, job_definition): + def prepare(self, job_definition): # pragma: nocover raise NotImplementedError - def execute(self, job_definition): + def execute(self, job_definition): # pragma: nocover raise NotImplementedError - def finalize(self, job_definition): + def finalize(self, job_definition): # pragma: nocover raise NotImplementedError - def terminate(self, job_definition): + def terminate(self, job_definition): # pragma: nocover raise NotImplementedError - def get_status(self, job_definition): + def get_status(self, job_definition): # pragma: nocover raise NotImplementedError - def get_results(self, job_definition): + def get_results(self, job_definition): # pragma: nocover raise NotImplementedError - def cleanup(self, job_definition): + def cleanup(self, job_definition): # pragma: nocover raise NotImplementedError - def delete_files(self, workspace, privacy, paths): + def delete_files(self, workspace, privacy, paths): # pragma: nocover raise NotImplementedError From 754813f5da7f23372822307b2973925ea5dbea69 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sat, 28 Sep 2024 23:52:07 +0100 Subject: [PATCH 03/13] Some database tests --- tests/lib/test_database.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/lib/test_database.py b/tests/lib/test_database.py index 5620aa5c..8c7d002e 100644 --- a/tests/lib/test_database.py +++ b/tests/lib/test_database.py @@ -6,8 +6,10 @@ from jobrunner.lib.database import ( CONNECTION_CACHE, MigrationNeeded, + count_where, ensure_db, ensure_valid_db, + exists_where, find_one, get_connection, insert, @@ -56,6 +58,26 @@ def test_update_excluding_a_field(tmp_work_dir): assert j.commit == "commit-of-glory" +def test_exists_where(tmp_work_dir): + insert(Job(id="foo123", state=State.PENDING)) + insert(Job(id="foo124", state=State.RUNNING)) + insert(Job(id="foo125", state=State.FAILED)) + job_state_exists = exists_where(Job, state__in=[State.PENDING, State.FAILED]) + assert job_state_exists is True + job_id_exists = exists_where(Job, id="foo124") + assert job_id_exists is True + + +def test_count_where(tmp_work_dir): + insert(Job(id="foo123", state=State.PENDING)) + insert(Job(id="foo124", state=State.RUNNING)) + insert(Job(id="foo125", state=State.FAILED)) + jobs_in_states = count_where(Job, state__in=[State.PENDING, State.FAILED]) + assert jobs_in_states == 2 + jobs_with_id = count_where(Job, id="foo124") + assert jobs_with_id == 1 + + def test_select_values(tmp_work_dir): insert(Job(id="foo123", state=State.PENDING)) insert(Job(id="foo124", state=State.RUNNING)) From 8aed4da1c03a7b31ae2db01ecb59471f518faf78 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sat, 28 Sep 2024 23:52:39 +0100 Subject: [PATCH 04/13] Another database test --- tests/lib/test_database.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/lib/test_database.py b/tests/lib/test_database.py index 8c7d002e..d0d912f7 100644 --- a/tests/lib/test_database.py +++ b/tests/lib/test_database.py @@ -14,6 +14,7 @@ get_connection, insert, migrate_db, + query_params_to_sql, select_values, update, ) @@ -226,3 +227,28 @@ def test_ensure_valid_db(tmp_path): # does not raise when all is well conn.execute("PRAGMA user_version=1") ensure_valid_db(db, {1: "should not run"}) + + +@pytest.mark.parametrize( + "params,expected_sql_string,expected_sql_values", + [ + ({}, "1 = 1", []), + ({"doubutsu": "neko"}, '"doubutsu" = ?', ["neko"]), + ({"doubutsu__like": "ne%"}, '"doubutsu" LIKE ?', ["ne%"]), + ( + {"doubutsu__in": ["neko", "kitsune", "nezumi"]}, + '"doubutsu" IN (?, ?, ?)', + ["neko", "kitsune", "nezumi"], + ), + ( + {"namae": "rosa", "doubutsu__in": ["neko"]}, + '"namae" = ? AND "doubutsu" IN (?)', + ["rosa", "neko"], + ), + ({"state": State.RUNNING}, '"state" = ?', ['running']), + ], +) +def test_query_params_to_sql(params, expected_sql_string, expected_sql_values): + sql_string, sql_values = query_params_to_sql(params) + assert sql_string == expected_sql_string + assert sql_values == expected_sql_values From 8c405797aa4e9f777c2637d57403b1b6c710dab3 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sat, 28 Sep 2024 23:53:12 +0100 Subject: [PATCH 05/13] Reuse database filename default codeblock * this allows us to easily increase pytest code coverage --- jobrunner/lib/database.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/jobrunner/lib/database.py b/jobrunner/lib/database.py index e26e20c4..5a595cef 100644 --- a/jobrunner/lib/database.py +++ b/jobrunner/lib/database.py @@ -155,6 +155,12 @@ def transaction(): return conn +def filename_or_get_default(filename=None): + if filename is None: + filename = config.DATABASE_FILE + return filename + + def get_connection(filename=None): """Return the current configured connection.""" # The caching below means we get the same connection to the database every @@ -162,8 +168,7 @@ def get_connection(filename=None): # implement transaction support without having to explicitly pass round a # connection object. This is done on a per-thread basis to avoid potential # threading issues. - if filename is None: - filename = config.DATABASE_FILE + filename = filename_or_get_default(filename) # Looks icky but is documented `threading.local` usage cache = CONNECTION_CACHE.__dict__ @@ -208,8 +213,7 @@ def ensure_valid_db(filename=None, migrations=MIGRATIONS): # we store migrations in models, so make sure this has been imported to collect them import jobrunner.models # noqa: F401 - if filename is None: - filename = config.DATABASE_FILE + filename = filename_or_get_default(filename) db_type, db_exists = db_status(filename) if db_type == "file" and not db_exists: @@ -234,8 +238,7 @@ def ensure_db(filename=None, migrations=MIGRATIONS, verbose=False): # we store migrations in models, so make sure this has been imported to collect them import jobrunner.models # noqa: F401 - if filename is None: - filename = config.DATABASE_FILE + filename = filename_or_get_default(filename) db_type, db_exists = db_status(filename) From 92512f38e16e0484d4422bcb44b9c871e3cb2322 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sat, 28 Sep 2024 23:53:29 +0100 Subject: [PATCH 06/13] Refactor query_params_to_sql * move the empty branch to the start of the function --- jobrunner/lib/database.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jobrunner/lib/database.py b/jobrunner/lib/database.py index 5a595cef..d94685e4 100644 --- a/jobrunner/lib/database.py +++ b/jobrunner/lib/database.py @@ -296,8 +296,12 @@ def query_params_to_sql(params): All parameters are implicitly ANDed together, and there's a bit of magic to handle `field__in=list_of_values` queries, LIKE queries and Enum classes. """ + if not params: + return "1 = 1", [] + parts = [] values = [] + for key, value in params.items(): if key.endswith("__in"): field = key[:-4] @@ -311,11 +315,11 @@ def query_params_to_sql(params): else: parts.append(f"{escape(key)} = ?") values.append(value) + # Bit of a hack: convert any Enum instances to their values so we can use # them in querying values = [v.value if isinstance(v, Enum) else v for v in values] - if not parts: - parts = ["1 = 1"] + return " AND ".join(parts), values From edeae263affbe399ddc38c3f70f734140db964cc Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 00:10:28 +0100 Subject: [PATCH 07/13] Test upsert --- tests/lib/test_database.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/lib/test_database.py b/tests/lib/test_database.py index d0d912f7..62b800c9 100644 --- a/tests/lib/test_database.py +++ b/tests/lib/test_database.py @@ -17,6 +17,7 @@ query_params_to_sql, select_values, update, + upsert, ) from jobrunner.models import Job, State @@ -48,6 +49,20 @@ def test_update(tmp_work_dir): assert find_one(Job, id="foo123").action == "bar" +def test_upsert_insert(tmp_work_dir): + job = Job(id="foo123", action="bar") + upsert(job) + assert find_one(Job, id="foo123").action == "bar" + + +def test_upsert_update(tmp_work_dir): + job = Job(id="foo123", action="foo") + insert(job) + job.action = "bar" + upsert(job) + assert find_one(Job, id="foo123").action == "bar" + + def test_update_excluding_a_field(tmp_work_dir): job = Job(id="foo123", action="foo", commit="commit-of-glory") insert(job) @@ -245,7 +260,7 @@ def test_ensure_valid_db(tmp_path): '"namae" = ? AND "doubutsu" IN (?)', ["rosa", "neko"], ), - ({"state": State.RUNNING}, '"state" = ?', ['running']), + ({"state": State.RUNNING}, '"state" = ?', ["running"]), ], ) def test_query_params_to_sql(params, expected_sql_string, expected_sql_values): From b126de6ec8d3282675453ff45a13ff46e3cb1f0e Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 00:10:43 +0100 Subject: [PATCH 08/13] Refactor upsert to reuse insert --- jobrunner/lib/database.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/jobrunner/lib/database.py b/jobrunner/lib/database.py index d94685e4..cb326c39 100644 --- a/jobrunner/lib/database.py +++ b/jobrunner/lib/database.py @@ -42,27 +42,31 @@ def migration(version, sql): MIGRATIONS[version] = sql -def insert(item): +def generate_insert_sql(item): table = item.__tablename__ fields = dataclasses.fields(item) columns = ", ".join(escape(field.name) for field in fields) placeholders = ", ".join(["?"] * len(fields)) sql = f"INSERT INTO {escape(table)} ({columns}) VALUES({placeholders})" + return sql, fields + + +def insert(item): + sql, fields = generate_insert_sql(item) + get_connection().execute(sql, encode_field_values(fields, item)) def upsert(item): assert item.id - table = item.__tablename__ - fields = dataclasses.fields(item) - columns = ", ".join(escape(field.name) for field in fields) - placeholders = ", ".join(["?"] * len(fields)) + insert_sql, fields = generate_insert_sql(item) + updates = ", ".join(f"{escape(field.name)} = ?" for field in fields) # Note: technically we update the id on conflict with this approach, which - # is unessecary, but it does not hurt and simplifies updates and params + # is unnecessary, but it does not hurt and simplifies updates and params # parts of the query. sql = f""" - INSERT INTO {escape(table)} ({columns}) VALUES({placeholders}) + {insert_sql} ON CONFLICT(id) DO UPDATE SET {updates} """ params = encode_field_values(fields, item) From ac9210e4db4690ae35b76f9b0bd32f45ce4b2b4d Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 00:22:31 +0100 Subject: [PATCH 09/13] add test for insert helper function --- tests/lib/test_database.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/lib/test_database.py b/tests/lib/test_database.py index 62b800c9..e197969b 100644 --- a/tests/lib/test_database.py +++ b/tests/lib/test_database.py @@ -11,6 +11,7 @@ ensure_valid_db, exists_where, find_one, + generate_insert_sql, get_connection, insert, migrate_db, @@ -41,6 +42,16 @@ def test_basic_roundtrip(tmp_work_dir): assert job.output_spec == j.output_spec +def test_generate_insert_sql(tmp_work_dir): + job = Job(id="foo123", action="foo") + sql, _ = generate_insert_sql(job) + + assert ( + sql + == 'INSERT INTO "job" ("id", "job_request_id", "state", "repo_url", "commit", "workspace", "database_name", "action", "action_repo_url", "action_commit", "requires_outputs_from", "wait_for_job_ids", "run_command", "image_id", "output_spec", "outputs", "unmatched_outputs", "status_message", "status_code", "cancelled", "created_at", "updated_at", "started_at", "completed_at", "status_code_updated_at", "trace_context", "level4_excluded_files", "requires_db") VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)' + ) + + def test_update(tmp_work_dir): job = Job(id="foo123", action="foo") insert(job) From 8d27ef3121b9b71cef3d0ca91b22805780f76629 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 00:31:47 +0100 Subject: [PATCH 10/13] remove synonym function from coverage --- jobrunner/lib/database.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jobrunner/lib/database.py b/jobrunner/lib/database.py index cb326c39..10c99c08 100644 --- a/jobrunner/lib/database.py +++ b/jobrunner/lib/database.py @@ -106,7 +106,7 @@ def find_where(itemclass, **query_params): return [itemclass(*decode_field_values(fields, row)) for row in cursor] -def find_all(itemclass): +def find_all(itemclass): # pragma: nocover return find_where(itemclass) From 2d59bc47714080e56d9a4bb0ebfa89e26faca33b Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 00:32:42 +0100 Subject: [PATCH 11/13] Test transactions --- tests/lib/test_database.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tests/lib/test_database.py b/tests/lib/test_database.py index e197969b..4479c1fe 100644 --- a/tests/lib/test_database.py +++ b/tests/lib/test_database.py @@ -17,6 +17,7 @@ migrate_db, query_params_to_sql, select_values, + transaction, update, upsert, ) @@ -42,6 +43,38 @@ def test_basic_roundtrip(tmp_work_dir): assert job.output_spec == j.output_spec +def test_insert_in_transaction_success(tmp_work_dir): + job = Job( + id="foo123", + job_request_id="bar123", + state=State.RUNNING, + output_spec={"hello": [1, 2, 3]}, + ) + + with transaction(): + insert(job) + j = find_one(Job, job_request_id__in=["bar123", "baz123"]) + assert job.id == j.id + assert job.output_spec == j.output_spec + + +def test_insert_in_transaction_fail(tmp_work_dir): + job = Job( + id="foo123", + job_request_id="bar123", + state=State.RUNNING, + output_spec={"hello": [1, 2, 3]}, + ) + + with transaction(): + insert(job) + conn = get_connection() + conn.execute("ROLLBACK") + + with pytest.raises(ValueError): + find_one(Job, job_request_id__in=["bar123", "baz123"]) + + def test_generate_insert_sql(tmp_work_dir): job = Job(id="foo123", action="foo") sql, _ = generate_insert_sql(job) From 762ac7146f76af3bfdfb2f1307e1ca52e3269235 Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Sun, 29 Sep 2024 08:25:57 +0100 Subject: [PATCH 12/13] Test empty sync response --- tests/test_sync.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/test_sync.py b/tests/test_sync.py index f60e5f0a..10bb75ec 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -3,7 +3,8 @@ from responses import matchers from jobrunner import config, queries, sync -from jobrunner.models import JobRequest +from jobrunner.lib.database import find_where +from jobrunner.models import Job, JobRequest from tests.factories import job_factory, metrics_factory @@ -152,3 +153,24 @@ def test_session_request_flags(db, responses): # if this works, our expected request was generated sync.api_get("path", params={"backend": "test"}) + + +def test_sync_empty_response(db, monkeypatch, requests_mock): + monkeypatch.setattr( + "jobrunner.config.JOB_SERVER_ENDPOINT", "http://testserver/api/v2/" + ) + requests_mock.get( + "http://testserver/api/v2/job-requests/?backend=expectations", + json={ + "results": [], + }, + ) + sync.sync() + + # verify we did not post back to job-server + assert requests_mock.last_request.text is None + assert requests_mock.last_request.method == "GET" + + # also that we did not create any jobs + jobs = find_where(Job) + assert jobs == [] From 3141f1e6787bb434ab2b615188f0c486da8a058d Mon Sep 17 00:00:00 2001 From: Tom Ward <tomward@fmail.co.uk> Date: Thu, 10 Oct 2024 15:18:35 +0100 Subject: [PATCH 13/13] Bump target coverage to 84% --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bc86b6af..a9faeb88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ omit = [ ] [tool.coverage.report] -fail_under = 82 +fail_under = 84 show_missing = true skip_covered = true