From f1e9f8e44d2bd1e95e7c71d4d598291ca490654e Mon Sep 17 00:00:00 2001 From: Jack Sundberg Date: Thu, 6 Jul 2023 14:02:23 -0400 Subject: [PATCH 1/2] fix sqlite filtering bug --- docs/parameters.md | 6 ++++++ src/simmate/command_line/database.py | 5 ++--- src/simmate/configuration/django/settings.py | 8 ++++++++ src/simmate/database/base_data_types/base.py | 14 ++++++++++++-- src/simmate/engine/execution/executor.py | 16 ++++++++++++++++ src/simmate/engine/workflow.py | 11 +++++++++-- 6 files changed, 53 insertions(+), 7 deletions(-) diff --git a/docs/parameters.md b/docs/parameters.md index af6e4c5f8..3a2b47e3a 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1086,6 +1086,12 @@ When submitting workflows via the `run_cloud` command, tags are 'labels' that he - my-tag-02 ``` +!!! danger + Filter tags does not always work as expected in SQLite3 because a worker with + `my-tag` will incorrectly grab jobs like `my-tag-01` and `my-tag-02`. This + issue is known by both [Django](https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity) and [SQLite3](https://www.sqlite.org/faq.html#q18). Simmate addresses this issue by requiring all + tags to be 7 characters long AND fully lowercase when using SQLite3. + -------------------------- ## temperature_end diff --git a/src/simmate/command_line/database.py b/src/simmate/command_line/database.py index 68748a997..5a6ce5918 100644 --- a/src/simmate/command_line/database.py +++ b/src/simmate/command_line/database.py @@ -31,7 +31,7 @@ def reset(confirm_delete: bool = False, use_prebuilt: bool = None): - `--use-prebuilt` and `--no-use-prebuilt`: automatically say yes/no to a prebuilt database. This only applies if you are using sqlite. """ - from simmate.configuration.django.settings import DATABASES + from simmate.configuration.django.settings import DATABASE_BACKEND, DATABASES database_name = str(DATABASES["default"]["NAME"]) print(f"\nUsing {database_name}") @@ -47,8 +47,7 @@ def reset(confirm_delete: bool = False, use_prebuilt: bool = None): # if the user has a SQLite3 backend, ask if they'd like to use a prebuilt # database to begin - using_sqlite = DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3" - if using_sqlite and use_prebuilt is None: + if DATABASE_BACKEND == "sqlite3" and use_prebuilt is None: use_prebuilt = typer.confirm( "\nIt looks like you are using the default database backend (sqlite3). \n" "Would you like to use a prebuilt-database with all third-party data " diff --git a/src/simmate/configuration/django/settings.py b/src/simmate/configuration/django/settings.py index bab8dc73d..d11124160 100644 --- a/src/simmate/configuration/django/settings.py +++ b/src/simmate/configuration/django/settings.py @@ -137,6 +137,14 @@ } } +# As an extra, we keep a DATABASE_BACKEND variable for backend-specific methods +if DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3": + DATABASE_BACKEND = "sqlite3" +elif DATABASES["default"]["ENGINE"] == "django.db.backends.postgresql": + DATABASE_BACKEND = "postgresql" +else: + DATABASE_BACKEND = "unknown" + # -------------------------------------------------------------------------------------- # INSTALLED APPS diff --git a/src/simmate/database/base_data_types/base.py b/src/simmate/database/base_data_types/base.py index 3b6f0af8e..b59e80fa2 100644 --- a/src/simmate/database/base_data_types/base.py +++ b/src/simmate/database/base_data_types/base.py @@ -27,6 +27,8 @@ from django_pandas.io import read_frame from rich.progress import track +from simmate.configuration.django.settings import DATABASE_BACKEND + # The "as table_column" line does NOTHING but rename a module. # I have this because I want to use "table_column.CharField(...)" instead # of "models.CharField(...)" in my Models. This let's beginners read my @@ -195,15 +197,23 @@ def to_archive(self, filename: Path | str = None): def filter_by_tags(self, tags: list[str]): """ - A utility filter() method that + A utility filter() method that helps query the 'tags' column of a table. + + NOTE: Pay close attention to filtering when using the SQLite3 backend + as Django warns about unexpected substring matching: + https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity """ if tags: new_query = self for tag in tags: - new_query = new_query.filter(tags__icontains=tag) + if DATABASE_BACKEND == "postgresql": + new_query = new_query.filter(tags__contains=tag) + elif DATABASE_BACKEND == "sqlite3": + new_query = new_query.filter(tags__icontains=tag) else: new_query = self.filter(tags=[]) + return new_query diff --git a/src/simmate/engine/execution/executor.py b/src/simmate/engine/execution/executor.py index b4f1a22ae..76c46d7be 100644 --- a/src/simmate/engine/execution/executor.py +++ b/src/simmate/engine/execution/executor.py @@ -7,6 +7,7 @@ from django.utils import timezone from rich import print +from simmate.configuration.django.settings import DATABASE_BACKEND from simmate.engine.execution.database import WorkItem @@ -45,6 +46,21 @@ def submit( # The *args and **kwargs input separates args into a tuple and kwargs into # a dictionary for me, which makes their storage very easy! + # BUG-FIX: sqlite can't filter tags properly so we add a rule that + # all tags must have the same number of characters AND be all lower-case. + # Issue is discussed at https://github.com/jacksund/simmate/issues/475 + # Django discusses this issue in their docs as well: + # https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity + if tags and DATABASE_BACKEND == "sqlite3": + for tag in tags: + if len(tag) != 7 or tag.lower() != tag: + raise Exception( + "All tags must be 7 characters long AND all lowercase " + "when using SQLite3 (the default database backend). " + "This is to avoid unexpected behavior/bugs. " + "Read the `tags` parameter docs for more information." + ) + # make the WorkItem where all of the provided inputs are pickled and # save the workitem to the database. # Pickling is just converting them to a byte string format diff --git a/src/simmate/engine/workflow.py b/src/simmate/engine/workflow.py index 2b4cb870c..7f55df383 100644 --- a/src/simmate/engine/workflow.py +++ b/src/simmate/engine/workflow.py @@ -13,6 +13,7 @@ from django.utils import timezone import simmate +from simmate.configuration.django.settings import DATABASE_BACKEND from simmate.database.base_data_types import Calculation from simmate.engine.execution import SimmateExecutor, WorkItem from simmate.utilities import ( @@ -311,7 +312,7 @@ def _run_full( @classmethod def run_cloud( cls, - tags: list[str] = None, + tags: list[str] = [], **kwargs, ): """ @@ -348,9 +349,15 @@ def run_cloud( # them before submission to the queue. parameters_serialized = cls._serialize_parameters(**kwargs_cleaned) + # If tags were not provided, we add some default ones. Note, however, + # that SQLite3 limits the default tag to just "simmate". The parameter + # docs for `tags` explains this bug with SQLite + if not tags: + tags = cls.tags if DATABASE_BACKEND != "sqlite3" else ["simmate"] + state = SimmateExecutor.submit( cls._run_full, # should this be the run method...? - tags=tags or cls.tags, + tags=tags, **parameters_serialized, ) From 3e2da7025bc2d1d9a24c058f9d49ea4da014d4a4 Mon Sep 17 00:00:00 2001 From: Jack Sundberg Date: Thu, 6 Jul 2023 14:06:12 -0400 Subject: [PATCH 2/2] update changelog --- docs/change_log.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/change_log.md b/docs/change_log.md index 5cfe14355..f8adf5a64 100644 --- a/docs/change_log.md +++ b/docs/change_log.md @@ -68,7 +68,9 @@ There is one key exception to the rules above -- and that is with `MAJOR`=0 rele **Fixes** --> -- no new changes have been merged into the `main` branch yet +**Fixes** + +- fix bug where workers incorrectly grab substring tag matches (e.g. a worker submited with the tag `ex` would incorrectly grab jobs like `ex-01` or `ex-02`) --------------------------------------------------------------------------------