Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix sqlite filtering bug 🦋 #476

Merged
merged 2 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/change_log.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ There is one key exception to the rules above -- and that is with `MAJOR`=0 rele
**Fixes**
-->

- no new changes have been merged into the `main` branch yet
**Fixes**

- fix bug where workers incorrectly grab substring tag matches (e.g. a worker submited with the tag `ex` would incorrectly grab jobs like `ex-01` or `ex-02`)

--------------------------------------------------------------------------------

Expand Down
6 changes: 6 additions & 0 deletions docs/parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,12 @@ When submitting workflows via the `run_cloud` command, tags are 'labels' that he
- my-tag-02
```

!!! danger
Filter tags does not always work as expected in SQLite3 because a worker with
`my-tag` will incorrectly grab jobs like `my-tag-01` and `my-tag-02`. This
issue is known by both [Django](https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity) and [SQLite3](https://www.sqlite.org/faq.html#q18). Simmate addresses this issue by requiring all
tags to be 7 characters long AND fully lowercase when using SQLite3.

--------------------------

## temperature_end
Expand Down
5 changes: 2 additions & 3 deletions src/simmate/command_line/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def reset(confirm_delete: bool = False, use_prebuilt: bool = None):
- `--use-prebuilt` and `--no-use-prebuilt`: automatically say yes/no to a
prebuilt database. This only applies if you are using sqlite.
"""
from simmate.configuration.django.settings import DATABASES
from simmate.configuration.django.settings import DATABASE_BACKEND, DATABASES

database_name = str(DATABASES["default"]["NAME"])
print(f"\nUsing {database_name}")
Expand All @@ -47,8 +47,7 @@ def reset(confirm_delete: bool = False, use_prebuilt: bool = None):

# if the user has a SQLite3 backend, ask if they'd like to use a prebuilt
# database to begin
using_sqlite = DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3"
if using_sqlite and use_prebuilt is None:
if DATABASE_BACKEND == "sqlite3" and use_prebuilt is None:
use_prebuilt = typer.confirm(
"\nIt looks like you are using the default database backend (sqlite3). \n"
"Would you like to use a prebuilt-database with all third-party data "
Expand Down
8 changes: 8 additions & 0 deletions src/simmate/configuration/django/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,14 @@
}
}

# As an extra, we keep a DATABASE_BACKEND variable for backend-specific methods
if DATABASES["default"]["ENGINE"] == "django.db.backends.sqlite3":
DATABASE_BACKEND = "sqlite3"
elif DATABASES["default"]["ENGINE"] == "django.db.backends.postgresql":
DATABASE_BACKEND = "postgresql"
else:
DATABASE_BACKEND = "unknown"

# --------------------------------------------------------------------------------------

# INSTALLED APPS
Expand Down
14 changes: 12 additions & 2 deletions src/simmate/database/base_data_types/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from django_pandas.io import read_frame
from rich.progress import track

from simmate.configuration.django.settings import DATABASE_BACKEND

# The "as table_column" line does NOTHING but rename a module.
# I have this because I want to use "table_column.CharField(...)" instead
# of "models.CharField(...)" in my Models. This let's beginners read my
Expand Down Expand Up @@ -195,15 +197,23 @@ def to_archive(self, filename: Path | str = None):

def filter_by_tags(self, tags: list[str]):
"""
A utility filter() method that
A utility filter() method that helps query the 'tags' column of a table.

NOTE: Pay close attention to filtering when using the SQLite3 backend
as Django warns about unexpected substring matching:
https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity
"""

if tags:
new_query = self
for tag in tags:
new_query = new_query.filter(tags__icontains=tag)
if DATABASE_BACKEND == "postgresql":
new_query = new_query.filter(tags__contains=tag)
elif DATABASE_BACKEND == "sqlite3":
new_query = new_query.filter(tags__icontains=tag)
else:
new_query = self.filter(tags=[])

return new_query


Expand Down
16 changes: 16 additions & 0 deletions src/simmate/engine/execution/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from django.utils import timezone
from rich import print

from simmate.configuration.django.settings import DATABASE_BACKEND
from simmate.engine.execution.database import WorkItem


Expand Down Expand Up @@ -45,6 +46,21 @@ def submit(
# The *args and **kwargs input separates args into a tuple and kwargs into
# a dictionary for me, which makes their storage very easy!

# BUG-FIX: sqlite can't filter tags properly so we add a rule that
# all tags must have the same number of characters AND be all lower-case.
# Issue is discussed at https://github.com/jacksund/simmate/issues/475
# Django discusses this issue in their docs as well:
# https://docs.djangoproject.com/en/4.2/ref/databases/#substring-matching-and-case-sensitivity
if tags and DATABASE_BACKEND == "sqlite3":
for tag in tags:
if len(tag) != 7 or tag.lower() != tag:
raise Exception(
"All tags must be 7 characters long AND all lowercase "
"when using SQLite3 (the default database backend). "
"This is to avoid unexpected behavior/bugs. "
"Read the `tags` parameter docs for more information."
)

# make the WorkItem where all of the provided inputs are pickled and
# save the workitem to the database.
# Pickling is just converting them to a byte string format
Expand Down
11 changes: 9 additions & 2 deletions src/simmate/engine/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.utils import timezone

import simmate
from simmate.configuration.django.settings import DATABASE_BACKEND
from simmate.database.base_data_types import Calculation
from simmate.engine.execution import SimmateExecutor, WorkItem
from simmate.utilities import (
Expand Down Expand Up @@ -311,7 +312,7 @@ def _run_full(
@classmethod
def run_cloud(
cls,
tags: list[str] = None,
tags: list[str] = [],
**kwargs,
):
"""
Expand Down Expand Up @@ -348,9 +349,15 @@ def run_cloud(
# them before submission to the queue.
parameters_serialized = cls._serialize_parameters(**kwargs_cleaned)

# If tags were not provided, we add some default ones. Note, however,
# that SQLite3 limits the default tag to just "simmate". The parameter
# docs for `tags` explains this bug with SQLite
if not tags:
tags = cls.tags if DATABASE_BACKEND != "sqlite3" else ["simmate"]

state = SimmateExecutor.submit(
cls._run_full, # should this be the run method...?
tags=tags or cls.tags,
tags=tags,
**parameters_serialized,
)

Expand Down