-
-
Notifications
You must be signed in to change notification settings - Fork 784
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: search tokenization, handling of quoted literal search, and pos…
…tgres fuzziness (#2351) * Creating postgres migration script and starting to set up to detect database * non-working placeholders for postgres pg_tgrm * First draft of some indexes * non-working commit of postgres indexing * Further non-working edits to db-centric fuzzy search * update alembic for extensions * More non-working setup * Move db type check to init_db * fix typo in db name check * Add sqlite token search and postgres full text search * reorder search to hit exact matches faster * Add settings and docs for POSTGRES_LANGUAGE (full text search) * Use user-specified POSTGRES_LANGUAGE in search * fix fuzzy search typo * Remove full text search and instead order by trigram match * cleaner adding of indices, remove fulltext * Cleanup old import of getting app settings * Fix typo in index * Fix some alembic fuzzy typos * Remove diagnostic printing from alembic migration * Fix mixed up commutator for trigram operator and relax criteria * forgot to remove query debug * sort only on name * token and fuzzy search tests * Refactor recipe search test to avoid rare random string cross-matches. * Add ability to quote parts of search for exact match * Remove internal punctuation, unless it's quoted for literal search * Add tests for special character removal and literal search * Remove the outer double quotes from searches, but leave internal single quotes alone. * Update tests to avoid intra-test name collisions * Fixing leftovers highlighted by lint * cleanup linting and mypy errors * Fix test cross-matching on dirty db (leftovers from bulk import) * forgot to cleanup something when debugging mypy errors * re-order pg_trgm loading in postgres * address comments
- Loading branch information
Showing
7 changed files
with
304 additions
and
43 deletions.
There are no files selected for viewing
89 changes: 89 additions & 0 deletions
89
alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
"""postgres fuzzy search | ||
Revision ID: b3dbb554ba53 | ||
Revises: 38514b39a824 | ||
Create Date: 2023-04-13 06:47:04.617131 | ||
""" | ||
import sqlalchemy as sa | ||
|
||
import mealie.db.migration_types | ||
from alembic import op | ||
import alembic.context as context | ||
from mealie.core.config import get_app_settings | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "b3dbb554ba53" | ||
down_revision = "38514b39a824" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def get_db_type(): | ||
return op.get_context().dialect.name | ||
|
||
|
||
def setup_postgres_trigrams(): | ||
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") | ||
op.create_index( | ||
"ix_recipes_name_normalized_gin", | ||
table_name="recipes", | ||
columns=["name_normalized"], | ||
unique=False, | ||
postgresql_using="gin", | ||
postgresql_ops={ | ||
"name_normalized": "gin_trgm_ops", | ||
}, | ||
) | ||
op.create_index( | ||
"ix_recipes_description_normalized_gin", | ||
table_name="recipes", | ||
columns=["description_normalized"], | ||
unique=False, | ||
postgresql_using="gin", | ||
postgresql_ops={ | ||
"description_normalized": "gin_trgm_ops", | ||
}, | ||
) | ||
op.create_index( | ||
"ix_recipes_ingredients_note_normalized_gin", | ||
table_name="recipes_ingredients", | ||
columns=["note_normalized"], | ||
unique=False, | ||
postgresql_using="gin", | ||
postgresql_ops={ | ||
"note_normalized": "gin_trgm_ops", | ||
}, | ||
) | ||
op.create_index( | ||
"ix_recipes_ingredients_original_text_normalized_gin", | ||
table_name="recipes_ingredients", | ||
columns=["original_text_normalized"], | ||
unique=False, | ||
postgresql_using="gin", | ||
postgresql_ops={ | ||
"original_text_normalized": "gin_trgm_ops", | ||
}, | ||
) | ||
|
||
|
||
def remove_postgres_trigrams(): | ||
op.execute("DROP EXTENSION IF EXISTS pg_trgm;") | ||
op.drop_index("ix_recipes_name_normalized_gin", table_name="recipe") | ||
op.drop_index("ix_recipes_description_normalized_gin", table_name="recipe") | ||
op.drop_index("ix_recipes_ingredients_note_normalized_gin", table_name="recipes_ingredients") | ||
op.drop_index("ix_recipes_ingredients_original_text_normalized_gin", table_name="recipes_ingredients") | ||
|
||
|
||
def upgrade(): | ||
if get_db_type() == "postgresql": | ||
setup_postgres_trigrams() | ||
else: | ||
pass | ||
|
||
|
||
def downgrade(): | ||
if get_db_type() == "postgres": | ||
remove_postgres_trigrams() | ||
else: | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.