From 9ae872cb38998f69f8502e2c70ab991765e12d2d Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Tue, 17 Sep 2024 15:14:48 +0100 Subject: [PATCH 1/6] enable dynamic summary in schema --- tests/local_vespa/test_app/schemas/document_passage.sd | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index e279cd0..a2a9a59 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -29,6 +29,7 @@ schema document_passage { field text_block type string { indexing: attribute | index | summary index: enable-bm25 + summary: dynamic } field text_embedding type tensor(x[768]) { From a0d2559b53e4bd10851e86d6c239a31d15952a99 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Tue, 17 Sep 2024 15:15:33 +0100 Subject: [PATCH 2/6] CLI to run search queries --- poetry.lock | 22 ++++---- pyproject.toml | 6 +++ src/cpr_sdk/cli/run_search_query.py | 80 +++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 11 deletions(-) create mode 100644 src/cpr_sdk/cli/run_search_query.py diff --git a/poetry.lock b/poetry.lock index 3022a6c..87d0897 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1487,7 +1487,7 @@ test = ["hypothesis", "pytest", "readme-renderer"] name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, @@ -1580,7 +1580,7 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, @@ -2170,9 +2170,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2598,8 +2598,8 @@ files = [ annotated-types = ">=0.6.0" pydantic-core = "2.23.3" typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, {version = ">=4.6.1", markers = "python_version < \"3.13\""}, + {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, ] [package.extras] @@ -2711,7 +2711,7 @@ typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" name = "pygments" version = "2.18.0" description = "Pygments is a syntax highlighting package written in Python." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, @@ -3213,13 +3213,13 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy [[package]] name = "rich" -version = "13.8.0" +version = "13.8.1" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -optional = true +optional = false python-versions = ">=3.7.0" files = [ - {file = "rich-13.8.0-py3-none-any.whl", hash = "sha256:2e85306a063b9492dffc86278197a60cbece75bcb766022f3436f567cae11bdc"}, - {file = "rich-13.8.0.tar.gz", hash = "sha256:a5ac1f1cd448ade0d59cc3356f7db7a7ccda2c8cbae9c7a90c28ff463d3e91f4"}, + {file = "rich-13.8.1-py3-none-any.whl", hash = "sha256:1760a3c0848469b97b558fc61c85233e3dafb69c7a071b4d60c38099d3cd4c06"}, + {file = "rich-13.8.1.tar.gz", hash = "sha256:8260cda28e3db6bf04d2d1ef4dbc03ba80a824c88b0e7668a0f23126a424844a"}, ] [package.dependencies] @@ -4156,7 +4156,7 @@ files = [ name = "typer" version = "0.12.5" description = "Typer, build great CLIs. Easy to code. Based on Python type hints." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"}, @@ -4707,4 +4707,4 @@ vespa = ["pyvespa", "pyyaml", "sentence-transformers", "torch"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "5a69522e0af674a66555a1bc1df88d6b5f2921e849f71781db6e7779fa10b8f8" +content-hash = "4865844ac4bddb76b66d4068f477a17119d2f886f3e0df9accb9bb3d3db6dac7" diff --git a/pyproject.toml b/pyproject.toml index ff3c1e1..6e5cde0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,12 @@ black = "^24.2.0" moto = { extras = ["s3"], version = "^5.0.13" } pytest-dotenv = "^0.5.2" +[tool.poetry.group.cli] +optional = true + +[tool.poetry.group.cli.dependencies] +typer = "^0.12.5" +rich = "^13.8.1" [tool.pytest.ini_options] addopts = "-p no:cacheprovider" diff --git a/src/cpr_sdk/cli/run_search_query.py b/src/cpr_sdk/cli/run_search_query.py new file mode 100644 index 0000000..9ce6fcd --- /dev/null +++ b/src/cpr_sdk/cli/run_search_query.py @@ -0,0 +1,80 @@ +import json +from rich.console import Console +from rich.table import Table +from rich import print_json +from rich import print as rprint +import typer +from src.cpr_sdk.search_adaptors import VespaSearchAdapter +from src.cpr_sdk.models.search import SearchParameters +from tests.conftest import VESPA_TEST_SEARCH_URL + + +def main( + instance_url: str = VESPA_TEST_SEARCH_URL, + exact_match: bool = False, + limit: int = 10, +): + """Run a search query with different rank profiles.""" + console = Console() + search_adapter = VespaSearchAdapter(instance_url) + + while True: + query = input("Enter your search query (or 'q' to quit): ") + if query.lower() == "q": + break + + search_parameters = SearchParameters( + query_string=query, exact_match=exact_match, limit=limit + ) + search_response = search_adapter.search(search_parameters) + + for family in search_response.families: + family_data = family.hits[0].model_dump() + console.rule( + title=f"{family_data['family_name']} ({family_data['family_geography']} ,{family_data['family_import_id']})" + ) + print_json( + json.dumps( + { + k: v + for k, v in family_data.items() + if not k.startswith("text_block") and "metadata" not in k + }, + default=str, + ) + ) + + # There's some typing weirdness going on here: + # hasattr(family.hits[0], 'text_blocks') can be False, but + # family.hits[0].text_block exists + try: + rprint("Text blocks:") + table = Table(title="Hits Table") + + # Add columns to the table + table.add_column( + "Text Block ID", justify="right", style="cyan", no_wrap=True + ) + table.add_column("Text Block", style="magenta") + + # Add rows to the table + for hit in family.hits: + try: + table.add_row(f"{hit.text_block_id}", f"{hit.text_block}") # type: ignore + except Exception: + pass + + # Print the table + console.print(table) + + if family == search_response.families[-1]: + print("No more families to show.") + break + except AttributeError: + print("No text blocks found.") + + input("Press any key to show next family") + + +if __name__ == "__main__": + typer.run(main) From 8c061889b143335998d9cbf4d5fccc34efd9170d Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Tue, 17 Sep 2024 17:05:19 +0100 Subject: [PATCH 3/6] update poetry.lock --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 87d0897..095a623 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4707,4 +4707,4 @@ vespa = ["pyvespa", "pyyaml", "sentence-transformers", "torch"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4865844ac4bddb76b66d4068f477a17119d2f886f3e0df9accb9bb3d3db6dac7" +content-hash = "4858af3797ee77908bc209a09a317c000300ed612e87d7113b0953b0a96d563f" From 1a363f54d2e92621dc45326d9992f642d4772855 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Sep 2024 15:57:23 +0100 Subject: [PATCH 4/6] run poetry.lock --- poetry.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index d15e1cb..a946347 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3784,9 +3784,9 @@ type = ["pytest-mypy"] [extras] spacy = ["spacy"] -vespa = ["pyvespa", "pyyaml", "sentence-transformers"] +vespa = ["pyvespa", "pyyaml"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4858af3797ee77908bc209a09a317c000300ed612e87d7113b0953b0a96d563f" +content-hash = "edfe8501ab3ed88c1da822c9a043db85f94476a21352feb7df9225533a6909de" From bc1a983e5fe49b802cd64cd67f4e08e8cb56d9b3 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Sep 2024 15:58:11 +0100 Subject: [PATCH 5/6] remove dynamic symmary feature --- tests/local_vespa/test_app/schemas/document_passage.sd | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index dfbcd58..8f2f3df 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -29,7 +29,6 @@ schema document_passage { field text_block type string { indexing: attribute | index | summary index: enable-bm25 - summary: dynamic } field text_embedding type tensor(x[768]) { From 141038dea36549afee4f861d15ffa6380c65e0b4 Mon Sep 17 00:00:00 2001 From: Jesse Claven Date: Thu, 19 Sep 2024 11:51:53 +0100 Subject: [PATCH 6/6] feat(cli): Add script entrypoint This builds on the new CLI [2], to make it easier to use. I've also removed left-over dependencies that are no longer needed [1]. [1] https://github.com/climatepolicyradar/cpr-sdk/pull/107 [2] https://github.com/climatepolicyradar/cpr-sdk/pull/105 --- README.md | 11 ++++++++++- poetry.lock | 2 +- pyproject.toml | 9 ++++++++- src/cpr_sdk/cli/run_search_query.py | 5 ++++- src/cpr_sdk/version.py | 2 +- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4b761f2..eafd7ac 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,15 @@ adaptor.get_by_id(document_id="id:YOUR_NAMESPACE:YOUR_SCHEMA_NAME::SOME_DOCUMENT All of the above search functionality assumes that a valid set of vespa credentials is available in `~/.vespa`, or in a directory supplied to the `VespaSearchAdapter` constructor directly. See [the docs](docs/vespa-auth.md) for more information on how vespa expects credentials. +# CLI + +There is a simple CLI provided. + +```bash +poetry poetry install --extras "vespa" +poetry run cpr +``` + # Test setup Some tests rely on a local running instance of vespa. @@ -247,4 +256,4 @@ make vespa_dev_down - Merge. - Tag a release manually in github with a version that matches the latest on main that you just merged. - In CI/CD we will check that the latest release matches the versions defined in code. -- Check in `pypi`. \ No newline at end of file +- Check in `pypi`. diff --git a/poetry.lock b/poetry.lock index a946347..2178356 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3789,4 +3789,4 @@ vespa = ["pyvespa", "pyyaml"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "edfe8501ab3ed88c1da822c9a043db85f94476a21352feb7df9225533a6909de" +content-hash = "a150bcde5a281144e73bf5062ecda986396743221ca278cb7bb7d8744dafbf4b" diff --git a/pyproject.toml b/pyproject.toml index 4827e57..c6f5326 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,9 @@ classifiers = [ license = "LICENSE" +[tool.poetry.scripts] +cpr = "src.cpr_sdk.cli.run_search_query:app" + [project.urls] Homepage = "https://github.com/climatepolicyradar/cpr-sdk" Repository = "https://github.com/climatepolicyradar/cpr-sdk" @@ -44,7 +47,7 @@ poetry = "^1.8.3" flatten-dict = "^0.4.2" [tool.poetry.extras] -vespa = ["pyvespa", "pyyaml", "sentence-transformers", "torch"] +vespa = ["pyvespa", "pyyaml"] spacy = ["spacy"] [tool.poetry.group.dev] @@ -115,3 +118,7 @@ line-length = 88 [tool.ruff.per-file-ignores] "__init__.py" = ["F401"] "tests/*" = ["E501"] + +[tool.pyright] +stubPath = "" +reportMissingImports = false diff --git a/src/cpr_sdk/cli/run_search_query.py b/src/cpr_sdk/cli/run_search_query.py index 9ce6fcd..10023f0 100644 --- a/src/cpr_sdk/cli/run_search_query.py +++ b/src/cpr_sdk/cli/run_search_query.py @@ -8,7 +8,10 @@ from src.cpr_sdk.models.search import SearchParameters from tests.conftest import VESPA_TEST_SEARCH_URL +app = typer.Typer() + +@app.callback() def main( instance_url: str = VESPA_TEST_SEARCH_URL, exact_match: bool = False, @@ -77,4 +80,4 @@ def main( if __name__ == "__main__": - typer.run(main) + app() diff --git a/src/cpr_sdk/version.py b/src/cpr_sdk/version.py index 3fba0a6..6402b1a 100644 --- a/src/cpr_sdk/version.py +++ b/src/cpr_sdk/version.py @@ -1,6 +1,6 @@ _MAJOR = "1" _MINOR = "6" -_PATCH = "1" +_PATCH = "2" _SUFFIX = "" VERSION_SHORT = "{0}.{1}".format(_MAJOR, _MINOR)