Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/pip/matplotlib-3.10.0
Browse files Browse the repository at this point in the history
  • Loading branch information
SebastianNiehusAA authored Dec 17, 2024
2 parents 8c418f0 + 61d0660 commit 5cb274a
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 12 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
- Add `create_project` bool to `StudioClient.__init__()` to enable users to automatically create their Studio projects
- Add progressbar to the `Runner` to be able to track the `Run`
- Add `StudioClient.submit_benchmark_lineages` function and include it in `StudioClient.submit_benchmark_execution`

#### DocumentIndexClient
- Add method `DocumentIndexClient.chunks()` for retrieving all text chunks of a document.
- Add metadata filter `FilterOps.IS_NULL`, that allows to filter fields based on whether their value is null.

### Fixes
...
- The Document Index `SearchQuery` now correctly allows searches with a negative `min_score`.

### Deprecations
...
Expand Down
6 changes: 3 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/documentation/elo_qa_eval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@
"outputs": [],
"source": [
"newly_added_models = [\n",
" Llama3InstructModel(name=\"llama-3.1-70b-instruct\", client=aa_client),\n",
" Llama3InstructModel(name=\"llama-3.3-70b-instruct\", client=aa_client),\n",
"]\n",
"\n",
"for model in newly_added_models:\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ class FilterOps(Enum):
BEFORE = "before"
AT_OR_BEFORE = "at_or_before"
EQUAL_TO = "equal_to"
IS_NULL = "is_null"


class FilterField(BaseModel):
Expand Down Expand Up @@ -293,16 +294,15 @@ class SearchQuery(BaseModel):
query: Actual text to be searched with.
max_results: Max number of search results to be retrieved by the query.
Must be larger than 0.
min_score: Filter out results with a similarity score below this value.
Must be between 0 and 1.
For searches on hybrid indexes, the Document Index applies the min_score
to the semantic results before fusion of result sets. As fusion re-scores results,
min_score: Filter out results with a similarity score below this value. Must be between
-1 and 1. For searches on hybrid indexes, the Document Index applies the min_score to
the semantic results before fusion of result sets. As fusion re-scores results,
returned scores may exceed this value.
"""

query: str
max_results: int = Field(ge=0, default=1)
min_score: float = Field(ge=0.0, le=1.0, default=0.0)
min_score: float = Field(ge=-1.0, le=1.0, default=0.0)
filters: Optional[list[Filters]] = None


Expand Down
4 changes: 2 additions & 2 deletions src/intelligence_layer/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def __init__(
)
if name not in [model["name"] for model in self._client.models()]:
warnings.warn(
"The provided model is not a recommended model for this model class."
"The provided model is not a recommended model for this model class. "
"Make sure that the model you have selected is suited to be use for the prompt template used in this model class."
)
self._complete: Task[CompleteInput, CompleteOutput] = _Complete(
Expand Down Expand Up @@ -414,7 +414,7 @@ def __init__(
) -> None:
if name not in self.RECOMMENDED_MODELS or name == "":
warnings.warn(
"The provided model is not a recommended model for this model class."
"The provided model is not a recommended model for this model class. "
"Make sure that the model you have selected is suited to be use for the prompt template used in this model class."
)
super().__init__(name, client)
Expand Down
7 changes: 7 additions & 0 deletions tests/conftest_document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:

metadata_1: JsonSerializable = {
"string-field": "example_string_1",
"option-field": None,
"integer-field": 123,
"float-field": 123.45,
"boolean-field": True,
Expand All @@ -168,6 +169,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:

metadata_2: JsonSerializable = {
"string-field": "example_string_2",
"option-field": "example_string_2",
"integer-field": 456,
"float-field": 678.90,
"boolean-field": False,
Expand All @@ -178,6 +180,7 @@ def document_contents_with_metadata() -> list[DocumentContents]:

metadata_3: JsonSerializable = {
"string-field": "example_string_3",
"option-field": "example_string_3",
"integer-field": 789,
"float-field": 101112.13,
"boolean-field": True,
Expand Down Expand Up @@ -237,6 +240,10 @@ def filter_index_configs(
"field-name": "string-field",
"field-type": "string",
},
random_identifier(): {
"field-name": "option-field",
"field-type": "string",
},
random_identifier(): {
"field-name": "integer-field",
"field-type": "integer",
Expand Down
75 changes: 75 additions & 0 deletions tests/connectors/document_index/test_document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,81 @@ def search() -> None:
search()


def test_search_with_null_filter(
document_index: DocumentIndexClient,
read_only_populated_collection: tuple[CollectionPath, IndexPath],
) -> None:
search_query = SearchQuery(
query="Pemberton",
max_results=10,
min_score=0.5,
filters=[
Filters(
filter_type="with",
fields=[
FilterField(
field_name="option-field",
field_value=True,
criteria=FilterOps.IS_NULL,
)
],
)
],
)

@retry
def search() -> None:
collection_path, index_path = read_only_populated_collection
results = document_index.search(
collection_path,
index_path.index,
search_query,
)
assert len(results) == 1
assert results[0].document_path.document_name == "document-0"

search()


def test_search_with_null_filter_without(
document_index: DocumentIndexClient,
read_only_populated_collection: tuple[CollectionPath, IndexPath],
) -> None:
search_query = SearchQuery(
query="Pemberton",
max_results=10,
min_score=0.5,
filters=[
Filters(
filter_type="without",
fields=[
FilterField(
field_name="option-field",
field_value=True,
criteria=FilterOps.IS_NULL,
)
],
)
],
)

@retry
def search() -> None:
collection_path, index_path = read_only_populated_collection
results = document_index.search(
collection_path,
index_path.index,
search_query,
)
assert len(results) == 2
assert {r.document_path.document_name for r in results} == {
"document-1",
"document-2",
}

search()


def test_search_with_integer_filter(
document_index: DocumentIndexClient,
read_only_populated_collection: tuple[CollectionPath, IndexPath],
Expand Down

0 comments on commit 5cb274a

Please sign in to comment.