Skip to content

Commit

Permalink
fix: fix sparse embedding errors in hybrid retriever
Browse files Browse the repository at this point in the history
  • Loading branch information
asajatovic committed Dec 18, 2024
1 parent 51638e1 commit b91e180
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class HybridQdrantInMemoryRetriever(QdrantInMemoryRetriever):
retriever_type: The type of retriever to be instantiated. Should be `ASYMMETRIC` for most query-document retrieveal use cases, `SYMMETRIC` is optimized
for similar document retrieval. Defaults to `ASYMMETRIC`.
distance_metric: The distance metric to be used for vector comparison. Defaults to `Distance.COSINE`.
sparse_model_name: The name of the sparse embedding model from `fastemebed` to be used. Defaults to `"bm25"`.
sparse_model_name: The name of the sparse embedding model from `fastemebed` to be used. Defaults to `"Qdrant/bm25"`.
max_workers: The maximum number of workers to use for concurrent processing. Defaults to 10.
Example:
Expand All @@ -56,7 +56,7 @@ def __init__(
threshold: float = 0.0,
retriever_type: RetrieverType = RetrieverType.ASYMMETRIC,
distance_metric: Distance = Distance.COSINE,
sparse_model_name: str = "bm25",
sparse_model_name: str = "Qdrant/bm25",
max_workers: int = 10,
) -> None:
self._client = client or LimitedConcurrencyClient.from_env()
Expand All @@ -79,8 +79,16 @@ def __init__(

self._search_client.create_collection(
collection_name=self._collection_name,
vectors_config=VectorParams(size=128, distance=self._distance_metric),
sparse_vectors_config=self._search_client.get_fastembed_sparse_vector_params(),
vectors_config={
self._dense_vector_field_name: VectorParams(
size=128, distance=self._distance_metric
)
},
sparse_vectors_config={
self._sparse_vector_field_name: self._search_client.get_fastembed_sparse_vector_params()[ # type: ignore[index]
str(self._search_client.get_sparse_vector_field_name())
]
},
)

self._add_texts_to_memory(documents)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
@fixture
def in_memory_retriever_documents() -> Sequence[Document]:
return [
Document(text="Summer is warm"),
Document(text="Summer is warm but I like it"),
Document(text="I do not like rain"),
Document(text="We are so back"),
Document(text="Summer rain is rejuvenating"),
Expand Down

0 comments on commit b91e180

Please sign in to comment.