Skip to content

Commit

Permalink
enhance: Update hello_hybrid_sparse_dense.py example to include BGE r…
Browse files Browse the repository at this point in the history
…eranker (#2028)

Signed-off-by: Buqian Zheng <[email protected]>
  • Loading branch information
zhengbuqian authored Apr 10, 2024
1 parent 7809192 commit 9957fbc
Showing 1 changed file with 27 additions and 4 deletions.
31 changes: 27 additions & 4 deletions examples/hello_hybrid_sparse_dense.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
# If true, use BGE-M3 model to generate dense and sparse vectors.
# If false, use random numbers to compose dense and sparse vectors.
use_bge_m3 = True
# If both use_bge_m3 and use_reranker are true, the search result will be reranked
# using BGE CrossEncoder model.
use_reranker = True

# The overall steps are as follows:
# 1. embed the text as dense and sparse vectors
Expand Down Expand Up @@ -113,12 +116,32 @@ def random_embedding(texts):
# Currently Milvus only support 1 query in the same hybrid search request, so
# we inspect res[0] directly. In future release Milvus will accept batch
# hybrid search queries in the same call.
for hit in res[0]:
print(f'text: {hit.fields["text"]} distance {hit.distance}')

# If you are using BGE-M3 to generate the embedding, you should see the following:
res = res[0]

if use_bge_m3 and use_reranker:
result_texts = [hit.fields["text"] for hit in res]
from pymilvus.model.reranker import BGERerankFunction
bge_rf = BGERerankFunction(device='cpu')
# rerank the results using BGE CrossEncoder model
results = bge_rf(query, result_texts, top_k=2)
for hit in results:
print(f'text: {hit.text} distance {hit.score}')
else:
for hit in res:
print(f'text: {hit.fields["text"]} distance {hit.distance}')

# If you used both BGE-M3 and the reranker, you should see the following:
# text: Alan Turing was the first person to conduct substantial research in AI. distance 0.9306981017573297
# text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.03217001154515051
#
# If you used only BGE-M3, you should see the following:
# text: Alan Turing was the first person to conduct substantial research in AI. distance 0.032786883413791656
# text: Artificial intelligence was founded as an academic discipline in 1956. distance 0.016129031777381897

# In this simple example the reranker yields the same result as the embedding based hybrid search, but in more complex
# scenarios the reranker can provide more accurate results.

# If you used random vectors, the result will be different each time you run the script.

# Drop the collection to clean up the data.
utility.drop_collection(col_name)

0 comments on commit 9957fbc

Please sign in to comment.