From a9325aed4e567542a9967f6b13b16accb6718c5f Mon Sep 17 00:00:00 2001 From: kdutia <20212179+kdutia@users.noreply.github.com> Date: Tue, 26 Nov 2024 09:35:40 +0000 Subject: [PATCH] Update vespa test files --- .../schemas/document_passage.sd | 66 ++++++++++++- .../schemas/family_document.sd | 92 +++++++++++++++++++ 2 files changed, 155 insertions(+), 3 deletions(-) diff --git a/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd index 29492a3a..863929db 100644 --- a/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd +++ b/tests/search/vespa/fixtures/vespa_test_schema/schemas/document_passage.sd @@ -1,5 +1,10 @@ schema document_passage { + field text_block_not_stemmed type string { + indexing: input text_block | summary | index + stemming: none + } + document document_passage { field search_weights_ref type reference { @@ -134,6 +139,37 @@ schema document_passage { summary concepts {} } + document-summary search_summary_with_tokens { + summary family_name {} + summary family_description {} + summary family_import_id {} + summary family_slug {} + summary family_category {} + summary family_publication_ts {} + summary family_geography {} + summary family_geographies {} + summary family_source {} + summary document_import_id {} + summary document_slug {} + summary document_languages {} + summary document_content_type {} + summary document_cdn_object {} + summary document_source_url {} + summary corpus_import_id {} + summary corpus_type_name {} + summary metadata {} + summary text_block {} + summary text_block_id {} + summary text_block_type {} + summary text_block_page {} + summary text_block_coords {} + summary concepts {} + summary text_block_tokens { + source: text_block + tokens + } + } + rank-profile exact inherits default { function text_score() { expression: attribute(passage_weight) * fieldMatch(text_block) @@ -141,7 +177,17 @@ schema document_passage { first-phase { expression: text_score() } - match-features: text_score() + match-features: text_score() fieldMatch(text_block) + } + + rank-profile exact_not_stemmed inherits default { + function text_score() { + expression: attribute(passage_weight) * fieldMatch(text_block_not_stemmed) + } + first-phase { + expression: text_score() + } + match-features: text_score() fieldMatch(text_block) } rank-profile hybrid_no_closeness inherits default { @@ -151,7 +197,7 @@ schema document_passage { first-phase { expression: text_score() } - match-features: text_score() + match-features: text_score() bm25(text_block) } rank-profile hybrid inherits default { @@ -164,6 +210,20 @@ schema document_passage { first-phase { expression: text_score() } - match-features: text_score() + match-features: text_score() bm25(text_block) closeness(text_embedding) + } + + rank-profile hybrid_custom_weight inherits default { + inputs { + query(query_embedding) tensor(x[768]) + query(bm25_weight) double + } + function text_score() { + expression: attribute(passage_weight) * (query(bm25_weight) * bm25(text_block) + closeness(text_embedding)) + } + first-phase { + expression: text_score() + } + match-features: text_score() bm25(text_block) closeness(text_embedding) } } diff --git a/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd b/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd index e62d6df5..e56963b2 100644 --- a/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd +++ b/tests/search/vespa/fixtures/vespa_test_schema/schemas/family_document.sd @@ -1,5 +1,15 @@ schema family_document { + field family_name_not_stemmed type string { + indexing: input family_name_index | index + stemming: none + } + + field family_description_not_stemmed type string { + indexing: input family_description_index | index + stemming: none + } + document family_document { field search_weights_ref type reference { @@ -170,6 +180,19 @@ schema family_document { } match-features: name_score() description_score() } + + rank-profile exact_not_stemmed inherits default { + function name_score() { + expression: attribute(name_weight) * fieldMatch(family_name_not_stemmed) + } + function description_score() { + expression: attribute(description_weight) * fieldMatch(family_description_not_stemmed) + } + first-phase { + expression: name_score() + description_score() + } + match-features: name_score() description_score() + } rank-profile hybrid_no_closeness inherits default { function name_score() { @@ -199,6 +222,40 @@ schema family_document { } match-features: name_score() description_score() } + + rank-profile hybrid_no_description_embedding inherits default { + inputs { + query(query_embedding) tensor(x[768]) + } + function name_score() { + expression: attribute(name_weight) * bm25(family_name_index) + } + function description_score() { + expression: attribute(description_weight) * bm25(family_description_index) + } + first-phase { + expression: name_score() + description_score() + } + match-features: name_score() description_score() + } + + rank-profile hybrid_custom_weight inherits default { + inputs { + query(query_embedding) tensor(x[768]) + query(bm25_weight) double + } + function name_score() { + expression: attribute(name_weight) * bm25(family_name_index) + } + function description_score() { + expression: attribute(description_weight) * bm25(family_description_index) + } + first-phase { + expression: name_score() + description_score() + } + match-features: name_score() description_score() + } + document-summary search_summary { summary family_name {} @@ -223,4 +280,39 @@ schema family_document { summary collection_title {} summary collection_summary {} } + + document-summary search_summary_with_tokens { + summary family_name {} + summary family_description {} + summary family_import_id {} + summary family_slug {} + summary family_category {} + summary family_publication_ts {} + summary family_geography {} + summary family_geographies {} + summary family_source {} + summary document_import_id {} + summary document_title {} + summary document_slug {} + summary document_languages {} + summary document_content_type {} + summary document_cdn_object {} + summary document_source_url {} + summary metadata {} + summary corpus_import_id {} + summary corpus_type_name {} + summary collection_title {} + summary collection_summary {} + summary family_name_index {} + summary family_name_index_tokens { + source: family_name_index + tokens + } + summary family_description_index {} + summary family_description_index_tokens { + source: family_description_index + tokens + } + from-disk + } }