From 9048c5b5651f757ca9089b5947d6dd3e3f8dfe29 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:09:47 +0000 Subject: [PATCH 01/11] set document language to english --- tests/local_vespa/test_app/schemas/document_passage.sd | 4 ++++ tests/local_vespa/test_app/schemas/family_document.sd | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 5a221f1..533ef45 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -5,6 +5,10 @@ schema document_passage { stemming: none } + field language type string { + indexing: "en" | set_language + } + document document_passage { field search_weights_ref type reference { diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index de73ae2..0d11ff5 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -10,6 +10,10 @@ schema family_document { stemming: none } + field language type string { + indexing: "en" | set_language + } + document family_document { field search_weights_ref type reference { From 9b8ea4a57bcb0d34b6fe7a6b5ef65cd29281145d Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:14:37 +0000 Subject: [PATCH 02/11] remove unused 'exact' rank-profile --- .../test_app/schemas/document_passage.sd | 10 ---------- .../local_vespa/test_app/schemas/family_document.sd | 13 ------------- 2 files changed, 23 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 533ef45..8c892f6 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -173,16 +173,6 @@ schema document_passage { tokens } } - - rank-profile exact inherits default { - function text_score() { - expression: attribute(passage_weight) * fieldMatch(text_block) - } - first-phase { - expression: text_score() - } - summary-features: text_score() fieldMatch(text_block) - } rank-profile exact_not_stemmed inherits default { function text_score() { diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 0d11ff5..da16473 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -172,19 +172,6 @@ schema family_document { fields: family_name_index, family_description_index } - rank-profile exact inherits default { - function name_score() { - expression: attribute(name_weight) * fieldMatch(family_name_index) - } - function description_score() { - expression: attribute(description_weight) * fieldMatch(family_description_index) - } - first-phase { - expression: name_score() + description_score() - } - summary-features: name_score() description_score() - } - rank-profile exact_not_stemmed inherits default { function name_score() { expression: attribute(name_weight) * fieldMatch(family_name_not_stemmed) From 59084c5e5b7a8ee4f908bcbde50817685ef5d6d7 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:34:42 +0000 Subject: [PATCH 03/11] add weights with defaults to hybrid profile --- .../test_app/schemas/document_passage.sd | 20 +++------- .../test_app/schemas/family_document.sd | 40 ++++--------------- 2 files changed, 13 insertions(+), 47 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 8c892f6..82ef7fb 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -197,27 +197,19 @@ schema document_passage { rank-profile hybrid inherits default { inputs { query(query_embedding) tensor(x[768]) + query(passage_bm25_weight) double: 1.0 + query(passage_closeness_weight) double: 1.0 } function text_score() { - expression: attribute(passage_weight) * (bm25(text_block) + closeness(text_embedding)) + expression: attribute(passage_weight) * (query(passage_bm25_weight) * bm25(text_block) + query(passage_closeness_weight) * closeness(text_embedding)) } first-phase { expression: text_score() } summary-features: text_score() bm25(text_block) closeness(text_embedding) } - - rank-profile hybrid_custom_weight inherits default { - inputs { - query(query_embedding) tensor(x[768]) - query(bm25_weight) double - } - function text_score() { - expression: attribute(passage_weight) * (query(bm25_weight) * bm25(text_block) + closeness(text_embedding)) - } - first-phase { - expression: text_score() - } - summary-features: text_score() bm25(text_block) closeness(text_embedding) + + rank-profile hybrid_no_embedding inherits hybrid { } + } diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index da16473..d8a41db 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -201,53 +201,27 @@ schema family_document { rank-profile hybrid inherits default { inputs { query(query_embedding) tensor(x[768]) + query(description_bm25_weight) double: 1.0 + query(description_closeness_weight) double: 1.0 } function name_score() { - expression: attribute(name_weight) * bm25(family_name_index) + expression: bm25(family_name_index) } function description_score() { - expression: attribute(description_weight) * (bm25(family_description_index) + closeness(family_description_embedding)) + expression: query(description_bm25_weight) * bm25(family_description_index) + query(descriptione_closeness_weight) * closeness(family_description_embedding) } first-phase { - expression: name_score() + description_score() + expression: (attribute(name_weight) * name_score()) + (attribute(description_weight) * description_score()) } summary-features: name_score() description_score() } - rank-profile hybrid_no_description_embedding inherits default { + rank-profile hybrid_no_description_embedding inherits hybrid { inputs { - query(query_embedding) tensor(x[768]) - } - function name_score() { - expression: attribute(name_weight) * bm25(family_name_index) + query(description_closeness_weight) double: 0.0 } - function description_score() { - expression: attribute(description_weight) * bm25(family_description_index) - } - first-phase { - expression: name_score() + description_score() - } - summary-features: name_score() description_score() } - rank-profile hybrid_custom_weight inherits default { - inputs { - query(query_embedding) tensor(x[768]) - query(bm25_weight) double - } - function name_score() { - expression: attribute(name_weight) * bm25(family_name_index) - } - function description_score() { - expression: attribute(description_weight) * bm25(family_description_index) - } - first-phase { - expression: name_score() + description_score() - } - summary-features: name_score() description_score() - } - - document-summary search_summary { summary family_name {} summary family_description {} From 401105e5591c22e5d65b3f48a7157fa727c754be Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:36:51 +0000 Subject: [PATCH 04/11] hybrid_no_closeness schema inherits from hybrid --- .../test_app/schemas/document_passage.sd | 18 +++++++----------- .../test_app/schemas/family_document.sd | 19 ++++++------------- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 82ef7fb..1e0643e 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -184,16 +184,6 @@ schema document_passage { summary-features: text_score() fieldMatch(text_block) } - rank-profile hybrid_no_closeness inherits default { - function text_score() { - expression: attribute(passage_weight) * bm25(text_block) - } - first-phase { - expression: text_score() - } - summary-features: text_score() bm25(text_block) - } - rank-profile hybrid inherits default { inputs { query(query_embedding) tensor(x[768]) @@ -209,7 +199,13 @@ schema document_passage { summary-features: text_score() bm25(text_block) closeness(text_embedding) } - rank-profile hybrid_no_embedding inherits hybrid { + rank-profile hybrid_no_closeness inherits hybrid { + inputs { + query(passage_closeness_weight) double: 0.0 + } + } + + rank-profile hybrid_no_description_embedding inherits hybrid { } } diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index d8a41db..0fdf0a5 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -185,19 +185,6 @@ schema family_document { summary-features: name_score() description_score() } - rank-profile hybrid_no_closeness inherits default { - function name_score() { - expression: attribute(name_weight) * bm25(family_name_index) - } - function description_score() { - expression: attribute(description_weight) * bm25(family_description_index) - } - first-phase { - expression: name_score() + description_score() - } - summary-features: name_score() description_score() - } - rank-profile hybrid inherits default { inputs { query(query_embedding) tensor(x[768]) @@ -215,6 +202,12 @@ schema family_document { } summary-features: name_score() description_score() } + + rank-profile hybrid_no_closeness inherits hybrid { + inputs { + query(description_closeness_weight) double: 0.0 + } + } rank-profile hybrid_no_description_embedding inherits hybrid { inputs { From 6a7c6b087f86751f3b15c199b0b93ade708931fc Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:37:23 +0000 Subject: [PATCH 05/11] remove hybrid_no_description_embedding rank-profile --- tests/local_vespa/test_app/schemas/document_passage.sd | 3 --- tests/local_vespa/test_app/schemas/family_document.sd | 6 ------ 2 files changed, 9 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 1e0643e..3cea617 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -204,8 +204,5 @@ schema document_passage { query(passage_closeness_weight) double: 0.0 } } - - rank-profile hybrid_no_description_embedding inherits hybrid { - } } diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 0fdf0a5..9795f4b 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -209,12 +209,6 @@ schema family_document { } } - rank-profile hybrid_no_description_embedding inherits hybrid { - inputs { - query(description_closeness_weight) double: 0.0 - } - } - document-summary search_summary { summary family_name {} summary family_description {} From 780bfa4fde9e2ad22d6dd885de8da2f532b50615 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:40:32 +0000 Subject: [PATCH 06/11] search_summary_with_tokens inherits search_summary --- .../test_app/schemas/document_passage.sd | 26 +------------------ .../test_app/schemas/family_document.sd | 23 +--------------- 2 files changed, 2 insertions(+), 47 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 3cea617..92a6a7f 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -143,31 +143,7 @@ schema document_passage { summary concepts {} } - document-summary search_summary_with_tokens { - summary family_name {} - summary family_description {} - summary family_import_id {} - summary family_slug {} - summary family_category {} - summary family_publication_ts {} - summary family_geography {} - summary family_geographies {} - summary family_source {} - summary document_import_id {} - summary document_slug {} - summary document_languages {} - summary document_content_type {} - summary document_cdn_object {} - summary document_source_url {} - summary corpus_import_id {} - summary corpus_type_name {} - summary metadata {} - summary text_block {} - summary text_block_id {} - summary text_block_type {} - summary text_block_page {} - summary text_block_coords {} - summary concepts {} + document-summary search_summary_with_tokens inherits search_summary { summary text_block_tokens { source: text_block tokens diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 9795f4b..5155d26 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -233,28 +233,7 @@ schema family_document { summary collection_summary {} } - document-summary search_summary_with_tokens { - summary family_name {} - summary family_description {} - summary family_import_id {} - summary family_slug {} - summary family_category {} - summary family_publication_ts {} - summary family_geography {} - summary family_geographies {} - summary family_source {} - summary document_import_id {} - summary document_title {} - summary document_slug {} - summary document_languages {} - summary document_content_type {} - summary document_cdn_object {} - summary document_source_url {} - summary metadata {} - summary corpus_import_id {} - summary corpus_type_name {} - summary collection_title {} - summary collection_summary {} + document-summary search_summary_with_tokens inherits search_summary { summary family_name_index {} summary family_name_index_tokens { source: family_name_index From 7e2411d520327b56b33889fde4e5d06495a035e9 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:49:09 +0000 Subject: [PATCH 07/11] add all features to rank profile summary-features --- .../local_vespa/test_app/schemas/document_passage.sd | 12 ++++++------ .../local_vespa/test_app/schemas/family_document.sd | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 92a6a7f..db58fce 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -152,12 +152,12 @@ schema document_passage { rank-profile exact_not_stemmed inherits default { function text_score() { - expression: attribute(passage_weight) * fieldMatch(text_block_not_stemmed) + expression: fieldMatch(text_block_not_stemmed) } first-phase { - expression: text_score() + expression: attribute(passage_weight) * text_score() } - summary-features: text_score() fieldMatch(text_block) + summary-features: attribute(passage_weight) text_score() } rank-profile hybrid inherits default { @@ -167,12 +167,12 @@ schema document_passage { query(passage_closeness_weight) double: 1.0 } function text_score() { - expression: attribute(passage_weight) * (query(passage_bm25_weight) * bm25(text_block) + query(passage_closeness_weight) * closeness(text_embedding)) + expression: query(passage_bm25_weight) * bm25(text_block) + query(passage_closeness_weight) * closeness(text_embedding) } first-phase { - expression: text_score() + expression: attribute(passage_weight) * text_score() } - summary-features: text_score() bm25(text_block) closeness(text_embedding) + summary-features: text_score() bm25(text_block) closeness(text_embedding) attribute(passage_weight) } rank-profile hybrid_no_closeness inherits hybrid { diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 5155d26..8639781 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -174,15 +174,15 @@ schema family_document { rank-profile exact_not_stemmed inherits default { function name_score() { - expression: attribute(name_weight) * fieldMatch(family_name_not_stemmed) + expression: fieldMatch(family_name_not_stemmed) } function description_score() { - expression: attribute(description_weight) * fieldMatch(family_description_not_stemmed) + expression: fieldMatch(family_description_not_stemmed) } first-phase { - expression: name_score() + description_score() + expression: attribute(name_weight) * name_score() + attribute(description_weight) * description_score() } - summary-features: name_score() description_score() + summary-features: name_score() description_score() attribute(name_weight) attribute(description_weight) } rank-profile hybrid inherits default { @@ -200,7 +200,7 @@ schema family_document { first-phase { expression: (attribute(name_weight) * name_score()) + (attribute(description_weight) * description_score()) } - summary-features: name_score() description_score() + summary-features: name_score() description_score() bm25(family_name_index) bm25(family_description_index) closeness(family_description_embedding) } rank-profile hybrid_no_closeness inherits hybrid { From 4f238b1066453d0fc33092b75bb7aed6dfea2a45 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:51:35 +0000 Subject: [PATCH 08/11] add nativerank profiles --- .../test_app/schemas/document_passage.sd | 15 +++++++++++++++ .../test_app/schemas/family_document.sd | 18 ++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index db58fce..94792b8 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -174,6 +174,21 @@ schema document_passage { } summary-features: text_score() bm25(text_block) closeness(text_embedding) attribute(passage_weight) } + + rank-profile hybrid_nativerank inherits default { + inputs { + query(query_embedding) tensor(x[768]) + query(passage_nativerank_weight) double: 1.0 + query(passage_closeness_weight) double: 1.0 + } + function text_score() { + expression: query(passage_nativerank_weight) * nativeRank(text_block) + query(passage_closeness_weight) * closeness(text_embedding) + } + first-phase { + expression: attribute(passage_weight) * text_score() + } + summary-features: text_score() nativeRank(text_block) closeness(text_embedding) attribute(passage_weight) + } rank-profile hybrid_no_closeness inherits hybrid { inputs { diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 8639781..669101e 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -202,6 +202,24 @@ schema family_document { } summary-features: name_score() description_score() bm25(family_name_index) bm25(family_description_index) closeness(family_description_embedding) } + + rank-profile hybrid_nativerank inherits default { + inputs { + query(query_embedding) tensor(x[768]) + query(description_nativerank_weight) double: 1.0 + query(description_closeness_weight) double: 1.0 + } + function name_score() { + expression: nativeRank(family_name_index) + } + function description_score() { + expression: query(description_nativerank_weight) * nativeRank(family_description_index) + query(descriptione_closeness_weight) * closeness(family_description_embedding) + } + first-phase { + expression: (attribute(name_weight) * name_score()) + (attribute(description_weight) * description_score()) + } + summary-features: name_score() description_score() nativeRank(family_name_index) nativeRank(family_description_index) closeness(family_description_embedding) + } rank-profile hybrid_no_closeness inherits hybrid { inputs { From 7ab8260b990abee00830c5d30df086ae29cb5739 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 11:54:20 +0000 Subject: [PATCH 09/11] add field variants with bolding --- tests/local_vespa/test_app/schemas/document_passage.sd | 5 +++++ tests/local_vespa/test_app/schemas/family_document.sd | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/local_vespa/test_app/schemas/document_passage.sd b/tests/local_vespa/test_app/schemas/document_passage.sd index 94792b8..d5ccf10 100644 --- a/tests/local_vespa/test_app/schemas/document_passage.sd +++ b/tests/local_vespa/test_app/schemas/document_passage.sd @@ -9,6 +9,11 @@ schema document_passage { indexing: "en" | set_language } + field text_block_bolding type string { + indexing: input text_block | summary | index + bolding: true + } + document document_passage { field search_weights_ref type reference { diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index 669101e..c012052 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -10,6 +10,16 @@ schema family_document { stemming: none } + field family_name_bolding type string { + indexing: input family_name_index | index + bolding: true + } + + field family_description_bolding type string { + indexing: input family_description_index | index + bolding: true + } + field language type string { indexing: "en" | set_language } From d6e21d895bf7c6f011e92f27346cc713cd5d3492 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 12:16:13 +0000 Subject: [PATCH 10/11] bump version to 1.12.0 --- src/cpr_sdk/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpr_sdk/version.py b/src/cpr_sdk/version.py index f580e9c..5b93ec2 100644 --- a/src/cpr_sdk/version.py +++ b/src/cpr_sdk/version.py @@ -1,5 +1,5 @@ _MAJOR = "1" -_MINOR = "11" +_MINOR = "12" _PATCH = "0" _SUFFIX = "" From 3aec5fc861f914e0f505ffbe9252c75ac929d2d7 Mon Sep 17 00:00:00 2001 From: Kalyan Dutia Date: Wed, 18 Dec 2024 12:24:54 +0000 Subject: [PATCH 11/11] add summary to family_name_bolding & family_description_bolding --- tests/local_vespa/test_app/schemas/family_document.sd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/local_vespa/test_app/schemas/family_document.sd b/tests/local_vespa/test_app/schemas/family_document.sd index c012052..fcdd9e0 100644 --- a/tests/local_vespa/test_app/schemas/family_document.sd +++ b/tests/local_vespa/test_app/schemas/family_document.sd @@ -11,12 +11,12 @@ schema family_document { } field family_name_bolding type string { - indexing: input family_name_index | index + indexing: input family_name_index | summary | index bolding: true } field family_description_bolding type string { - indexing: input family_description_index | index + indexing: input family_description_index | summary | index bolding: true }