Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add not-stemmed searchable fields and rank profile #421

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
schema document_passage {

field text_block_not_stemmed type string {
indexing: input text_block | summary | index
stemming: none
}

document document_passage {

field search_weights_ref type reference<search_weights> {
Expand Down Expand Up @@ -134,14 +139,55 @@ schema document_passage {
summary concepts {}
}

document-summary search_summary_with_tokens {
summary family_name {}
summary family_description {}
summary family_import_id {}
summary family_slug {}
summary family_category {}
summary family_publication_ts {}
summary family_geography {}
summary family_geographies {}
summary family_source {}
summary document_import_id {}
summary document_slug {}
summary document_languages {}
summary document_content_type {}
summary document_cdn_object {}
summary document_source_url {}
summary corpus_import_id {}
summary corpus_type_name {}
summary metadata {}
summary text_block {}
summary text_block_id {}
summary text_block_type {}
summary text_block_page {}
summary text_block_coords {}
summary concepts {}
summary text_block_tokens {
source: text_block
tokens
}
}
kdutia marked this conversation as resolved.
Show resolved Hide resolved

rank-profile exact inherits default {
function text_score() {
expression: attribute(passage_weight) * fieldMatch(text_block)
}
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() fieldMatch(text_block)
}

rank-profile exact_not_stemmed inherits default {
function text_score() {
expression: attribute(passage_weight) * fieldMatch(text_block_not_stemmed)
}
first-phase {
expression: text_score()
}
match-features: text_score() fieldMatch(text_block)
}

rank-profile hybrid_no_closeness inherits default {
Expand All @@ -151,7 +197,7 @@ schema document_passage {
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() bm25(text_block)
}

rank-profile hybrid inherits default {
Expand All @@ -164,6 +210,20 @@ schema document_passage {
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() bm25(text_block) closeness(text_embedding)
}

rank-profile hybrid_custom_weight inherits default {
inputs {
query(query_embedding) tensor<float>(x[768])
query(bm25_weight) double
}
function text_score() {
expression: attribute(passage_weight) * (query(bm25_weight) * bm25(text_block) + closeness(text_embedding))
}
first-phase {
expression: text_score()
}
match-features: text_score() bm25(text_block) closeness(text_embedding)
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
schema family_document {

field family_name_not_stemmed type string {
indexing: input family_name_index | index
stemming: none
}

field family_description_not_stemmed type string {
indexing: input family_description_index | index
stemming: none
}

document family_document {

field search_weights_ref type reference<search_weights> {
Expand Down Expand Up @@ -170,6 +180,19 @@ schema family_document {
}
match-features: name_score() description_score()
}

rank-profile exact_not_stemmed inherits default {
function name_score() {
expression: attribute(name_weight) * fieldMatch(family_name_not_stemmed)
}
function description_score() {
expression: attribute(description_weight) * fieldMatch(family_description_not_stemmed)
}
first-phase {
expression: name_score() + description_score()
}
match-features: name_score() description_score()
}

rank-profile hybrid_no_closeness inherits default {
function name_score() {
Expand Down Expand Up @@ -199,6 +222,40 @@ schema family_document {
}
match-features: name_score() description_score()
}

rank-profile hybrid_no_description_embedding inherits default {
inputs {
query(query_embedding) tensor<float>(x[768])
}
function name_score() {
expression: attribute(name_weight) * bm25(family_name_index)
}
function description_score() {
expression: attribute(description_weight) * bm25(family_description_index)
}
first-phase {
expression: name_score() + description_score()
}
match-features: name_score() description_score()
}

rank-profile hybrid_custom_weight inherits default {
inputs {
query(query_embedding) tensor<float>(x[768])
query(bm25_weight) double
}
function name_score() {
expression: attribute(name_weight) * bm25(family_name_index)
}
function description_score() {
expression: attribute(description_weight) * bm25(family_description_index)
}
first-phase {
expression: name_score() + description_score()
}
match-features: name_score() description_score()
}


document-summary search_summary {
summary family_name {}
Expand All @@ -223,4 +280,39 @@ schema family_document {
summary collection_title {}
summary collection_summary {}
}

document-summary search_summary_with_tokens {
summary family_name {}
summary family_description {}
summary family_import_id {}
summary family_slug {}
summary family_category {}
summary family_publication_ts {}
summary family_geography {}
summary family_geographies {}
summary family_source {}
summary document_import_id {}
summary document_title {}
summary document_slug {}
summary document_languages {}
summary document_content_type {}
summary document_cdn_object {}
summary document_source_url {}
summary metadata {}
summary corpus_import_id {}
summary corpus_type_name {}
summary collection_title {}
summary collection_summary {}
summary family_name_index {}
summary family_name_index_tokens {
source: family_name_index
tokens
}
summary family_description_index {}
summary family_description_index_tokens {
source: family_description_index
tokens
}
from-disk
}
}
Loading