diff --git a/backend/src/search/event.rs b/backend/src/search/event.rs index 8fa8e6400..c505dbad9 100644 --- a/backend/src/search/event.rs +++ b/backend/src/search/event.rs @@ -139,7 +139,7 @@ impl Event { } pub(super) async fn prepare_index(index: &Index) -> Result<()> { - util::lazy_set_special_attributes(index, "event", FieldAbilities { + util::lazy_set_special_attributes(index, "event", true, FieldAbilities { searchable: &[ "title", "creators", diff --git a/backend/src/search/mod.rs b/backend/src/search/mod.rs index c42f33d43..7525eab32 100644 --- a/backend/src/search/mod.rs +++ b/backend/src/search/mod.rs @@ -336,7 +336,7 @@ pub(crate) async fn rebuild_if_necessary( for task in tasks { util::wait_on_task(task, meili).await?; } - info!("Completely rebuild search index"); + info!("Completely rebuilt search index"); meili.meta_index.add_or_replace(&[meta::Meta::current_clean()], None).await .context("failed to update index version document (clean)")?; diff --git a/backend/src/search/playlist.rs b/backend/src/search/playlist.rs index 62b276f79..0a9677a19 100644 --- a/backend/src/search/playlist.rs +++ b/backend/src/search/playlist.rs @@ -90,7 +90,7 @@ impl Playlist { } pub(super) async fn prepare_index(index: &Index) -> Result<()> { - util::lazy_set_special_attributes(index, "playlist", FieldAbilities { + util::lazy_set_special_attributes(index, "playlist", true, FieldAbilities { searchable: &["title", "description"], filterable: &["read_roles", "write_roles"], sortable: &["updated_timestamp"], diff --git a/backend/src/search/realm.rs b/backend/src/search/realm.rs index d238cfd22..85d6d9468 100644 --- a/backend/src/search/realm.rs +++ b/backend/src/search/realm.rs @@ -71,7 +71,7 @@ impl Realm { } pub(super) async fn prepare_index(index: &Index) -> Result<()> { - util::lazy_set_special_attributes(index, "realm", FieldAbilities { + util::lazy_set_special_attributes(index, "realm", false, FieldAbilities { searchable: &["name"], filterable: &["is_root", "is_user_realm"], sortable: &[], diff --git a/backend/src/search/series.rs b/backend/src/search/series.rs index f2acdce0c..c7c3e2bfd 100644 --- a/backend/src/search/series.rs +++ b/backend/src/search/series.rs @@ -94,7 +94,7 @@ impl Series { } pub(super) async fn prepare_index(index: &Index) -> Result<()> { - util::lazy_set_special_attributes(index, "series", FieldAbilities { + util::lazy_set_special_attributes(index, "series", true, FieldAbilities { searchable: &["title", "description"], filterable: &["listed", "read_roles", "write_roles"], sortable: &["updated_timestamp"], diff --git a/backend/src/search/stop-words.txt b/backend/src/search/stop-words.txt new file mode 100644 index 000000000..d45d410eb --- /dev/null +++ b/backend/src/search/stop-words.txt @@ -0,0 +1,396 @@ +# Single latin letters +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z + + +# English +# 'a' and 'i' are already covered by single letters above. +# Based on NLTK's list of english stopwords +about +above +#after -> German word +again +against +all +am +an +and +any +are +as +at +be +because +been +before +being +below +between +both +but +by +can +could +did +do +does +doing +dont +down +during +each +few +for +from +further +had +has +have +having +he +her +here +hers +herself +him +himself +his +how +however +if +in +into +is +it +its +itself +just +like +many +me +more +#most -> German word +must +my +myself +no +nor +not # -> German word, not super common as stand-alone word and very much a English stop word, so we keep it +now +of +off +on +once +only +or +other +our +ours +ourselves +out +over +own +said +same +she +should +so +some +such # -> German word, but probably fine to keep it a stop word +than +that +the +their +theirs +them +themselves +then +there +#these -> German word +they +this +those +through +to +too +under +until +up +using +very +was +we +were +what +when +where +which +while +who +whom +why +will +with +would +you +your +yours +yourself +yourselves + + +# German +aber +alle +allem +allen +aller +alles +als +also #-> English word but also kind of stop-wordy, so keeping it +am +an +ander +andere +anderem +anderen +anderer +anderes +anderm +andern +anderr +anders +auch +auf +aus +bei +#bin -> english word +bis +bist +da +damit +dann +der +den +des +dem +#die -> english word +das +dass +daß +dazu +dein +deine +deinem +deinen +deiner +deines +denn +derer +dessen +dich +dir +du +#dies -> english word +diese +diesem +diesen +dieser +dieses +doch +dort +durch +ein +eine +einem +einen +einer +eines +einig +einige +einigem +einigen +einiger +einiges +einmal +er +ihn +ihm +es +etwas +euer +eure +eurem +euren +eurer +eures +für +gab +gegen +gewesen +hab +habe +haben +#hat -> English word +hatte +hatten +hier +hin +hinter +ich +mich +mir +ihr +ihre +ihrem +ihren +ihrer +ihres +euch +im +in +indem +ins +ist +jede +jedem +jeden +jeder +jedes +jene +jenem +jenen +jener +jenes +jetzt +kam +kann +kein +keine +keinem +keinen +keiner +keines +konnte +können +könnte +machen +#man -> English word +manche +manchem +manchen +mancher +manches +mein +meine +meinem +meinen +meiner +meines +mit +muss +musste +nach +nicht +nichts +noch +#nun -> English word +nur +ob +oder +ohne +sehr +sein +seine +seinem +seinen +seiner +seines +selbst +sich +sie +ihnen +sind +so +solche +solchem +solchen +solcher +solches +soll +sollte +sondern +sonst +sowie +über +um +und +uns +unse +unsem +unsen +unser +unses +unter +viel +vom +von +vor +während +#war -> English word +waren +warst +was +weg +weil +weiter +welche +welchem +welchen +welcher +welches +wenn +werde +werden +wie +wieder +will +wir +wird +wirst +wo +wollen +wollte +wurde +wurden +würde +würden +zu +zum +zur +zwar +zwischen diff --git a/backend/src/search/user.rs b/backend/src/search/user.rs index a34f87ff7..3786a6079 100644 --- a/backend/src/search/user.rs +++ b/backend/src/search/user.rs @@ -64,7 +64,7 @@ impl User { } pub(super) async fn prepare_index(index: &Index) -> Result<()> { - util::lazy_set_special_attributes(index, "user", FieldAbilities { + util::lazy_set_special_attributes(index, "user", false, FieldAbilities { searchable: &["display_name"], filterable: &[], sortable: &[], diff --git a/backend/src/search/util.rs b/backend/src/search/util.rs index 4292ce87a..a3df95cd4 100644 --- a/backend/src/search/util.rs +++ b/backend/src/search/util.rs @@ -1,4 +1,4 @@ -use std::time::Duration; +use std::{sync::LazyLock, time::Duration}; use meilisearch_sdk::{errors::{Error, ErrorCode}, indexes::Index, tasks::Task, task_info::TaskInfo}; @@ -22,6 +22,7 @@ pub(super) struct FieldAbilities<'a> { pub(super) async fn lazy_set_special_attributes( index: &Index, index_name: &str, + stop_words: bool, fields: FieldAbilities<'_>, ) -> Result<()> { if index.get_searchable_attributes().await? != fields.searchable { @@ -39,9 +40,22 @@ pub(super) async fn lazy_set_special_attributes( index.set_sortable_attributes(fields.sortable).await?; } + if stop_words && index.get_stop_words().await?.iter().ne(&*STOP_WORDS) { + debug!("Updating stop words of {index_name} index"); + index.set_stop_words(&*STOP_WORDS).await?; + } + Ok(()) } +static STOP_WORDS: LazyLock> = LazyLock::new(|| { + const RAW: &str = include_str!("stop-words.txt"); + RAW.lines() + .map(|l| l.split('#').next().unwrap().trim()) + .filter(|s| !s.is_empty()) + .collect() +}); + /// Encodes roles inside an ACL (e.g. for an event) to be stored in the index. /// The roles are hex encoded to be filterable properly with Meili's /// case-insensitive filtering. Also, `ROLE_ADMIN` is removed as an space diff --git a/util/scripts/check-system.sh b/util/scripts/check-system.sh index e887af598..3c9878df5 100755 --- a/util/scripts/check-system.sh +++ b/util/scripts/check-system.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -MIN_RUST_VERSION="1.74.0" +MIN_RUST_VERSION="1.80.0" MIN_NPM_VERSION="7.0" has_command() {