Skip to content

Commit

Permalink
#40 add fuzzy search
Browse files Browse the repository at this point in the history
  • Loading branch information
joepio committed Nov 13, 2021
1 parent a582931 commit 665a1f9
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 20 deletions.
53 changes: 35 additions & 18 deletions server/src/handlers/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{appstate::AppState, errors::BetterResult};
use actix_web::{web, HttpResponse};
use atomic_lib::{urls, Resource, Storelike};
use serde::Deserialize;
use std::{ops::RangeBounds, sync::Mutex};
use std::sync::Mutex;
use tantivy::{collector::TopDocs, query::QueryParser};

#[derive(Deserialize, Debug)]
Expand All @@ -16,8 +16,7 @@ pub struct SearchQuery {
/// Parses a search query and responds with a list of resources
pub async fn search_query(
data: web::Data<Mutex<AppState>>,
query: web::Query<SearchQuery>,
body: String,
params: web::Query<SearchQuery>,
req: actix_web::HttpRequest,
) -> BetterResult<HttpResponse> {
let context = data
Expand All @@ -28,24 +27,41 @@ pub async fn search_query(
let searcher = context.search_reader.searcher();
let fields = crate::search::get_schema_fields(&context);

// construct the query
let query_parser = QueryParser::for_index(
&context.search_index,
vec![
fields.subject,
// I don't think we need to search in the property
// fields.property,
fields.value,
],
);
let tantivy_query = query_parser
.parse_query(&query.q)
.map_err(|e| format!("Error parsing query {}", e))?;
let mut should_fuzzy = true;
let query = params.q.clone();
// If any of these substrings appear, the user wants an exact / advanced search
let dont_fuzz_strings = vec!["*", "AND", "OR", "[", "\""];
for dont_fuzz in dont_fuzz_strings {
if query.contains(dont_fuzz) {
should_fuzzy = false
}
}

let query: Box<dyn tantivy::query::Query> = if should_fuzzy {
let term = tantivy::Term::from_field_text(fields.value, &params.q);
let query = tantivy::query::FuzzyTermQuery::new_prefix(term, 3, true);
Box::new(query)
} else {
// construct the query
let query_parser = QueryParser::for_index(
&context.search_index,
vec![
fields.subject,
// I don't think we need to search in the property
// fields.property,
fields.value,
],
);
let tantivy_query = query_parser
.parse_query(&params.q)
.map_err(|e| format!("Error parsing query {}", e))?;
tantivy_query
};

// execute the query
let top_docs = searcher
.search(&tantivy_query, &TopDocs::with_limit(10))
.map_err(|e| "Error with creating docs for search")?;
.search(&query, &TopDocs::with_limit(10))
.map_err(|_e| "Error with creating docs for search")?;
let mut subjects: Vec<String> = Vec::new();

// convert found documents to resources
Expand Down Expand Up @@ -80,6 +96,7 @@ pub async fn search_query(
// Create a valid atomic data resource
let mut results_resource = Resource::new(subject);
results_resource.set_propval(urls::IS_A.into(), vec![urls::ENDPOINT].into(), store)?;
results_resource.set_propval(urls::DESCRIPTION.into(), atomic_lib::Value::Markdown("Full text-search endpoint. You can use the keyword `AND` and `OR`, or use `\"` for advanced searches. ".into()), store)?;
results_resource.set_propval(urls::ENDPOINT_RESULTS.into(), resources.into(), store)?;
results_resource.set_propval(
urls::ENDPOINT_PARAMETERS.into(),
Expand Down
13 changes: 11 additions & 2 deletions server/src/search.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Full-text search, powered by Tantivy.
//! A folder for the index is stored in the config.
//! You can see the Endpoint on `http://localhost/search`

use atomic_lib::Resource;
use atomic_lib::Storelike;
/// Full-text search, powered by Tantivy.
use tantivy::schema::*;
use tantivy::Index;
use tantivy::IndexWriter;
Expand All @@ -10,6 +13,8 @@ use crate::appstate::AppState;
use crate::config::Config;
use crate::errors::BetterResult;

/// The actual Schema used for search.
/// It mimics a single Atom (or Triple).
pub struct Fields {
pub subject: Field,
pub property: Field,
Expand All @@ -27,6 +32,7 @@ pub fn build_schema() -> BetterResult<tantivy::schema::Schema> {
Ok(schema)
}

/// Creates or reads the index from the `search_index_path` and allocates some heap size.
pub fn get_index(config: &Config) -> BetterResult<(IndexWriter, Index)> {
let schema = build_schema()?;
std::fs::create_dir_all(&config.search_index_path).unwrap();
Expand All @@ -39,6 +45,7 @@ pub fn get_index(config: &Config) -> BetterResult<(IndexWriter, Index)> {
Ok((index_writer, index))
}

/// Returns the schema for the search index.
pub fn get_schema_fields(appstate: &AppState) -> Fields {
let subject = appstate.search_schema.get_field("subject").unwrap();
let property = appstate.search_schema.get_field("property").unwrap();
Expand All @@ -51,6 +58,8 @@ pub fn get_schema_fields(appstate: &AppState) -> Fields {
}
}

/// Indexes all resources from the store to search.
/// At this moment does not remove existing index.
pub fn add_all_resources(appstate: &AppState) -> BetterResult<()> {
log::info!("Building search index...");
for resource in appstate.store.all_resources(true) {
Expand Down Expand Up @@ -82,6 +91,6 @@ pub fn get_reader(index: &tantivy::Index) -> BetterResult<tantivy::IndexReader>
.reader_builder()
.reload_policy(ReloadPolicy::OnCommit)
.try_into()
.map_err(|e| "Failed getting reader")
.map_err(|_e| "Failed getting search reader")
.unwrap())
}

0 comments on commit 665a1f9

Please sign in to comment.