Skip to content

Commit

Permalink
#40 Search - construct resource, perform query
Browse files Browse the repository at this point in the history
  • Loading branch information
joepio committed Nov 10, 2021
1 parent 949f9e4 commit e7d157f
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 32 deletions.
20 changes: 20 additions & 0 deletions lib/defaults/default_store.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,26 @@
],
"https://atomicdata.dev/properties/shortname": "parameters"
},
{
"@id": "https://atomicdata.dev/properties/endpoint/results",
"https://atomicdata.dev/properties/classtype": "https://atomicdata.dev/classes/Property",
"https://atomicdata.dev/properties/datatype": "https://atomicdata.dev/datatypes/resourceArray",
"https://atomicdata.dev/properties/description": "The results of the endpoint",
"https://atomicdata.dev/properties/isA": [
"https://atomicdata.dev/classes/Property"
],
"https://atomicdata.dev/properties/shortname": "results"
},
{
"@id": "https://atomicdata.dev/properties/searchQuery",
"https://atomicdata.dev/properties/datatype": "https://atomicdata.dev/datatypes/string",
"https://atomicdata.dev/properties/description": "See https://docs.rs/tantivy/0.16.1/tantivy/query/struct.QueryParser.html",
"https://atomicdata.dev/properties/isA": [
"https://atomicdata.dev/classes/Property"
],
"https://atomicdata.dev/properties/parent": "https://atomicdata.dev/properties",
"https://atomicdata.dev/properties/shortname": "search-query"
},
{
"@id": "https://atomicdata.dev/properties/isDynamic",
"https://atomicdata.dev/properties/datatype": "https://atomicdata.dev/datatypes/boolean",
Expand Down
2 changes: 2 additions & 0 deletions lib/src/urls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ pub const COLLECTION_SORT_BY: &str = "https://atomicdata.dev/properties/collecti
pub const COLLECTION_SORT_DESC: &str = "https://atomicdata.dev/properties/collection/sortDesc";
// ... for Endpoints
pub const ENDPOINT_PARAMETERS: &str = "https://atomicdata.dev/properties/endpoint/parameters";
pub const ENDPOINT_RESULTS: &str = "https://atomicdata.dev/properties/endpoint/results";
pub const PATH: &str = "https://atomicdata.dev/properties/path";
pub const SEARCH_QUERY: &str = "https://atomicdata.dev/properties/searchQuery";
// ... for Hierarchy / Drive
pub const PARENT: &str = "https://atomicdata.dev/properties/parent";
pub const READ: &str = "https://atomicdata.dev/properties/read";
Expand Down
4 changes: 4 additions & 0 deletions server/example_requests.http
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ Accept: application/ld+json
GET https://atomicdata.dev/properties/isA HTTP/1.1
Accept: text/turtle

### Full text search
GET http://localhost/search?q=blup HTTP/1.1
Accept: application/ld+json

### Send a Commit
POST http://localhost/commit HTTP/1.1
Accept: application/json
Expand Down
4 changes: 4 additions & 0 deletions server/src/appstate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ pub fn init(config: Config) -> BetterResult<AppState> {
atomic_lib::populate::populate_default_store(&store)
.map_err(|e| format!("Failed to populate default store. {}", e))?;
// Building the index here is needed to perform TPF queries on imported resources
log::info!(
"Building index... (this could take a few minutes for larger existing databases)"
);
store.build_index(true)?;
log::info!("Building index finished!");
}
set_default_agent(&config, &store)?;
if config.initialize {
Expand Down
4 changes: 2 additions & 2 deletions server/src/handlers/resource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::sync::Mutex;
/// Respond to a single resource.
/// The URL should match the Subject of the resource.
pub async fn get_resource(
subject_end: Option<web::Path<String>>,
path: Option<web::Path<String>>,
data: web::Data<Mutex<AppState>>,
req: actix_web::HttpRequest,
) -> BetterResult<HttpResponse> {
Expand All @@ -20,7 +20,7 @@ pub async fn get_resource(
let mut content_type = get_accept(req.headers());
let base_url = &context.config.local_base_url;
// Get the subject from the path, or return the home URL
let subject = if let Some(subj_end) = subject_end {
let subject = if let Some(subj_end) = path {
let mut subj_end_string = subj_end.as_str();
if content_type == ContentType::Html {
if let Some((ext, path)) = try_extension(subj_end_string) {
Expand Down
75 changes: 63 additions & 12 deletions server/src/handlers/search.rs
Original file line number Diff line number Diff line change
@@ -1,42 +1,93 @@
use crate::{appstate::AppState, errors::BetterResult};
use actix_web::{web, HttpResponse};
use atomic_lib::{urls, Resource, Storelike};
use serde::Deserialize;
use std::sync::Mutex;
use std::{ops::RangeBounds, sync::Mutex};
use tantivy::{collector::TopDocs, query::QueryParser};

#[derive(Deserialize, Debug)]
pub struct SearchQuery {
/// The actual search query
pub query: String,
pub q: String,
/// Include the full resources in the response
pub include: Option<bool>,
}

/// Parses a search query and responds with a list of resources
pub async fn search_query(
data: web::Data<Mutex<AppState>>,
query: web::Query<SearchQuery>,
body: String,
req: actix_web::HttpRequest,
) -> BetterResult<HttpResponse> {
let mut context = data
let context = data
.lock()
.expect("Failed to lock mutexguard in search_query");

let store = &mut context.store;
let store = &context.store;
let searcher = context.search_reader.searcher();
let (property_field, value_field) = crate::search::get_schema_fields(&context);
let query_parser =
QueryParser::for_index(&context.search_index, vec![property_field, value_field]);
let fields = crate::search::get_schema_fields(&context);

// construct the query
let query_parser = QueryParser::for_index(
&context.search_index,
vec![
fields.subject,
// I don't think we need to search in the property
// fields.property,
fields.value,
],
);
let tantivy_query = query_parser
.parse_query(&query.query)
.parse_query(&query.q)
.map_err(|e| format!("Error parsing query {}", e))?;

// execute the query
let top_docs = searcher
.search(&tantivy_query, &TopDocs::with_limit(10))
.map_err(|e| "Error with creating docs for search")?;
let mut subjects: Vec<String> = Vec::new();

// convert found documents to resources
for (_score, doc_address) in top_docs {
let retrieved_doc = searcher.doc(doc_address).unwrap();
println!("{}", context.search_schema.to_json(&retrieved_doc));
let subject_val = retrieved_doc.get_first(fields.subject).unwrap();
let subject = match subject_val {
tantivy::schema::Value::Str(s) => s,
_else => return Err("Subject is not a string!".into()),
};
if subjects.contains(subject) {
continue;
} else {
subjects.push(subject.clone());
}
}
let mut resources: Vec<Resource> = Vec::new();
for s in subjects {
resources.push(store.get_resource_extended(&s, true)?);
}

// You'd think there would be a simpler way of getting the requested URL...
let subject = format!(
"{}{}",
store.get_self_url().ok_or("No base URL")?,
req.uri()
.path_and_query()
.ok_or("Add a query param")?
.to_string()
);

// Create a valid atomic data resource
let mut results_resource = Resource::new(subject);
results_resource.set_propval(urls::IS_A.into(), vec![urls::ENDPOINT].into(), store)?;
results_resource.set_propval(urls::ENDPOINT_RESULTS.into(), resources.into(), store)?;
results_resource.set_propval(
urls::ENDPOINT_PARAMETERS.into(),
vec![urls::SEARCH_QUERY].into(),
store,
)?;
// let json_ad = atomic_lib::serialize::resources_to_json_ad(&resources)?;
let mut builder = HttpResponse::Ok();
let message = format!("succesful search for {:?}", query.query);
log::info!("{}", &message);
Ok(builder.body(message))
// log::info!("Search q: {} hits: {}", &query.q, resources.len());
Ok(builder.body(results_resource.to_json_ad()?))
}
2 changes: 2 additions & 0 deletions server/src/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! Functions useful in the server

use crate::content_types::ContentType;

// Returns None if the string is empty.
// Useful for parsing form inputs.
pub fn empty_to_nothing(string: Option<String>) -> Option<String> {
Expand Down
63 changes: 45 additions & 18 deletions server/src/search.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use atomic_lib::Resource;
use atomic_lib::Storelike;
/// Full-text search, powered by Tantivy.
use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::Index;
use tantivy::IndexWriter;
Expand All @@ -11,41 +10,69 @@ use crate::appstate::AppState;
use crate::config::Config;
use crate::errors::BetterResult;

pub struct Fields {
pub subject: Field,
pub property: Field,
pub value: Field,
}

/// Returns the schema for the search index.
pub fn build_schema() -> BetterResult<tantivy::schema::Schema> {
let mut schema_builder = Schema::builder();
// The STORED flag makes the index store the full values. Can be useful.
schema_builder.add_text_field("subject", TEXT | STORED);
schema_builder.add_text_field("property", TEXT | STORED);
schema_builder.add_text_field("value", TEXT);
schema_builder.add_text_field("value", TEXT | STORED);
let schema = schema_builder.build();
Ok(schema)
}

pub fn get_index(config: &Config) -> BetterResult<(IndexWriter, Index)> {
let schema = build_schema()?;
let index = Index::create_in_dir(&config.search_index_path, schema).unwrap();
std::fs::create_dir_all(&config.search_index_path).unwrap();
let mmap_directory =
tantivy::directory::MmapDirectory::open(&config.search_index_path).unwrap();

let index = Index::open_or_create(mmap_directory, schema).unwrap();
let heap_size_bytes = 50_000_000;
let mut index_writer = index.writer(heap_size_bytes).unwrap();
let index_writer = index.writer(heap_size_bytes).unwrap();
Ok((index_writer, index))
}

pub fn get_schema_fields(appstate: &AppState) -> (Field, Field) {
let property_field = appstate.search_schema.get_field("property").unwrap();
let value_field = appstate.search_schema.get_field("value").unwrap();
(property_field, value_field)
pub fn get_schema_fields(appstate: &AppState) -> Fields {
let subject = appstate.search_schema.get_field("subject").unwrap();
let property = appstate.search_schema.get_field("property").unwrap();
let value = appstate.search_schema.get_field("value").unwrap();

Fields {
subject,
property,
value,
}
}

fn add_resource(appstate: AppState, resource: &Resource) -> BetterResult<()> {
let mut doc = Document::default();
let property_field = appstate.search_schema.get_field("property").unwrap();
let value_field = appstate.search_schema.get_field("value").unwrap();
for (prop, val) in resource.get_propvals() {
doc.add_text(property_field, prop);
doc.add_text(value_field, &val.to_string());
pub fn add_all_resources(appstate: &AppState) -> BetterResult<()> {
log::info!("Building search index...");
for resource in appstate.store.all_resources(true) {
add_resource(appstate, &resource)?;
}
appstate.search_index_writer.read()?.add_document(doc);
// TODO: don't do this every time!
appstate.search_index_writer.write()?.commit().unwrap();
log::info!("Finished building search index!");
Ok(())
}

/// Adds a single resource to the search index, but does _not_ commit!
/// `appstate.search_index_writer.write()?.commit().unwrap();`
pub fn add_resource(appstate: &AppState, resource: &Resource) -> BetterResult<()> {
let fields = get_schema_fields(appstate);
let subject = resource.get_subject();
for (prop, val) in resource.get_propvals() {
let mut doc = Document::default();
doc.add_text(fields.property, prop);
doc.add_text(fields.value, &val.to_string());
doc.add_text(fields.subject, subject);
appstate.search_index_writer.read()?.add_document(doc);
}
Ok(())
}

Expand Down
2 changes: 2 additions & 0 deletions server/src/serve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub async fn serve(config: crate::config::Config) -> AtomicResult<()> {
.expect("Failed to build index");
log::info!("Building index finished!");
});

crate::search::add_all_resources(&appstate)?;
}

let server = HttpServer::new(move || {
Expand Down

0 comments on commit e7d157f

Please sign in to comment.