Skip to content

Commit

Permalink
#114 collection / query cache (#285) - Bump to v0.31.0
Browse files Browse the repository at this point in the history
* #114 WIP collection cache

* #114 WIP collection cache working but slow

* #114 try different approach

* WIP tests passing, but sorting not working

* WIP

* Sorting one way works...

* Fix sorting

* mostly working

* Move db tests to file

* Move some utility functions

* Cleanup

* authorization tests

* Add authorization tests, get them green

* Cache invalidation test passing

* Add test for delting, fix temp path gitignore

* Refactor commit opts

* Fix query index

* Change TPF, fix test

* Tests passing

* Improve sorting

* Bump to v0.31.0
  • Loading branch information
joepio authored Jan 25, 2022
1 parent 49f8663 commit d9b9fb6
Show file tree
Hide file tree
Showing 37 changed files with 1,668 additions and 587 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
.env
trace-*.json
.temp
12 changes: 11 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,17 @@
List of changes for this repo, including `atomic-cli`, `atomic-server` and `atomic-lib`.
By far most changes relate to `atomic-server`, so if not specified, assume the changes are relevant only for the server.

## [v0.30.4] - 2021-01-15
## [v0.31.0] - 2022-01-25

- Huge performance increase for queries! Added sortable index, big refactor #114
- Added `store.query()` function with better query options, such as `starts_at` and `limit`. Under the hood, this powers `Collection`s,
- `Resource.save` returns a `CommitResponse`.
- Refactor `Commit.apply_opts`, structure options.
- Remove the potentially confusing `commit.apply` method.
- `store.tpf` now takes a `Value` instead of `String`.
- Improved sorting logic. Still has some problems.

## [v0.30.4] - 2022-01-15

Run with `--rebuild-index` the first time, if you use an existing database.
Note that due to an issue in actix, I'm unable to publish the `atomic-server` crate at this moment.
Expand Down
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ license = "MIT"
name = "atomic-cli"
readme = "README.md"
repository = "https://github.com/joepio/atomic-data-rust"
version = "0.30.0"
version = "0.31.0"

[dependencies]
atomic_lib = {version = "0.30.0", path = "../lib", features = ["config", "rdf"]}
atomic_lib = {version = "0.31.0", path = "../lib", features = ["config", "rdf"]}
clap = "2.33.3"
colored = "2.0.0"
dirs = "3.0.1"
Expand Down
2 changes: 1 addition & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl Context<'_> {
self.store.set_default_agent(Agent {
subject: write_ctx.agent.clone(),
private_key: Some(write_ctx.private_key.clone()),
created_at: atomic_lib::datetime_helpers::now(),
created_at: atomic_lib::utils::now(),
name: None,
public_key: generate_public_key(&write_ctx.private_key).public,
});
Expand Down
Binary file modified lib/.DS_Store
Binary file not shown.
4 changes: 0 additions & 4 deletions lib/.tmp/db/conf

This file was deleted.

Binary file removed lib/.tmp/db/db
Binary file not shown.
2 changes: 1 addition & 1 deletion lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ license = "MIT"
name = "atomic_lib"
readme = "README.md"
repository = "https://github.com/joepio/atomic-data-rust"
version = "0.30.4"
version = "0.31.0"

[dependencies]
base64 = "0.13.0"
Expand Down
6 changes: 3 additions & 3 deletions lib/src/agents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! Agents are actors (such as users) that can edit content.
//! https://docs.atomicdata.dev/commits/concepts.html

use crate::{datetime_helpers, errors::AtomicResult, urls, Resource, Storelike};
use crate::{errors::AtomicResult, urls, Resource, Storelike};

#[derive(Clone, Debug)]
pub struct Agent {
Expand Down Expand Up @@ -60,7 +60,7 @@ impl Agent {
public_key: keypair.public.clone(),
subject: format!("{}/agents/{}", store.get_server_url(), keypair.public),
name: name.map(|x| x.to_owned()),
created_at: datetime_helpers::now(),
created_at: crate::utils::now(),
}
}

Expand All @@ -72,7 +72,7 @@ impl Agent {
public_key: public_key.into(),
subject: format!("{}/agents/{}", store.get_server_url(), public_key),
name: None,
created_at: datetime_helpers::now(),
created_at: crate::utils::now(),
})
}
}
Expand Down
4 changes: 2 additions & 2 deletions lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn fetch_resource(
/// Returns the various x-atomic authentication headers, includign agent signature
pub fn get_authentication_headers(url: &str, agent: &Agent) -> AtomicResult<Vec<(String, String)>> {
let mut headers = Vec::new();
let now = crate::datetime_helpers::now().to_string();
let now = crate::utils::now().to_string();
let message = format!("{} {}", url, now);
let signature = sign_message(
&message,
Expand Down Expand Up @@ -97,7 +97,7 @@ pub fn fetch_tpf(

/// Posts a Commit to the endpoint of the Subject from the Commit
pub fn post_commit(commit: &crate::Commit, store: &impl Storelike) -> AtomicResult<()> {
let server_url = crate::url_helpers::server_url(commit.get_subject())?;
let server_url = crate::utils::server_url(commit.get_subject())?;
// Default Commit endpoint is `https://example.com/commit`
let endpoint = format!("{}commit", server_url);
post_commit_custom_endpoint(&endpoint, commit, store)
Expand Down
167 changes: 68 additions & 99 deletions lib/src/collections.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
//! Collections are dynamic resources that refer to multiple resources.
//! They are constructed using a TPF query
use crate::{errors::AtomicResult, storelike::ResourceCollection, urls, Resource, Storelike};
use crate::{
errors::AtomicResult,
storelike::{Query, ResourceCollection},
urls, Resource, Storelike, Value,
};

#[derive(Debug)]
pub struct TpfQuery {
Expand Down Expand Up @@ -154,7 +158,7 @@ pub struct Collection {

/// Sorts a vector or resources by some property.
#[tracing::instrument]
fn sort_resources(
pub fn sort_resources(
mut resources: ResourceCollection,
sort_by: &str,
sort_desc: bool,
Expand All @@ -163,8 +167,8 @@ fn sort_resources(
let val_a = a.get(sort_by);
let val_b = b.get(sort_by);
if val_a.is_err() || val_b.is_err() {
return std::cmp::Ordering::Equal;
}
return std::cmp::Ordering::Greater;
};
if val_b.unwrap().to_string() > val_a.unwrap().to_string() {
if sort_desc {
std::cmp::Ordering::Greater
Expand Down Expand Up @@ -193,103 +197,43 @@ impl Collection {
if collection_builder.page_size < 1 {
return Err("Page size must be greater than 0".into());
}
// Execute the TPF query, get all the subjects.
// Note that these are not yet authorized.
let atoms = store.tpf(
None,
collection_builder.property.as_deref(),
collection_builder.value.as_deref(),
collection_builder.include_external,
)?;
// Remove duplicate subjects
let mut subjects_deduplicated: Vec<String> = atoms
.iter()
.map(|atom| atom.subject.clone())
.collect::<std::collections::HashSet<String>>()
.into_iter()
.collect();

// Sort by subject, better than no sorting
subjects_deduplicated.sort();

// WARNING: Entering expensive loop!
// This is needed for sorting, authorization and including nested resources.
// It could be skipped if there is no authorization and sorting requirement.
let mut resources = Vec::new();
for subject in subjects_deduplicated.iter() {
// These nested resources are not fully calculated - they will be presented as -is
match store.get_resource_extended(subject, true, for_agent) {
Ok(resource) => {
resources.push(resource);
}
Err(e) => match e.error_type {
crate::AtomicErrorType::NotFoundError => {}
crate::AtomicErrorType::UnauthorizedError => {}
crate::AtomicErrorType::OtherError => {
return Err(
format!("Error when getting resource in collection: {}", e).into()
)
}
},
}
}
if let Some(sort) = &collection_builder.sort_by {
resources = sort_resources(resources, sort, collection_builder.sort_desc);
}
let mut subjects = Vec::new();
for r in resources.iter() {
subjects.push(r.get_subject().clone())
}
let mut all_pages: Vec<Vec<String>> = Vec::new();
let mut all_pages_nested: Vec<Vec<Resource>> = Vec::new();
let mut page: Vec<String> = Vec::new();
let mut page_nested: Vec<Resource> = Vec::new();
let current_page = collection_builder.current_page;
for (i, subject) in subjects.iter().enumerate() {
page.push(subject.into());
if collection_builder.include_nested {
page_nested.push(resources[i].clone());
}
if page.len() >= collection_builder.page_size {
all_pages.push(page);
all_pages_nested.push(page_nested);
page = Vec::new();
page_nested = Vec::new();
// No need to calculte more than necessary
if all_pages.len() > current_page {
break;
}
}
// Add the last page when handling the last subject
if i == subjects.len() - 1 {
all_pages.push(page);
all_pages_nested.push(page_nested);
break;
}
}
if all_pages.is_empty() {
all_pages.push(Vec::new());
all_pages_nested.push(Vec::new());
}
// Maybe I should default to last page, if current_page is too high?
let members = all_pages
.get(current_page)
.ok_or(format!("Page number {} is too high", current_page))?
.clone();
let total_items = subjects.len();
// Construct the pages (TODO), use pageSize
let total_pages =
(total_items + collection_builder.page_size - 1) / collection_builder.page_size;
let members_nested = if collection_builder.include_nested {
Some(
all_pages_nested
.get(current_page)
.ok_or(format!("Page number {} is too high", current_page))?
.clone(),
)
} else {
None
// Warning: this _assumes_ that the Value is a string.
// This will work for most datatypes, but not for things like resource arrays!
// We could improve this by taking the datatype of the `property`, and parsing the string.
let value_filter = collection_builder
.value
.as_ref()
.map(|val| Value::String(val.clone()));

let q = Query {
property: collection_builder.property.clone(),
value: value_filter,
limit: Some(collection_builder.page_size),
start_val: None,
end_val: None,
offset: collection_builder.page_size * collection_builder.current_page,
sort_by: collection_builder.sort_by.clone(),
sort_desc: collection_builder.sort_desc,
include_external: collection_builder.include_external,
include_nested: collection_builder.include_nested,
for_agent: for_agent.map(|a| a.to_string()),
};

let query_result = store.query(&q)?;
let members = query_result.subjects;
let members_nested = Some(query_result.resources);
let total_items = query_result.count;
let pages_fraction = total_items as f64 / collection_builder.page_size as f64;
let total_pages = pages_fraction.ceil() as usize;
if collection_builder.current_page > total_pages {
return Err(format!(
"Page number out of bounds, got {}, max {}",
collection_builder.current_page, total_pages
)
.into());
}

let collection = Collection {
total_pages,
members,
Expand Down Expand Up @@ -651,4 +595,29 @@ mod test {
== "2"
);
}

#[test]
fn sorting_resources() {
let prop = urls::DESCRIPTION.to_string();
let mut a = Resource::new("first".into());
a.set_propval_unsafe(prop.clone(), Value::Markdown("1".into()));
let mut b = Resource::new("second".into());
b.set_propval_unsafe(prop.clone(), Value::Markdown("2".into()));
let mut c = Resource::new("third_missing_property".into());

let asc = vec![a.clone(), b.clone(), c.clone()];
let sorted = sort_resources(asc.clone(), &prop, false);
assert_eq!(a.get_subject(), sorted[0].get_subject());
assert_eq!(b.get_subject(), sorted[1].get_subject());
assert_eq!(c.get_subject(), sorted[2].get_subject());

let sorted_desc = sort_resources(asc.clone(), &prop, true);
assert_eq!(b.get_subject(), sorted_desc[0].get_subject());
assert_eq!(a.get_subject(), sorted_desc[1].get_subject());
assert_eq!(
c.get_subject(),
sorted_desc[2].get_subject(),
"c is missing the sorted property - it should _alway_ be last"
);
}
}
Loading

0 comments on commit d9b9fb6

Please sign in to comment.