From a5066eaf6a5680a5b365a5bbd0671fde9dcc7050 Mon Sep 17 00:00:00 2001 From: Arnaud Mimart <33665250+amimart@users.noreply.github.com> Date: Thu, 27 Apr 2023 16:09:26 +0200 Subject: [PATCH] feat(cognitarium): separate iri namespaces from triples --- contracts/okp4-cognitarium/src/contract.rs | 113 +++++++-- contracts/okp4-cognitarium/src/error.rs | 19 +- contracts/okp4-cognitarium/src/rdf.rs | 215 +++++++++++++++--- contracts/okp4-cognitarium/src/state/mod.rs | 2 +- .../src/state/{de.rs => serde.rs} | 33 ++- .../okp4-cognitarium/src/state/triples.rs | 87 +------ 6 files changed, 308 insertions(+), 161 deletions(-) rename contracts/okp4-cognitarium/src/state/{de.rs => serde.rs} (56%) diff --git a/contracts/okp4-cognitarium/src/contract.rs b/contracts/okp4-cognitarium/src/contract.rs index 0a3717bb..2894f39a 100644 --- a/contracts/okp4-cognitarium/src/contract.rs +++ b/contracts/okp4-cognitarium/src/contract.rs @@ -8,7 +8,7 @@ use cw2::set_contract_version; use crate::error::ContractError; use crate::msg::{ExecuteMsg, InstantiateMsg, QueryMsg}; -use crate::state::{Store, STORE}; +use crate::state::{Store, NAMESPACE_KEY_INCREMENT, STORE}; // version info for migration info const CONTRACT_NAME: &str = concat!("crates.io:", env!("CARGO_PKG_NAME")); @@ -24,6 +24,7 @@ pub fn instantiate( set_contract_version(deps.storage, CONTRACT_NAME, CONTRACT_VERSION)?; STORE.save(deps.storage, &Store::new(info.sender, msg.limits.into()))?; + NAMESPACE_KEY_INCREMENT.save(deps.storage, &0u128)?; Ok(Response::default()) } @@ -45,30 +46,41 @@ pub mod execute { use crate::error::StoreError; use crate::msg::DataInput; use crate::rdf; - use crate::state::{triples, Triple}; + use crate::rdf::NSResolveFn; + use crate::state::{namespaces, triples, Namespace, NAMESPACE_KEY_INCREMENT}; use blake3::Hash; + use cosmwasm_std::Storage; + use std::collections::BTreeMap; pub fn insert(deps: DepsMut, graph: DataInput) -> Result { let mut store = STORE.load(deps.storage)?; let old_count = store.stat.triples_count; - rdf::parse_triples( - graph, - |triple| -> Result { Ok(triple.try_into()?) }, - |res| -> Result<(), ContractError> { - res.and_then(|triple| { + let mut ns_key_inc = NAMESPACE_KEY_INCREMENT.load(deps.storage)?; + let mut ns_cache: BTreeMap = BTreeMap::new(); + + let mut triple_reader = rdf::read_triples(&graph); + + loop { + let next = triple_reader.next(&mut ns_resolver( + deps.storage, + &mut ns_key_inc, + &mut ns_cache, + )); + + match next { + None => { + break; + } + Some(res) => { + let triple = res.map_err(ContractError::from)?; store.stat.triples_count += Uint128::one(); - store - .limits - .max_triple_count - .filter(|&max| max < store.stat.triples_count) - .map(|max| { - Err(ContractError::from(StoreError::MaxTriplesLimitExceeded( - max, - ))) - }) - .unwrap_or(Ok(()))?; + if store.stat.triples_count > store.limits.max_triple_count { + Err(ContractError::from(StoreError::MaxTriplesLimitExceeded( + store.limits.max_triple_count, + )))? + } let object_hash: Hash = triple.object.as_hash(); triples() @@ -81,16 +93,53 @@ pub mod execute { ), &triple, ) - .map_err(ContractError::Std) - }) - }, - )?; + .map_err(ContractError::Std)?; + } + } + } STORE.save(deps.storage, &store)?; + NAMESPACE_KEY_INCREMENT.save(deps.storage, &ns_key_inc)?; + for entry in ns_cache { + namespaces().save(deps.storage, entry.0, &entry.1)?; + } Ok(Response::new() .add_attribute("action", "insert") - .add_attribute("inserted_count", store.stat.triples_count - old_count)) + .add_attribute("triple_count", store.stat.triples_count - old_count)) + } + + fn ns_resolver<'a>( + store: &'a dyn Storage, + ns_key_inc: &'a mut u128, + ns_cache: &'a mut BTreeMap, + ) -> NSResolveFn<'a> { + Box::new(|ns_str| -> Result { + match ns_cache.get_mut(ns_str.as_str()) { + Some(namespace) => { + namespace.counter += 1; + Ok(namespace.key) + } + None => { + let mut namespace = match namespaces().load(store, ns_str.clone()) { + Err(StdError::NotFound { .. }) => { + let n = Namespace { + key: *ns_key_inc, + counter: 0u128, + }; + *ns_key_inc += 1; + Ok(n) + } + Ok(n) => Ok(n), + Err(e) => Err(e), + }?; + + namespace.counter += 1; + ns_cache.insert(ns_str.clone(), namespace.clone()); + Ok(namespace.key) + } + } + }) } } @@ -105,7 +154,7 @@ mod tests { use crate::error::StoreError; use crate::msg::{DataInput, StoreLimitsInput, StoreLimitsInputBuilder}; use crate::state; - use crate::state::triples; + use crate::state::{namespaces, triples, Namespace}; use cosmwasm_std::testing::{mock_dependencies, mock_env, mock_info}; use cosmwasm_std::{Attribute, Order}; use std::env; @@ -153,6 +202,8 @@ mod tests { triples_count: Uint128::zero(), } ); + + assert_eq!(NAMESPACE_KEY_INCREMENT.load(&deps.storage).unwrap(), 0u128); } #[test] @@ -183,12 +234,13 @@ mod tests { info.clone(), ExecuteMsg::InsertData { input: case }, ); + assert!(res.is_ok()); assert_eq!( res.unwrap().attributes, vec![ Attribute::new("action", "insert"), - Attribute::new("inserted_count", "40") + Attribute::new("triple_count", "40") ] ); @@ -202,6 +254,19 @@ mod tests { STORE.load(&deps.storage).unwrap().stat.triples_count, Uint128::from(40u128), ); + assert_eq!(NAMESPACE_KEY_INCREMENT.load(&deps.storage).unwrap(), 17u128); + assert_eq!( + namespaces() + .load( + &deps.storage, + "https://ontology.okp4.space/dataverse/dataspace/".to_string() + ) + .unwrap(), + Namespace { + key: 0u128, + counter: 5u128, + } + ) } } diff --git a/contracts/okp4-cognitarium/src/error.rs b/contracts/okp4-cognitarium/src/error.rs index 14a8738b..283c9c4f 100644 --- a/contracts/okp4-cognitarium/src/error.rs +++ b/contracts/okp4-cognitarium/src/error.rs @@ -18,18 +18,6 @@ pub enum ContractError { Unauthorized, } -impl From for ContractError { - fn from(value: RdfXmlError) -> Self { - ContractError::ParseRDF(RDFParseError::from(value)) - } -} - -impl From for ContractError { - fn from(value: TurtleError) -> Self { - ContractError::ParseRDF(RDFParseError::from(value)) - } -} - #[derive(Error, Debug, PartialEq)] pub enum StoreError { #[error("Maximum triples number exceeded: {0}")] @@ -56,10 +44,17 @@ pub enum StoreError { #[derive(Error, Debug, PartialEq)] pub enum RDFParseError { + #[error("{0}")] + Std(#[from] StdError), + #[error("Error parsing XML RDF: {0}")] XML(String), + #[error("Error parsing Turtle RDF: {0}")] Turtle(String), + + #[error("Unexpected error parsing RDF: {0}")] + Unexpected(String), } impl From for RDFParseError { diff --git a/contracts/okp4-cognitarium/src/rdf.rs b/contracts/okp4-cognitarium/src/rdf.rs index c2dabe43..e3efb1fd 100644 --- a/contracts/okp4-cognitarium/src/rdf.rs +++ b/contracts/okp4-cognitarium/src/rdf.rs @@ -1,31 +1,174 @@ +use crate::error::RDFParseError; use crate::msg::DataInput; +use crate::state; use cosmwasm_std::StdError; -use rio_api::model::Triple; +use rio_api::model::{Literal, NamedNode, Subject, Term, Triple}; use rio_api::parser::TriplesParser; -use rio_turtle::{NTriplesParser, TurtleError, TurtleParser}; -use rio_xml::{RdfXmlError, RdfXmlParser}; -use std::io::BufReader; - -pub fn parse_triples(input: DataInput, map_fn: MF, use_fn: UF) -> Result<(), E> -where - MF: FnMut(Triple<'_>) -> Result, - UF: FnMut(Result) -> Result<(), E>, - E: From + From, -{ - match input { - DataInput::RDFXml(data) => RdfXmlParser::new(BufReader::new(data.as_slice()), None) - .into_iter(map_fn) - .try_for_each(use_fn), - DataInput::Turtle(data) => TurtleParser::new(BufReader::new(data.as_slice()), None) - .into_iter(map_fn) - .try_for_each(use_fn), - DataInput::NTriples(data) => NTriplesParser::new(BufReader::new(data.as_slice())) - .into_iter(map_fn) - .try_for_each(use_fn), +use rio_turtle::{NTriplesParser, TurtleParser}; +use rio_xml::RdfXmlParser; +use std::io::{BufRead, BufReader}; + +pub struct TripleReader { + parser: TriplesParserKind, + buffer: Vec, +} + +pub enum TriplesParserKind { + NTriples(NTriplesParser), + Turtle(TurtleParser), + RdfXml(RdfXmlParser), +} + +pub fn read_triples(graph: &DataInput) -> TripleReader> { + TripleReader::new(match graph { + DataInput::RDFXml(data) => { + TriplesParserKind::RdfXml(RdfXmlParser::new(BufReader::new(data.as_slice()), None)) + } + DataInput::Turtle(data) => { + TriplesParserKind::Turtle(TurtleParser::new(BufReader::new(data.as_slice()), None)) + } + DataInput::NTriples(data) => { + TriplesParserKind::NTriples(NTriplesParser::new(BufReader::new(data.as_slice()))) + } + }) +} + +pub type NSResolveFn<'a> = Box Result + 'a>; + +impl TripleReader { + pub fn new(parser: TriplesParserKind) -> Self { + TripleReader { + parser, + buffer: Vec::new(), + } + } + + pub fn next( + &mut self, + ns_resolve_fn: &mut NSResolveFn, + ) -> Option> { + loop { + if let Some(t) = self.buffer.pop() { + return Some(Ok(t)); + } + + if let Err(e) = match &mut self.parser { + TriplesParserKind::NTriples(parser) => { + Self::read(parser, &mut self.buffer, ns_resolve_fn) + } + TriplesParserKind::Turtle(parser) => { + Self::read(parser, &mut self.buffer, ns_resolve_fn) + } + TriplesParserKind::RdfXml(parser) => { + Self::read(parser, &mut self.buffer, ns_resolve_fn) + } + }? { + return Some(Err(e)); + } + } + } + + fn read( + parser: &mut P, + buffer: &mut Vec, + ns_resolve_fn: &mut NSResolveFn, + ) -> Option> + where + P: TriplesParser, + E: From + From, + { + if parser.is_end() { + None? + } + + if let Err(e) = parser.parse_step(&mut |t| { + buffer.push(Self::triple(&t, ns_resolve_fn)?); + Ok(()) + }) { + Some(Err(e)) + } else { + Some(Ok(())) + } + } + + fn triple( + triple: &Triple, + ns_resolve_fn: &mut NSResolveFn, + ) -> Result { + Ok(state::Triple { + subject: Self::subject(triple.subject, ns_resolve_fn)?, + predicate: Self::node(triple.predicate, ns_resolve_fn)?, + object: Self::object(triple.object, ns_resolve_fn)?, + }) + } + + fn subject( + subject: Subject, + ns_resolve_fn: &mut NSResolveFn, + ) -> Result { + match subject { + Subject::NamedNode(node) => { + Self::node(node, ns_resolve_fn).map(|n| state::Subject::Named(n)) + } + Subject::BlankNode(node) => Ok(state::Subject::Blank(node.id.to_string())), + _ => Err(RDFParseError::Unexpected( + "RDF star syntax unsupported".to_string(), + )), + } + } + + fn node( + node: NamedNode, + ns_resolve_fn: &mut NSResolveFn, + ) -> Result { + let (ns, v) = explode_iri(node.iri)?; + Ok(state::Node { + namespace: ns_resolve_fn(ns)?, + value: v, + }) + } + + fn object( + object: Term, + ns_resolve_fn: &mut NSResolveFn, + ) -> Result { + match object { + Term::BlankNode(node) => Ok(state::Object::Blank(node.id.to_string())), + Term::NamedNode(node) => { + Self::node(node, ns_resolve_fn).map(|n| state::Object::Named(n)) + } + Term::Literal(literal) => { + Self::literal(literal, ns_resolve_fn).map(|l| state::Object::Literal(l)) + } + _ => Err(RDFParseError::Unexpected( + "RDF star syntax unsupported".to_string(), + )), + } + } + + fn literal( + literal: Literal, + ns_resolve_fn: &mut NSResolveFn, + ) -> Result { + match literal { + Literal::Simple { value } => Ok(state::Literal::Simple { + value: value.to_string(), + }), + Literal::LanguageTaggedString { value, language } => Ok(state::Literal::I18NString { + value: value.to_string(), + language: language.to_string(), + }), + Literal::Typed { value, datatype } => { + Self::node(datatype, ns_resolve_fn).map(|node| state::Literal::Typed { + value: value.to_string(), + datatype: node, + }) + } + } } } -pub fn explode_iri(iri: &str) -> Result<(&str, &str), StdError> { +pub fn explode_iri(iri: &str) -> Result<(String, String), RDFParseError> { let mut marker_index: Option = None; for delim in ['#', '/', ':'] { if let Some(index) = iri.rfind(delim) { @@ -37,10 +180,12 @@ pub fn explode_iri(iri: &str) -> Result<(&str, &str), StdError> { } if let Some(index) = marker_index { - return Ok((&iri[..index + 1], &iri[index + 1..])); + return Ok((iri[..index + 1].to_string(), iri[index + 1..].to_string())); } - Err(StdError::generic_err("Couldn't extract IRI namespace")) + Err(RDFParseError::Unexpected( + "Couldn't extract IRI namespace".to_string(), + )) } #[cfg(test)] @@ -51,32 +196,40 @@ mod tests { fn proper_explode_iri() { assert_eq!( explode_iri("http://www.w3.org/2001/XMLSchema#dateTime"), - Ok(("http://www.w3.org/2001/XMLSchema#", "dateTime")) + Ok(( + "http://www.w3.org/2001/XMLSchema#".to_string(), + "dateTime".to_string() + )) ); assert_eq!( explode_iri("https://ontology.okp4.space/core/Governance"), - Ok(("https://ontology.okp4.space/core/", "Governance")) + Ok(( + "https://ontology.okp4.space/core/".to_string(), + "Governance".to_string() + )) ); assert_eq!( explode_iri( "did:key:0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655" ), Ok(( - "did:key:", - "0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655" + "did:key:".to_string(), + "0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655".to_string() )) ); assert_eq!( explode_iri("wow:this/is#weird"), - Ok(("wow:this/is#", "weird")) + Ok(("wow:this/is#".to_string(), "weird".to_string())) ); assert_eq!( explode_iri("this#is:weird/too"), - Ok(("this#is:weird/", "too")) + Ok(("this#is:weird/".to_string(), "too".to_string())) ); assert_eq!( explode_iri("this_doesn't_work"), - Err(StdError::generic_err("Couldn't extract IRI namespace")) + Err(RDFParseError::Unexpected( + "Couldn't extract IRI namespace".to_string() + )) ); } } diff --git a/contracts/okp4-cognitarium/src/state/mod.rs b/contracts/okp4-cognitarium/src/state/mod.rs index d9906b34..1560ef02 100644 --- a/contracts/okp4-cognitarium/src/state/mod.rs +++ b/contracts/okp4-cognitarium/src/state/mod.rs @@ -1,5 +1,5 @@ -mod de; mod namespaces; +mod serde; mod store; mod triples; diff --git a/contracts/okp4-cognitarium/src/state/de.rs b/contracts/okp4-cognitarium/src/state/serde.rs similarity index 56% rename from contracts/okp4-cognitarium/src/state/de.rs rename to contracts/okp4-cognitarium/src/state/serde.rs index 16d5924f..5eff8963 100644 --- a/contracts/okp4-cognitarium/src/state/de.rs +++ b/contracts/okp4-cognitarium/src/state/serde.rs @@ -1,6 +1,7 @@ use crate::state::triples::{Node, Subject}; use cosmwasm_std::{StdError, StdResult}; -use cw_storage_plus::{Key, KeyDeserialize, Prefixer, PrimaryKey}; +use cw_storage_plus::{IntKey, Key, KeyDeserialize, Prefixer, PrimaryKey}; +use std::array::TryFromSliceError; fn parse_length(value: &[u8]) -> StdResult { Ok(u16::from_be_bytes( @@ -19,8 +20,15 @@ impl<'a> PrimaryKey<'a> for Subject { fn key(&self) -> Vec { match self { - Subject::Named(node) => node.key(), - Subject::Blank(node) => vec![Key::Ref(&[]), Key::Ref(node.as_bytes())], + Subject::Named(node) => { + let mut keys = Vec::new(); + keys.push(Key::Val8([b'n'])); + for x in node.key() { + keys.push(x); + } + keys + } + Subject::Blank(node) => vec![Key::Val8([b'n']), Key::Ref(node.as_bytes())], } } } @@ -34,12 +42,13 @@ impl<'a> Prefixer<'a> for Subject { impl KeyDeserialize for Subject { type Output = Subject; - fn from_vec(value: Vec) -> StdResult { - let named = Node::from_vec(value)?; - if named.namespace.is_empty() { - return Ok(Subject::Blank(named.value)); + fn from_vec(mut value: Vec) -> StdResult { + let val = value.split_off(3); + match val[2] { + b'n' => Node::from_vec(value).map(|n| Subject::Named(n)), + b'b' => String::from_vec(value).map(|n| Subject::Blank(n)), + _ => Err(StdError::generic_err("Could not deserialize subject key")), } - Ok(Subject::Named(named)) } } @@ -51,7 +60,7 @@ impl<'a> PrimaryKey<'a> for Node { fn key(&self) -> Vec { vec![ - Key::Ref(self.namespace.as_bytes()), + Key::Val128(self.namespace.to_cw_bytes()), Key::Ref(self.value.as_bytes()), ] } @@ -72,7 +81,11 @@ impl KeyDeserialize for Node { let ns = val.split_off(n_len); Ok(Node { - namespace: String::from_vec(ns)?, + namespace: u128::from_cw_bytes( + ns.as_slice() + .try_into() + .map_err(|e: TryFromSliceError| StdError::generic_err(e.to_string()))?, + ), value: String::from_vec(val)?, }) } diff --git a/contracts/okp4-cognitarium/src/state/triples.rs b/contracts/okp4-cognitarium/src/state/triples.rs index eb48cec1..1b9c6fd6 100644 --- a/contracts/okp4-cognitarium/src/state/triples.rs +++ b/contracts/okp4-cognitarium/src/state/triples.rs @@ -1,8 +1,5 @@ -use crate::rdf::explode_iri; use blake3::Hash; -use cosmwasm_std::StdError; use cw_storage_plus::{Index, IndexList, IndexedMap, MultiIndex}; -use rio_api::model::NamedNode; use serde::{Deserialize, Serialize}; pub struct TripleIndexes<'a> { @@ -36,38 +33,12 @@ pub struct Triple { pub object: Object, } -impl<'a> TryFrom> for Triple { - type Error = StdError; - - fn try_from(value: rio_api::model::Triple<'a>) -> Result { - Ok(Triple { - subject: value.subject.try_into()?, - predicate: value.predicate.try_into()?, - object: value.object.try_into()?, - }) - } -} - #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub enum Subject { Named(Node), Blank(BlankNode), } -impl<'a> TryFrom> for Subject { - type Error = StdError; - - fn try_from(value: rio_api::model::Subject<'a>) -> Result { - match value { - rio_api::model::Subject::NamedNode(node) => { - Node::try_from(node).map(|n| Subject::Named(n)) - } - rio_api::model::Subject::BlankNode(node) => Ok(Subject::Blank(node.id.to_string())), - _ => Err(StdError::generic_err("Not implemented")), - } - } -} - pub type Predicate = Node; #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] @@ -77,21 +48,6 @@ pub enum Object { Literal(Literal), } -impl<'a> TryFrom> for Object { - type Error = StdError; - - fn try_from(value: rio_api::model::Term<'a>) -> Result { - match value { - rio_api::model::Term::BlankNode(node) => Ok(Object::Blank(node.id.to_string())), - rio_api::model::Term::NamedNode(node) => Node::try_from(node).map(|n| Object::Named(n)), - rio_api::model::Term::Literal(literal) => { - Literal::try_from(literal).map(|l| Object::Literal(l)) - } - _ => Err(StdError::generic_err("RDF star syntax unsupported")), - } - } -} - impl Object { pub fn as_hash(&self) -> Hash { let mut hasher = blake3::Hasher::new(); @@ -99,8 +55,8 @@ impl Object { Object::Named(n) => { hasher .update(&[b'n']) - .update(n.namespace.as_bytes()) - .update(n.namespace.as_bytes()); + .update(n.namespace.to_be_bytes().as_slice()) + .update(n.namespace.to_be_bytes().as_slice()); } Object::Blank(n) => { hasher.update(&[b'b']).update(n.as_bytes()); @@ -116,7 +72,7 @@ impl Object { Literal::Typed { value, datatype } => hasher .update(&[b't']) .update(value.as_bytes()) - .update(datatype.namespace.as_bytes()) + .update(datatype.namespace.to_be_bytes().as_slice()) .update(datatype.value.as_bytes()), }; } @@ -130,48 +86,13 @@ pub type BlankNode = String; #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct Node { - pub namespace: String, + pub namespace: u128, pub value: String, } -impl<'a> TryFrom> for Node { - type Error = StdError; - - fn try_from(value: NamedNode) -> Result { - explode_iri(value.iri).map(|(ns, v)| Self { - namespace: ns.to_string(), - value: v.to_string(), - }) - } -} - #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub enum Literal { Simple { value: String }, I18NString { value: String, language: String }, Typed { value: String, datatype: Node }, } - -impl<'a> TryFrom> for Literal { - type Error = StdError; - - fn try_from(value: rio_api::model::Literal<'a>) -> Result { - match value { - rio_api::model::Literal::Simple { value } => Ok(Literal::Simple { - value: value.to_string(), - }), - rio_api::model::Literal::LanguageTaggedString { value, language } => { - Ok(Literal::I18NString { - value: value.to_string(), - language: language.to_string(), - }) - } - rio_api::model::Literal::Typed { value, datatype } => { - Node::try_from(datatype).map(|node| Literal::Typed { - value: value.to_string(), - datatype: node, - }) - } - } - } -}