diff --git a/Cargo.lock b/Cargo.lock index 45a4b102..dd531e00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,18 @@ version = "1.0.65" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + [[package]] name = "base16ct" version = "0.1.1" @@ -38,6 +50,20 @@ version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2b2456fd614d856680dcd9fcc660a51a820fa09daef2e49772b56a193c8474" +[[package]] +name = "blake3" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ae2468a89544a466886840aa467a25b766499f4f04bf7d9fcd10ecee9fccef" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest 0.10.5", +] + [[package]] name = "block-buffer" version = "0.9.0" @@ -77,6 +103,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db" +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + [[package]] name = "cfg-if" version = "1.0.0" @@ -89,6 +121,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "722e23542a15cea1f65d4a1419c4cfd7a26706c70871a13a04238ca3f40f1661" +[[package]] +name = "constant_time_eq" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13418e745008f7349ec7e449155f419a61b92b58a99cc3616942b926825ec76b" + [[package]] name = "cosmwasm-crypto" version = "1.2.5" @@ -645,11 +683,18 @@ dependencies = [ "digest 0.10.5", ] +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "okp4-cognitarium" version = "1.0.0" dependencies = [ "base64 0.21.2", + "blake3", "cosmwasm-schema", "cosmwasm-std", "cosmwasm-storage", @@ -657,6 +702,9 @@ dependencies = [ "cw-storage-plus 1.0.1", "cw2 1.0.1", "derive_builder", + "rio_api", + "rio_turtle", + "rio_xml", "schemars", "serde", "thiserror", @@ -737,6 +785,18 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +[[package]] +name = "oxilangtag" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d91edf4fbb970279443471345a4e8c491bf05bb283b3e6c88e4e606fd8c181b" + +[[package]] +name = "oxiri" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c" + [[package]] name = "percent-encoding" version = "2.2.0" @@ -785,6 +845,15 @@ dependencies = [ "syn 1.0.107", ] +[[package]] +name = "quick-xml" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5c1a97b1bc42b1d550bfb48d4262153fe400a12bab1511821736f7eac76d7e2" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.26" @@ -823,6 +892,35 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rio_api" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e121c6d7cd351521b2f18d9443ffadc622ff4941cb9a332755fceaf22f8d2a" + +[[package]] +name = "rio_turtle" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab908d64c750d82f0e4bc79acb686143d8c26d750e4eb02ba8dd91e1531c2dc" +dependencies = [ + "oxilangtag", + "oxiri", + "rio_api", +] + +[[package]] +name = "rio_xml" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3939c6a1cc5f99ea4ce45b92b9b8a4d50f2047284290c38127c2b6f9cbd4a2aa" +dependencies = [ + "oxilangtag", + "oxiri", + "quick-xml", + "rio_api", +] + [[package]] name = "ryu" version = "1.0.11" diff --git a/contracts/okp4-cognitarium/Cargo.toml b/contracts/okp4-cognitarium/Cargo.toml index b5fec578..c23e0d1d 100644 --- a/contracts/okp4-cognitarium/Cargo.toml +++ b/contracts/okp4-cognitarium/Cargo.toml @@ -27,12 +27,16 @@ panic = 'abort' rpath = false [dependencies] +blake3 = "1.3.3" cosmwasm-schema.workspace = true cosmwasm-std.workspace = true cosmwasm-storage.workspace = true cw-storage-plus.workspace = true cw2.workspace = true derive_builder = "0.12.0" +rio_api = "0.8.3" +rio_turtle = "0.8.3" +rio_xml = "0.8.3" schemars.workspace = true serde.workspace = true thiserror.workspace = true diff --git a/contracts/okp4-cognitarium/src/contract.rs b/contracts/okp4-cognitarium/src/contract.rs index 38dc6b35..d404aa0f 100644 --- a/contracts/okp4-cognitarium/src/contract.rs +++ b/contracts/okp4-cognitarium/src/contract.rs @@ -1,11 +1,12 @@ +use crate::contract::execute::insert; #[cfg(not(feature = "library"))] use cosmwasm_std::entry_point; use cosmwasm_std::{Binary, Deps, DepsMut, Env, MessageInfo, Response, StdError, StdResult}; use cw2::set_contract_version; use crate::error::ContractError; -use crate::msg::{ExecuteMsg, InstantiateMsg, QueryMsg}; -use crate::state::{Store, STORE}; +use crate::msg::{DataFormat, ExecuteMsg, InstantiateMsg, QueryMsg}; +use crate::state::{Store, NAMESPACE_KEY_INCREMENT, STORE}; // version info for migration info const CONTRACT_NAME: &str = concat!("crates.io:", env!("CARGO_PKG_NAME")); @@ -20,25 +21,53 @@ pub fn instantiate( ) -> Result { set_contract_version(deps.storage, CONTRACT_NAME, CONTRACT_VERSION)?; - STORE.save( - deps.storage, - &Store { - owner: info.sender, - limits: msg.limits.into(), - }, - )?; + STORE.save(deps.storage, &Store::new(info.sender, msg.limits.into()))?; + NAMESPACE_KEY_INCREMENT.save(deps.storage, &0u128)?; Ok(Response::default()) } #[cfg_attr(not(feature = "library"), entry_point)] pub fn execute( - _deps: DepsMut, + deps: DepsMut, _env: Env, - _info: MessageInfo, - _msg: ExecuteMsg, + info: MessageInfo, + msg: ExecuteMsg, ) -> Result { - Err(ContractError::NotImplemented) + match msg { + ExecuteMsg::InsertData { format, data } => { + insert(deps, info, format.unwrap_or(DataFormat::Turtle), data) + } + _ => Err(StdError::generic_err("Not implemented").into()), + } +} + +pub mod execute { + use super::*; + use crate::msg::DataFormat; + use crate::rdf::TripleReader; + use crate::state::TripleStorer; + use std::io::BufReader; + + pub fn insert( + deps: DepsMut, + info: MessageInfo, + format: DataFormat, + data: Binary, + ) -> Result { + if STORE.load(deps.storage)?.owner != info.sender { + Err(ContractError::Unauthorized)? + } + + let buf = BufReader::new(data.as_slice()); + let mut reader = TripleReader::new(format, buf); + let mut storer = TripleStorer::new(deps.storage)?; + let count = storer.store_all(&mut reader)?; + + Ok(Response::new() + .add_attribute("action", "insert") + .add_attribute("triple_count", count)) + } } #[cfg_attr(not(feature = "library"), entry_point)] @@ -49,10 +78,18 @@ pub fn query(_deps: Deps, _env: Env, _msg: QueryMsg) -> StdResult { #[cfg(test)] mod tests { use super::*; - use crate::msg::StoreLimitsInput; + use crate::error::StoreError; + use crate::msg::ExecuteMsg::InsertData; + use crate::msg::{StoreLimitsInput, StoreLimitsInputBuilder}; use crate::state; + use crate::state::{namespaces, triples, Namespace, Node, Object, Subject, Triple}; + use blake3::Hash; use cosmwasm_std::testing::{mock_dependencies, mock_env, mock_info}; - use cosmwasm_std::Uint128; + use cosmwasm_std::{Attribute, Order, Uint128}; + use std::env; + use std::fs::File; + use std::io::Read; + use std::path::Path; #[test] fn proper_initialization() { @@ -88,5 +125,279 @@ mod tests { max_insert_data_triple_count: Uint128::from(7u128), } ); + assert_eq!( + store.stat, + state::StoreStat { + triple_count: Uint128::zero(), + byte_size: Uint128::zero(), + } + ); + + assert_eq!(NAMESPACE_KEY_INCREMENT.load(&deps.storage).unwrap(), 0u128); + } + + #[test] + fn proper_insert() { + let cases = vec![ + InsertData { + format: Some(DataFormat::RDFXml), + data: read_test_data("sample.rdf.xml"), + }, + InsertData { + format: Some(DataFormat::Turtle), + data: read_test_data("sample.ttl"), + }, + InsertData { + format: Some(DataFormat::NTriples), + data: read_test_data("sample.nt"), + }, + InsertData { + format: Some(DataFormat::NQuads), + data: read_test_data("sample.nq"), + }, + InsertData { + format: None, + data: read_test_data("sample.ttl"), + }, + ]; + + for case in cases { + let mut deps = mock_dependencies(); + + let info = mock_info("owner", &[]); + instantiate( + deps.as_mut(), + mock_env(), + info.clone(), + InstantiateMsg { + limits: StoreLimitsInput::default(), + }, + ) + .unwrap(); + + let res = execute(deps.as_mut(), mock_env(), info.clone(), case); + + assert!(res.is_ok()); + assert_eq!( + res.unwrap().attributes, + vec![ + Attribute::new("action", "insert"), + Attribute::new("triple_count", "40") + ] + ); + + assert_eq!( + triples() + .range_raw(&deps.storage, None, None, Order::Ascending) + .count(), + 40 + ); + assert_eq!( + STORE.load(&deps.storage).unwrap().stat.triple_count, + Uint128::from(40u128), + ); + assert_eq!(NAMESPACE_KEY_INCREMENT.load(&deps.storage).unwrap(), 17u128); + assert_eq!( + namespaces() + .load( + &deps.storage, + "https://ontology.okp4.space/dataverse/dataspace/".to_string() + ) + .unwrap(), + Namespace { + key: 0u128, + counter: 5u128, + } + ); + assert_eq!( + triples() + .load( + &deps.storage, + ( + Hash::from_hex( + "09653b5306fa80dc7bea8313d84ac6ed9ded591d42c7f4838c39d1d7a4f09d03" + ) + .unwrap() + .as_bytes(), + Node { + namespace: 3u128, + value: "hasRegistrar".to_string() + } + .key(), + Subject::Named(Node { + namespace: 0u128, + value: "97ff7e16-c08d-47be-8475-211016c82e33".to_string() + }) + .key() + ) + ) + .unwrap(), + Triple { + object: Object::Named(Node { + namespace: 4u128, + value: "0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655" + .to_string() + }), + predicate: Node { + namespace: 3u128, + value: "hasRegistrar".to_string() + }, + subject: Subject::Named(Node { + namespace: 0u128, + value: "97ff7e16-c08d-47be-8475-211016c82e33".to_string() + }), + } + ) + } + } + + #[test] + fn insert_unauthorized() { + let mut deps = mock_dependencies(); + instantiate( + deps.as_mut(), + mock_env(), + mock_info("owner", &[]), + InstantiateMsg { + limits: StoreLimitsInput::default(), + }, + ) + .unwrap(); + + let res = execute( + deps.as_mut(), + mock_env(), + mock_info("not-owner", &[]), + InsertData { + format: Some(DataFormat::RDFXml), + data: read_test_data("sample.rdf.xml"), + }, + ); + assert!(res.is_err()); + assert_eq!(res.err().unwrap(), ContractError::Unauthorized); + } + + #[test] + fn insert_limits() { + let cases = vec![ + ( + StoreLimitsInputBuilder::default() + .max_triple_count(30u128) + .build() + .unwrap(), + Some(ContractError::from(StoreError::TripleCount(30u128.into()))), + ), + ( + StoreLimitsInputBuilder::default() + .max_triple_count(40u128) + .build() + .unwrap(), + None, + ), + ( + StoreLimitsInputBuilder::default() + .max_byte_size(50u128) + .build() + .unwrap(), + Some(ContractError::from(StoreError::ByteSize(50u128.into()))), + ), + ( + StoreLimitsInputBuilder::default() + .max_byte_size(50000u128) + .build() + .unwrap(), + None, + ), + ( + StoreLimitsInputBuilder::default() + .max_insert_data_byte_size(500u128) + .build() + .unwrap(), + Some(ContractError::from(StoreError::InsertDataByteSize( + 500u128.into(), + ))), + ), + ( + StoreLimitsInputBuilder::default() + .max_insert_data_byte_size(50000u128) + .build() + .unwrap(), + None, + ), + ( + StoreLimitsInputBuilder::default() + .max_triple_byte_size(150u128) + .build() + .unwrap(), + Some(ContractError::from(StoreError::TripleByteSize( + 176u128.into(), + 150u128.into(), + ))), + ), + ( + StoreLimitsInputBuilder::default() + .max_triple_byte_size(400u128) + .build() + .unwrap(), + None, + ), + ( + StoreLimitsInputBuilder::default() + .max_insert_data_triple_count(30u128) + .build() + .unwrap(), + Some(ContractError::from(StoreError::InsertDataTripleCount( + 30u128.into(), + ))), + ), + ( + StoreLimitsInputBuilder::default() + .max_insert_data_triple_count(40u128) + .build() + .unwrap(), + None, + ), + ]; + + let exec_msg = InsertData { + format: Some(DataFormat::RDFXml), + data: read_test_data("sample.rdf.xml"), + }; + for case in cases { + let mut deps = mock_dependencies(); + + let info = mock_info("owner", &[]); + instantiate( + deps.as_mut(), + mock_env(), + info.clone(), + InstantiateMsg { limits: case.0 }, + ) + .unwrap(); + + let res = execute(deps.as_mut(), mock_env(), info.clone(), exec_msg.clone()); + + if let Some(err) = case.1 { + assert!(res.is_err()); + assert_eq!(res.err().unwrap(), err); + } else { + assert!(res.is_ok()); + } + } + } + + fn read_test_data(file: &str) -> Binary { + let mut bytes: Vec = Vec::new(); + + File::open( + Path::new(env::var("CARGO_MANIFEST_DIR").unwrap().as_str()) + .join("testdata") + .join(file), + ) + .unwrap() + .read_to_end(&mut bytes) + .unwrap(); + + Binary::from(bytes) } } diff --git a/contracts/okp4-cognitarium/src/error.rs b/contracts/okp4-cognitarium/src/error.rs index 56754195..64ebeab5 100644 --- a/contracts/okp4-cognitarium/src/error.rs +++ b/contracts/okp4-cognitarium/src/error.rs @@ -1,4 +1,6 @@ -use cosmwasm_std::StdError; +use cosmwasm_std::{StdError, Uint128}; +use rio_turtle::TurtleError; +use rio_xml::RdfXmlError; use thiserror::Error; #[derive(Error, Debug, PartialEq)] @@ -6,6 +8,63 @@ pub enum ContractError { #[error("{0}")] Std(#[from] StdError), - #[error("Not implemented.")] - NotImplemented, + #[error("{0}")] + ParseRDF(#[from] RDFParseError), + + #[error("{0}")] + Store(#[from] StoreError), + + #[error("Only the owner can perform this operation.")] + Unauthorized, +} + +impl From for ContractError { + fn from(value: RdfXmlError) -> Self { + RDFParseError::from(value).into() + } +} + +impl From for ContractError { + fn from(value: TurtleError) -> Self { + RDFParseError::from(value).into() + } +} + +#[derive(Error, Debug, PartialEq, Eq)] +pub enum StoreError { + #[error("Maximum triples number exceeded: {0}")] + TripleCount(Uint128), + + #[error("Maximum byte size exceeded: {0}")] + ByteSize(Uint128), + + #[error("Maximum triple byte size exceeded: {0} / {1}")] + TripleByteSize(Uint128, Uint128), + + #[error("Maximum insert byte size exceeded: {0}")] + InsertDataByteSize(Uint128), + + #[error("Maximum insert triple count exceeded: {0}")] + InsertDataTripleCount(Uint128), +} + +#[derive(Error, Debug, PartialEq, Eq)] +pub enum RDFParseError { + #[error("Error parsing XML RDF: {0}")] + Xml(String), + + #[error("Error parsing Turtle RDF: {0}")] + Turtle(String), +} + +impl From for RDFParseError { + fn from(value: RdfXmlError) -> Self { + RDFParseError::Xml(value.to_string()) + } +} + +impl From for RDFParseError { + fn from(value: TurtleError) -> Self { + RDFParseError::Xml(value.to_string()) + } } diff --git a/contracts/okp4-cognitarium/src/lib.rs b/contracts/okp4-cognitarium/src/lib.rs index dfedc9dc..d43e0389 100644 --- a/contracts/okp4-cognitarium/src/lib.rs +++ b/contracts/okp4-cognitarium/src/lib.rs @@ -1,6 +1,7 @@ pub mod contract; mod error; pub mod msg; +mod rdf; pub mod state; pub use crate::error::ContractError; diff --git a/contracts/okp4-cognitarium/src/rdf.rs b/contracts/okp4-cognitarium/src/rdf.rs new file mode 100644 index 00000000..c3b09d84 --- /dev/null +++ b/contracts/okp4-cognitarium/src/rdf.rs @@ -0,0 +1,115 @@ +use crate::msg::DataFormat; +use cosmwasm_std::{StdError, StdResult}; +use rio_api::model::{Quad, Triple}; +use rio_api::parser::{QuadsParser, TriplesParser}; +use rio_turtle::{NQuadsParser, NTriplesParser, TurtleError, TurtleParser}; +use rio_xml::{RdfXmlError, RdfXmlParser}; +use std::io::BufRead; + +pub struct TripleReader { + parser: TriplesParserKind, +} + +#[allow(clippy::large_enum_variant)] +pub enum TriplesParserKind { + NTriples(NTriplesParser), + Turtle(TurtleParser), + RdfXml(RdfXmlParser), + NQuads(NQuadsParser), +} + +impl TripleReader { + pub fn new(format: DataFormat, src: R) -> Self { + TripleReader { + parser: match format { + DataFormat::RDFXml => TriplesParserKind::RdfXml(RdfXmlParser::new(src, None)), + DataFormat::Turtle => TriplesParserKind::Turtle(TurtleParser::new(src, None)), + DataFormat::NTriples => TriplesParserKind::NTriples(NTriplesParser::new(src)), + DataFormat::NQuads => TriplesParserKind::NQuads(NQuadsParser::new(src)), + }, + } + } + + pub fn read_all(&mut self, mut use_fn: UF) -> Result<(), E> + where + UF: FnMut(Triple) -> Result<(), E>, + E: From + From, + { + match &mut self.parser { + TriplesParserKind::NTriples(parser) => parser.parse_all(&mut use_fn), + TriplesParserKind::Turtle(parser) => parser.parse_all(&mut use_fn), + TriplesParserKind::RdfXml(parser) => parser.parse_all(&mut use_fn), + TriplesParserKind::NQuads(parser) => { + parser.parse_all(&mut |quad: Quad| -> Result<(), E> { + use_fn(Triple { + subject: quad.subject, + predicate: quad.predicate, + object: quad.object, + }) + }) + } + } + } +} + +pub fn explode_iri(iri: &str) -> StdResult<(String, String)> { + let mut marker_index: Option = None; + for delim in ['#', '/', ':'] { + if let Some(index) = iri.rfind(delim) { + marker_index = match marker_index { + Some(i) => Some(i.max(index)), + None => Some(index), + } + } + } + + if let Some(index) = marker_index { + return Ok((iri[..index + 1].to_string(), iri[index + 1..].to_string())); + } + + Err(StdError::generic_err("Couldn't extract IRI namespace")) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn proper_explode_iri() { + assert_eq!( + explode_iri("http://www.w3.org/2001/XMLSchema#dateTime"), + Ok(( + "http://www.w3.org/2001/XMLSchema#".to_string(), + "dateTime".to_string() + )) + ); + assert_eq!( + explode_iri("https://ontology.okp4.space/core/Governance"), + Ok(( + "https://ontology.okp4.space/core/".to_string(), + "Governance".to_string() + )) + ); + assert_eq!( + explode_iri( + "did:key:0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655" + ), + Ok(( + "did:key:".to_string(), + "0x04d1f1b8f8a7a28f9a5a254c326a963a22f5a5b5d5f5e5d5c5b5a5958575655".to_string() + )) + ); + assert_eq!( + explode_iri("wow:this/is#weird"), + Ok(("wow:this/is#".to_string(), "weird".to_string())) + ); + assert_eq!( + explode_iri("this#is:weird/too"), + Ok(("this#is:weird/".to_string(), "too".to_string())) + ); + assert_eq!( + explode_iri("this_doesn't_work"), + Err(StdError::generic_err("Couldn't extract IRI namespace")) + ); + } +} diff --git a/contracts/okp4-cognitarium/src/state/mod.rs b/contracts/okp4-cognitarium/src/state/mod.rs new file mode 100644 index 00000000..d74f0c28 --- /dev/null +++ b/contracts/okp4-cognitarium/src/state/mod.rs @@ -0,0 +1,9 @@ +mod namespaces; +mod store; +mod storer; +mod triples; + +pub use namespaces::*; +pub use store::*; +pub use storer::*; +pub use triples::*; diff --git a/contracts/okp4-cognitarium/src/state/namespaces.rs b/contracts/okp4-cognitarium/src/state/namespaces.rs new file mode 100644 index 00000000..af05ab57 --- /dev/null +++ b/contracts/okp4-cognitarium/src/state/namespaces.rs @@ -0,0 +1,34 @@ +use cw_storage_plus::{Index, IndexList, IndexedMap, Item, UniqueIndex}; +use serde::{Deserialize, Serialize}; + +/// Store a key increment used a unique key for referencing a namespace. Given the size of an `u128` +/// there is no need to implement a garbage collector mechanism in case some namespaces are removed. +pub const NAMESPACE_KEY_INCREMENT: Item = Item::new("namespace_key"); + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct Namespace { + /// The unique, incremented key issues to reference this namespace from a triple IRI. + pub key: u128, + + /// A reference counter to this namespace. + pub counter: u128, +} + +pub struct NamespaceIndexes<'a> { + key: UniqueIndex<'a, u128, Namespace, String>, +} + +impl IndexList for NamespaceIndexes<'_> { + fn get_indexes(&self) -> Box> + '_> { + Box::new(vec![&self.key as &dyn Index].into_iter()) + } +} + +pub fn namespaces<'a>() -> IndexedMap<'a, String, Namespace, NamespaceIndexes<'a>> { + IndexedMap::new( + "NAMESPACE", + NamespaceIndexes { + key: UniqueIndex::new(|ns| ns.key, "NAMESPACE__KEY"), + }, + ) +} diff --git a/contracts/okp4-cognitarium/src/state.rs b/contracts/okp4-cognitarium/src/state/store.rs similarity index 83% rename from contracts/okp4-cognitarium/src/state.rs rename to contracts/okp4-cognitarium/src/state/store.rs index da1ac89e..ae3584d8 100644 --- a/contracts/okp4-cognitarium/src/state.rs +++ b/contracts/okp4-cognitarium/src/state/store.rs @@ -9,6 +9,17 @@ pub const STORE: Item = Item::new("store"); pub struct Store { pub owner: Addr, pub limits: StoreLimits, + pub stat: StoreStat, +} + +impl Store { + pub fn new(owner: Addr, limits: StoreLimits) -> Store { + Store { + owner, + limits, + stat: StoreStat::default(), + } + } } #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] @@ -49,3 +60,9 @@ impl From for msg::StoreLimits { } } } + +#[derive(Serialize, Deserialize, Default, Clone, Debug, PartialEq, Eq)] +pub struct StoreStat { + pub triple_count: Uint128, + pub byte_size: Uint128, +} diff --git a/contracts/okp4-cognitarium/src/state/storer.rs b/contracts/okp4-cognitarium/src/state/storer.rs new file mode 100644 index 00000000..21a0960b --- /dev/null +++ b/contracts/okp4-cognitarium/src/state/storer.rs @@ -0,0 +1,216 @@ +use crate::error::StoreError; +use crate::rdf::TripleReader; +use crate::state::{ + namespaces, triples, Literal, Namespace, Node, Object, Store, Subject, Triple, + NAMESPACE_KEY_INCREMENT, STORE, +}; +use crate::{rdf, ContractError}; +use blake3::Hash; +use cosmwasm_std::{StdError, StdResult, Storage, Uint128}; +use rio_api::model; +use rio_api::model::Term; +use std::collections::BTreeMap; +use std::io::BufRead; + +pub struct TripleStorer<'a> { + storage: &'a mut dyn Storage, + store: Store, + ns_key_inc_offset: u128, + ns_cache: BTreeMap, + initial_triple_count: Uint128, + initial_byte_size: Uint128, +} + +impl<'a> TripleStorer<'a> { + pub fn new(storage: &'a mut dyn Storage) -> StdResult { + let store = STORE.load(storage)?; + let ns_key_inc_offset = NAMESPACE_KEY_INCREMENT.load(storage)?; + Ok(Self { + storage, + store: store.clone(), + ns_key_inc_offset, + ns_cache: BTreeMap::new(), + initial_triple_count: store.stat.triple_count, + initial_byte_size: store.stat.byte_size, + }) + } + + pub fn store_all( + &mut self, + reader: &mut TripleReader, + ) -> Result { + reader.read_all(|t| self.store_triple(t))?; + self.finish() + } + + pub fn store_triple(&mut self, t: model::Triple) -> Result<(), ContractError> { + self.store.stat.triple_count += Uint128::one(); + if self.store.stat.triple_count > self.store.limits.max_triple_count { + Err(StoreError::TripleCount(self.store.limits.max_triple_count))? + } + if self.store.stat.triple_count - self.initial_triple_count + > self.store.limits.max_insert_data_triple_count + { + Err(StoreError::InsertDataTripleCount( + self.store.limits.max_insert_data_triple_count, + ))? + } + + let t_size = Uint128::from(Self::triple_size(t) as u128); + if t_size > self.store.limits.max_triple_byte_size { + Err(StoreError::TripleByteSize( + t_size, + self.store.limits.max_triple_byte_size, + ))? + } + + self.store.stat.byte_size += t_size; + if self.store.stat.byte_size > self.store.limits.max_byte_size { + Err(StoreError::ByteSize(self.store.limits.max_byte_size))? + } + if self.store.stat.byte_size - self.initial_byte_size + > self.store.limits.max_insert_data_byte_size + { + Err(StoreError::InsertDataByteSize( + self.store.limits.max_insert_data_byte_size, + ))? + } + + let triple = self.rio_to_triple(t)?; + let object_hash: Hash = triple.object.as_hash(); + triples() + .save( + self.storage, + ( + object_hash.as_bytes(), + triple.predicate.key(), + triple.subject.key(), + ), + &triple, + ) + .map_err(ContractError::Std) + } + + pub fn finish(&mut self) -> Result { + STORE.save(self.storage, &self.store)?; + NAMESPACE_KEY_INCREMENT.save(self.storage, &self.ns_key_inc_offset)?; + for entry in &self.ns_cache { + namespaces().save(self.storage, entry.0.to_string(), entry.1)?; + } + + Ok(self.store.stat.triple_count - self.initial_triple_count) + } + + fn resolve_namespace_key(&mut self, ns_str: String) -> StdResult { + match self.ns_cache.get_mut(ns_str.as_str()) { + Some(namespace) => { + namespace.counter += 1; + Ok(namespace.key) + } + None => { + let mut namespace = match namespaces().load(self.storage, ns_str.clone()) { + Err(StdError::NotFound { .. }) => { + let n = Namespace { + key: self.ns_key_inc_offset, + counter: 0u128, + }; + self.ns_key_inc_offset += 1; + Ok(n) + } + Ok(n) => Ok(n), + Err(e) => Err(e), + }?; + + namespace.counter += 1; + self.ns_cache.insert(ns_str, namespace.clone()); + Ok(namespace.key) + } + } + } + + fn rio_to_triple(&mut self, triple: model::Triple) -> StdResult { + Ok(Triple { + subject: self.rio_to_subject(triple.subject)?, + predicate: self.rio_to_node(triple.predicate)?, + object: self.rio_to_object(triple.object)?, + }) + } + + fn rio_to_subject(&mut self, subject: model::Subject) -> StdResult { + match subject { + model::Subject::NamedNode(node) => self.rio_to_node(node).map(Subject::Named), + model::Subject::BlankNode(node) => Ok(Subject::Blank(node.id.to_string())), + model::Subject::Triple(_) => Err(StdError::generic_err("RDF star syntax unsupported")), + } + } + + fn rio_to_node(&mut self, node: model::NamedNode) -> StdResult { + let (ns, v) = rdf::explode_iri(node.iri)?; + Ok(Node { + namespace: self.resolve_namespace_key(ns)?, + value: v, + }) + } + + fn rio_to_object(&mut self, object: Term) -> StdResult { + match object { + Term::BlankNode(node) => Ok(Object::Blank(node.id.to_string())), + Term::NamedNode(node) => self.rio_to_node(node).map(Object::Named), + Term::Literal(literal) => self.rio_to_literal(literal).map(Object::Literal), + Term::Triple(_) => Err(StdError::generic_err("RDF star syntax unsupported")), + } + } + + fn rio_to_literal(&mut self, literal: model::Literal) -> StdResult { + match literal { + model::Literal::Simple { value } => Ok(Literal::Simple { + value: value.to_string(), + }), + model::Literal::LanguageTaggedString { value, language } => Ok(Literal::I18NString { + value: value.to_string(), + language: language.to_string(), + }), + model::Literal::Typed { value, datatype } => { + self.rio_to_node(datatype).map(|node| Literal::Typed { + value: value.to_string(), + datatype: node, + }) + } + } + } + + fn triple_size(triple: model::Triple) -> usize { + Self::subject_size(triple.subject) + + Self::node_size(triple.predicate) + + Self::object_size(triple.object) + } + + fn subject_size(subject: model::Subject) -> usize { + match subject { + model::Subject::NamedNode(n) => Self::node_size(n), + model::Subject::BlankNode(n) => n.id.len(), + model::Subject::Triple(_) => 0, + } + } + + fn node_size(node: model::NamedNode) -> usize { + node.iri.len() + } + + fn object_size(term: Term) -> usize { + match term { + Term::NamedNode(n) => Self::node_size(n), + Term::BlankNode(n) => n.id.len(), + Term::Literal(l) => match l { + model::Literal::Simple { value } => value.len(), + model::Literal::LanguageTaggedString { value, language } => { + value.len() + language.len() + } + model::Literal::Typed { value, datatype } => { + value.len() + Self::node_size(datatype) + } + }, + Term::Triple(_) => 0, + } + } +} diff --git a/contracts/okp4-cognitarium/src/state/triples.rs b/contracts/okp4-cognitarium/src/state/triples.rs new file mode 100644 index 00000000..ffbb1152 --- /dev/null +++ b/contracts/okp4-cognitarium/src/state/triples.rs @@ -0,0 +1,137 @@ +use blake3::Hash; +use cw_storage_plus::{Index, IndexList, IndexedMap, MultiIndex}; +use serde::{Deserialize, Serialize}; + +/// Represents a triple primary key as a tuple of: +/// - Object hash +/// - Predicate in a binary format +/// - Subject in a binary format +pub type TriplePK<'a> = (&'a [u8], Vec, Vec); + +pub struct TripleIndexes<'a> { + subject_and_predicate: MultiIndex<'a, (Vec, Vec), Triple, TriplePK<'a>>, +} + +impl IndexList for TripleIndexes<'_> { + fn get_indexes(&self) -> Box> + '_> { + Box::new(vec![&self.subject_and_predicate as &dyn Index].into_iter()) + } +} + +pub fn triples<'a>() -> IndexedMap<'a, TriplePK<'a>, Triple, TripleIndexes<'a>> { + IndexedMap::new( + "TRIPLE", + TripleIndexes { + subject_and_predicate: MultiIndex::new( + |_pk, triple| (triple.subject.key(), triple.predicate.key()), + "TRIPLE", + "TRIPLE__SUBJECT_PREDICATE", + ), + }, + ) +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct Triple { + pub subject: Subject, + pub predicate: Predicate, + pub object: Object, +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum Subject { + Named(Node), + Blank(BlankNode), +} + +impl Subject { + pub fn key(&self) -> Vec { + match self { + Subject::Named(n) => { + let node = n.key(); + let mut key: Vec = Vec::with_capacity(node.len() + 1); + key.push(b'n'); + key.extend(node); + + key + } + Subject::Blank(n) => { + let val = n.as_bytes(); + let mut key: Vec = Vec::with_capacity(val.len() + 1); + key.push(b'b'); + key.extend(val); + + key + } + } + } +} + +pub type Predicate = Node; + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum Object { + Named(Node), + Blank(BlankNode), + Literal(Literal), +} + +impl Object { + pub fn as_hash(&self) -> Hash { + let mut hasher = blake3::Hasher::new(); + match self { + Object::Named(n) => { + hasher + .update(&[b'n']) + .update(n.namespace.to_be_bytes().as_slice()) + .update(n.namespace.to_be_bytes().as_slice()); + } + Object::Blank(n) => { + hasher.update(&[b'b']).update(n.as_bytes()); + } + Object::Literal(l) => { + hasher.update(&[b'l']); + match l { + Literal::Simple { value } => hasher.update(&[b's']).update(value.as_bytes()), + Literal::I18NString { value, language } => hasher + .update(&[b'i']) + .update(value.as_bytes()) + .update(language.as_bytes()), + Literal::Typed { value, datatype } => hasher + .update(&[b't']) + .update(value.as_bytes()) + .update(datatype.namespace.to_be_bytes().as_slice()) + .update(datatype.value.as_bytes()), + }; + } + } + + hasher.finalize() + } +} + +pub type BlankNode = String; + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub struct Node { + pub namespace: u128, + pub value: String, +} + +impl Node { + pub fn key(&self) -> Vec { + let val = self.value.as_bytes(); + let mut key: Vec = Vec::with_capacity(val.len() + 16); + key.extend(self.namespace.to_be_bytes()); + key.extend(val); + + key + } +} + +#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] +pub enum Literal { + Simple { value: String }, + I18NString { value: String, language: String }, + Typed { value: String, datatype: Node }, +} diff --git a/contracts/okp4-cognitarium/testdata/sample.nq b/contracts/okp4-cognitarium/testdata/sample.nq new file mode 100644 index 00000000..0efdf122 --- /dev/null +++ b/contracts/okp4-cognitarium/testdata/sample.nq @@ -0,0 +1,40 @@ + . + . + . + . + . + . + "A test Data Space."@en . + "Un Data Space de test."@fr . + "OKP4" . + "OKP4" . + "Test" . + . + "Data Space de test"@fr . + "Test Data Space"@en . + . + . + . + . + . + . + . + . + "OKP4" . + . + . + "test" . + "Dataset de test"@fr . + "test Dataset"@en . + "Me" . + . + "A test Dataset."@en . + "Un Dataset de test."@fr . + . + . + . + . + . + "2023-03-28T00:00:00+00:00"^^ . + "2023-03-28T00:00:00+00:00"^^ . + . diff --git a/contracts/okp4-cognitarium/testdata/sample.nt b/contracts/okp4-cognitarium/testdata/sample.nt new file mode 100644 index 00000000..a8b96153 --- /dev/null +++ b/contracts/okp4-cognitarium/testdata/sample.nt @@ -0,0 +1,40 @@ + . + . + . + . + . + . + "A test Data Space."@en . + "Un Data Space de test."@fr . + "OKP4" . + "OKP4" . + "Test" . + . + "Data Space de test"@fr . + "Test Data Space"@en . + . + . + . + . + . + . + . + . + "OKP4" . + . + . + "test" . + "Dataset de test"@fr . + "test Dataset"@en . + "Me" . + . + "A test Dataset."@en . + "Un Dataset de test."@fr . + . + . + . + . + . + "2023-03-28T00:00:00+00:00"^^ . + "2023-03-28T00:00:00+00:00"^^ . + . diff --git a/contracts/okp4-cognitarium/testdata/sample.rdf.xml b/contracts/okp4-cognitarium/testdata/sample.rdf.xml new file mode 100644 index 00000000..85388f8a --- /dev/null +++ b/contracts/okp4-cognitarium/testdata/sample.rdf.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + A test Data Space. + Un Data Space de test. + OKP4 + + OKP4 + Test + + Data Space de test + Test Data Space + + + + + + + + + + + + OKP4 + + + + test + Dataset de test + test Dataset + Me + + A test Dataset. + Un Dataset de test. + + + + + + + 2023-03-28T00:00:00+00:00 + + 2023-03-28T00:00:00+00:00 + + + + diff --git a/contracts/okp4-cognitarium/testdata/sample.ttl b/contracts/okp4-cognitarium/testdata/sample.ttl new file mode 100644 index 00000000..c1be2b3f --- /dev/null +++ b/contracts/okp4-cognitarium/testdata/sample.ttl @@ -0,0 +1,43 @@ +@prefix owl: . +@prefix ns0: . +@prefix xsd: . + + + a owl:NamedIndividual, ; + ns0:hasRegistrar . + + + a owl:NamedIndividual, ; + ns0:describes ; + ns0:hasDescription "A test Data Space."@en, "Un Data Space de test."@fr ; + ns0:hasPublisher "OKP4" ; + ns0:hasTag "OKP4", "Test" ; + ns0:hasTopic ; + ns0:hasTitle "Data Space de test"@fr, "Test Data Space"@en . + + + a owl:NamedIndividual, ns0:Dataset ; + ns0:hasIdentifier ; + ns0:providedBy ; + ns0:belongsTo ; + ns0:hasRegistrar . + + + a owl:NamedIndividual, ; + ns0:hasPublisher "OKP4" ; + ns0:hasLicense ; + ns0:hasFormat ; + ns0:hasTag "test" ; + ns0:hasTitle "Dataset de test"@fr, "test Dataset"@en ; + ns0:hasCreator "Me" ; + ns0:describes ; + ns0:hasDescription "A test Dataset."@en, "Un Dataset de test."@fr ; + ns0:hasTopic . + + + a owl:NamedIndividual, ; + ns0:createdBy ; + ns0:lastModifiedBy ; + ns0:updatedOn "2023-03-28T00:00:00+00:00"^^xsd:dateTime ; + ns0:createdOn "2023-03-28T00:00:00+00:00"^^xsd:dateTime ; + ns0:describes .