From f829121a2de851a6cafbb57ff29788581678b05c Mon Sep 17 00:00:00 2001 From: Nordine Bittich Date: Sun, 1 Dec 2024 12:15:47 +0100 Subject: [PATCH] iri: implementation in turtle doc but slow! --- examples/turtle_doc/input/0029.ttl | 18 ++ examples/turtle_doc/output/0029.ttl | 10 + src/iri.rs | 406 ++++++++++++++-------------- src/iri_spect.txt | 137 ---------- src/tests/turtle_doc_test.rs | 3 +- src/turtle/turtle_doc.rs | 46 ++-- 6 files changed, 253 insertions(+), 367 deletions(-) create mode 100644 examples/turtle_doc/input/0029.ttl create mode 100644 examples/turtle_doc/output/0029.ttl delete mode 100644 src/iri_spect.txt diff --git a/examples/turtle_doc/input/0029.ttl b/examples/turtle_doc/input/0029.ttl new file mode 100644 index 0000000..ad4f30d --- /dev/null +++ b/examples/turtle_doc/input/0029.ttl @@ -0,0 +1,18 @@ +@base . + +# Define prefixes for namespaces (optional but common practice) +@prefix ex: . +@prefix foaf: . + +# Relative IRIs resolve against the base + a foaf:Document ; + foaf:title "An Example Document" ; + foaf:maker . + +# Use a prefix to reference another IRI +ex:item123 a ex:Product ; + ex:price "19.99"^^ ; + ex:availableAt . + +# Fully qualified IRIs are unaffected by the base + ex:relatedTo ex:item123 . diff --git a/examples/turtle_doc/output/0029.ttl b/examples/turtle_doc/output/0029.ttl new file mode 100644 index 0000000..8d98562 --- /dev/null +++ b/examples/turtle_doc/output/0029.ttl @@ -0,0 +1,10 @@ + + a ; + "An Example Document" ; + . + + a ; + "19.99"^^ ; + . + + . diff --git a/src/iri.rs b/src/iri.rs index fc50f41..562f5df 100644 --- a/src/iri.rs +++ b/src/iri.rs @@ -1,84 +1,98 @@ #[derive(Debug, PartialEq)] -pub enum IRI { +pub enum IRI<'a> { IRI { - scheme: String, - hier_part: IHierPart, - query: String, - fragment: String, + scheme: &'a str, + hier_part: IHierPart<'a>, + query: &'a str, + fragment: &'a str, }, - Reference(RelativeRef), + Reference(RelativeRef<'a>), Absolute { - scheme: String, - hier_part: IHierPart, - query: String, + scheme: &'a str, + hier_part: IHierPart<'a>, + query: &'a str, }, } #[derive(Debug, PartialEq)] -pub enum IHierPart { - AbEmpty { authority: Authority, ipath: IPath }, - Absolute(IPath), - Rootless(IPath), +pub enum IHierPart<'a> { + AbEmpty { + authority: Authority<'a>, + ipath: IPath<'a>, + }, + Absolute(IPath<'a>), + Rootless(IPath<'a>), Empty, } #[derive(Debug, PartialEq)] -pub enum RelativePart { - AbEmpty { authority: Authority, ipath: IPath }, - Absolute(IPath), - NoScheme(IPath), - Empty(IPath), +pub enum RelativePart<'a> { + AbEmpty { + authority: Authority<'a>, + ipath: IPath<'a>, + }, + Absolute(IPath<'a>), + NoScheme(IPath<'a>), + Empty(IPath<'a>), } #[derive(Debug, PartialEq)] -pub struct RelativeRef { - pub relative_part: RelativePart, - pub query: String, - pub fragment: String, +pub struct RelativeRef<'a> { + pub relative_part: RelativePart<'a>, + pub query: &'a str, + pub fragment: &'a str, } #[derive(Debug, PartialEq)] -pub struct Authority { - pub user_info: Option, - pub host: Host, - pub port: Option, +pub struct Authority<'a> { + pub user_info: Option<&'a str>, + pub host: Host<'a>, + pub port: Option<&'a str>, } #[derive(Debug, PartialEq)] -pub enum Host { +pub enum Host<'a> { IPV4(Vec), IPV6(Vec), - RegName(Option), + RegName(Option<&'a str>), } #[derive(Debug, PartialEq)] -pub enum IPath { - AbEmpty(Vec), // starts with / or is empty +pub enum IPath<'a> { + AbEmpty(Vec<&'a str>), // starts with / or is empty Absolute { - snz: String, // segment non zero (isegment-nz) - segments: Vec, // isegment + snz: &'a str, // segment non zero (isegment-nz) + segments: Vec<&'a str>, // isegment }, Rootless { - snz: String, // isegment-nz - segments: Vec, + snz: &'a str, // isegment-nz + segments: Vec<&'a str>, }, NoScheme { - snz_nc: String, // isegment-nz-nc - segments: Vec, + snz_nc: &'a str, // isegment-nz-nc + segments: Vec<&'a str>, }, Empty, // ipath-empty } -use nom::error::{ParseError, VerboseError}; +use nom::{ + combinator::complete, + error::{ParseError, VerboseError}, +}; use parser::{parse_absolute_iri, parse_iri, parse_iri_reference}; use crate::prelude::alt; -impl<'a> TryFrom<&'a str> for IRI { +impl<'a> TryFrom<&'a str> for IRI<'a> { type Error = nom::Err>; fn try_from(value: &'a str) -> Result { - match alt((parse_iri, parse_absolute_iri, parse_iri_reference))(value) { + match alt(( + complete(parse_iri), + complete(parse_absolute_iri), + complete(parse_iri_reference), + ))(value) + { Ok((rest, iri)) => { if !rest.trim().is_empty() { Err(nom::Err::Error(VerboseError::from_error_kind( @@ -93,8 +107,11 @@ impl<'a> TryFrom<&'a str> for IRI { } } } - -#[allow(unused)] +impl IRI<'_> { + pub fn is_relative(&self) -> bool { + matches!(self, IRI::Reference(_)) + } +} mod ip { use nom::{ bytes::complete::take_while_m_n, @@ -175,31 +192,23 @@ mod ip { } } -#[allow(unused)] mod parser { use nom::{ bytes::streaming::take_while1, - character::complete::anychar, - error::{ParseError, VerboseError}, - multi::{fold_many0, fold_many1, many1}, + multi::{many0_count, many1_count}, }; use crate::prelude::*; use super::{ - ip::{self, parse_ip_v4, parse_ip_v6}, + ip::{parse_ip_v4, parse_ip_v6}, Authority, Host, IHierPart, IPath, RelativePart, RelativeRef, IRI, }; - fn parse_i_query(s: &str) -> ParserResult { - fold_many0( - alt((parse_ip_char, parse_i_private, tag("/"), tag("?"))), - String::new, - |mut acc, item| { - acc.push_str(item); - acc - }, - )(s) + fn parse_i_query(s: &str) -> ParserResult<&str> { + let (rest, _) = many0_count(alt((parse_ip_char, parse_i_private, tag("/"), tag("?"))))(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } fn parse_authority(s: &str) -> ParserResult { map( @@ -211,7 +220,7 @@ mod parser { |(user_info, host, port)| Authority { user_info, host, - port: port.map(String::from), + port, }, )(s) } @@ -222,13 +231,13 @@ mod parser { pub(super) fn parse_iri(s: &str) -> ParserResult { map( tuple(( - parse_scheme, - preceded(tag(":"), parse_i_hier_part), + terminated(parse_scheme, tag(":")), + parse_i_hier_part, preceded(opt(tag("?")), parse_i_query), preceded(opt(tag("#")), parse_i_fragment), )), |(scheme, hier_part, query, fragment)| IRI::IRI { - scheme: scheme.into(), + scheme, hier_part, query, fragment, @@ -243,7 +252,7 @@ mod parser { preceded(opt(tag("?")), parse_i_query), )), |(scheme, hier_part, query)| IRI::Absolute { - scheme: scheme.into(), + scheme, hier_part, query, }, @@ -282,7 +291,7 @@ mod parser { ), map(parse_ipath_absolute, IHierPart::Absolute), map(parse_ipath_rootless, IHierPart::Rootless), - map(parse_ipath_empty, |(path)| IHierPart::Empty), + map(parse_ipath_empty, |_| IHierPart::Empty), ))(s) } fn parse_host(s: &str) -> ParserResult { @@ -296,15 +305,10 @@ mod parser { ))(s) } - fn parse_i_fragment(s: &str) -> ParserResult { - fold_many0( - alt((parse_ip_char, tag("/"), tag("?"))), - String::new, - |mut acc, item| { - acc.push_str(item); - acc - }, - )(s) + fn parse_i_fragment(s: &str) -> ParserResult<&str> { + let (rest, _) = many0_count(alt((parse_ip_char, tag("/"), tag("?"))))(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } fn parse_ipath_empty(s: &str) -> ParserResult { map( @@ -317,12 +321,9 @@ mod parser { map( pair( parse_i_segmentnz, - many0(recognize(preceded(tag("/"), parse_i_segment0))), + many0(recognize(preceded(tag("/"), parse_i_segmentnz))), ), - |(snz, segments)| IPath::Rootless { - snz, - segments: segments.into_iter().map(String::from).collect(), - }, + |(snz, segments)| IPath::Rootless { snz, segments }, )(s) } @@ -332,16 +333,13 @@ mod parser { parse_i_segmentnz_nc, many0(recognize(preceded(tag("/"), parse_i_segment0))), ), - |(snz_nc, segments)| IPath::NoScheme { - snz_nc, - segments: segments.into_iter().map(String::from).collect(), - }, + |(snz_nc, segments)| IPath::NoScheme { snz_nc, segments }, )(s) } fn parse_ipath_abempty(s: &str) -> ParserResult { map( many0(recognize(preceded(tag("/"), parse_i_segment0))), - |v| IPath::AbEmpty(v.into_iter().map(String::from).collect()), + IPath::AbEmpty, )(s) } fn parse_ipath_absolute(s: &str) -> ParserResult { @@ -351,40 +349,30 @@ mod parser { many0(recognize(preceded(tag("/"), parse_i_segment0))), ); verify( - map(parser, |(snz, segments)| IPath::Absolute { - snz, - segments: segments.into_iter().map(String::from).collect(), - }), + map(parser, |(snz, segments)| IPath::Absolute { snz, segments }), move |_| first_two.starts_with("/") && first_two != "//", )(s) } - fn parse_i_segmentnz(s: &str) -> ParserResult { - fold_many0(parse_ip_char, String::new, |mut acc, item| { - acc.push_str(item); - acc - })(s) + fn parse_i_segmentnz(s: &str) -> ParserResult<&str> { + let (rest, _) = many0_count(parse_ip_char)(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } - fn parse_i_segment0(s: &str) -> ParserResult { - fold_many0(parse_ip_char, String::new, |mut acc, item| { - acc.push_str(item); - acc - })(s) + fn parse_i_segment0(s: &str) -> ParserResult<&str> { + let (rest, _) = many0_count(parse_ip_char)(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } - fn parse_i_segmentnz_nc(s: &str) -> ParserResult { - fold_many1( - alt(( - parse_i_unreserved, - parse_pct_encoded, - parse_sub_delims, - tag("@"), - )), - String::new, - |mut acc, item| { - acc.push_str(item); - acc - }, - )(s) + fn parse_i_segmentnz_nc(s: &str) -> ParserResult<&str> { + let (rest, _) = many1_count(alt(( + parse_i_unreserved, + parse_pct_encoded, + parse_sub_delims, + tag("@"), + )))(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } fn parse_ip_char(s: &str) -> ParserResult<&str> { alt(( @@ -401,33 +389,27 @@ mod parser { |scheme: &str| scheme.starts_with(|c: char| c.is_alphabetic()), )(s) } - fn parse_userinfo(s: &str) -> ParserResult { - fold_many1( - alt(( - parse_pct_encoded, - parse_i_unreserved, - parse_sub_delims, - tag(":"), - )), - String::new, - |mut acc: String, item| { - acc.push_str(item); - acc - }, - )(s) + fn parse_userinfo(s: &str) -> ParserResult<&str> { + let (rest, _) = many1_count(alt(( + parse_pct_encoded, + parse_i_unreserved, + parse_sub_delims, + tag(":"), + )))(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } fn parse_port(s: &str) -> ParserResult<&str> { take_while1(|p: char| p.is_numeric())(s) } - fn parse_i_reg_name(s: &str) -> ParserResult { - fold_many1( - alt((parse_pct_encoded, parse_i_unreserved, parse_sub_delims)), - String::new, - |mut acc: String, item| { - acc.push_str(item); - acc - }, - )(s) + fn parse_i_reg_name(s: &str) -> ParserResult<&str> { + let (rest, _) = many1_count(alt(( + parse_pct_encoded, + parse_i_unreserved, + parse_sub_delims, + )))(s)?; + let value = &s[0..s.len() - rest.len()]; + Ok((rest, value)) } fn parse_i_private(s: &str) -> ParserResult<&str> { verify(take(1usize), |hex: &str| { @@ -492,7 +474,7 @@ mod parser { }), ))(s) } - fn hex_to_char(hex: &str) -> Option { + fn _hex_to_char(hex: &str) -> Option { u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) } } @@ -616,22 +598,23 @@ mod test { } #[test] + fn test_iris() { let iri = IRI::try_from("http://example.com/").unwrap(); assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/".into(),],), + ipath: IPath::AbEmpty(vec!["/",],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -639,17 +622,17 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "https".into(), + scheme: "https", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/page".into(),],), + ipath: IPath::AbEmpty(vec!["/page",],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -657,17 +640,17 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "ftp".into(), + scheme: "ftp", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("ftp.example.org".into(),),), + host: Host::RegName(Some("ftp.example.org",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/file.txt".into(),],), + ipath: IPath::AbEmpty(vec!["/file.txt",],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -675,17 +658,17 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/a".into(), "/b%20c".into()],), + ipath: IPath::AbEmpty(vec!["/a", "/b%20c"],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -693,17 +676,17 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/a".into(), "/こんにちは".into()],), + ipath: IPath::AbEmpty(vec!["/a", "/こんにちは"],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -711,13 +694,13 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "mailto".into(), + scheme: "mailto", hier_part: IHierPart::Rootless(IPath::Rootless { - snz: "user@example.com".into(), + snz: "user@example.com", segments: vec![] }), - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); @@ -725,17 +708,17 @@ mod test { assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/".into()],), + ipath: IPath::AbEmpty(vec!["/"],), }, - query: "q=foo%3Dbar".into(), - fragment: "".into(), + query: "q=foo%3Dbar", + fragment: "", } ); let iri = IRI::try_from("/a/b/c").unwrap(); @@ -743,11 +726,11 @@ mod test { iri, IRI::Reference(RelativeRef { relative_part: RelativePart::Absolute(IPath::Absolute { - snz: "".into(), - segments: vec!["/a".into(), "/b".into(), "/c".into(),], + snz: "", + segments: vec!["/a", "/b", "/c",], },), - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", },) ); @@ -756,11 +739,11 @@ mod test { iri, IRI::Reference(RelativeRef { relative_part: RelativePart::NoScheme(IPath::NoScheme { - snz_nc: ".".into(), - segments: vec!["/c".into(),], + snz_nc: ".", + segments: vec!["/c",], },), - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", },) ); let iri = IRI::try_from("../b/c").unwrap(); @@ -768,79 +751,79 @@ mod test { iri, IRI::Reference(RelativeRef { relative_part: RelativePart::NoScheme(IPath::NoScheme { - snz_nc: "..".into(), - segments: vec!["/b".into(), "/c".into(),], + snz_nc: "..", + segments: vec!["/b", "/c",], },), - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", },) ); let iri = IRI::try_from("http://xn--fsq.com").unwrap(); assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("xn--fsq.com".into(),),), + host: Host::RegName(Some("xn--fsq.com",),), port: None, }, ipath: IPath::AbEmpty(vec![],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); let iri = IRI::try_from("http://[2001:db8::1]/path").unwrap(); assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, host: Host::IPV6(vec![0x2001, 0xdb8, 0, 0, 0, 0, 0, 1]), port: None, }, - ipath: IPath::AbEmpty(vec!["/path".into()],), + ipath: IPath::AbEmpty(vec!["/path"],), }, - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", } ); let iri = IRI::try_from("ftp://example.com/path?query=1¶m=2#fragment").unwrap(); assert_eq!( iri, IRI::IRI { - scheme: "ftp".into(), + scheme: "ftp", hier_part: IHierPart::AbEmpty { authority: Authority { user_info: None, - host: Host::RegName(Some("example.com".into(),),), + host: Host::RegName(Some("example.com",),), port: None, }, - ipath: IPath::AbEmpty(vec!["/path".into()],), + ipath: IPath::AbEmpty(vec!["/path"],), }, - query: "query=1¶m=2".into(), - fragment: "fragment".into(), + query: "query=1¶m=2", + fragment: "fragment", } ); let iri = IRI::try_from("http://user:pass@example.com:8080/path?q#frag").unwrap(); assert_eq!( iri, IRI::IRI { - scheme: "http".into(), + scheme: "http", hier_part: IHierPart::AbEmpty { authority: Authority { - user_info: Some("user:pass".into()), - host: Host::RegName(Some("example.com".into())), - port: Some("8080".into()), + user_info: Some("user:pass"), + host: Host::RegName(Some("example.com")), + port: Some("8080"), }, - ipath: IPath::AbEmpty(vec!["/path".into()],), + ipath: IPath::AbEmpty(vec!["/path"],), }, - query: "q".into(), - fragment: "frag".into(), + query: "q", + fragment: "frag", } ); assert!(IRI::try_from("://example.com").is_err()); @@ -851,11 +834,30 @@ mod test { iri, IRI::Reference(RelativeRef { relative_part: RelativePart::NoScheme(IPath::NoScheme { - snz_nc: "path".into(), - segments: vec!["/".into(),], + snz_nc: "path", + segments: vec!["/",], + },), + query: "", + fragment: "", + },) + ); + assert_eq!( + IRI::try_from("about").unwrap(), + IRI::Reference(RelativeRef { + relative_part: RelativePart::NoScheme(IPath::NoScheme { + snz_nc: "about", + segments: vec![], },), - query: "".into(), - fragment: "".into(), + query: "", + fragment: "", + },) + ); + assert_eq!( + IRI::try_from("").unwrap(), + IRI::Reference(RelativeRef { + relative_part: RelativePart::Empty(IPath::Empty), + query: "", + fragment: "", },) ); } diff --git a/src/iri_spect.txt b/src/iri_spect.txt deleted file mode 100644 index 690aa09..0000000 --- a/src/iri_spect.txt +++ /dev/null @@ -1,137 +0,0 @@ - - IRI = scheme ":" ihier-part [ "?" iquery ] - [ "#" ifragment ] - - - - - - - - ipath-noscheme = isegment-nz-nc *( "/" isegment ) - - - - - - - - - - - Some productions are ambiguous. The "first-match-wins" (a.k.a. - "greedy") algorithm applies. For details, see [RFC3986]. - - - - -Duerst & Suignard Standards Track [Page 8] - -RFC 3987 Internationalized Resource Identifiers January 2005 - - - The following rules are the same as those in [RFC3986]: - - - - - IP-literal = "[" ( IPv6address / IPvFuture ) "]" - - IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - - - - - - - - - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - reserved = gen-delims / sub-delims - gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - - - - - -DONE: - ipath = ipath-abempty ; begins with "/" or is empty - / ipath-absolute ; begins with "/" but not "//" - / ipath-noscheme ; begins with a non-colon segment - / ipath-rootless ; begins with a segment - / ipath-empty ; zero characters - - - ihier-part = "//" iauthority ipath-abempty - / ipath-absolute - / ipath-rootless - / ipath-empty - - - ipath-abempty = *( "/" isegment ) - ipchar = iunreserved / pct-encoded / sub-delims / ":" - / "@" - iauthority = [ iuserinfo "@" ] ihost [ ":" port ] - - port = *DIGIT - - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" - - pct-encoded = "%" HEXDIG HEXDIG - iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar - - ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF - / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD - / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD - / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD - / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD - / %xD0000-DFFFD / %xE1000-EFFFD - - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - dec-octet = DIGIT ; 0-9 - / %x31-39 DIGIT ; 10-99 - / "1" 2DIGIT ; 100-199 - / "2" %x30-34 DIGIT ; 200-249 - / "25" %x30-35 ; 250-255 - - IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet - - IPv6address = 6( h16 ":" ) ls32 - / "::" 5( h16 ":" ) ls32 - / [ h16 ] "::" 4( h16 ":" ) ls32 - / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 - / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 - / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 - / [ *4( h16 ":" ) h16 ] "::" ls32 - / [ *5( h16 ":" ) h16 ] "::" h16 - / [ *6( h16 ":" ) h16 ] "::" - h16 = 1*4HEXDIG -ls32 = ( h16 ":" h16 ) / IPv4address - - ireg-name = *( iunreserved / pct-encoded / sub-delims ) - iuserinfo = *( iunreserved / ncoded / sub-delims / ":" ) - ihost = IP-literal / IPv4address / ireg-name - isegment = *ipchar - isegment-nz = 1*ipchar - isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims - / "@" ) - ; non-zero-length segment without any colon ":" - ipath-absolute = "/" [ isegment-nz *( "/" isegment ) ] - - ipath-rootless = isegment-nz *( "/" isegment ) - ipath-empty = 0 - - ifragment = *( ipchar / "/" / "?" ) - iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD - - iquery = *( ipchar / iprivate / "/" / "?" ) - - irelative-part = "//" iauthority ipath-abempty - / ipath-absolute - irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ] - - absolute-IRI = scheme ":" ihier-part [ "?" iquery ] - - IRI-reference = IRI / irelative-ref - diff --git a/src/tests/turtle_doc_test.rs b/src/tests/turtle_doc_test.rs index ba334d8..be4df2a 100644 --- a/src/tests/turtle_doc_test.rs +++ b/src/tests/turtle_doc_test.rs @@ -32,7 +32,8 @@ const INPUT_DIR: &str = "examples/turtle_doc"; #[test_case("0025", None , false ; "EQ: test date 20/09/2012")] #[test_case("0026", None , false ; "EQ: test date 2023-08-30T10:31:00.080Z")] #[test_case("0027", None , true ; "JSON: test simple json result with bnode")] -// #[test_case("0028", None , false ; "The following Turtle document contains examples of all the different ways of writing IRIs in Turtle.")] +#[test_case("0028", None , false ; "EQ: The following Turtle document contains examples of all the different ways of writing IRIs in Turtle.")] +#[test_case("0029", None , false ; "EQ: Simple base example")] #[serial] fn test_turtle_doc(test_name: &str, diff_file: Option<&str>, output_json: bool) { reset_fake_uuid_gen(); diff --git a/src/turtle/turtle_doc.rs b/src/turtle/turtle_doc.rs index 25f1f8d..b63ca7c 100644 --- a/src/turtle/turtle_doc.rs +++ b/src/turtle/turtle_doc.rs @@ -1,4 +1,5 @@ use crate::grammar::{BLANK_NODE_LABEL, STRING_LITERAL_LONG_QUOTE}; +use crate::iri::IRI; use crate::shared::{ DATE_FORMATS, DEFAULT_DATE_FORMAT, DEFAULT_DATE_TIME_FORMAT, DEFAULT_TIME_FORMAT, RDF_FIRST, RDF_NIL, RDF_REST, TIME_FORMATS, XSD_BOOLEAN, XSD_DATE, XSD_DATE_TIME, XSD_DECIMAL, XSD_DOUBLE, @@ -51,7 +52,7 @@ pub struct TurtleDocError { struct Context<'a> { base: Option<&'a str>, well_known_prefix: Option, - prefixes: BTreeMap<&'a str, &'a str>, + prefixes: BTreeMap, Cow<'a, str>>, } #[derive(Serialize, PartialEq, Deserialize, Clone, Debug)] @@ -180,18 +181,6 @@ impl<'a> TurtleDoc<'a> { self.statements.push(stmt); } } - pub fn add_prefixes(&mut self, prefixes: BTreeMap) { - let prefixes: BTreeMap, Cow> = prefixes - .into_iter() - .map(|(k, v)| (Cow::Owned(k), Cow::Owned(v))) - .collect(); - // for (k, prefix) in prefixes { - // if self.prefixes.contains_key(&k) { - // panic("FIXME. https://www.ietf.org/rfc/rfc3987.html"); - // } - // } - self.prefixes.extend(prefixes); - } pub fn len(&self) -> usize { self.statements.len() } @@ -369,9 +358,17 @@ impl<'a> TurtleDoc<'a> { TurtleValue::Base(base) => { context.base = Some(Self::extract_iri(base)?); } - TurtleValue::Prefix((prefix, iri)) => { - let iri = TurtleDoc::extract_iri(iri)?; - context.prefixes.insert(prefix, iri); + TurtleValue::Prefix((k, prefix)) => { + let mut prefix = Cow::Borrowed(TurtleDoc::extract_iri(prefix)?); + let k = Cow::Borrowed(k); + let base = context.base.unwrap_or(""); + let iri = IRI::try_from(prefix.as_ref()).map_err(|e| TurtleDocError { + message: e.to_string(), + })?; + if iri.is_relative() { + prefix = Cow::Owned(format!("{base}{prefix}")); + } + context.prefixes.insert(k, prefix); } statement @ TurtleValue::Statement { subject: _, @@ -390,11 +387,7 @@ impl<'a> TurtleDoc<'a> { base: context.base, well_known_prefix: context.well_known_prefix, statements, - prefixes: context - .prefixes - .into_iter() - .map(|(k, v)| (Cow::Borrowed(k), (Cow::Borrowed(v)))) - .collect(), + prefixes: context.prefixes, }) } @@ -415,7 +408,10 @@ impl<'a> TurtleDoc<'a> { ) -> Result, TurtleDocError> { match s { TurtleValue::Iri(Iri::Enclosed(iri)) => { - if !iri.starts_with("http://") && !iri.starts_with("https://") { + let iri_rfc3987 = IRI::try_from(iri).map_err(|e| TurtleDocError { + message: e.to_string(), + })?; + if iri_rfc3987.is_relative() { if let Some(base) = base { let iri = (*base).to_owned() + iri; return Ok(Node::Iri(Cow::Owned(iri.to_string()))); @@ -556,11 +552,7 @@ impl<'a> TurtleDoc<'a> { ) -> Result, TurtleDocError> { match value { v @ TurtleValue::Iri(_) | v @ TurtleValue::Literal(_) => { - let prefixes: BTreeMap, Cow> = ctx - .prefixes - .iter() - .map(|(k, v)| (Cow::Borrowed(*k), Cow::Borrowed(*v))) - .collect(); + let prefixes: BTreeMap, Cow> = ctx.prefixes.clone(); let base = ctx.base.map(Cow::Borrowed); Self::simple_turtle_value_to_node(v, base, prefixes, true) }