diff --git a/autosar-data/src/element.rs b/autosar-data/src/element.rs index e05dfc5..cf213c6 100644 --- a/autosar-data/src/element.rs +++ b/autosar-data/src/element.rs @@ -2021,7 +2021,13 @@ impl Element { /// Set or delete the comment attached to the element /// /// Set None to remove the comment. - pub fn set_comment(&self, opt_comment: Option) { + pub fn set_comment(&self, mut opt_comment: Option) { + if let Some(comment) = &mut opt_comment { + // make sure the comment we store never contains "--" as this is forbidden by the w3 xml specification + if comment.contains("--") { + *comment = comment.replace("--", "__"); + } + } self.0.lock().comment = opt_comment; } diff --git a/autosar-data/src/lexer.rs b/autosar-data/src/lexer.rs index 199ca9d..a0f36c6 100644 --- a/autosar-data/src/lexer.rs +++ b/autosar-data/src/lexer.rs @@ -228,7 +228,20 @@ impl<'a> ArxmlLexer<'a> { } b'!' => { // second char is '!' -> parse a comment - return self.read_comment(endpos).map(|res| (self.line, res)); + // we found a '>' character, but comments are allowed to contain unquoted '<' and '>' + // this means we need to make sure the end is actually '-->', not just '>' + let mut comment_endpos = endpos; + while comment_endpos < self.buffer.len() + && !self.buffer[comment_endpos - 2..].starts_with(b"-->") + { + comment_endpos += 1; + } + if comment_endpos < self.buffer.len() { + return self.read_comment(comment_endpos).map(|res| (self.line, res)); + } else { + // hit the end of the input -> unclosed comment + return Err(self.error(ArxmlLexerError::InvalidComment)); + } } _ => { // any other second char -> BeginElement @@ -363,4 +376,12 @@ mod test { let event = ArxmlEvent::ArxmlHeader(None); let _ = format!("{event:#?}"); } + + /// github issue #15 - comments can contain '<' and '>' + #[test] + fn test_w3c_comment_example() { + let data = b""; + let mut lexer = ArxmlLexer::new(data, PathBuf::from("(buffer)")); + assert!(matches!(lexer.next(), Ok((_, ArxmlEvent::Comment(_))))); + } } diff --git a/autosar-data/src/lib.rs b/autosar-data/src/lib.rs index 29344a8..c53f145 100644 --- a/autosar-data/src/lib.rs +++ b/autosar-data/src/lib.rs @@ -94,8 +94,8 @@ mod lexer; mod parser; // allow public access to the error sub-types -pub use parser::ArxmlParserError; pub use lexer::ArxmlLexerError; +pub use parser::ArxmlParserError; // reexport some of the info from the specification pub use autosar_data_specification::AttributeName;