From fd49984110f93e3eb29081a77f021b4a73afc14f Mon Sep 17 00:00:00 2001 From: Miles Ziemer Date: Mon, 12 Dec 2022 14:18:10 -0500 Subject: [PATCH] Fix quoted text grammar and parsing Updates Smithy ABNF for QuotedChar and TextBlock, and updates IdlTextParser to properly parse QuotedChar and TextBlock. Previously, IdlTextParser was allowing any character to be used as a QuotedChar, despite QuotedChar having certain restrictions in the grammar. IdlTextParser was updated to follow the grammar's restrictions properly. The grammar restrictions were also loosened to allow tab characters in QuotedChar and double quotes in TextBlock, both of which were already in use. Specifically for double quotes in a TextBlock, one or two are allowed in a row, and must be followed by a QuotedChar. --- docs/source-2.0/spec/idl.rst | 14 +++++++------- .../smithy/model/loader/IdlTextParser.java | 18 +++++++++++++++++- .../smithy/model/loader/IdlTextParserTest.java | 18 +++++++++++++++--- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/docs/source-2.0/spec/idl.rst b/docs/source-2.0/spec/idl.rst index 3449ca56555..bda35adc108 100644 --- a/docs/source-2.0/spec/idl.rst +++ b/docs/source-2.0/spec/idl.rst @@ -148,20 +148,20 @@ string support defined in :rfc:`7405`. NodeKeywords :%s"true" / %s"false" / %s"null" NodeStringValue :`ShapeId` / `TextBlock` / `QuotedText` QuotedText :DQUOTE *`QuotedChar` DQUOTE - QuotedChar :%x20-21 ; space - "!" + QuotedChar :%x09 ; tab + :/ %x20-21 ; space - "!" :/ %x23-5B ; "#" - "[" :/ %x5D-10FFFF ; "]"+ :/ `EscapedChar` - :/ `PreservedDouble` :/ `NL` - EscapedChar :`Escape` (`Escape` / "'" / DQUOTE / %s"b" - : / %s"f" / %s"n" / %s"r" / %s"t" - : / "/" / `UnicodeEscape`) + EscapedChar :`Escape` (`Escape` / DQUOTE / %s"b" / %s"f" + : / %s"n" / %s"r" / %s"t" / "/" + : / `UnicodeEscape`) UnicodeEscape :%s"u" `Hex` `Hex` `Hex` `Hex` Hex :DIGIT / %x41-46 / %x61-66 - PreservedDouble :`Escape` (%x20-21 / %x23-5B / %x5D-10FFFF) Escape :%x5C ; backslash - TextBlock :`ThreeDquotes` *`SP` `NL` *`QuotedChar` `ThreeDquotes` + TextBlock :`ThreeDquotes` *`SP` `NL` *`TextBlockContent` `ThreeDquotes` + TextBlockContent :`QuotedChar` / (1*2DQUOTE 1*`QuotedChar`) ThreeDquotes :DQUOTE DQUOTE DQUOTE .. rubric:: Shapes diff --git a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTextParser.java b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTextParser.java index e60440ccaa2..f667bccb100 100644 --- a/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTextParser.java +++ b/smithy-model/src/main/java/software/amazon/smithy/model/loader/IdlTextParser.java @@ -40,6 +40,7 @@ static String parseQuotedTextAndTextBlock(IdlModelParser parser, boolean triple) while (!parser.eof()) { char next = parser.peek(); if (next == '"' && (!triple || (parser.peek(1) == '"' && parser.peek(2) == '"'))) { + // Found closing quotes of quoted_text and/or text_block break; } parser.skip(); @@ -82,8 +83,10 @@ private static String parseStringContents(IdlModelParser parser, String lexeme, case NORMAL: if (c == '\\') { state = State.AFTER_ESCAPE; - } else { + } else if (isValidNormalCharacter(c, triple)) { result.append(c); + } else { + throw parser.syntax("Invalid character: `" + c + "`"); } break; case AFTER_ESCAPE: @@ -285,4 +288,17 @@ private static String createTextBlockLine(String line, int longestPadding) { return endPosition >= startPosition ? line.substring(startPosition, endPosition + 1) : null; } + + private static boolean isValidNormalCharacter(char c, boolean isTextBlock) { + // Valid normal characters are the unescaped characters defined in the + // QuotedChar grammar: + // https://smithy.io/2.0/spec/idl.html#grammar-token-smithy-QuotedChar + return c == '\t' + || c == '\n' + || c == '\r' + || (c >= 0x20 && c <= 0x21) // space - "!" + || (isTextBlock && c == 0x22) // DQUOTE is allowed in text_block + || (c >= 0x23 && c <= 0x5b) // "#" - "[" + || c >= 0x5d; // "]"+ + } } diff --git a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlTextParserTest.java b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlTextParserTest.java index 028e42939c1..aca97f3b434 100644 --- a/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlTextParserTest.java +++ b/smithy-model/src/test/java/software/amazon/smithy/model/loader/IdlTextParserTest.java @@ -2,6 +2,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.util.stream.Stream; import org.junit.jupiter.params.ParameterizedTest; @@ -18,6 +19,13 @@ public void parsesText(String input, String lexeme) { assertThat(result.getValue(), equalTo(lexeme)); } + @ParameterizedTest + @MethodSource("invalidTextProvider") + public void throwsForInvalidText(String invalidInput) { + IdlModelParser parser = new IdlModelParser("/foo", invalidInput); + assertThrows(ModelSyntaxException.class, () -> IdlNodeParser.parseNode(parser).expectStringNode()); + } + private static Stream validTextProvider() { return Stream.of( Arguments.of("\"foo\"", "foo"), @@ -25,8 +33,6 @@ private static Stream validTextProvider() { Arguments.of("\"\t\"", "\t"), Arguments.of("\"\r\"", "\n"), Arguments.of("\"\n\"", "\n"), - Arguments.of("\"\b\"", "\b"), - Arguments.of("\"\f\"", "\f"), Arguments.of("\"\\t\"", "\t"), Arguments.of("\"\\r\"", "\r"), Arguments.of("\"\\n\"", "\n"), @@ -59,8 +65,10 @@ private static Stream validTextProvider() { Arguments.of("\"\"\"\n\n\n\"\"\"", "\n\n"), Arguments.of("\"\"\"\n foo\n baz\n \"\"\"", "foo\nbaz\n"), Arguments.of("\"\"\"\n foo\n baz\n \"\"\"", "foo\n baz\n"), - Arguments.of("\"\"\"\n\"foo\\\"\"\"\"", "\"foo\""), Arguments.of("\"\"\"\n foo\\n bar\n baz\"\"\"", "foo\n bar\nbaz"), + Arguments.of("\"\"\"\n{ \"foo\": \"bar\" }\"\"\"", "{ \"foo\": \"bar\" }"), + Arguments.of("\"\"\"\n \"a\" \"\"\"", "\"a\""), + Arguments.of("\"\"\"\n \" \"\"\"", "\""), // Empty lines and lines with only ws do not contribute to incidental ws. Arguments.of("\"\"\"\n\n foo\n \n\n \n \"\"\"", "\nfoo\n\n\n\n"), // If the last line is offset to the right, it's discarded since it's all whitespace. @@ -69,4 +77,8 @@ private static Stream validTextProvider() { Arguments.of("\"\"\"\r Foo\\\r Baz\"\"\"", "FooBaz"), Arguments.of("\"\"\"\r\n Foo\\\r\n Baz\"\"\"", "FooBaz")); } + + private static Stream invalidTextProvider() { + return Stream.of( "\"\b\"", "\"\"\"", "\"\\\"", "\"\\'\""); + } }