diff --git a/ballerina/Dependencies.toml b/ballerina/Dependencies.toml index 61b14bc..15a6530 100644 --- a/ballerina/Dependencies.toml +++ b/ballerina/Dependencies.toml @@ -73,6 +73,17 @@ org = "ballerina" name = "lang.object" version = "0.0.0" +[[package]] +org = "ballerina" +name = "lang.regexp" +version = "0.0.0" +dependencies = [ + {org = "ballerina", name = "jballerina.java"} +] +modules = [ + {org = "ballerina", packageName = "lang.regexp", moduleName = "lang.regexp"} +] + [[package]] org = "ballerina" name = "lang.value" @@ -141,6 +152,7 @@ dependencies = [ {org = "ballerina", name = "file"}, {org = "ballerina", name = "io"}, {org = "ballerina", name = "lang.array"}, + {org = "ballerina", name = "lang.regexp"}, {org = "ballerina", name = "log"}, {org = "ballerina", name = "test"} ] diff --git a/ballerina/modules/lexer/lexer.bal b/ballerina/modules/lexer/lexer.bal index d0d454f..cc27183 100644 --- a/ballerina/modules/lexer/lexer.bal +++ b/ballerina/modules/lexer/lexer.bal @@ -62,7 +62,7 @@ public isolated function scan(LexerState state) returns LexerState|LexicalError return state.index == 0 ? state.tokenize(EMPTY_LINE) : state.tokenize(EOL); } - // Check for line breaks when reading form string + // Check for line breaks when reading from string if state.peek() == "\n" && state.context != LEXER_DOUBLE_QUOTE { state.isNewLine = true; return state.tokenize(EOL); diff --git a/ballerina/modules/parser/parser.bal b/ballerina/modules/parser/parser.bal index f2aba6b..2a70997 100644 --- a/ballerina/modules/parser/parser.bal +++ b/ballerina/modules/parser/parser.bal @@ -244,3 +244,18 @@ public isolated function parse(ParserState state, ParserOption option = DEFAULT, return generateGrammarError(state, string `Invalid token '${state.currentToken.token}' as the first for generating an event`); } + +# Check if the given string is a valid planar scalar. +# +# + value - The string to be checked +# + return - True if the string is a valid planar scalar. Else, false. +public isolated function isValidPlanarScalar(string value) returns boolean { + string? planarScalarResult = (); + do { + ParserState parserState = check new ([value]); + planarScalarResult = check planarScalar(parserState, false); + } on fail { + return false; + } + return planarScalarResult is string && planarScalarResult.trim() == value.trim(); +} diff --git a/ballerina/modules/parser/scalar.bal b/ballerina/modules/parser/scalar.bal index 511bfd2..2e99faf 100644 --- a/ballerina/modules/parser/scalar.bal +++ b/ballerina/modules/parser/scalar.bal @@ -169,14 +169,15 @@ isolated function singleQuoteScalar(ParserState state) returns ParsingError|stri # Parse the string of a planar scalar. # -# + state - Current parser state +# + state - Current parser state +# + allowTokensAsPlanar - If set, then the restricted tokens are allowed as planar scalar # + return - Parsed planar scalar value -isolated function planarScalar(ParserState state) returns ParsingError|string { +isolated function planarScalar(ParserState state, boolean allowTokensAsPlanar = true) returns ParsingError|string { // Process the first planar char string lexemeBuffer = state.currentToken.value; boolean isFirstLine = true; string newLineBuffer = ""; - state.lexerState.allowTokensAsPlanar = true; + state.lexerState.allowTokensAsPlanar = allowTokensAsPlanar; check checkToken(state, peek = true); diff --git a/ballerina/modules/serializer/node.bal b/ballerina/modules/serializer/node.bal index 361e489..e8a3529 100644 --- a/ballerina/modules/serializer/node.bal +++ b/ballerina/modules/serializer/node.bal @@ -14,18 +14,17 @@ import yaml.common; import yaml.schema; - -const string INVALID_PLANAR_PATTERN = "([\\w|\\s]*[\\-|\\?|:|] [\\w|\\s]*)|" - + "([\\w|\\s]* #[\\w|\\s]*)|" - + "([,|\\[|\\]|\\{|\\}|&\\*|!\\||>|'|\"|%|@|`][\\w|\\s]*)"; +import yaml.parser; +import ballerina/lang.regexp; isolated function serializeString(SerializerState state, json data, string tag) { string value = data.toString(); - state.events.push({ - value: re `${INVALID_PLANAR_PATTERN}`.isFullMatch(value) || state.forceQuotes - ? string `${state.delimiter}${value}${state.delimiter}` : value, - tag - }); + if value.includes("\n") { + value = state.delimiter + regexp:replaceAll(re `\n`, data.toString(), "\\n") + state.delimiter; + } else { + value = (!parser:isValidPlanarScalar(value) || state.forceQuotes) ? state.delimiter + value + state.delimiter : value; + } + state.events.push({value, tag}); } isolated function serializeSequence(SerializerState state, json[] data, string tag, int depthLevel) returns schema:SchemaError? { diff --git a/ballerina/modules/serializer/tests/lib_test.bal b/ballerina/modules/serializer/tests/lib_test.bal index 81d9388..de1ff8e 100644 --- a/ballerina/modules/serializer/tests/lib_test.bal +++ b/ballerina/modules/serializer/tests/lib_test.bal @@ -86,14 +86,44 @@ function testQuotesForInvalidPlanarChar(string line) returns error? { function invalidPlanarDataGen() returns map<[string]> { return { - "comment": [" #"], + "comment": [" # comment"], "explicit key": ["? "], "sequence entry": ["- "], "mapping value": [": "], - "flow indicator": ["}a"] + "flow indicator": ["}a"], + "alias": ["*/*"], + "collect-entry": [", "], + "anchor": ["&anchor"], + "tag": ["!tag"], + "directive": ["%YAML 1.2"] }; } +@test:Config { + dataProvider: scalarWithNewLinesDataGen, + groups: ["serializer"] +} +function testScalarWithNewLines(json line, string[] expectedOutputs) returns error? { + common:Event[] events = check getSerializedEvents(line); + int index = 0; + foreach common:Event event in events { + if event is common:ScalarEvent { + test:assertEquals(event.value, string `"${expectedOutputs[index]}"`); + index += 1; + } + } + test:assertEquals(index, expectedOutputs.length()); +} + +function scalarWithNewLinesDataGen()returns map<[json, string[]]> => + { + "simple scalar": ["first\nsecond", ["first\\nsecond"]], + "sequence": [[["first\nsecond"], ["first\nsecond\n\nthird"]], ["first\\nsecond", "first\\nsecond\\n\\nthird"]], + "nested sequence": [[[["first\nsecond"]]], ["first\\nsecond"]], + "mapping": [{"key\nline": "first\nsecond"}, ["key\\nline", "first\\nsecond"]], + "nested mapping": [{"key\nline": {"nested\nline": "first\nsecond"}}, ["key\\nline", "nested\\nline", "first\\nsecond"]] + }; + @test:Config { groups: ["serializer"] }