From 9082379a824456edad0eff5b8cbf9d69aa4804cc Mon Sep 17 00:00:00 2001 From: Joao Paulo Magalhaes Date: Sun, 27 Nov 2022 15:29:14 +0000 Subject: [PATCH] [fix] re #331 eager scan for carriage return caused slow parsing --- changelog/0.5.0.md | 2 +- src/c4/yml/parse.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/changelog/0.5.0.md b/changelog/0.5.0.md index 8059f75aa..109093500 100644 --- a/changelog/0.5.0.md +++ b/changelog/0.5.0.md @@ -60,7 +60,7 @@ - ~10x faster than `scanf()` - ~30x-50x faster than a naive `stringstream::str()` followed by `stringstream::operator>>()` For more details, see [the changelog for c4core 0.1.10](https://github.com/biojppm/c4core/releases/tag/v0.1.10). -- Fix [#289](https://github.com/biojppm/rapidyaml/issues/289) - parsing of flow-style sequences had quadratic complexity, causing long parse times in ultra long lines [PR#293](https://github.com/biojppm/rapidyaml/pull/293). +- Fix [#289](https://github.com/biojppm/rapidyaml/issues/289) and [#331](https://github.com/biojppm/rapidyaml/issues/331) - parsing of single-line flow-style sequences had quadratic complexity, causing long parse times in ultra long lines [PR#293](https://github.com/biojppm/rapidyaml/pull/293)/[PR#332](https://github.com/biojppm/rapidyaml/pull/332). - This was due to scanning for the token `: ` before scanning for `,` or `]`, which caused line-length scans on every scalar scan. Changing the order of the checks was enough to address the quadratic complexity, and the parse times for flow-style are now in line with block-style. - As part of this changeset, a significant number of runtime branches was eliminated by separating `Parser::_scan_scalar()` into several different `{seq,map}x{block,flow}` functions specific for each context. Expect some improvement in parse times. - Also, on Debug builds (or assertion-enabled builds) there was a paranoid assertion calling `Tree::has_child()` in `Tree::insert_child()` that caused quadratic behavior because the assertion had linear complexity. It was replaced with a somewhat equivalent O(1) assertion. diff --git a/src/c4/yml/parse.cpp b/src/c4/yml/parse.cpp index e7970e8e4..e5fde1994 100644 --- a/src/c4/yml/parse.cpp +++ b/src/c4/yml/parse.cpp @@ -4139,10 +4139,9 @@ csubstr Parser::_scan_squot_scalar() // leading whitespace also needs filtering needs_filter = needs_filter - || numlines > 1 + || (numlines > 1) || line_is_blank - || (_at_line_begin() && line.begins_with(' ')) - || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + || (_at_line_begin() && line.begins_with(' ')); if(pos == npos) { @@ -4241,10 +4240,9 @@ csubstr Parser::_scan_dquot_scalar() // leading whitespace also needs filtering needs_filter = needs_filter - || numlines > 1 + || (numlines > 1) || line_is_blank - || (_at_line_begin() && line.begins_with(' ')) - || (m_state->line_contents.full.last_of('\r') != csubstr::npos); + || (_at_line_begin() && line.begins_with(' ')); if(pos == npos) {