Skip to content

Commit

Permalink
[fix] re #152: do not slurp the terminating newlines after a folded s…
Browse files Browse the repository at this point in the history
…calar
  • Loading branch information
biojppm committed Sep 15, 2021
1 parent e17f2cb commit 0c99390
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 41 deletions.
78 changes: 48 additions & 30 deletions src/c4/yml/parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2497,35 +2497,40 @@ csubstr Parser::_peek_next_line(size_t pos) const
return {};
}


//-----------------------------------------------------------------------------
void Parser::_scan_line()
void Parser::LineContents::reset_with_next_line(csubstr buf, size_t offset)
{
if(m_state->pos.offset >= m_buf.len) return;

char const* b = &m_buf[m_state->pos.offset];
char const* e = b;

// get the line stripped of newline chars
while(e < m_buf.end() && (*e != '\n' && *e != '\r'))
{
RYML_ASSERT(offset <= buf.len);
char const* C4_RESTRICT b = &buf[offset];
char const* C4_RESTRICT e = b;
// get the current line stripped of newline chars
while(e < buf.end() && (*e != '\n' && *e != '\r'))
++e;
}
RYML_ASSERT(e >= b);
csubstr stripped = m_buf.sub(m_state->pos.offset, static_cast<size_t>(e - b));

const csubstr stripped_ = buf.sub(offset, static_cast<size_t>(e - b));
// advance pos to include the first line ending
if(e != m_buf.end() && *e == '\r') ++e;
if(e != m_buf.end() && *e == '\n') ++e;
if(e != buf.end() && *e == '\r')
++e;
if(e != buf.end() && *e == '\n')
++e;
RYML_ASSERT(e >= b);
csubstr full = m_buf.sub(m_state->pos.offset, static_cast<size_t>(e - b));
const csubstr full_ = buf.sub(offset, static_cast<size_t>(e - b));
reset(full_, stripped_);
}

m_state->line_contents.reset(full, stripped);
void Parser::_scan_line()
{
if(m_state->pos.offset >= m_buf.len)
return;
m_state->line_contents.reset_with_next_line(m_buf, m_state->pos.offset);
}


//-----------------------------------------------------------------------------
void Parser::_line_progressed(size_t ahead)
{
_c4dbgpf("line[%zu] (%zu cols) progressed by %zu: col %zu --> %zu offset %zu --> %zu", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead);
_c4dbgpf("line[%zu] (%zu cols) progressed by %zu: col %zu-->%zu offset %zu-->%zu", m_state->pos.line, m_state->line_contents.full.len, ahead, m_state->pos.col, m_state->pos.col+ahead, m_state->pos.offset, m_state->pos.offset+ahead);
m_state->pos.offset += ahead;
m_state->pos.col += ahead;
RYML_ASSERT(m_state->pos.col <= m_state->line_contents.stripped.len+1);
Expand All @@ -2534,13 +2539,24 @@ void Parser::_line_progressed(size_t ahead)

void Parser::_line_ended()
{
_c4dbgpf("line[%zu] (%zu cols) ended! offset %zu --> %zu", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len);
_c4dbgpf("line[%zu] (%zu cols) ended! offset %zu-->%zu", m_state->pos.line, m_state->line_contents.full.len, m_state->pos.offset, m_state->pos.offset+m_state->line_contents.full.len - m_state->line_contents.stripped.len);
RYML_ASSERT(m_state->pos.col == m_state->line_contents.stripped.len+1);
m_state->pos.offset += m_state->line_contents.full.len - m_state->line_contents.stripped.len;
++m_state->pos.line;
m_state->pos.col = 1;
}

void Parser::_line_ended_undo()
{
RYML_ASSERT(m_state->pos.col == 1u);
RYML_ASSERT(m_state->pos.line > 0u);
RYML_ASSERT(m_state->pos.offset >= m_state->line_contents.full.len - m_state->line_contents.stripped.len);
_c4dbgpf("line[%zu] undo ended! line %zu-->%zu, offset %zu-->%zu", m_state->pos.line, m_state->pos.line, m_state->pos.line - 1, m_state->pos.offset, m_state->pos.offset - (m_state->line_contents.full.len - m_state->line_contents.stripped.len));
m_state->pos.offset -= m_state->line_contents.full.len - m_state->line_contents.stripped.len;
--m_state->pos.line;
m_state->pos.col = m_state->line_contents.stripped.len + 1u;
}

//-----------------------------------------------------------------------------
void Parser::_set_indentation(size_t indentation)
{
Expand Down Expand Up @@ -3096,7 +3112,7 @@ NodeData* Parser::_append_val(csubstr val, bool quoted)
size_t nid = m_tree->append_child(m_state->node_id);
m_tree->to_val(nid, val, additional_flags);

_c4dbgpf("append val: id=%zd key='%.*s' val='%.*s'", nid, _c4prsp(m_tree->get(nid)->m_key.scalar), _c4prsp(m_tree->get(nid)->m_val.scalar));
_c4dbgpf("append val: id=%zd val='%.*s'", nid, _c4prsp(m_tree->get(nid)->m_val.scalar));
if( ! m_val_tag.empty())
{
_c4dbgpf("append val[%zu]: set val tag='%.*s' -> '%.*s'", nid, _c4prsp(m_val_tag), _c4prsp(normalize_tag(m_val_tag)));
Expand Down Expand Up @@ -3560,6 +3576,7 @@ csubstr Parser::_scan_block()
_line_ended();
_scan_line();

// if no explicit indentation was given, pick it from the current line
if(indentation == npos)
indentation = m_state->line_contents.indentation;

Expand All @@ -3574,20 +3591,18 @@ csubstr Parser::_scan_block()
// read every full line into a raw block,
// from which newlines are to be stripped as needed
size_t num_lines = 0, first = m_state->pos.line;
auto &lc = m_state->line_contents;
LineContents lc;
while(( ! _finished_file()))
{
_scan_line();
if(lc.indentation < indentation)
{
// stop when the line is deindented and not empty
if( ! lc.rem.trim(" \t\r\n").empty())
{
break;
}
}
raw_block.len += m_state->line_contents.full.len;
// peek next line, but do not advance immediately
lc.reset_with_next_line(m_buf, m_state->pos.offset);
// stop when the line is deindented and not empty
if(lc.indentation < indentation && ( ! lc.rem.trim(" \t\r\n").empty()))
break;
// advance now that we know the folded scalar continues
m_state->line_contents = lc;
_c4dbgpf("scanning block: append '%.*s'", _c4prsp(m_state->line_contents.rem));
raw_block.len += m_state->line_contents.full.len;
_line_progressed(m_state->line_contents.rem.len);
_line_ended();
++num_lines;
Expand All @@ -3596,6 +3611,9 @@ csubstr Parser::_scan_block()
C4_UNUSED(num_lines);
C4_UNUSED(first);

if(num_lines)
_line_ended_undo();

_c4dbgpf("scanning block: raw='%.*s'", _c4prsp(raw_block));

// ok! now we strip the newlines and spaces according to the specs
Expand Down
3 changes: 3 additions & 0 deletions src/c4/yml/parse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ class RYML_EXPORT Parser

LineContents() : full(), stripped(), rem(), indentation() {}

void reset_with_next_line(csubstr buf, size_t pos);

void reset(csubstr full_, csubstr stripped_)
{
full = full_;
Expand Down Expand Up @@ -288,6 +290,7 @@ class RYML_EXPORT Parser

void _line_progressed(size_t ahead);
void _line_ended();
void _line_ended_undo();

void _prepare_pop()
{
Expand Down
120 changes: 109 additions & 11 deletions test/test_block_folded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,127 @@
namespace c4 {
namespace yml {

TEST(block_scalars, issue152)
TEST(block_folded, issue152_not_indented)
{
Tree t = parse(R"(
exec:
cmd_succeeds:
const Tree t = parse(R"(
ok:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
- parses - yes
ok_parses: yes
err:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err_parses: no
err2:
- >
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err2_parses: no
err3:
- >-
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err3_parses: no
)");
EXPECT_EQ(t["ok" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(t["err" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(t["err2"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(t["err3"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432"));
}

TEST(block_folded, issue152_indented_once)
{
const Tree t = parse(R"(
indented_once:
ok:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
- parses - yes
cmd_fails:
ok_parses: yes
err:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
parses: no
cmd_fails2:
err_parses: no
err2:
- >
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
parses2: no
cmd_fails3:
err2_parses: no
err3:
- >-
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
parses3: no
periodSeconds: 10)");
err3_parses: no
)");
const NodeRef n = t["indented_once"];
EXPECT_EQ(n["ok" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err2"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err3"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432"));
}

TEST(block_folded, issue152_indented_twice)
{
const Tree t = parse(R"(
indented_once:
indented_twice:
ok:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
- parses - yes
ok_parses: yes
err:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err_parses: no
err2:
- >
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err2_parses: no
err3:
- >-
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err3_parses: no
)");
const NodeRef n = t["indented_once"]["indented_twice"];
EXPECT_EQ(n["ok" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err2"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err3"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432"));
}

TEST(block_folded, issue152_indented_thrice)
{
const Tree t = parse(R"(
indented_once:
indented_twice:
indented_thrice:
ok:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
- parses - yes
ok_parses: yes
err:
- |
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err_parses: no
err2:
- >
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err2_parses: no
err3:
- >-
exec pg_isready -U "dog" -d "dbname=dog" -h 127.0.0.1 -p 5432
err3_parses: no
)");
const NodeRef n = t["indented_once"]["indented_twice"]["indented_thrice"];
EXPECT_EQ(n["ok" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err" ][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err2"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432\n"));
EXPECT_EQ(n["err3"][0].val(), csubstr("exec pg_isready -U \"dog\" -d \"dbname=dog\" -h 127.0.0.1 -p 5432"));
}


//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

#define BLOCK_FOLDED_CASES \
"7T8X", \
Expand Down

0 comments on commit 0c99390

Please sign in to comment.