Skip to content

Commit

Permalink
[fix] re #205: add missing escaped characters in dquo scalars
Browse files Browse the repository at this point in the history
  • Loading branch information
biojppm committed Jan 28, 2022
1 parent 07c13e2 commit 835bbdd
Show file tree
Hide file tree
Showing 20 changed files with 585 additions and 365 deletions.
4 changes: 3 additions & 1 deletion .github/setenv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,9 @@ function c4_cfg_test()
;;
em++)
emcmake cmake -S $PROJ_DIR -B $build_dir -DCMAKE_INSTALL_PREFIX="$install_dir" \
-DCMAKE_BUILD_TYPE=$BT $CMFLAGS -DCMAKE_CXX_FLAGS="-s DISABLE_EXCEPTION_CATCHING=0"
-DCMAKE_BUILD_TYPE=$BT $CMFLAGS \
-DCMAKE_CXX_FLAGS="-s DISABLE_EXCEPTION_CATCHING=0" \
-DRYML_TEST_TOOLS=OFF
;;
*)
echo "unknown compiler"
Expand Down
4 changes: 2 additions & 2 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ As part of the [new feature to track source locations](https://github.com/biojpp
### Fixes
- Fix [#205](https://github.com/biojppm/rapidyaml/issues/205): add missing escape for `\b\f\0` ([PR#206](https://github.com/biojppm/rapidyaml/pulls/206)).
- Fix [#205](https://github.com/biojppm/rapidyaml/issues/205): fix parsing of escaped characters in double-quoted strings: `"\\\"\n\r\t\<TAB>\/\<SPC>\0\b\f\a\v\e\_\N\L\P"` ([PR#207](https://github.com/biojppm/rapidyaml/pulls/207)).
- Fix [#204](https://github.com/biojppm/rapidyaml/issues/204): add decoding of unicode codepoints `\x` `\u` `\U` in double-quoted scalars:
```c++
Tree tree = parse_in_arena(R"(["\u263A \xE2\x98\xBA \u2705 \U0001D11E"])");
assert(tree[0].val() == "☺ ☺ ✅ 𝄞");
```
This is mandated by the YAML standard and was missing from ryml ([PR#206](https://github.com/biojppm/rapidyaml/pulls/206)).
This is mandated by the YAML standard and was missing from ryml ([PR#207](https://github.com/biojppm/rapidyaml/pulls/207)).
- Fix [#193](https://github.com/biojppm/rapidyaml/issues/193): amalgamated header missing `#include <stdarg.h>` which prevented compilation in bare-metal `arm-none-eabi` ([PR #195](https://github.com/biojppm/rapidyaml/pull/195), requiring also [c4core #64](https://github.com/biojppm/c4core/pull/64)).
- Accept `infinity`,`inf` and `nan` as special float values (but not mixed case: eg `InFiNiTy` or `Inf` or `NaN` are not accepted) ([PR #186](https://github.com/biojppm/rapidyaml/pull/186)).
- Accept special float values with upper or mixed case: `.Inf`, `.INF`, `.NaN`, `.NAN`. Previously, only low-case `.inf` and `.nan` were accepted ([PR #186](https://github.com/biojppm/rapidyaml/pull/186)).
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


def get_readme_for_python():
with open(TOP_DIR / "README.md", "r") as fh:
with open(TOP_DIR / "README.md", "r", encoding="utf8") as fh:
marker = "<!-- endpythonreadme -->" # get everything up to this tag
return fh.read().split(marker)[0]

Expand Down
2 changes: 1 addition & 1 deletion src/c4/yml/detail/checks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ inline void check_free_list(Tree const& t)

inline void check_arena(Tree const& t)
{
C4_CHECK(t.m_arena.len == 0 || (t.m_arena_pos >= 0 && t.m_arena_pos < t.m_arena.len));
C4_CHECK(t.m_arena.len == 0 || (t.m_arena_pos >= 0 && t.m_arena_pos <= t.m_arena.len));
C4_CHECK(t.arena_size() == t.m_arena_pos);
C4_CHECK(t.arena_slack() + t.m_arena_pos == t.m_arena.len);
}
Expand Down
55 changes: 50 additions & 5 deletions src/c4/yml/detail/parser_dbg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,68 @@
#endif

#define _c4prsp(sp) ((int)(sp).len), (sp).str
#define _c4presc(s) __c4presc(s.str, s.len)
#define _c4prc(c) (__c4prc(c) ? 2 : 1), (__c4prc(c) ? __c4prc(c) : &c)
#define _c4presc(s) __c4presc(s.str, s.len)
inline const char *__c4prc(const char &c)
{
switch(c)
{
case '\n': return "\\n";
case '\t': return "\\t";
case '\0': return "\\0";
case '\r': return "\\r";
case '\t': return "\\t";
case '\n': return "\\n";
case '\f': return "\\f";
case '\b': return "\\b";
case '\v': return "\\v";
case '\a': return "\\a";
default: return nullptr;
};
}
}
inline void __c4presc(const char *s, size_t len)
{
size_t prev = 0;
for(size_t i = 0; i < len; ++i)
printf("%.*s", _c4prc(s[i]));
{
switch(s[i])
{
case '\n' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('n'); putchar('\n'); prev = i+1; break;
case '\t' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('t'); prev = i+1; break;
case '\0' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('0'); prev = i+1; break;
case '\r' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('r'); prev = i+1; break;
case '\f' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('f'); prev = i+1; break;
case '\b' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('b'); prev = i+1; break;
case '\v' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('v'); prev = i+1; break;
case '\a' : fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('a'); prev = i+1; break;
case '\x1b': fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('e'); prev = i+1; break;
case -0x3e/*0xc2u*/:
if(i+1 < len)
{
if(s[i+1] == -0x60/*0xa0u*/)
{
fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('_'); prev = i+2; ++i;
}
else if(s[i+1] == -0x7b/*0x85u*/)
{
fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('N'); prev = i+2; ++i;
}
break;
}
case -0x1e/*0xe2u*/:
if(i+2 < len && s[i+1] == -0x80/*0x80u*/)
{
if(s[i+2] == -0x58/*0xa8u*/)
{
fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('L'); prev = i+3; i += 2;
}
else if(s[i+2] == -0x57/*0xa9u*/)
{
fwrite(s+prev, 1, i-prev, stdout); putchar('\\'); putchar('P'); prev = i+3; i += 2;
}
break;
}
}
}
fwrite(s + prev, 1, len - prev, stdout);
}

#pragma clang diagnostic pop
Expand Down
40 changes: 7 additions & 33 deletions src/c4/yml/emit.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,84 +268,58 @@ template<class Writer>
void Emitter<Writer>::_write_json(NodeScalar const& sc, NodeType flags)
{
if(C4_UNLIKELY( ! sc.tag.empty()))
{
c4::yml::error("JSON does not have tags");
}
if(C4_UNLIKELY(flags.has_anchor()))
{
c4::yml::error("JSON does not have anchors");
}
_write_scalar_json(sc.scalar, flags.has_key(), flags.is_quoted());
}

template<class Writer>
void Emitter<Writer>::_write_scalar_block(csubstr s, size_t ilevel, bool explicit_key)
{
#define _rymlindent_nextline() for(size_t lv = 0; lv < ilevel+1; ++lv) { this->Writer::_do_write(" "); }
#define _ryml_add_newline() do { while(s[pos] == '\r') { this->Writer::_do_write('\r'); ++pos; RYML_ASSERT(pos <= s.len); } this->Writer::_do_write('\n'); ++pos; RYML_ASSERT(pos <= s.len); } while(0)

if(explicit_key)
this->Writer::_do_write("? ");

csubstr trimmed = s.trimr("\r\n");
csubstr trimmed = s.trimr("\n\r");
size_t numnewlines_at_end = s.len - trimmed.len - s.sub(trimmed.len).count('\r');
if(numnewlines_at_end == 0)
this->Writer::_do_write("|-\n");
else if(numnewlines_at_end == 1)
this->Writer::_do_write("|\n");
else if(numnewlines_at_end > 1)
this->Writer::_do_write("|+\n");

size_t pos = 0; // tracks the last character that was already written
if(trimmed.len)
{
size_t pos = 0; // tracks the last character that was already written
for(size_t i = 0; i < trimmed.len; ++i)
{
printf("scalar[%zu]='%.*s'\n", i, _c4prc(trimmed[i]));
if(trimmed.str[i] != '\n')
if(trimmed[i] != '\n')
continue;
// write everything up to this point
csubstr since_pos = trimmed.range(pos, i+1); // include the newline
printf("scalar[%zu]='%.*s' newline! pos=%zu since='", i, _c4prc(trimmed[i]), pos);
_c4presc(since_pos);
printf("'\n");
pos = i+1; // because of the newline
_rymlindent_nextline()
this->Writer::_do_write(since_pos);
pos = i+1; // already written
}
if(pos < trimmed.len)
{
_rymlindent_nextline()
printf("scalar... pos=%zu rest='", pos);
_c4presc(trimmed.sub(pos));
printf("'\n");
this->Writer::_do_write(trimmed.sub(pos));
}
pos = trimmed.len;
if(numnewlines_at_end)
{
printf("scalar... newline! pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
_ryml_add_newline();
this->Writer::_do_write('\n');
--numnewlines_at_end;
printf("scalar... newline! ...pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
}
}
for(size_t i = 0; i < numnewlines_at_end; ++i)
{
_rymlindent_nextline()
if(i+1 < numnewlines_at_end || explicit_key)
{
printf("scalar... newline! pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
_ryml_add_newline();
printf("scalar... newline! ...pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
}
this->Writer::_do_write('\n');
}
if(explicit_key && !numnewlines_at_end)
{
printf("scalar... newline! pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
_ryml_add_newline();
printf("scalar... newline! ...pos=%zu newlines_at_end=%zu\n", pos, numnewlines_at_end);
}
this->Writer::_do_write('\n');
#undef _rymlindent_nextline
}

Expand Down
Loading

0 comments on commit 835bbdd

Please sign in to comment.