Skip to content

Commit

Permalink
Merge pull request #432 from biojppm/post_newparser
Browse files Browse the repository at this point in the history
Post newparser
  • Loading branch information
biojppm authored May 22, 2024
2 parents 6c19ea9 + 43a551c commit e432c2f
Show file tree
Hide file tree
Showing 28 changed files with 632 additions and 611 deletions.
2 changes: 1 addition & 1 deletion bm/bm_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ int main(int argc, char** argv)

ryml::id_type estimate_capacity(ryml::csubstr src)
{
return (3 * ryml::Parser::estimate_tree_capacity(src)) >> 1;
return (3 * ryml::estimate_tree_capacity(src)) >> 1;
}


Expand Down
4 changes: 4 additions & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ Most of the changes are from the giant Parser refactor described below. Before g
NodeRef::depth_asc() const;
NodeRef::depth_desc() const;
```
- [#PR432](https://github.com/biojppm/rapidyaml/pull/432) - Added a function to estimate the required tree capacity, based on yaml markup:
```cpp
size_t estimate_tree_capacity(csubstr); // estimate number of nodes resulting from yaml
```


------
Expand Down
46 changes: 23 additions & 23 deletions samples/quickstart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,8 @@ john: doe)";
// The lower level index API is based on the indices of nodes,
// where the node's id is the node's position in the tree's data
// array. This API is very efficient, but somewhat difficult to use:
size_t root_id = tree.root_id();
size_t bar_id = tree.find_child(root_id, "bar"); // need to get the index right
ryml::id_type root_id = tree.root_id();
ryml::id_type bar_id = tree.find_child(root_id, "bar"); // need to get the index right
CHECK(tree.is_map(root_id)); // all of the index methods are in the tree
CHECK(tree.is_seq(bar_id)); // ... and receive the subject index

Expand Down Expand Up @@ -426,14 +426,14 @@ john: doe)";

// IMPORTANT. The ryml tree uses an index-based linked list for
// storing children, so the complexity of
// `Tree::operator[csubstr]` and `Tree::operator[size_t]` is O(n),
// `Tree::operator[csubstr]` and `Tree::operator[id_type]` is O(n),
// linear on the number of root children. If you use
// `Tree::operator[]` with a large tree where the root has many
// children, you will see a performance hit.
//
// To avoid this hit, you can create your own accelerator
// structure. For example, before doing a lookup, do a single
// traverse at the root level to fill an `map<csubstr,size_t>`
// traverse at the root level to fill an `map<csubstr,id_type>`
// mapping key names to node indices; with a node index, a lookup
// (via `Tree::get()`) is O(1), so this way you can get O(log n)
// lookup from a key. (But please do not use `std::map` if you
Expand Down Expand Up @@ -479,29 +479,29 @@ john: doe)";
ryml::csubstr expected_keys[] = {"foo", "bar", "john"};
// iterate children using the high-level node API:
{
size_t count = 0;
ryml::id_type count = 0;
for(ryml::ConstNodeRef const& child : root.children())
CHECK(child.key() == expected_keys[count++]);
}
// iterate siblings using the high-level node API:
{
size_t count = 0;
ryml::id_type count = 0;
for(ryml::ConstNodeRef const& child : root["foo"].siblings())
CHECK(child.key() == expected_keys[count++]);
}
// iterate children using the lower-level tree index API:
{
size_t count = 0;
for(size_t child_id = tree.first_child(root_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
ryml::id_type count = 0;
for(ryml::id_type child_id = tree.first_child(root_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
CHECK(tree.key(child_id) == expected_keys[count++]);
}
// iterate siblings using the lower-level tree index API:
// (notice the only difference from above is in the loop
// preamble, which calls tree.first_sibling(bar_id) instead of
// tree.first_child(root_id))
{
size_t count = 0;
for(size_t child_id = tree.first_sibling(bar_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
ryml::id_type count = 0;
for(ryml::id_type child_id = tree.first_sibling(bar_id); child_id != ryml::NONE; child_id = tree.next_sibling(child_id))
CHECK(tree.key(child_id) == expected_keys[count++]);
}
}
Expand Down Expand Up @@ -3629,7 +3629,7 @@ void write(ryml::NodeRef *n, my_type const& val)
template<class T>
bool read(ryml::ConstNodeRef const& n, my_seq_type<T> *seq)
{
seq->seq_member.resize(n.num_children()); // num_children() is O(N)
seq->seq_member.resize(static_cast<size_t>(n.num_children())); // num_children() is O(N)
size_t pos = 0;
for(auto const ch : n.children())
ch >> seq->seq_member[pos++];
Expand Down Expand Up @@ -3813,7 +3813,7 @@ void sample_float_precision()
CHECK(output.size() == reference.size());
for(size_t i = 0; i < reference.size(); ++i)
{
CHECK(get_num_digits(tree[i].val()) == num_digits_original);
CHECK(get_num_digits(tree[(ryml::id_type)i].val()) == num_digits_original);
CHECK(fabs(output[i] - reference[i]) < precision_safe);
}
}
Expand Down Expand Up @@ -4577,12 +4577,12 @@ d: 3
CHECK(tree.docref(1).id() == stream.child(1).id());
CHECK(tree.docref(2).id() == stream.child(2).id());
// equivalent: using the lower level index API
const size_t stream_id = tree.root_id();
const ryml::id_type stream_id = tree.root_id();
CHECK(tree.is_root(stream_id));
CHECK(tree.is_stream(stream_id));
CHECK(!tree.is_doc(stream_id));
CHECK(tree.num_children(stream_id) == 3);
for(size_t doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(stream_id))
for(ryml::id_type doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(stream_id))
CHECK(tree.is_doc(doc_id));
CHECK(tree.doc(0) == tree.child(stream_id, 0));
CHECK(tree.doc(1) == tree.child(stream_id, 1));
Expand All @@ -4594,7 +4594,7 @@ d: 3
CHECK(stream[0]["a"].val() == "0");
CHECK(stream[0]["b"].val() == "1");
// equivalent: using the index API
const size_t doc0_id = tree.first_child(stream_id);
const ryml::id_type doc0_id = tree.first_child(stream_id);
CHECK(tree.is_doc(doc0_id));
CHECK(tree.is_map(doc0_id));
CHECK(tree.val(tree.find_child(doc0_id, "a")) == "0");
Expand All @@ -4606,7 +4606,7 @@ d: 3
CHECK(stream[1]["c"].val() == "2");
CHECK(stream[1]["d"].val() == "3");
// equivalent: using the index API
const size_t doc1_id = tree.next_sibling(doc0_id);
const ryml::id_type doc1_id = tree.next_sibling(doc0_id);
CHECK(tree.is_doc(doc1_id));
CHECK(tree.is_map(doc1_id));
CHECK(tree.val(tree.find_child(doc1_id, "c")) == "2");
Expand All @@ -4620,7 +4620,7 @@ d: 3
CHECK(stream[2][2].val() == "6");
CHECK(stream[2][3].val() == "7");
// equivalent: using the index API
const size_t doc2_id = tree.next_sibling(doc1_id);
const ryml::id_type doc2_id = tree.next_sibling(doc1_id);
CHECK(tree.is_doc(doc2_id));
CHECK(tree.is_seq(doc2_id));
CHECK(tree.val(tree.child(doc2_id, 0)) == "4");
Expand All @@ -4644,18 +4644,18 @@ d: 3
};
// using the node API
{
size_t count = 0;
ryml::id_type count = 0;
const ryml::ConstNodeRef stream = tree.rootref();
CHECK(stream.num_children() == C4_COUNTOF(expected_json));
CHECK(stream.num_children() == (ryml::id_type)C4_COUNTOF(expected_json));
for(ryml::ConstNodeRef doc : stream.children())
CHECK(ryml::emitrs_json<std::string>(doc) == expected_json[count++]);
}
// equivalent: using the index API
{
size_t count = 0;
const size_t stream_id = tree.root_id();
CHECK(tree.num_children(stream_id) == C4_COUNTOF(expected_json));
for(size_t doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(doc_id))
ryml::id_type count = 0;
const ryml::id_type stream_id = tree.root_id();
CHECK(tree.num_children(stream_id) == (ryml::id_type)C4_COUNTOF(expected_json));
for(ryml::id_type doc_id = tree.first_child(stream_id); doc_id != ryml::NONE; doc_id = tree.next_sibling(doc_id))
CHECK(ryml::emitrs_json<std::string>(tree, doc_id) == expected_json[count++]);
}
}
Expand Down
36 changes: 18 additions & 18 deletions src/c4/yml/emit.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void Emitter<Writer>::_emit_yaml(id_type id)
break;
++end;
}
const size_t parent = m_tree->parent(next_node);
const id_type parent = m_tree->parent(next_node);
for( ; tagds.b != end; ++tagds.b)
{
if(next_node != m_tree->first_child(parent))
Expand Down Expand Up @@ -199,7 +199,7 @@ void Emitter<Writer>::_write_doc(id_type id)
}
else // docval
{
RYML_ASSERT(m_tree->has_val(id));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->has_val(id));
// some plain scalars such as '...' and '---' must not
// appear at 0-indentation
const csubstr val = m_tree->val(id);
Expand Down Expand Up @@ -245,9 +245,9 @@ void Emitter<Writer>::_do_visit_flow_sl(id_type node, id_type depth, id_type ile
{
const bool prev_flow = m_flow;
m_flow = true;
RYML_ASSERT(!m_tree->is_stream(node));
RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
_RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
if(C4_UNLIKELY(depth > m_opts.max_depth()))
_RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");

Expand All @@ -273,7 +273,7 @@ void Emitter<Writer>::_do_visit_flow_sl(id_type node, id_type depth, id_type ile
}
else if(m_tree->is_container(node))
{
RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));

bool spc = false; // write a space

Expand Down Expand Up @@ -451,9 +451,9 @@ void Emitter<Writer>::_do_visit_block_container(id_type node, id_type depth, id_
template<class Writer>
void Emitter<Writer>::_do_visit_block(id_type node, id_type depth, id_type ilevel, id_type do_indent)
{
RYML_ASSERT(!m_tree->is_stream(node));
RYML_ASSERT(m_tree->is_container(node) || m_tree->is_doc(node));
RYML_ASSERT(m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
_RYML_CB_ASSERT(m_tree->callbacks(), !m_tree->is_stream(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_container(node) || m_tree->is_doc(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_root(node) || (m_tree->parent_is_map(node) || m_tree->parent_is_seq(node)));
if(C4_UNLIKELY(depth > m_opts.max_depth()))
_RYML_CB_ERR(m_tree->callbacks(), "max depth exceeded");
if(m_tree->is_doc(node))
Expand All @@ -464,7 +464,7 @@ void Emitter<Writer>::_do_visit_block(id_type node, id_type depth, id_type ileve
}
else if(m_tree->is_container(node))
{
RYML_ASSERT(m_tree->is_map(node) || m_tree->is_seq(node));
_RYML_CB_ASSERT(m_tree->callbacks(), m_tree->is_map(node) || m_tree->is_seq(node));
bool spc = false; // write a space
bool nl = false; // write a newline
if(m_tree->has_key(node))
Expand Down Expand Up @@ -672,9 +672,9 @@ size_t Emitter<Writer>::_write_escaped_newlines(csubstr s, size_t i)
this->Writer::_do_write('\n'); // write the newline again
++i; // increase the outer loop counter!
} while(i < s.len && s.str[i] == '\n');
RYML_ASSERT(i > 0);
_RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
--i;
RYML_ASSERT(s.str[i] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == '\n');
return i;
}

Expand All @@ -690,10 +690,10 @@ template<class Writer>
size_t Emitter<Writer>::_write_indented_block(csubstr s, size_t i, id_type ilevel)
{
//_c4dbgpf("indblock@i={} rem=[{}]~~~\n{}~~~", i, s.sub(i).len, s.sub(i));
RYML_ASSERT(i > 0);
RYML_ASSERT(s.str[i-1] == '\n');
RYML_ASSERT(i < s.len);
RYML_ASSERT(s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), i > 0);
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i-1] == '\n');
_RYML_CB_ASSERT(m_tree->callbacks(), i < s.len);
_RYML_CB_ASSERT(m_tree->callbacks(), s.str[i] == ' ' || s.str[i] == '\t' || s.str[i] == '\n');
again:
size_t pos = s.find("\n ", i);
if(pos == npos)
Expand Down Expand Up @@ -725,7 +725,7 @@ size_t Emitter<Writer>::_write_indented_block(csubstr s, size_t i, id_type ileve
template<class Writer>
void Emitter<Writer>::_write_scalar_literal(csubstr s, id_type ilevel, bool explicit_key)
{
RYML_ASSERT(s.find("\r") == csubstr::npos);
_RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
if(explicit_key)
this->Writer::_do_write("? ");
csubstr trimmed = s.trimr('\n');
Expand Down Expand Up @@ -773,7 +773,7 @@ void Emitter<Writer>::_write_scalar_folded(csubstr s, id_type ilevel, bool expli
{
if(explicit_key)
this->Writer::_do_write("? ");
RYML_ASSERT(s.find("\r") == csubstr::npos);
_RYML_CB_ASSERT(m_tree->callbacks(), s.find("\r") == csubstr::npos);
csubstr trimmed = s.trimr('\n');
const size_t numnewlines_at_end = s.len - trimmed.len;
const bool is_newline_only = (trimmed.len == 0 && (s.len > 0));
Expand Down
Loading

0 comments on commit e432c2f

Please sign in to comment.