Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework node struct #326

Merged
merged 8 commits into from
Jan 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions api_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,21 @@ static void accessors(test_batch_runner *runner) {
cmark_node_free(doc);
}

static void free_parent(test_batch_runner *runner) {
static const char markdown[] = "text\n";

cmark_node *doc =
cmark_parse_document(markdown, sizeof(markdown) - 1, CMARK_OPT_DEFAULT);

cmark_node *para = cmark_node_first_child(doc);
cmark_node *text = cmark_node_first_child(para);
cmark_node_unlink(text);
cmark_node_free(doc);
STR_EQ(runner, cmark_node_get_literal(text), "text",
"inline content after freeing parent block");
cmark_node_free(text);
}

static void node_check(test_batch_runner *runner) {
// Construct an incomplete tree.
cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
Expand Down Expand Up @@ -381,9 +396,6 @@ static void create_tree(test_batch_runner *runner) {
free(html);

cmark_node_free(doc);

// TODO: Test that the contents of an unlinked inline are valid
// after the parent block was destroyed. This doesn't work so far.
cmark_node_free(emph);
}

Expand Down Expand Up @@ -915,7 +927,7 @@ static void source_pos(test_batch_runner *runner) {
" </heading>\n"
" <paragraph sourcepos=\"3:1-4:42\">\n"
" <text sourcepos=\"3:1-3:14\" xml:space=\"preserve\">Hello “ </text>\n"
" <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\" title=\"\">\n"
" <link sourcepos=\"3:15-3:37\" destination=\"http://www.google.com\">\n"
" <text sourcepos=\"3:16-3:36\" xml:space=\"preserve\">http://www.google.com</text>\n"
" </link>\n"
" <softbreak />\n"
Expand Down Expand Up @@ -1031,6 +1043,7 @@ int main() {
version(runner);
constructor(runner);
accessors(runner);
free_parent(runner);
node_check(runner);
iterator(runner);
iterator_delete(runner);
Expand Down
70 changes: 41 additions & 29 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
cmark_node *e;

e = (cmark_node *)mem->calloc(1, sizeof(*e));
cmark_strbuf_init(mem, &e->content, 32);
e->mem = mem;
e->type = (uint16_t)tag;
e->flags = CMARK_NODE__OPEN;
e->start_line = start_line;
Expand All @@ -96,6 +96,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {

cmark_strbuf_init(mem, &parser->curline, 256);
cmark_strbuf_init(mem, &parser->linebuf, 0);
cmark_strbuf_init(mem, &parser->content, 0);

parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
Expand Down Expand Up @@ -171,19 +172,18 @@ static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) {
block_type == CMARK_NODE_HEADING);
}

static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
static void add_line(cmark_chunk *ch, cmark_parser *parser) {
int chars_to_tab;
int i;
assert(node->flags & CMARK_NODE__OPEN);
if (parser->partially_consumed_tab) {
parser->offset += 1; // skip over tab
// add space characters:
chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
for (i = 0; i < chars_to_tab; i++) {
cmark_strbuf_putc(&node->content, ' ');
cmark_strbuf_putc(&parser->content, ' ');
}
}
cmark_strbuf_put(&node->content, ch->data + parser->offset,
cmark_strbuf_put(&parser->content, ch->data + parser->offset,
ch->len - parser->offset);
}

Expand Down Expand Up @@ -230,12 +230,10 @@ static bool S_ends_with_blank_line(cmark_node *node) {
}

// returns true if content remains after link defs are resolved.
static bool resolve_reference_link_definitions(
cmark_parser *parser,
cmark_node *b) {
static bool resolve_reference_link_definitions(cmark_parser *parser) {
bufsize_t pos;
cmark_strbuf *node_content = &b->content;
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
cmark_strbuf *node_content = &parser->content;
cmark_chunk chunk = {node_content->ptr, node_content->size};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
parser->refmap))) {
Expand All @@ -244,7 +242,7 @@ static bool resolve_reference_link_definitions(
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
return !is_blank(&b->content, 0);
return !is_blank(node_content, 0);
}

static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
Expand Down Expand Up @@ -277,15 +275,18 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
b->end_column = parser->last_line_length;
}

cmark_strbuf *node_content = &b->content;
cmark_strbuf *node_content = &parser->content;

switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
{
has_content = resolve_reference_link_definitions(parser, b);
has_content = resolve_reference_link_definitions(parser);
if (!has_content) {
// remove blank node (former reference def)
cmark_node_free(b);
} else {
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
}
break;
}
Expand All @@ -302,23 +303,30 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
}
assert(pos < node_content->size);

cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
houdini_unescape_html_f(&tmp, node_content->ptr, pos);
cmark_strbuf_trim(&tmp);
cmark_strbuf_unescape(&tmp);
b->as.code.info = cmark_chunk_buf_detach(&tmp);
if (pos == 0) {
b->as.code.info = NULL;
} else {
cmark_strbuf tmp = CMARK_BUF_INIT(parser->mem);
houdini_unescape_html_f(&tmp, node_content->ptr, pos);
cmark_strbuf_trim(&tmp);
cmark_strbuf_unescape(&tmp);
b->as.code.info = cmark_strbuf_detach(&tmp);
}

if (node_content->ptr[pos] == '\r')
pos += 1;
if (node_content->ptr[pos] == '\n')
pos += 1;
cmark_strbuf_drop(node_content, pos);
}
b->as.code.literal = cmark_chunk_buf_detach(node_content);
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
break;

case CMARK_NODE_HEADING:
case CMARK_NODE_HTML_BLOCK:
b->as.literal = cmark_chunk_buf_detach(node_content);
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
break;

case CMARK_NODE_LIST: // determine tight/loose status
Expand Down Expand Up @@ -396,6 +404,9 @@ static void process_inlines(cmark_mem *mem, cmark_node *root,
if (ev_type == CMARK_EVENT_ENTER) {
if (contains_inlines(S_type(cur))) {
cmark_parse_inlines(mem, cur, refmap, options);
mem->free(cur->data);
cur->data = NULL;
cur->len = 0;
}
}
}
Expand Down Expand Up @@ -508,6 +519,8 @@ static cmark_node *finalize_document(cmark_parser *parser) {
finalize(parser, parser->root);
process_inlines(parser->mem, parser->root, parser->refmap, parser->options);

cmark_strbuf_free(&parser->content);

return parser->root;
}

Expand Down Expand Up @@ -972,7 +985,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.code.fence_length = (matched > 255) ? 255 : matched;
(*container)->as.code.fence_offset =
(int8_t)(parser->first_nonspace - parser->offset);
(*container)->as.code.info = cmark_chunk_literal("");
(*container)->as.code.info = NULL;
S_advance_offset(parser, input,
parser->first_nonspace + matched - parser->offset,
false);
Expand All @@ -991,7 +1004,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(lev =
scan_setext_heading_line(input, parser->first_nonspace))) {
// finalize paragraph, resolving reference links
has_content = resolve_reference_link_definitions(parser, *container);
has_content = resolve_reference_link_definitions(parser);

if (has_content) {

Expand Down Expand Up @@ -1074,7 +1087,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.code.fence_char = 0;
(*container)->as.code.fence_length = 0;
(*container)->as.code.fence_offset = 0;
(*container)->as.code.info = cmark_chunk_literal("");
(*container)->as.code.info = NULL;

} else {
break;
Expand Down Expand Up @@ -1131,7 +1144,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
if (parser->current != last_matched_container &&
container == last_matched_container && !parser->blank &&
S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
add_line(parser->current, input, parser);
add_line(input, parser);
} else { // not a lazy continuation
// Finalize any blocks that were not matched and set cur to container:
while (parser->current != last_matched_container) {
Expand All @@ -1140,9 +1153,9 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
}

if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
add_line(container, input, parser);
add_line(input, parser);
} else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
add_line(container, input, parser);
add_line(input, parser);

int matches_end_condition;
switch (container->as.html_block_type) {
Expand Down Expand Up @@ -1189,14 +1202,14 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
}
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
add_line(container, input, parser);
add_line(input, parser);
} else {
// create paragraph container for line
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
parser->first_nonspace + 1);
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
add_line(container, input, parser);
add_line(input, parser);
}

parser->current = container;
Expand Down Expand Up @@ -1233,7 +1246,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,

input.data = parser->curline.ptr;
input.len = parser->curline.size;
input.alloc = 0;

parser->line_number++;

Expand Down
65 changes: 5 additions & 60 deletions src/chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,26 @@
#include "cmark_ctype.h"

#define CMARK_CHUNK_EMPTY \
{ NULL, 0, 0 }
{ NULL, 0 }

typedef struct {
unsigned char *data;
const unsigned char *data;
bufsize_t len;
bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;

static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
if (c->alloc)
mem->free(c->data);

static CMARK_INLINE void cmark_chunk_free(cmark_chunk *c) {
c->data = NULL;
c->alloc = 0;
c->len = 0;
}

static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
assert(!c->alloc);

while (c->len && cmark_isspace(c->data[0])) {
c->data++;
c->len--;
}
}

static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
assert(!c->alloc);

while (c->len > 0) {
if (!cmark_isspace(c->data[c->len - 1]))
break;
Expand All @@ -58,61 +49,15 @@ static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
return p ? (bufsize_t)(p - ch->data) : ch->len;
}

static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
cmark_chunk *c) {
unsigned char *str;

if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)mem->calloc(c->len + 1, 1);
if (c->len > 0) {
memcpy(str, c->data, c->len);
}
str[c->len] = 0;
c->data = str;
c->alloc = 1;

return (char *)str;
}

static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
const char *str) {
unsigned char *old = c->alloc ? c->data : NULL;
if (str == NULL) {
c->len = 0;
c->data = NULL;
c->alloc = 0;
} else {
c->len = (bufsize_t)strlen(str);
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
if (old != NULL) {
mem->free(old);
}
}

static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
cmark_chunk c = {(unsigned char *)data, len};
return c;
}

static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
bufsize_t pos, bufsize_t len) {
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}

static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
cmark_chunk c;

c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;

cmark_chunk c = {ch->data + pos, len};
return c;
}

Expand Down
Loading