Skip to content

Commit

Permalink
Rearrange struct cmark_node
Browse files Browse the repository at this point in the history
Introduce multi-purpose data/len members in struct cmark_node. This
is mainly used to store literal text for inlines, code and HTML blocks.

Move the content strbuf for blocks from cmark_node to cmark_parser.
When finalizing nodes that allow inlines (paragraphs and headings),
detach the strbuf and store the block content in the node's data/len
members. Free the block content after processing inlines.

Reduces size of struct cmark_node by 8 bytes.
  • Loading branch information
nwellnhof authored and jgm committed Jan 23, 2020
1 parent 3ef0718 commit f3f50b2
Show file tree
Hide file tree
Showing 13 changed files with 111 additions and 174 deletions.
53 changes: 30 additions & 23 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
cmark_node *e;

e = (cmark_node *)mem->calloc(1, sizeof(*e));
cmark_strbuf_init(mem, &e->content, 32);
e->mem = mem;
e->type = (uint16_t)tag;
e->flags = CMARK_NODE__OPEN;
e->start_line = start_line;
Expand All @@ -96,6 +96,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {

cmark_strbuf_init(mem, &parser->curline, 256);
cmark_strbuf_init(mem, &parser->linebuf, 0);
cmark_strbuf_init(mem, &parser->content, 0);

parser->refmap = cmark_reference_map_new(mem);
parser->root = document;
Expand Down Expand Up @@ -171,19 +172,18 @@ static CMARK_INLINE bool contains_inlines(cmark_node_type block_type) {
block_type == CMARK_NODE_HEADING);
}

static void add_line(cmark_node *node, cmark_chunk *ch, cmark_parser *parser) {
static void add_line(cmark_chunk *ch, cmark_parser *parser) {
int chars_to_tab;
int i;
assert(node->flags & CMARK_NODE__OPEN);
if (parser->partially_consumed_tab) {
parser->offset += 1; // skip over tab
// add space characters:
chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
for (i = 0; i < chars_to_tab; i++) {
cmark_strbuf_putc(&node->content, ' ');
cmark_strbuf_putc(&parser->content, ' ');
}
}
cmark_strbuf_put(&node->content, ch->data + parser->offset,
cmark_strbuf_put(&parser->content, ch->data + parser->offset,
ch->len - parser->offset);
}

Expand Down Expand Up @@ -230,12 +230,10 @@ static bool S_ends_with_blank_line(cmark_node *node) {
}

// returns true if content remains after link defs are resolved.
static bool resolve_reference_link_definitions(
cmark_parser *parser,
cmark_node *b) {
static bool resolve_reference_link_definitions(cmark_parser *parser) {
bufsize_t pos;
cmark_strbuf *node_content = &b->content;
cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
cmark_strbuf *node_content = &parser->content;
cmark_chunk chunk = {node_content->ptr, node_content->size};
while (chunk.len && chunk.data[0] == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, &chunk,
parser->refmap))) {
Expand All @@ -244,7 +242,7 @@ static bool resolve_reference_link_definitions(
chunk.len -= pos;
}
cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
return !is_blank(&b->content, 0);
return !is_blank(node_content, 0);
}

static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
Expand Down Expand Up @@ -277,15 +275,18 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
b->end_column = parser->last_line_length;
}

cmark_strbuf *node_content = &b->content;
cmark_strbuf *node_content = &parser->content;

switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
{
has_content = resolve_reference_link_definitions(parser, b);
has_content = resolve_reference_link_definitions(parser);
if (!has_content) {
// remove blank node (former reference def)
cmark_node_free(b);
} else {
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
}
break;
}
Expand Down Expand Up @@ -318,12 +319,14 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
pos += 1;
cmark_strbuf_drop(node_content, pos);
}
b->as.code.literal = cmark_strbuf_detach(node_content);
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
break;

case CMARK_NODE_HEADING:
case CMARK_NODE_HTML_BLOCK:
b->as.literal.len = node_content->size;
b->as.literal.data = cmark_strbuf_detach(node_content);
b->len = node_content->size;
b->data = cmark_strbuf_detach(node_content);
break;

case CMARK_NODE_LIST: // determine tight/loose status
Expand Down Expand Up @@ -401,6 +404,9 @@ static void process_inlines(cmark_mem *mem, cmark_node *root,
if (ev_type == CMARK_EVENT_ENTER) {
if (contains_inlines(S_type(cur))) {
cmark_parse_inlines(mem, cur, refmap, options);
mem->free(cur->data);
cur->data = NULL;
cur->len = 0;
}
}
}
Expand Down Expand Up @@ -513,6 +519,8 @@ static cmark_node *finalize_document(cmark_parser *parser) {
finalize(parser, parser->root);
process_inlines(parser->mem, parser->root, parser->refmap, parser->options);

cmark_strbuf_free(&parser->content);

return parser->root;
}

Expand Down Expand Up @@ -996,7 +1004,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(lev =
scan_setext_heading_line(input, parser->first_nonspace))) {
// finalize paragraph, resolving reference links
has_content = resolve_reference_link_definitions(parser, *container);
has_content = resolve_reference_link_definitions(parser);

if (has_content) {

Expand Down Expand Up @@ -1136,7 +1144,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
if (parser->current != last_matched_container &&
container == last_matched_container && !parser->blank &&
S_type(parser->current) == CMARK_NODE_PARAGRAPH) {
add_line(parser->current, input, parser);
add_line(input, parser);
} else { // not a lazy continuation
// Finalize any blocks that were not matched and set cur to container:
while (parser->current != last_matched_container) {
Expand All @@ -1145,9 +1153,9 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
}

if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
add_line(container, input, parser);
add_line(input, parser);
} else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
add_line(container, input, parser);
add_line(input, parser);

int matches_end_condition;
switch (container->as.html_block_type) {
Expand Down Expand Up @@ -1194,14 +1202,14 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
}
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
add_line(container, input, parser);
add_line(input, parser);
} else {
// create paragraph container for line
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
parser->first_nonspace + 1);
S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
add_line(container, input, parser);
add_line(input, parser);
}

parser->current = container;
Expand Down Expand Up @@ -1238,7 +1246,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,

input.data = parser->curline.ptr;
input.len = parser->curline.size;
input.alloc = 0;

parser->line_number++;

Expand Down
65 changes: 5 additions & 60 deletions src/chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,35 +9,26 @@
#include "cmark_ctype.h"

#define CMARK_CHUNK_EMPTY \
{ NULL, 0, 0 }
{ NULL, 0 }

typedef struct {
unsigned char *data;
const unsigned char *data;
bufsize_t len;
bufsize_t alloc; // also implies a NULL-terminated string
} cmark_chunk;

static CMARK_INLINE void cmark_chunk_free(cmark_mem *mem, cmark_chunk *c) {
if (c->alloc)
mem->free(c->data);

static CMARK_INLINE void cmark_chunk_free(cmark_chunk *c) {
c->data = NULL;
c->alloc = 0;
c->len = 0;
}

static CMARK_INLINE void cmark_chunk_ltrim(cmark_chunk *c) {
assert(!c->alloc);

while (c->len && cmark_isspace(c->data[0])) {
c->data++;
c->len--;
}
}

static CMARK_INLINE void cmark_chunk_rtrim(cmark_chunk *c) {
assert(!c->alloc);

while (c->len > 0) {
if (!cmark_isspace(c->data[c->len - 1]))
break;
Expand All @@ -58,61 +49,15 @@ static CMARK_INLINE bufsize_t cmark_chunk_strchr(cmark_chunk *ch, int c,
return p ? (bufsize_t)(p - ch->data) : ch->len;
}

static CMARK_INLINE const char *cmark_chunk_to_cstr(cmark_mem *mem,
cmark_chunk *c) {
unsigned char *str;

if (c->alloc) {
return (char *)c->data;
}
str = (unsigned char *)mem->calloc(c->len + 1, 1);
if (c->len > 0) {
memcpy(str, c->data, c->len);
}
str[c->len] = 0;
c->data = str;
c->alloc = 1;

return (char *)str;
}

static CMARK_INLINE void cmark_chunk_set_cstr(cmark_mem *mem, cmark_chunk *c,
const char *str) {
unsigned char *old = c->alloc ? c->data : NULL;
if (str == NULL) {
c->len = 0;
c->data = NULL;
c->alloc = 0;
} else {
c->len = (bufsize_t)strlen(str);
c->data = (unsigned char *)mem->calloc(c->len + 1, 1);
c->alloc = 1;
memcpy(c->data, str, c->len + 1);
}
if (old != NULL) {
mem->free(old);
}
}

static CMARK_INLINE cmark_chunk cmark_chunk_literal(const char *data) {
bufsize_t len = data ? (bufsize_t)strlen(data) : 0;
cmark_chunk c = {(unsigned char *)data, len, 0};
cmark_chunk c = {(unsigned char *)data, len};
return c;
}

static CMARK_INLINE cmark_chunk cmark_chunk_dup(const cmark_chunk *ch,
bufsize_t pos, bufsize_t len) {
cmark_chunk c = {ch->data + pos, len, 0};
return c;
}

static CMARK_INLINE cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) {
cmark_chunk c;

c.len = buf->size;
c.data = cmark_strbuf_detach(buf);
c.alloc = 1;

cmark_chunk c = {ch->data + pos, len};
return c;
}

Expand Down
2 changes: 1 addition & 1 deletion src/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ static bool is_autolink(cmark_node *node) {
if (strcmp((const char *)url, "mailto:") == 0) {
url += 7;
}
return strcmp((const char *)url, (char *)link_text->as.literal.data) == 0;
return strcmp((const char *)url, (char *)link_text->data) == 0;
}

// if node is a block node, returns node.
Expand Down
15 changes: 7 additions & 8 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_TEXT:
case CMARK_NODE_CODE:
case CMARK_NODE_HTML_INLINE:
escape_html(html, node->as.literal.data, node->as.literal.len);
escape_html(html, node->data, node->len);
break;

case CMARK_NODE_LINEBREAK:
Expand Down Expand Up @@ -164,8 +164,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
cmark_strbuf_puts(html, "\">");
}

escape_html(html, node->as.code.literal,
strlen((char *)node->as.code.literal));
escape_html(html, node->data, node->len);
cmark_strbuf_puts(html, "</code></pre>\n");
break;

Expand All @@ -174,7 +173,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_put(html, node->data, node->len);
}
cr(html);
break;
Expand Down Expand Up @@ -218,7 +217,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
break;

case CMARK_NODE_TEXT:
escape_html(html, node->as.literal.data, node->as.literal.len);
escape_html(html, node->data, node->len);
break;

case CMARK_NODE_LINEBREAK:
Expand All @@ -237,15 +236,15 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,

case CMARK_NODE_CODE:
cmark_strbuf_puts(html, "<code>");
escape_html(html, node->as.literal.data, node->as.literal.len);
escape_html(html, node->data, node->len);
cmark_strbuf_puts(html, "</code>");
break;

case CMARK_NODE_HTML_INLINE:
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
cmark_strbuf_put(html, node->data, node->len);
}
break;

Expand Down Expand Up @@ -325,7 +324,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,

char *cmark_render_html(cmark_node *root, int options) {
char *result;
cmark_strbuf html = CMARK_BUF_INIT(cmark_node_mem(root));
cmark_strbuf html = CMARK_BUF_INIT(root->mem);
cmark_event_type ev_type;
cmark_node *cur;
struct render_state state = {&html, NULL};
Expand Down
Loading

0 comments on commit f3f50b2

Please sign in to comment.