diff options
| author | Mehmet Samet Duman <yongdohyun@projecttick.org> | 2026-04-02 18:41:54 +0300 |
|---|---|---|
| committer | Mehmet Samet Duman <yongdohyun@projecttick.org> | 2026-04-02 18:41:54 +0300 |
| commit | 3d2121f5d6555744ce5aa502088fc2b34dc26d38 (patch) | |
| tree | 53f42c08746171878b57f5b6ffe1eb841da9d45d /cmark/src/commonmark.c | |
| parent | 6bf7c5ce92ff6237c0b17c332873805018812b40 (diff) | |
| parent | 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b (diff) | |
| download | Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.tar.gz Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.zip | |
Add 'cmark/' from commit '64efa3b3b3d35f2ffb604b57a8a9c89047cb420b'
git-subtree-dir: cmark
git-subtree-mainline: 6bf7c5ce92ff6237c0b17c332873805018812b40
git-subtree-split: 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b
Diffstat (limited to 'cmark/src/commonmark.c')
| -rw-r--r-- | cmark/src/commonmark.c | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/cmark/src/commonmark.c b/cmark/src/commonmark.c new file mode 100644 index 0000000000..ad805a630e --- /dev/null +++ b/cmark/src/commonmark.c @@ -0,0 +1,473 @@ +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "utf8.h" +#include "scanners.h" +#include "render.h" + +#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) +#define LIT(s) renderer->out(renderer, s, false, LITERAL) +#define CR() renderer->cr(renderer) +#define BLANKLINE() renderer->blankline(renderer) +#define ENCODED_SIZE 20 +#define LISTMARKER_SIZE 20 + +// Functions to convert cmark_nodes to commonmark strings. + +static inline void outc(cmark_renderer *renderer, cmark_escaping escape, + int32_t c, unsigned char nextc) { + bool needs_escaping = false; + bool follows_digit = + renderer->buffer->size > 0 && + cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); + char encoded[ENCODED_SIZE]; + int options = renderer->options; + + needs_escaping = + c < 0x80 && escape != LITERAL && + ((escape == NORMAL && + (c < 0x20 || + c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || + c == '>' || c == '\\' || c == '`' || + (c == '!' && (!nextc || nextc == '[')) || + (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || + ((CMARK_OPT_SMART & options) && + ((c == '-' && nextc == '-') || + (c == '.' && nextc == '.') || + c == '"' || c == '\'')) || + (renderer->begin_content && (c == '-' || c == '+' || c == '=') && + // begin_content doesn't get set to false til we've passed digits + // at the beginning of line, so... + !follows_digit) || + (renderer->begin_content && (c == '.' || c == ')') && follows_digit && + (nextc == 0 || cmark_isspace(nextc))))) || + (escape == URL && + (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || + c == ')' || c == '(')) || + (escape == TITLE && + (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); + + if (needs_escaping) { + if (escape == URL && cmark_isspace(c)) { + // use percent encoding for spaces + snprintf(encoded, ENCODED_SIZE, "%%%2X", c); + cmark_strbuf_puts(renderer->buffer, encoded); + renderer->column += 3; + } else if (cmark_ispunct(c)) { + cmark_render_ascii(renderer, "\\"); + cmark_render_code_point(renderer, c); + } else { // render as entity + snprintf(encoded, ENCODED_SIZE, "&#%d;", c); + cmark_strbuf_puts(renderer->buffer, encoded); + renderer->column += (int)strlen(encoded); + } + } else { + cmark_render_code_point(renderer, c); + } +} + +static int longest_backtick_sequence(const char *code) { + int longest = 0; + int current = 0; + size_t i = 0; + size_t code_len = strlen(code); + while (i <= code_len) { + if (code[i] == '`') { + current++; + } else { + if (current > longest) { + longest = current; + } + current = 0; + } + i++; + } + return longest; +} + +static int shortest_unused_backtick_sequence(const char *code) { + // note: if the shortest sequence is >= 32, this returns 32 + // so as not to overflow the bit array. + uint32_t used = 1; + int current = 0; + size_t i = 0; + size_t code_len = strlen(code); + while (i <= code_len) { + if (code[i] == '`') { + current++; + } else { + if (current > 0 && current < 32) { + used |= (1U << current); + } + current = 0; + } + i++; + } + // return number of first bit that is 0: + i = 0; + while (i < 32 && used & 1) { + used = used >> 1; + i++; + } + return (int)i; +} + +static bool is_autolink(cmark_node *node) { + const unsigned char *title; + const unsigned char *url; + cmark_node *link_text; + + if (node->type != CMARK_NODE_LINK) { + return false; + } + + url = node->as.link.url; + if (url == NULL || _scan_scheme(url) == 0) { + return false; + } + + title = node->as.link.title; + // if it has a title, we can't treat it as an autolink: + if (title && title[0]) { + return false; + } + + link_text = node->first_child; + if (link_text == NULL) { + return false; + } + cmark_consolidate_text_nodes(link_text); + if (strncmp((const char *)url, "mailto:", 7) == 0) { + url += 7; + } + return link_text->data != NULL && + strcmp((const char *)url, (char *)link_text->data) == 0; +} + +static int S_render_node(cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + cmark_node *tmp; + int list_number; + cmark_delim_type list_delim; + size_t numticks; + bool extra_spaces; + size_t i; + bool entering = (ev_type == CMARK_EVENT_ENTER); + const char *info, *code, *title; + char fencechar[2] = {'\0', '\0'}; + size_t code_len; + char listmarker[LISTMARKER_SIZE]; + const char *emph_delim; + bool first_in_list_item; + bufsize_t marker_width; + bool has_nonspace; + bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && + !(CMARK_OPT_HARDBREAKS & options); + + // Don't adjust tight list status til we've started the list. + // Otherwise we lose the blank line between a paragraph and + // a following list. + if (entering) { + if (node->parent && node->parent->type == CMARK_NODE_ITEM) { + renderer->in_tight_list_item = node->parent->parent->as.list.tight; + } + } else { + if (node->type == CMARK_NODE_LIST) { + renderer->in_tight_list_item = + node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->parent->parent->as.list.tight; + } + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + LIT("> "); + renderer->begin_content = true; + cmark_strbuf_puts(renderer->prefix, "> "); + } else { + cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); + BLANKLINE(); + } + break; + + case CMARK_NODE_LIST: + if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { + // this ensures that a following indented code block or list will be + // inteprereted correctly. + CR(); + LIT("<!-- end list -->"); + BLANKLINE(); + } + break; + + case CMARK_NODE_ITEM: + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + marker_width = 4; + } else { + list_number = cmark_node_get_list_start(node->parent); + list_delim = cmark_node_get_list_delim(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + // we ensure a width of at least 4 so + // we get nice transition from single digits + // to double + snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, + list_delim == CMARK_PAREN_DELIM ? ")" : ".", + list_number < 10 ? " " : " "); + marker_width = (bufsize_t)strlen(listmarker); + } + if (entering) { + if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { + LIT(" - "); + renderer->begin_content = true; + } else { + LIT(listmarker); + renderer->begin_content = true; + } + if (node->first_child == NULL) { + BLANKLINE(); + } else { + for (i = marker_width; i--;) { + cmark_strbuf_putc(renderer->prefix, ' '); + } + } + } else { + cmark_strbuf_truncate(renderer->prefix, + renderer->prefix->size - marker_width); + CR(); + } + break; + + case CMARK_NODE_HEADING: + if (entering) { + for (i = cmark_node_get_heading_level(node); i > 0; i--) { + LIT("#"); + } + LIT(" "); + renderer->begin_content = true; + renderer->no_linebreaks = true; + } else { + renderer->no_linebreaks = false; + BLANKLINE(); + } + break; + + case CMARK_NODE_CODE_BLOCK: + + first_in_list_item = node->prev == NULL && node->parent && + node->parent->type == CMARK_NODE_ITEM; + + if (!first_in_list_item) { + BLANKLINE(); + } + info = cmark_node_get_fence_info(node); + fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; + code = cmark_node_get_literal(node); + + numticks = longest_backtick_sequence(code) + 1; + if (numticks < 3) { + numticks = 3; + } + for (i = 0; i < numticks; i++) { + LIT(fencechar); + } + LIT(" "); + OUT(info, false, LITERAL); + CR(); + OUT(cmark_node_get_literal(node), false, LITERAL); + CR(); + for (i = 0; i < numticks; i++) { + LIT(fencechar); + } + + BLANKLINE(); + break; + + case CMARK_NODE_HTML_BLOCK: + BLANKLINE(); + OUT(cmark_node_get_literal(node), false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_CUSTOM_BLOCK: + BLANKLINE(); + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + BLANKLINE(); + break; + + case CMARK_NODE_THEMATIC_BREAK: + BLANKLINE(); + LIT("-----"); + BLANKLINE(); + break; + + case CMARK_NODE_PARAGRAPH: + if (!entering) { + BLANKLINE(); + } + break; + + case CMARK_NODE_TEXT: + OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); + break; + + case CMARK_NODE_LINEBREAK: + if (!(CMARK_OPT_HARDBREAKS & options)) { + LIT(" "); + } + CR(); + break; + + case CMARK_NODE_SOFTBREAK: + if (CMARK_OPT_HARDBREAKS & options) { + LIT(" "); + CR(); + } else if (!renderer->no_linebreaks && renderer->width == 0 && + !(CMARK_OPT_HARDBREAKS & options) && + !(CMARK_OPT_NOBREAKS & options)) { + CR(); + } else { + OUT(" ", allow_wrap, LITERAL); + } + break; + + case CMARK_NODE_CODE: + code = cmark_node_get_literal(node); + code_len = strlen(code); + numticks = shortest_unused_backtick_sequence(code); + has_nonspace = false; + for (i=0; i < code_len; i++) { + if (code[i] != ' ') { + has_nonspace = true; + break; + } + } + extra_spaces = code_len == 0 || + code[0] == '`' || code[code_len - 1] == '`' || + (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); + for (i = 0; i < numticks; i++) { + LIT("`"); + } + if (extra_spaces) { + LIT(" "); + } + OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); + if (extra_spaces) { + LIT(" "); + } + for (i = 0; i < numticks; i++) { + LIT("`"); + } + break; + + case CMARK_NODE_HTML_INLINE: + OUT(cmark_node_get_literal(node), false, LITERAL); + break; + + case CMARK_NODE_CUSTOM_INLINE: + OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), + false, LITERAL); + break; + + case CMARK_NODE_STRONG: + if (entering) { + LIT("**"); + } else { + LIT("**"); + } + break; + + case CMARK_NODE_EMPH: + // If we have EMPH(EMPH(x)), we need to use *_x_* + // because **x** is STRONG(x): + if (node->parent && node->parent->type == CMARK_NODE_EMPH && + node->next == NULL && node->prev == NULL) { + emph_delim = "_"; + } else { + emph_delim = "*"; + } + if (entering) { + LIT(emph_delim); + } else { + LIT(emph_delim); + } + break; + + case CMARK_NODE_LINK: + if (is_autolink(node)) { + if (entering) { + LIT("<"); + if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { + LIT((const char *)cmark_node_get_url(node) + 7); + } else { + LIT((const char *)cmark_node_get_url(node)); + } + LIT(">"); + // return signal to skip contents of node... + return 0; + } + } else { + if (entering) { + LIT("["); + } else { + LIT("]("); + OUT(cmark_node_get_url(node), false, URL); + title = cmark_node_get_title(node); + if (strlen(title) > 0) { + LIT(" \""); + OUT(title, false, TITLE); + LIT("\""); + } + LIT(")"); + } + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + LIT("; + OUT(cmark_node_get_url(node), false, URL); + title = cmark_node_get_title(node); + if (strlen(title) > 0) { + OUT(" \"", allow_wrap, LITERAL); + OUT(title, false, TITLE); + LIT("\""); + } + LIT(")"); + } + break; + + default: + assert(false); + break; + } + + return 1; +} + +char *cmark_render_commonmark(cmark_node *root, int options, int width) { + if (options & CMARK_OPT_HARDBREAKS) { + // disable breaking on width, since it has + // a different meaning with OPT_HARDBREAKS + width = 0; + } + return cmark_render(root, options, width, outc, S_render_node); +} |
