summaryrefslogtreecommitdiff
path: root/cmark/src/xml.c
diff options
context:
space:
mode:
authorMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-02 18:41:54 +0300
committerMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-02 18:41:54 +0300
commit3d2121f5d6555744ce5aa502088fc2b34dc26d38 (patch)
tree53f42c08746171878b57f5b6ffe1eb841da9d45d /cmark/src/xml.c
parent6bf7c5ce92ff6237c0b17c332873805018812b40 (diff)
parent64efa3b3b3d35f2ffb604b57a8a9c89047cb420b (diff)
downloadProject-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.tar.gz
Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.zip
Add 'cmark/' from commit '64efa3b3b3d35f2ffb604b57a8a9c89047cb420b'
git-subtree-dir: cmark git-subtree-mainline: 6bf7c5ce92ff6237c0b17c332873805018812b40 git-subtree-split: 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b
Diffstat (limited to 'cmark/src/xml.c')
-rw-r--r--cmark/src/xml.c229
1 files changed, 229 insertions, 0 deletions
diff --git a/cmark/src/xml.c b/cmark/src/xml.c
new file mode 100644
index 0000000000..2ca2de82ce
--- /dev/null
+++ b/cmark/src/xml.c
@@ -0,0 +1,229 @@
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+
+#define BUFFER_SIZE 100
+#define MAX_INDENT 40
+
+// Functions to convert cmark_nodes to XML strings.
+
+// C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
+static const char XML_ESCAPE_TABLE[256] = {
+ /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
+ /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
+ /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
+ /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+// U+FFFD Replacement Character encoded in UTF-8
+#define UTF8_REPL "\xEF\xBF\xBD"
+
+static const char *XML_ESCAPES[] = {
+ "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
+};
+
+static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
+ bufsize_t size) {
+ bufsize_t i = 0, org, esc = 0;
+
+ while (i < size) {
+ org = i;
+ while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
+ i++;
+
+ if (i > org)
+ cmark_strbuf_put(ob, src + org, i - org);
+
+ if (i >= size)
+ break;
+
+ if (esc == 9) {
+ // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
+ // be changed.
+ // We know that src[i] is 0xBE or 0xBF.
+ if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
+ cmark_strbuf_putc(ob, 0xBD);
+ } else {
+ cmark_strbuf_putc(ob, src[i]);
+ }
+ } else {
+ cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
+ }
+
+ i++;
+ }
+}
+
+static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
+ if (source)
+ escape_xml(dest, source, (bufsize_t)strlen((char *)source));
+}
+
+struct render_state {
+ cmark_strbuf *xml;
+ int indent;
+};
+
+static inline void indent(struct render_state *state) {
+ int i;
+ for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
+ cmark_strbuf_putc(state->xml, ' ');
+ }
+}
+
+static int S_render_node(cmark_node *node, cmark_event_type ev_type,
+ struct render_state *state, int options) {
+ cmark_strbuf *xml = state->xml;
+ bool literal = false;
+ cmark_delim_type delim;
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
+ char buffer[BUFFER_SIZE];
+
+ if (entering) {
+ indent(state);
+ cmark_strbuf_putc(xml, '<');
+ cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
+
+ if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
+ snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
+ node->start_line, node->start_column, node->end_line,
+ node->end_column);
+ cmark_strbuf_puts(xml, buffer);
+ }
+
+ literal = false;
+
+ switch (node->type) {
+ case CMARK_NODE_DOCUMENT:
+ cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
+ break;
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_CODE:
+ case CMARK_NODE_HTML_BLOCK:
+ case CMARK_NODE_HTML_INLINE:
+ cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
+ escape_xml(xml, node->data, node->len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_LIST:
+ switch (cmark_node_get_list_type(node)) {
+ case CMARK_ORDERED_LIST:
+ cmark_strbuf_puts(xml, " type=\"ordered\"");
+ snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
+ cmark_node_get_list_start(node));
+ cmark_strbuf_puts(xml, buffer);
+ delim = cmark_node_get_list_delim(node);
+ if (delim == CMARK_PAREN_DELIM) {
+ cmark_strbuf_puts(xml, " delimiter=\"paren\"");
+ } else if (delim == CMARK_PERIOD_DELIM) {
+ cmark_strbuf_puts(xml, " delimiter=\"period\"");
+ }
+ break;
+ case CMARK_BULLET_LIST:
+ cmark_strbuf_puts(xml, " type=\"bullet\"");
+ break;
+ default:
+ break;
+ }
+ snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
+ (cmark_node_get_list_tight(node) ? "true" : "false"));
+ cmark_strbuf_puts(xml, buffer);
+ break;
+ case CMARK_NODE_HEADING:
+ snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
+ cmark_strbuf_puts(xml, buffer);
+ break;
+ case CMARK_NODE_CODE_BLOCK:
+ if (node->as.code.info) {
+ cmark_strbuf_puts(xml, " info=\"");
+ escape_xml_str(xml, node->as.code.info);
+ cmark_strbuf_putc(xml, '"');
+ }
+ cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
+ escape_xml(xml, node->data, node->len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_CUSTOM_BLOCK:
+ case CMARK_NODE_CUSTOM_INLINE:
+ cmark_strbuf_puts(xml, " on_enter=\"");
+ escape_xml_str(xml, node->as.custom.on_enter);
+ cmark_strbuf_putc(xml, '"');
+ cmark_strbuf_puts(xml, " on_exit=\"");
+ escape_xml_str(xml, node->as.custom.on_exit);
+ cmark_strbuf_putc(xml, '"');
+ break;
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ cmark_strbuf_puts(xml, " destination=\"");
+ escape_xml_str(xml, node->as.link.url);
+ cmark_strbuf_putc(xml, '"');
+ if (node->as.link.title) {
+ cmark_strbuf_puts(xml, " title=\"");
+ escape_xml_str(xml, node->as.link.title);
+ cmark_strbuf_putc(xml, '"');
+ }
+ break;
+ default:
+ break;
+ }
+ if (node->first_child) {
+ state->indent += 2;
+ } else if (!literal) {
+ cmark_strbuf_puts(xml, " /");
+ }
+ cmark_strbuf_puts(xml, ">\n");
+
+ } else if (node->first_child) {
+ state->indent -= 2;
+ indent(state);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
+ cmark_strbuf_puts(xml, ">\n");
+ }
+
+ return 1;
+}
+
+char *cmark_render_xml(cmark_node *root, int options) {
+ char *result;
+ cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
+ cmark_event_type ev_type;
+ cmark_node *cur;
+ struct render_state state = {&xml, 0};
+
+ cmark_iter *iter = cmark_iter_new(root);
+
+ cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ cmark_strbuf_puts(state.xml,
+ "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state, options);
+ }
+ result = (char *)cmark_strbuf_detach(&xml);
+
+ cmark_iter_free(iter);
+ return result;
+}