summaryrefslogtreecommitdiff
path: root/cmark/tools
diff options
context:
space:
mode:
authorMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-02 18:41:54 +0300
committerMehmet Samet Duman <yongdohyun@projecttick.org>2026-04-02 18:41:54 +0300
commit3d2121f5d6555744ce5aa502088fc2b34dc26d38 (patch)
tree53f42c08746171878b57f5b6ffe1eb841da9d45d /cmark/tools
parent6bf7c5ce92ff6237c0b17c332873805018812b40 (diff)
parent64efa3b3b3d35f2ffb604b57a8a9c89047cb420b (diff)
downloadProject-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.tar.gz
Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.zip
Add 'cmark/' from commit '64efa3b3b3d35f2ffb604b57a8a9c89047cb420b'
git-subtree-dir: cmark git-subtree-mainline: 6bf7c5ce92ff6237c0b17c332873805018812b40 git-subtree-split: 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b
Diffstat (limited to 'cmark/tools')
-rw-r--r--cmark/tools/appveyor-build.bat13
-rw-r--r--cmark/tools/make_case_fold_inc.py96
-rw-r--r--cmark/tools/make_entities_inc.py72
-rw-r--r--cmark/tools/xml2md.xsl319
4 files changed, 500 insertions, 0 deletions
diff --git a/cmark/tools/appveyor-build.bat b/cmark/tools/appveyor-build.bat
new file mode 100644
index 0000000000..73d555b52b
--- /dev/null
+++ b/cmark/tools/appveyor-build.bat
@@ -0,0 +1,13 @@
+@echo off
+
+if "%MSVC_VERSION%" == "10" goto msvc10
+
+call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64
+goto build
+
+:msvc10
+call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64
+
+:build
+nmake
+
diff --git a/cmark/tools/make_case_fold_inc.py b/cmark/tools/make_case_fold_inc.py
new file mode 100644
index 0000000000..3347d291b9
--- /dev/null
+++ b/cmark/tools/make_case_fold_inc.py
@@ -0,0 +1,96 @@
+# Creates a C lookup table for Unicode case folding (https://unicode.org/Public/UCD/latest/ucd/CaseFolding.txt).
+# Usage: python3 tools/make_case_fold_inc.py < data/CaseFolding.txt > src/case_fold.inc
+
+import sys, re
+
+prog = re.compile('([0-9A-F]+); [CF];((?: [0-9A-F]+)+);')
+main_table = []
+repl_table = []
+repl_idx = 0
+test = ''
+test_result = ''
+
+for line in sys.stdin:
+ m = prog.match(line)
+ if m is None:
+ continue
+
+ cp = int(m[1], 16);
+ if cp < 0x80:
+ continue
+
+ repl = b''
+ for x in m[2].split():
+ repl += chr(int(x, 16)).encode('UTF-8')
+
+ # Generate test case
+ if len(main_table) % 20 == 0:
+ test += chr(cp)
+ test_result += repl.decode('UTF-8')
+
+ # 17 bits for code point
+ if cp >= (1 << 17):
+ raise Exception("code point too large")
+
+ # 12 bits for upper bits of replacement index
+ # The lowest bit is always zero.
+ if repl_idx // 2 >= (1 << 12):
+ raise Exception("too many replacements")
+
+ # 3 bits for size of replacement
+ repl_size = len(repl)
+ if repl_size >= (1 << 3):
+ raise Exception("too many replacement chars")
+
+ main_table += [ cp | repl_idx // 2 << 17 | repl_size << 29 ]
+ repl_table += repl
+ repl_idx += repl_size
+
+ # Make sure that repl_idx is even
+ if repl_idx % 2 != 0:
+ repl_table += [0]
+ repl_idx += 1
+
+# Print test case
+if False:
+ print("test:", test)
+ print("test_result:", test_result)
+ sys.exit(0)
+
+print("""// Generated by tools/make_case_fold_inc.py
+
+#define CF_MAX (1 << 17)
+#define CF_TABLE_SIZE %d
+#define CF_CODE_POINT(x) ((x) & 0x1FFFF)
+#define CF_REPL_IDX(x) ((((x) >> 17) & 0xFFF) * 2)
+#define CF_REPL_SIZE(x) ((x) >> 29)
+
+static const uint32_t cf_table[%d] = {""" % (len(main_table), len(main_table)))
+
+i = 0
+size = len(main_table)
+for value in main_table:
+ if i % 6 == 0:
+ print(" ", end="")
+ print("0x%X" % value, end="")
+ i += 1
+ if i == size: print()
+ elif i % 6 == 0: print(",")
+ else: print(", ", end="")
+
+print("""};
+
+static const unsigned char cf_repl[%d] = {""" % len(repl_table))
+
+i = 0
+size = len(repl_table)
+for value in repl_table:
+ if i % 12 == 0:
+ print(" ", end="")
+ print("0x%02X" % value, end="")
+ i += 1
+ if i == size: print()
+ elif i % 12 == 0: print(",")
+ else: print(", ", end="")
+
+print("};")
diff --git a/cmark/tools/make_entities_inc.py b/cmark/tools/make_entities_inc.py
new file mode 100644
index 0000000000..25c65d99d6
--- /dev/null
+++ b/cmark/tools/make_entities_inc.py
@@ -0,0 +1,72 @@
+# Creates C data structures for binary lookup table of entities,
+# using python's html5 entity data.
+# Usage: python3 tools/make_entities_inc.py > src/entities.inc
+
+import html
+
+entities5 = html.entities.html5
+
+# Remove keys without semicolons. HTML5 allows some named character
+# references without a trailing semicolon.
+entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';'])
+
+main_table = []
+text_table = b''
+text_idx = 0
+
+for (ent, repl) in entities:
+ ent_bytes = ent.encode('UTF-8')
+ ent_size = len(ent_bytes)
+ repl_bytes = repl.encode('UTF-8')
+ repl_size = len(repl_bytes)
+
+ if text_idx >= (1 << 15):
+ raise Exception("text index too large")
+ if ent_size >= (1 << 5):
+ raise Exception("entity name too long")
+ if repl_size >= (1 << 3):
+ raise Exception("entity replacement too long")
+
+ main_table += [ text_idx | ent_size << 15 | repl_size << 20 ]
+
+ text_table += ent_bytes + repl_bytes
+ text_idx += ent_size + repl_size
+
+print("""/* Autogenerated by tools/make_headers_inc.py */
+
+#define ENT_MIN_LENGTH 2
+#define ENT_MAX_LENGTH 32
+#define ENT_TABLE_SIZE %d
+#define ENT_TEXT_IDX(x) ((x) & 0x7FFF)
+#define ENT_NAME_SIZE(x) (((x) >> 15) & 0x1F)
+#define ENT_REPL_SIZE(x) ((x) >> 20)
+
+static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table)));
+
+i = 0
+size = len(main_table)
+for value in main_table:
+ if i % 6 == 0:
+ print(" ", end="")
+ print("0x%X" % value, end="")
+ i += 1
+ if i == size: print()
+ elif i % 6 == 0: print(",")
+ else: print(", ", end="")
+
+print("""};
+
+static const unsigned char cmark_entity_text[%d] = {""" % len(text_table))
+
+i = 0
+size = len(text_table)
+for value in text_table:
+ if i % 12 == 0:
+ print(" ", end="")
+ print("0x%02X" % value, end="")
+ i += 1
+ if i == size: print()
+ elif i % 12 == 0: print(",")
+ else: print(", ", end="")
+
+print("};")
diff --git a/cmark/tools/xml2md.xsl b/cmark/tools/xml2md.xsl
new file mode 100644
index 0000000000..0122e5f712
--- /dev/null
+++ b/cmark/tools/xml2md.xsl
@@ -0,0 +1,319 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+
+xml2md.xsl
+==========
+
+This XSLT stylesheet transforms the cmark XML format back to Commonmark.
+Since the XML output is lossy, a lossless MD->XML->MD roundtrip isn't
+possible. The XML->MD->XML roundtrip should produce the original XML,
+though.
+
+Example usage with xsltproc:
+
+ cmark -t xml doc.md | xsltproc -novalid xml2md.xsl -
+
+-->
+
+<xsl:stylesheet
+ version="1.0"
+ xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ xmlns:md="http://commonmark.org/xml/1.0">
+
+<xsl:output method="text" encoding="utf-8"/>
+
+<!-- Generic templates -->
+
+<xsl:template match="/ | md:document | md:list">
+ <xsl:apply-templates select="md:*"/>
+</xsl:template>
+
+<xsl:template match="md:*">
+ <xsl:message>Unsupported element '<xsl:value-of select="local-name()"/>'</xsl:message>
+</xsl:template>
+
+<xsl:template match="md:*" mode="indent"/>
+
+<!-- Indent blocks -->
+
+<xsl:template match="md:*" mode="indent-block">
+ <xsl:if test="preceding-sibling::md:*">
+ <xsl:if test="not(ancestor::md:list[1][@tight='true'])">
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+ <xsl:text>&#10;</xsl:text>
+ </xsl:if>
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+ </xsl:if>
+</xsl:template>
+
+<!-- Heading -->
+
+<xsl:template match="md:heading">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:value-of select="substring('###### ', 7 - @level)"/>
+ <xsl:apply-templates select="md:*"/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+
+<!-- Paragraph -->
+
+<xsl:template match="md:paragraph">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:apply-templates select="md:*"/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+
+<!-- Thematic break -->
+
+<xsl:template match="md:thematic_break">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:text>***&#10;</xsl:text>
+</xsl:template>
+
+<!-- List -->
+
+<xsl:template match="md:list">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:apply-templates select="md:*"/>
+</xsl:template>
+
+<xsl:template match="md:item">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:choose>
+ <xsl:when test="../@type = 'bullet'">-</xsl:when>
+ <xsl:when test="../@type = 'ordered'">
+ <xsl:value-of select="../@start + position() - 1"/>
+ <xsl:choose>
+ <xsl:when test="../@delim = 'period'">.</xsl:when>
+ <xsl:when test="../@delim = 'paren'">)</xsl:when>
+ </xsl:choose>
+ </xsl:when>
+ </xsl:choose>
+ <xsl:text> </xsl:text>
+ <xsl:apply-templates select="md:*"/>
+</xsl:template>
+
+<xsl:template match="md:item" mode="indent">
+ <xsl:choose>
+ <xsl:when test="../@type = 'bullet'">
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ <xsl:when test="../@type = 'ordered'">
+ <xsl:text> </xsl:text>
+ </xsl:when>
+ </xsl:choose>
+</xsl:template>
+
+<!-- Block quote -->
+
+<xsl:template match="md:block_quote">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:text>&gt; </xsl:text>
+ <xsl:apply-templates select="md:*"/>
+</xsl:template>
+
+<xsl:template match="md:block_quote" mode="indent">
+ <xsl:text>&gt; </xsl:text>
+</xsl:template>
+
+<!-- Code block -->
+
+<xsl:template match="md:code_block">
+ <xsl:apply-templates select="." mode="indent-block"/>
+
+ <xsl:variable name="t" select="string(.)"/>
+ <xsl:variable name="delim">
+ <xsl:call-template name="code-delim">
+ <xsl:with-param name="text" select="$t"/>
+ <xsl:with-param name="delim" select="'```'"/>
+ </xsl:call-template>
+ </xsl:variable>
+
+ <xsl:value-of select="$delim"/>
+ <xsl:value-of select="@info"/>
+ <xsl:text>&#10;</xsl:text>
+ <xsl:call-template name="indent-lines">
+ <xsl:with-param name="code" select="$t"/>
+ </xsl:call-template>
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+ <xsl:value-of select="$delim"/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+
+<!-- Inline HTML -->
+
+<xsl:template match="md:html_block">
+ <xsl:apply-templates select="." mode="indent-block"/>
+ <xsl:value-of select="substring-before(., '&#10;')"/>
+ <xsl:text>&#10;</xsl:text>
+ <xsl:call-template name="indent-lines">
+ <xsl:with-param name="code" select="substring-after(., '&#10;')"/>
+ </xsl:call-template>
+</xsl:template>
+
+<!-- Indent multiple lines -->
+
+<xsl:template name="indent-lines">
+ <xsl:param name="code"/>
+ <xsl:if test="contains($code, '&#10;')">
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+ <xsl:value-of select="substring-before($code, '&#10;')"/>
+ <xsl:text>&#10;</xsl:text>
+ <xsl:call-template name="indent-lines">
+ <xsl:with-param name="code" select="substring-after($code, '&#10;')"/>
+ </xsl:call-template>
+ </xsl:if>
+</xsl:template>
+
+<!-- Text -->
+
+<xsl:template match="md:text">
+ <xsl:variable name="t" select="string(.)"/>
+ <xsl:variable name="first" select="substring($t, 1, 1)"/>
+ <xsl:variable name="marker-check" select="translate(substring($t, 1, 10), '0123456789', '')"/>
+ <xsl:choose>
+ <!-- Escape ordered list markers -->
+ <xsl:when test="starts-with($marker-check, '.') and $first != '.'">
+ <xsl:value-of select="substring-before($t, '.')"/>
+ <xsl:text>\.</xsl:text>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="substring-after($t, '.')"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:when test="starts-with($marker-check, ')') and $first != ')'">
+ <xsl:value-of select="substring-before($t, ')')"/>
+ <xsl:text>\)</xsl:text>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="substring-after($t, ')')"/>
+ </xsl:call-template>
+ </xsl:when>
+ <!-- Escape leading block characters -->
+ <xsl:when test="contains('-+>#=~', $first)">
+ <xsl:text>\</xsl:text>
+ <xsl:value-of select="$first"/>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="substring($t, 2)"/>
+ </xsl:call-template>
+ </xsl:when>
+ <!-- Otherwise -->
+ <xsl:otherwise>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="$t"/>
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+<!-- Breaks -->
+
+<xsl:template match="md:softbreak">
+ <xsl:text>&#10;</xsl:text>
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+</xsl:template>
+
+<xsl:template match="md:linebreak">
+ <xsl:text> &#10;</xsl:text>
+ <xsl:apply-templates select="ancestor::md:*" mode="indent"/>
+</xsl:template>
+
+<!-- Emphasis -->
+
+<xsl:template match="md:emph">
+ <xsl:text>*</xsl:text>
+ <xsl:apply-templates select="md:*"/>
+ <xsl:text>*</xsl:text>
+</xsl:template>
+
+<xsl:template match="md:strong">
+ <xsl:text>**</xsl:text>
+ <xsl:apply-templates select="md:*"/>
+ <xsl:text>**</xsl:text>
+</xsl:template>
+
+<!-- Inline code -->
+
+<xsl:template match="md:code">
+ <xsl:variable name="t" select="string(.)"/>
+ <xsl:variable name="delim">
+ <xsl:call-template name="code-delim">
+ <xsl:with-param name="text" select="$t"/>
+ <xsl:with-param name="delim" select="'`'"/>
+ </xsl:call-template>
+ </xsl:variable>
+ <xsl:value-of select="$delim"/>
+ <xsl:value-of select="$t"/>
+ <xsl:value-of select="$delim"/>
+</xsl:template>
+
+<!-- Links and images -->
+
+<xsl:template match="md:link | md:image">
+ <xsl:if test="self::md:image">!</xsl:if>
+ <xsl:text>[</xsl:text>
+ <xsl:apply-templates select="md:*"/>
+ <xsl:text>](</xsl:text>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="string(@destination)"/>
+ <xsl:with-param name="escape" select="'()'"/>
+ </xsl:call-template>
+ <xsl:if test="string(@title)">
+ <xsl:text> "</xsl:text>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="string(@title)"/>
+ <xsl:with-param name="escape" select="'&quot;'"/>
+ </xsl:call-template>
+ <xsl:text>"</xsl:text>
+ </xsl:if>
+ <xsl:text>)</xsl:text>
+</xsl:template>
+
+<!-- Inline HTML -->
+
+<xsl:template match="md:html_inline">
+ <xsl:value-of select="."/>
+</xsl:template>
+
+<!-- Escaping helpers -->
+
+<xsl:template name="escape-text">
+ <xsl:param name="text"/>
+ <xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
+
+ <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
+ <xsl:choose>
+ <xsl:when test="contains($trans, '\')">
+ <xsl:variable name="safe" select="substring-before($trans, '\')"/>
+ <xsl:variable name="l" select="string-length($safe)"/>
+ <xsl:value-of select="$safe"/>
+ <xsl:text>\</xsl:text>
+ <xsl:value-of select="substring($text, $l + 1, 1)"/>
+ <xsl:call-template name="escape-text">
+ <xsl:with-param name="text" select="substring($text, $l + 2)"/>
+ <xsl:with-param name="escape" select="$escape"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$text"/>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+<xsl:template name="code-delim">
+ <xsl:param name="text"/>
+ <xsl:param name="delim"/>
+
+ <xsl:choose>
+ <xsl:when test="contains($text, $delim)">
+ <xsl:call-template name="code-delim">
+ <xsl:with-param name="text" select="$text"/>
+ <xsl:with-param name="delim" select="concat($delim, '`')"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="$delim"/>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+</xsl:stylesheet>