diff options
| author | Mehmet Samet Duman <yongdohyun@projecttick.org> | 2026-04-02 18:41:54 +0300 |
|---|---|---|
| committer | Mehmet Samet Duman <yongdohyun@projecttick.org> | 2026-04-02 18:41:54 +0300 |
| commit | 3d2121f5d6555744ce5aa502088fc2b34dc26d38 (patch) | |
| tree | 53f42c08746171878b57f5b6ffe1eb841da9d45d /cmark/tools | |
| parent | 6bf7c5ce92ff6237c0b17c332873805018812b40 (diff) | |
| parent | 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b (diff) | |
| download | Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.tar.gz Project-Tick-3d2121f5d6555744ce5aa502088fc2b34dc26d38.zip | |
Add 'cmark/' from commit '64efa3b3b3d35f2ffb604b57a8a9c89047cb420b'
git-subtree-dir: cmark
git-subtree-mainline: 6bf7c5ce92ff6237c0b17c332873805018812b40
git-subtree-split: 64efa3b3b3d35f2ffb604b57a8a9c89047cb420b
Diffstat (limited to 'cmark/tools')
| -rw-r--r-- | cmark/tools/appveyor-build.bat | 13 | ||||
| -rw-r--r-- | cmark/tools/make_case_fold_inc.py | 96 | ||||
| -rw-r--r-- | cmark/tools/make_entities_inc.py | 72 | ||||
| -rw-r--r-- | cmark/tools/xml2md.xsl | 319 |
4 files changed, 500 insertions, 0 deletions
diff --git a/cmark/tools/appveyor-build.bat b/cmark/tools/appveyor-build.bat new file mode 100644 index 0000000000..73d555b52b --- /dev/null +++ b/cmark/tools/appveyor-build.bat @@ -0,0 +1,13 @@ +@echo off + +if "%MSVC_VERSION%" == "10" goto msvc10 + +call "C:\Program Files (x86)\Microsoft Visual Studio %MSVC_VERSION%.0\VC\vcvarsall.bat" amd64 +goto build + +:msvc10 +call "C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64 + +:build +nmake + diff --git a/cmark/tools/make_case_fold_inc.py b/cmark/tools/make_case_fold_inc.py new file mode 100644 index 0000000000..3347d291b9 --- /dev/null +++ b/cmark/tools/make_case_fold_inc.py @@ -0,0 +1,96 @@ +# Creates a C lookup table for Unicode case folding (https://unicode.org/Public/UCD/latest/ucd/CaseFolding.txt). +# Usage: python3 tools/make_case_fold_inc.py < data/CaseFolding.txt > src/case_fold.inc + +import sys, re + +prog = re.compile('([0-9A-F]+); [CF];((?: [0-9A-F]+)+);') +main_table = [] +repl_table = [] +repl_idx = 0 +test = '' +test_result = '' + +for line in sys.stdin: + m = prog.match(line) + if m is None: + continue + + cp = int(m[1], 16); + if cp < 0x80: + continue + + repl = b'' + for x in m[2].split(): + repl += chr(int(x, 16)).encode('UTF-8') + + # Generate test case + if len(main_table) % 20 == 0: + test += chr(cp) + test_result += repl.decode('UTF-8') + + # 17 bits for code point + if cp >= (1 << 17): + raise Exception("code point too large") + + # 12 bits for upper bits of replacement index + # The lowest bit is always zero. + if repl_idx // 2 >= (1 << 12): + raise Exception("too many replacements") + + # 3 bits for size of replacement + repl_size = len(repl) + if repl_size >= (1 << 3): + raise Exception("too many replacement chars") + + main_table += [ cp | repl_idx // 2 << 17 | repl_size << 29 ] + repl_table += repl + repl_idx += repl_size + + # Make sure that repl_idx is even + if repl_idx % 2 != 0: + repl_table += [0] + repl_idx += 1 + +# Print test case +if False: + print("test:", test) + print("test_result:", test_result) + sys.exit(0) + +print("""// Generated by tools/make_case_fold_inc.py + +#define CF_MAX (1 << 17) +#define CF_TABLE_SIZE %d +#define CF_CODE_POINT(x) ((x) & 0x1FFFF) +#define CF_REPL_IDX(x) ((((x) >> 17) & 0xFFF) * 2) +#define CF_REPL_SIZE(x) ((x) >> 29) + +static const uint32_t cf_table[%d] = {""" % (len(main_table), len(main_table))) + +i = 0 +size = len(main_table) +for value in main_table: + if i % 6 == 0: + print(" ", end="") + print("0x%X" % value, end="") + i += 1 + if i == size: print() + elif i % 6 == 0: print(",") + else: print(", ", end="") + +print("""}; + +static const unsigned char cf_repl[%d] = {""" % len(repl_table)) + +i = 0 +size = len(repl_table) +for value in repl_table: + if i % 12 == 0: + print(" ", end="") + print("0x%02X" % value, end="") + i += 1 + if i == size: print() + elif i % 12 == 0: print(",") + else: print(", ", end="") + +print("};") diff --git a/cmark/tools/make_entities_inc.py b/cmark/tools/make_entities_inc.py new file mode 100644 index 0000000000..25c65d99d6 --- /dev/null +++ b/cmark/tools/make_entities_inc.py @@ -0,0 +1,72 @@ +# Creates C data structures for binary lookup table of entities, +# using python's html5 entity data. +# Usage: python3 tools/make_entities_inc.py > src/entities.inc + +import html + +entities5 = html.entities.html5 + +# Remove keys without semicolons. HTML5 allows some named character +# references without a trailing semicolon. +entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';']) + +main_table = [] +text_table = b'' +text_idx = 0 + +for (ent, repl) in entities: + ent_bytes = ent.encode('UTF-8') + ent_size = len(ent_bytes) + repl_bytes = repl.encode('UTF-8') + repl_size = len(repl_bytes) + + if text_idx >= (1 << 15): + raise Exception("text index too large") + if ent_size >= (1 << 5): + raise Exception("entity name too long") + if repl_size >= (1 << 3): + raise Exception("entity replacement too long") + + main_table += [ text_idx | ent_size << 15 | repl_size << 20 ] + + text_table += ent_bytes + repl_bytes + text_idx += ent_size + repl_size + +print("""/* Autogenerated by tools/make_headers_inc.py */ + +#define ENT_MIN_LENGTH 2 +#define ENT_MAX_LENGTH 32 +#define ENT_TABLE_SIZE %d +#define ENT_TEXT_IDX(x) ((x) & 0x7FFF) +#define ENT_NAME_SIZE(x) (((x) >> 15) & 0x1F) +#define ENT_REPL_SIZE(x) ((x) >> 20) + +static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table))); + +i = 0 +size = len(main_table) +for value in main_table: + if i % 6 == 0: + print(" ", end="") + print("0x%X" % value, end="") + i += 1 + if i == size: print() + elif i % 6 == 0: print(",") + else: print(", ", end="") + +print("""}; + +static const unsigned char cmark_entity_text[%d] = {""" % len(text_table)) + +i = 0 +size = len(text_table) +for value in text_table: + if i % 12 == 0: + print(" ", end="") + print("0x%02X" % value, end="") + i += 1 + if i == size: print() + elif i % 12 == 0: print(",") + else: print(", ", end="") + +print("};") diff --git a/cmark/tools/xml2md.xsl b/cmark/tools/xml2md.xsl new file mode 100644 index 0000000000..0122e5f712 --- /dev/null +++ b/cmark/tools/xml2md.xsl @@ -0,0 +1,319 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- + +xml2md.xsl +========== + +This XSLT stylesheet transforms the cmark XML format back to Commonmark. +Since the XML output is lossy, a lossless MD->XML->MD roundtrip isn't +possible. The XML->MD->XML roundtrip should produce the original XML, +though. + +Example usage with xsltproc: + + cmark -t xml doc.md | xsltproc -novalid xml2md.xsl - + +--> + +<xsl:stylesheet + version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:md="http://commonmark.org/xml/1.0"> + +<xsl:output method="text" encoding="utf-8"/> + +<!-- Generic templates --> + +<xsl:template match="/ | md:document | md:list"> + <xsl:apply-templates select="md:*"/> +</xsl:template> + +<xsl:template match="md:*"> + <xsl:message>Unsupported element '<xsl:value-of select="local-name()"/>'</xsl:message> +</xsl:template> + +<xsl:template match="md:*" mode="indent"/> + +<!-- Indent blocks --> + +<xsl:template match="md:*" mode="indent-block"> + <xsl:if test="preceding-sibling::md:*"> + <xsl:if test="not(ancestor::md:list[1][@tight='true'])"> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> + <xsl:text> </xsl:text> + </xsl:if> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> + </xsl:if> +</xsl:template> + +<!-- Heading --> + +<xsl:template match="md:heading"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:value-of select="substring('###### ', 7 - @level)"/> + <xsl:apply-templates select="md:*"/> + <xsl:text> </xsl:text> +</xsl:template> + +<!-- Paragraph --> + +<xsl:template match="md:paragraph"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:apply-templates select="md:*"/> + <xsl:text> </xsl:text> +</xsl:template> + +<!-- Thematic break --> + +<xsl:template match="md:thematic_break"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:text>*** </xsl:text> +</xsl:template> + +<!-- List --> + +<xsl:template match="md:list"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:apply-templates select="md:*"/> +</xsl:template> + +<xsl:template match="md:item"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:choose> + <xsl:when test="../@type = 'bullet'">-</xsl:when> + <xsl:when test="../@type = 'ordered'"> + <xsl:value-of select="../@start + position() - 1"/> + <xsl:choose> + <xsl:when test="../@delim = 'period'">.</xsl:when> + <xsl:when test="../@delim = 'paren'">)</xsl:when> + </xsl:choose> + </xsl:when> + </xsl:choose> + <xsl:text> </xsl:text> + <xsl:apply-templates select="md:*"/> +</xsl:template> + +<xsl:template match="md:item" mode="indent"> + <xsl:choose> + <xsl:when test="../@type = 'bullet'"> + <xsl:text> </xsl:text> + </xsl:when> + <xsl:when test="../@type = 'ordered'"> + <xsl:text> </xsl:text> + </xsl:when> + </xsl:choose> +</xsl:template> + +<!-- Block quote --> + +<xsl:template match="md:block_quote"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:text>> </xsl:text> + <xsl:apply-templates select="md:*"/> +</xsl:template> + +<xsl:template match="md:block_quote" mode="indent"> + <xsl:text>> </xsl:text> +</xsl:template> + +<!-- Code block --> + +<xsl:template match="md:code_block"> + <xsl:apply-templates select="." mode="indent-block"/> + + <xsl:variable name="t" select="string(.)"/> + <xsl:variable name="delim"> + <xsl:call-template name="code-delim"> + <xsl:with-param name="text" select="$t"/> + <xsl:with-param name="delim" select="'```'"/> + </xsl:call-template> + </xsl:variable> + + <xsl:value-of select="$delim"/> + <xsl:value-of select="@info"/> + <xsl:text> </xsl:text> + <xsl:call-template name="indent-lines"> + <xsl:with-param name="code" select="$t"/> + </xsl:call-template> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> + <xsl:value-of select="$delim"/> + <xsl:text> </xsl:text> +</xsl:template> + +<!-- Inline HTML --> + +<xsl:template match="md:html_block"> + <xsl:apply-templates select="." mode="indent-block"/> + <xsl:value-of select="substring-before(., ' ')"/> + <xsl:text> </xsl:text> + <xsl:call-template name="indent-lines"> + <xsl:with-param name="code" select="substring-after(., ' ')"/> + </xsl:call-template> +</xsl:template> + +<!-- Indent multiple lines --> + +<xsl:template name="indent-lines"> + <xsl:param name="code"/> + <xsl:if test="contains($code, ' ')"> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> + <xsl:value-of select="substring-before($code, ' ')"/> + <xsl:text> </xsl:text> + <xsl:call-template name="indent-lines"> + <xsl:with-param name="code" select="substring-after($code, ' ')"/> + </xsl:call-template> + </xsl:if> +</xsl:template> + +<!-- Text --> + +<xsl:template match="md:text"> + <xsl:variable name="t" select="string(.)"/> + <xsl:variable name="first" select="substring($t, 1, 1)"/> + <xsl:variable name="marker-check" select="translate(substring($t, 1, 10), '0123456789', '')"/> + <xsl:choose> + <!-- Escape ordered list markers --> + <xsl:when test="starts-with($marker-check, '.') and $first != '.'"> + <xsl:value-of select="substring-before($t, '.')"/> + <xsl:text>\.</xsl:text> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="substring-after($t, '.')"/> + </xsl:call-template> + </xsl:when> + <xsl:when test="starts-with($marker-check, ')') and $first != ')'"> + <xsl:value-of select="substring-before($t, ')')"/> + <xsl:text>\)</xsl:text> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="substring-after($t, ')')"/> + </xsl:call-template> + </xsl:when> + <!-- Escape leading block characters --> + <xsl:when test="contains('-+>#=~', $first)"> + <xsl:text>\</xsl:text> + <xsl:value-of select="$first"/> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="substring($t, 2)"/> + </xsl:call-template> + </xsl:when> + <!-- Otherwise --> + <xsl:otherwise> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="$t"/> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> +</xsl:template> + +<!-- Breaks --> + +<xsl:template match="md:softbreak"> + <xsl:text> </xsl:text> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> +</xsl:template> + +<xsl:template match="md:linebreak"> + <xsl:text> </xsl:text> + <xsl:apply-templates select="ancestor::md:*" mode="indent"/> +</xsl:template> + +<!-- Emphasis --> + +<xsl:template match="md:emph"> + <xsl:text>*</xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>*</xsl:text> +</xsl:template> + +<xsl:template match="md:strong"> + <xsl:text>**</xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>**</xsl:text> +</xsl:template> + +<!-- Inline code --> + +<xsl:template match="md:code"> + <xsl:variable name="t" select="string(.)"/> + <xsl:variable name="delim"> + <xsl:call-template name="code-delim"> + <xsl:with-param name="text" select="$t"/> + <xsl:with-param name="delim" select="'`'"/> + </xsl:call-template> + </xsl:variable> + <xsl:value-of select="$delim"/> + <xsl:value-of select="$t"/> + <xsl:value-of select="$delim"/> +</xsl:template> + +<!-- Links and images --> + +<xsl:template match="md:link | md:image"> + <xsl:if test="self::md:image">!</xsl:if> + <xsl:text>[</xsl:text> + <xsl:apply-templates select="md:*"/> + <xsl:text>](</xsl:text> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="string(@destination)"/> + <xsl:with-param name="escape" select="'()'"/> + </xsl:call-template> + <xsl:if test="string(@title)"> + <xsl:text> "</xsl:text> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="string(@title)"/> + <xsl:with-param name="escape" select="'"'"/> + </xsl:call-template> + <xsl:text>"</xsl:text> + </xsl:if> + <xsl:text>)</xsl:text> +</xsl:template> + +<!-- Inline HTML --> + +<xsl:template match="md:html_inline"> + <xsl:value-of select="."/> +</xsl:template> + +<!-- Escaping helpers --> + +<xsl:template name="escape-text"> + <xsl:param name="text"/> + <xsl:param name="escape" select="'*_`<[]&'"/> + + <xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/> + <xsl:choose> + <xsl:when test="contains($trans, '\')"> + <xsl:variable name="safe" select="substring-before($trans, '\')"/> + <xsl:variable name="l" select="string-length($safe)"/> + <xsl:value-of select="$safe"/> + <xsl:text>\</xsl:text> + <xsl:value-of select="substring($text, $l + 1, 1)"/> + <xsl:call-template name="escape-text"> + <xsl:with-param name="text" select="substring($text, $l + 2)"/> + <xsl:with-param name="escape" select="$escape"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> +</xsl:template> + +<xsl:template name="code-delim"> + <xsl:param name="text"/> + <xsl:param name="delim"/> + + <xsl:choose> + <xsl:when test="contains($text, $delim)"> + <xsl:call-template name="code-delim"> + <xsl:with-param name="text" select="$text"/> + <xsl:with-param name="delim" select="concat($delim, '`')"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$delim"/> + </xsl:otherwise> + </xsl:choose> +</xsl:template> + +</xsl:stylesheet> |
