diff options
Diffstat (limited to 'cmark/test/pathological_tests.py')
| -rw-r--r-- | cmark/test/pathological_tests.py | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/cmark/test/pathological_tests.py b/cmark/test/pathological_tests.py new file mode 100644 index 0000000000..42c0e79e6d --- /dev/null +++ b/cmark/test/pathological_tests.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import itertools +import multiprocessing +import re +import sys +import queue +from cmark import CMark + +def hash_collisions(): + REFMAP_SIZE = 16 + COUNT = 25000 + + def badhash(ref): + h = 0 + for c in ref: + a = (h << 6) & 0xFFFFFFFF + b = (h << 16) & 0xFFFFFFFF + h = ord(c) + a + b - h + h = h & 0xFFFFFFFF + + return (h % REFMAP_SIZE) == 0 + + keys = ("x%d" % i for i in itertools.count()) + collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) + bad_key = next(collisions) + + document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) + + return document, re.compile(r"(<p>\[%s]</p>\n){%d}" % (bad_key, COUNT-1)) + + +# list of pairs consisting of input and a regex that must match the output. +pathological = { + # note - some pythons have limit of 65535 for {num-matches} in re. + "nested strong emph": + (("*a **a " * 32500) + "b" + (" a** a*" * 32500), + re.compile("(<em>a <strong>a ){32500}b( a</strong> a</em>){32500}")), + "many emph closers with no openers": + (("a_ " * 32500), + re.compile("(a[_] ){32499}a_")), + "many emph openers with no closers": + (("_a " * 32500), + re.compile("(_a ){32499}_a")), + "many link closers with no openers": + (("a]" * 32500), + re.compile("(a\\]){32500}")), + "many link openers with no closers": + (("[a" * 32500), + re.compile("(\\[a){32500}")), + "mismatched openers and closers": + (("*a_ " * 25000), + re.compile("([*]a[_] ){24999}[*]a_")), + "issue #389": + (("*a " * 20000 + "_a*_ " * 20000), + re.compile("(<em>a ){20000}(_a<\\/em>_ ?){20000}")), + "openers and closers multiple of 3": + (("a**b" + ("c* " * 25000)), + re.compile("a[*][*]b(c[*] ){24999}c[*]")), + "link openers and emph closers": + (("[ a_" * 25000), + re.compile("(\\[ a_){25000}")), + "pattern [ (]( repeated": + (("[ (](" * 40000), + re.compile("(\\[ \\(\\]\\(){40000}")), + "pattern ![[]() repeated": + ("![[]()" * 160000, + re.compile("(!\\[<a href=\"\"></a>){160000}")), + "hard link/emph case": + ("**x [a*b**c*](d)", + re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")), + "nested brackets": + (("[" * 25000) + "a" + ("]" * 25000), + re.compile("\\[{25000}a\\]{25000}")), + "nested block quotes": + ((("> " * 25000) + "a"), + re.compile("(<blockquote>\n){25000}")), + "deeply nested lists": + ("".join(map(lambda x: (" " * x + "* a\n"), range(0,500))), + re.compile("<ul>\n(<li>a\n<ul>\n){499}<li>a</li>\n</ul>\n(</li>\n</ul>\n){499}")), + "U+0000 in input": + ("abc\u0000de\u0000", + re.compile("abc\ufffd?de\ufffd?")), + "backticks": + ("".join(map(lambda x: ("e" + "`" * x), range(1,2500))), + re.compile("^<p>[e`]*</p>\n$")), + "unclosed links A": + ("[a](<b" * 30000, + re.compile(r"(\[a]\(<b){30000}")), + "unclosed links B": + ("[a](b" * 30000, + re.compile(r"(\[a]\(b){30000}")), + "unclosed <!--": + ("</" + "<!--" * 300000, + re.compile("\\<\\/(\\<!--){300000}")), + "empty lines in deeply nested lists": + ("- " * 30000 + "x" + "\n" * 30000, + re.compile(r"^(<\w+>\n?)+x(</\w+>\n)+$")), + "empty lines in deeply nested lists in blockquote": + ("> " + "- " * 30000 + "x\n" + ">\n" * 30000, + re.compile(r"^(<\w+>\n?)+x(</\w+>\n)+$")), + "emph in deep blockquote": + (">" * 100000 + "a*" * 100000, + re.compile(r"^(<\w+>\n)+<p>.*</p>\n(</\w+>\n)+$")), + "reference collisions": hash_collisions() +# "many references": +# ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, +# re.compile("(\\[0\\] ){4999}")) + } + +pathological_cmark = { + "nested inlines": + ("*" * 20000 + "a" + "*" * 20000, + re.compile("^\\*+a\\*+$")), + } + +whitespace_re = re.compile('/s+/') + +def run_pathological(q, inp, prog, lib_dir): + cmark = CMark(prog=prog, library_dir=lib_dir) + q.put(cmark.to_html(inp)) + +def run_pathological_cmark(q, inp, prog, lib_dir): + cmark = CMark(prog=prog, library_dir=lib_dir) + q.put(cmark.to_commonmark(inp)) + +def run_tests(args): + allowed_failures = {"many references": True} + TIMEOUT = 5 + + q = multiprocessing.Queue() + passed = [] + errored = [] + failed = [] + ignored = [] + + print("Testing pathological cases:") + for description in (*pathological, *pathological_cmark): + if description in pathological: + (inp, regex) = pathological[description] + p = multiprocessing.Process( + target=run_pathological, + args=(q, inp, args.program, args.library_dir) + ) + else: + (inp, regex) = pathological_cmark[description] + p = multiprocessing.Process( + target=run_pathological_cmark, + args=(q, inp, args.program, args.library_dir) + ) + p.start() + try: + # wait TIMEOUT seconds or until it finishes + rc, actual, err = q.get(True, TIMEOUT) + p.join() + if rc != 0: + print(description, '[ERRORED (return code %d)]' %rc) + print(err) + if description in allowed_failures: + ignored.append(description) + else: + errored.append(description) + elif regex.search(actual): + print(description, '[PASSED]') + passed.append(description) + else: + print(description, '[FAILED]') + print(repr(actual[:60])) + if description in allowed_failures: + ignored.append(description) + else: + failed.append(description) + except queue.Empty: + p.terminate() + p.join() + print(description, '[TIMEOUT]') + if description in allowed_failures: + ignored.append(description) + else: + errored.append(description) + + print("%d passed, %d failed, %d errored" % + (len(passed), len(failed), len(errored))) + if ignored: + print("Ignoring these allowed failures:") + for x in ignored: + print(x) + if failed or errored: + exit(1) + else: + exit(0) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Run cmark tests.') + parser.add_argument('--program', dest='program', nargs='?', default=None, + help='program to test') + parser.add_argument('--library-dir', dest='library_dir', nargs='?', + default=None, help='directory containing dynamic library') + args = parser.parse_args(sys.argv[1:]) + run_tests(args) |
