1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
|
# Creates C data structures for binary lookup table of entities,
# using python's html5 entity data.
# Usage: python3 tools/make_entities_inc.py > src/entities.inc
import html
entities5 = html.entities.html5
# Remove keys without semicolons. HTML5 allows some named character
# references without a trailing semicolon.
entities = sorted([(k[:-1], entities5[k]) for k in entities5.keys() if k[-1] == ';'])
main_table = []
text_table = b''
text_idx = 0
for (ent, repl) in entities:
ent_bytes = ent.encode('UTF-8')
ent_size = len(ent_bytes)
repl_bytes = repl.encode('UTF-8')
repl_size = len(repl_bytes)
if text_idx >= (1 << 15):
raise Exception("text index too large")
if ent_size >= (1 << 5):
raise Exception("entity name too long")
if repl_size >= (1 << 3):
raise Exception("entity replacement too long")
main_table += [ text_idx | ent_size << 15 | repl_size << 20 ]
text_table += ent_bytes + repl_bytes
text_idx += ent_size + repl_size
print("""/* Autogenerated by tools/make_headers_inc.py */
#define ENT_MIN_LENGTH 2
#define ENT_MAX_LENGTH 32
#define ENT_TABLE_SIZE %d
#define ENT_TEXT_IDX(x) ((x) & 0x7FFF)
#define ENT_NAME_SIZE(x) (((x) >> 15) & 0x1F)
#define ENT_REPL_SIZE(x) ((x) >> 20)
static const uint32_t cmark_entities[%d] = {""" % (len(main_table), len(main_table)));
i = 0
size = len(main_table)
for value in main_table:
if i % 6 == 0:
print(" ", end="")
print("0x%X" % value, end="")
i += 1
if i == size: print()
elif i % 6 == 0: print(",")
else: print(", ", end="")
print("""};
static const unsigned char cmark_entity_text[%d] = {""" % len(text_table))
i = 0
size = len(text_table)
for value in text_table:
if i % 12 == 0:
print(" ", end="")
print("0x%02X" % value, end="")
i += 1
if i == size: print()
elif i % 12 == 0: print(",")
else: print(", ", end="")
print("};")
|