summaryrefslogtreecommitdiff
path: root/docs/handbook/cmark/cli-usage.md
blob: d77c3b8fa903494e7f84c766147d4c4194b45fdc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# cmark — CLI Usage

## Overview

The `cmark` command-line tool (`main.c`) reads CommonMark input from files or stdin and renders it to one of five output formats. It serves as both a reference implementation and a practical conversion tool.

## Entry Point

```c
int main(int argc, char *argv[]);
```

## Output Formats

```c
typedef enum {
  FORMAT_NONE,
  FORMAT_HTML,
  FORMAT_XML,
  FORMAT_MAN,
  FORMAT_COMMONMARK,
  FORMAT_LATEX,
} writer_format;
```

Default: `FORMAT_HTML`.

## Command-Line Options

| Option | Long Form | Description |
|--------|-----------|-------------|
| `-t FORMAT` | `--to FORMAT` | Output format: `html`, `xml`, `man`, `commonmark`, `latex` |
| | `--width N` | Wrapping width (0 = no wrapping; default 0). Only affects `commonmark`, `man`, `latex` |
| | `--sourcepos` | Include source position information |
| | `--hardbreaks` | Render soft breaks as hard breaks |
| | `--nobreaks` | Render soft breaks as spaces |
| | `--unsafe` | Allow raw HTML and dangerous URLs |
| | `--smart` | Enable smart punctuation (curly quotes, em/en dashes, ellipses) |
| | `--validate-utf8` | Validate and clean UTF-8 input |
| `-h` | `--help` | Print usage information |
| | `--version` | Print version string |

## Option Parsing

```c
for (i = 1; i < argc; i++) {
  if (strcmp(argv[i], "--version") == 0) {
    printf("cmark %s", cmark_version_string());
    printf(" - CommonMark converter\n(C) 2014-2016 John MacFarlane\n");
    exit(0);
  } else if (strcmp(argv[i], "--sourcepos") == 0) {
    options |= CMARK_OPT_SOURCEPOS;
  } else if (strcmp(argv[i], "--hardbreaks") == 0) {
    options |= CMARK_OPT_HARDBREAKS;
  } else if (strcmp(argv[i], "--nobreaks") == 0) {
    options |= CMARK_OPT_NOBREAKS;
  } else if (strcmp(argv[i], "--smart") == 0) {
    options |= CMARK_OPT_SMART;
  } else if (strcmp(argv[i], "--unsafe") == 0) {
    options |= CMARK_OPT_UNSAFE;
  } else if (strcmp(argv[i], "--validate-utf8") == 0) {
    options |= CMARK_OPT_VALIDATE_UTF8;
  } else if ((strcmp(argv[i], "--to") == 0 || strcmp(argv[i], "-t") == 0) &&
             i + 1 < argc) {
    i++;
    if (strcmp(argv[i], "man") == 0)           writer = FORMAT_MAN;
    else if (strcmp(argv[i], "html") == 0)     writer = FORMAT_HTML;
    else if (strcmp(argv[i], "xml") == 0)      writer = FORMAT_XML;
    else if (strcmp(argv[i], "commonmark") == 0) writer = FORMAT_COMMONMARK;
    else if (strcmp(argv[i], "latex") == 0)    writer = FORMAT_LATEX;
    else {
      fprintf(stderr, "Unknown format %s\n", argv[i]);
      exit(1);
    }
  } else if (strcmp(argv[i], "--width") == 0 && i + 1 < argc) {
    i++;
    width = atoi(argv[i]);
  } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
    print_usage();
    exit(0);
  } else if (*argv[i] == '-') {
    print_usage();
    exit(1);
  } else {
    // Treat as filename
    files[numfps++] = i;
  }
}
```

## Input Handling

### File Input

```c
for (i = 0; i < numfps; i++) {
  fp = fopen(argv[files[i]], "rb");
  if (fp == NULL) {
    fprintf(stderr, "Error opening file %s: %s\n", argv[files[i]], strerror(errno));
    exit(1);
  }
  // Read in chunks and feed to parser
  while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
    cmark_parser_feed(parser, buffer, bytes);
    if (bytes < sizeof(buffer)) break;
  }
  fclose(fp);
}
```

Files are opened in binary mode (`"rb"`) and read in chunks of `BUFFER_SIZE` (4096 bytes). Each chunk is fed to the streaming parser via `cmark_parser_feed()`.

### Stdin Input

```c
if (numfps == 0) {
  // Read from stdin
  while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
    cmark_parser_feed(parser, buffer, bytes);
    if (bytes < sizeof(buffer)) break;
  }
}
```

When no files are specified, input is read from stdin.

### Windows Binary Mode

```c
#if defined(_WIN32) && !defined(__CYGWIN__)
_setmode(_fileno(stdin), _O_BINARY);
_setmode(_fileno(stdout), _O_BINARY);
#endif
```

On Windows, stdin and stdout are set to binary mode to prevent CR/LF translation.

## Rendering

```c
document = cmark_parser_finish(parser);
cmark_parser_free(parser);

// Render based on format
result = print_document(document, writer, width, options);
```

### `print_document()`

```c
static void print_document(cmark_node *document, writer_format writer,
                           int width, int options) {
  char *result;
  switch (writer) {
  case FORMAT_HTML:
    result = cmark_render_html(document, options);
    break;
  case FORMAT_XML:
    result = cmark_render_xml(document, options);
    break;
  case FORMAT_MAN:
    result = cmark_render_man(document, options, width);
    break;
  case FORMAT_COMMONMARK:
    result = cmark_render_commonmark(document, options, width);
    break;
  case FORMAT_LATEX:
    result = cmark_render_latex(document, options, width);
    break;
  default:
    fprintf(stderr, "Unknown format %d\n", writer);
    exit(1);
  }
  printf("%s", result);
  document->mem->free(result);
}
```

The rendered result is written to stdout and then freed.

### Cleanup

```c
cmark_node_free(document);
```

The AST is freed after rendering.

## OpenBSD Security

```c
#ifdef __OpenBSD__
  if (pledge("stdio rpath", NULL) != 0) {
    perror("pledge");
    return 1;
  }
#endif
```

On OpenBSD, the program restricts itself to `stdio` and `rpath` (read-only file access) via `pledge()`. This prevents the cmark binary from performing any operations beyond reading files and writing to stdout/stderr.

## Usage Examples

```bash
# Convert Markdown to HTML
cmark input.md

# Convert with smart punctuation
cmark --smart input.md

# Convert to man page with 72-column wrapping
cmark -t man --width 72 input.md

# Convert to LaTeX
cmark -t latex input.md

# Round-trip through CommonMark
cmark -t commonmark input.md

# Include source positions in output
cmark --sourcepos input.md

# Allow raw HTML passthrough
cmark --unsafe input.md

# Read from stdin
echo "# Hello" | cmark

# Validate UTF-8 input
cmark --validate-utf8 input.md

# Print version
cmark --version
```

## Exit Codes

- `0` — Success
- `1` — Error (unknown option, file open failure, unknown format)

## Cross-References

- [main.c](../../cmark/src/main.c) — Full implementation
- [public-api.md](public-api.md) — The C API functions called by main
- [html-renderer.md](html-renderer.md) — `cmark_render_html()`
- [xml-renderer.md](xml-renderer.md) — `cmark_render_xml()`
- [latex-renderer.md](latex-renderer.md) — `cmark_render_latex()`
- [man-renderer.md](man-renderer.md) — `cmark_render_man()`
- [commonmark-renderer.md](commonmark-renderer.md) — `cmark_render_commonmark()`