summaryrefslogtreecommitdiff
path: root/docs/handbook/cgit/url-routing.md
diff options
context:
space:
mode:
Diffstat (limited to 'docs/handbook/cgit/url-routing.md')
-rw-r--r--docs/handbook/cgit/url-routing.md331
1 files changed, 331 insertions, 0 deletions
diff --git a/docs/handbook/cgit/url-routing.md b/docs/handbook/cgit/url-routing.md
new file mode 100644
index 0000000000..0adb3b7fc5
--- /dev/null
+++ b/docs/handbook/cgit/url-routing.md
@@ -0,0 +1,331 @@
+# cgit — URL Routing and Request Dispatch
+
+## Overview
+
+cgit supports two URL schemes: virtual-root (path-based) and query-string.
+Incoming requests are parsed into a `cgit_query` structure and dispatched to
+one of 23 command handlers via a function pointer table.
+
+Source files: `cgit.c` (querystring parsing, routing), `parsing.c`
+(`cgit_parse_url`), `cmd.c` (command table).
+
+## URL Schemes
+
+### Virtual Root (Path-Based)
+
+When `virtual-root` is configured, URLs use clean paths:
+
+```
+/cgit/ → repository list
+/cgit/repo.git/ → summary
+/cgit/repo.git/log/ → log (default branch)
+/cgit/repo.git/log/main/path → log for path on branch main
+/cgit/repo.git/tree/v1.0/src/ → tree view at tag v1.0
+/cgit/repo.git/commit/?id=abc → commit view
+```
+
+The path after the virtual root is passed in `PATH_INFO` and parsed by
+`cgit_parse_url()`.
+
+### Query-String (CGI)
+
+Without virtual root, all parameters are passed in the query string:
+
+```
+/cgit.cgi?url=repo.git/log/main/path&ofs=50
+```
+
+## Query Structure
+
+All parsed parameters are stored in `ctx.qry`:
+
+```c
+struct cgit_query {
+ char *raw; /* raw URL / PATH_INFO */
+ char *repo; /* repository URL */
+ char *page; /* page/command name */
+ char *search; /* search string */
+ char *grep; /* grep pattern */
+ char *head; /* branch reference */
+ char *sha1; /* object SHA-1 */
+ char *sha2; /* second SHA-1 (for diffs) */
+ char *path; /* file/dir path within repo */
+ char *name; /* snapshot name / ref name */
+ char *url; /* combined URL path */
+ char *mimetype; /* requested MIME type */
+ char *etag; /* ETag from client */
+ int nohead; /* suppress header */
+ int ofs; /* pagination offset */
+ int has_symref; /* path contains a symbolic ref */
+ int has_sha1; /* explicit SHA was given */
+ int has_dot; /* path contains '..' */
+ int ignored; /* request should be ignored */
+ char *sort; /* sort field */
+ int showmsg; /* show full commit message */
+ int ssdiff; /* side-by-side diff */
+ int show_all; /* show all items */
+ int context; /* diff context lines */
+ int follow; /* follow renames */
+ int log_hierarchical_threading;
+};
+```
+
+## URL Parsing: `cgit_parse_url()`
+
+In `parsing.c`, the URL is decomposed into repo, page, and path:
+
+```c
+void cgit_parse_url(const char *url)
+{
+ /* Step 1: try progressively longer prefixes as repo URLs */
+ /* For each '/' in the URL, check if the prefix matches a repo */
+
+ for (p = strchr(url, '/'); p; p = strchr(p + 1, '/')) {
+ *p = '\0';
+ repo = cgit_get_repoinfo(url);
+ *p = '/';
+ if (repo) {
+ ctx.qry.repo = xstrdup(url_prefix);
+ ctx.repo = repo;
+ url = p + 1; /* remaining part */
+ break;
+ }
+ }
+ /* if no '/' found, try the whole URL as a repo name */
+
+ /* Step 2: parse the remaining path as page/ref/path */
+ /* e.g., "log/main/src/file.c" → page="log", path="main/src/file.c" */
+ p = strchr(url, '/');
+ if (p) {
+ ctx.qry.page = xstrndup(url, p - url);
+ ctx.qry.path = trim_end(p + 1, '/');
+ } else if (*url) {
+ ctx.qry.page = xstrdup(url);
+ }
+}
+```
+
+## Query String Parsing: `querystring_cb()`
+
+HTTP query parameters and POST form data are decoded by `querystring_cb()`
+in `cgit.c`. The function maps URL parameter names to `ctx.qry` fields:
+
+```c
+static void querystring_cb(const char *name, const char *value)
+{
+ if (!strcmp(name, "url")) ctx.qry.url = xstrdup(value);
+ else if (!strcmp(name, "p")) ctx.qry.page = xstrdup(value);
+ else if (!strcmp(name, "q")) ctx.qry.search = xstrdup(value);
+ else if (!strcmp(name, "h")) ctx.qry.head = xstrdup(value);
+ else if (!strcmp(name, "id")) ctx.qry.sha1 = xstrdup(value);
+ else if (!strcmp(name, "id2")) ctx.qry.sha2 = xstrdup(value);
+ else if (!strcmp(name, "ofs")) ctx.qry.ofs = atoi(value);
+ else if (!strcmp(name, "path")) ctx.qry.path = xstrdup(value);
+ else if (!strcmp(name, "name")) ctx.qry.name = xstrdup(value);
+ else if (!strcmp(name, "mimetype")) ctx.qry.mimetype = xstrdup(value);
+ else if (!strcmp(name, "s")) ctx.qry.sort = xstrdup(value);
+ else if (!strcmp(name, "showmsg")) ctx.qry.showmsg = atoi(value);
+ else if (!strcmp(name, "ss")) ctx.qry.ssdiff = atoi(value);
+ else if (!strcmp(name, "all")) ctx.qry.show_all = atoi(value);
+ else if (!strcmp(name, "context")) ctx.qry.context = atoi(value);
+ else if (!strcmp(name, "follow")) ctx.qry.follow = atoi(value);
+ else if (!strcmp(name, "dt")) ctx.qry.dt = atoi(value);
+ else if (!strcmp(name, "grep")) ctx.qry.grep = xstrdup(value);
+ else if (!strcmp(name, "etag")) ctx.qry.etag = xstrdup(value);
+}
+```
+
+### URL Parameter Reference
+
+| Parameter | Query Field | Type | Description |
+|-----------|------------|------|-------------|
+| `url` | `qry.url` | string | Full URL path (repo/page/path) |
+| `p` | `qry.page` | string | Page/command name |
+| `q` | `qry.search` | string | Search string |
+| `h` | `qry.head` | string | Branch/ref name |
+| `id` | `qry.sha1` | string | Object SHA-1 |
+| `id2` | `qry.sha2` | string | Second SHA-1 (diffs) |
+| `ofs` | `qry.ofs` | int | Pagination offset |
+| `path` | `qry.path` | string | File path in repo |
+| `name` | `qry.name` | string | Reference/snapshot name |
+| `mimetype` | `qry.mimetype` | string | MIME type override |
+| `s` | `qry.sort` | string | Sort field |
+| `showmsg` | `qry.showmsg` | int | Show full commit message |
+| `ss` | `qry.ssdiff` | int | Side-by-side diff toggle |
+| `all` | `qry.show_all` | int | Show all entries |
+| `context` | `qry.context` | int | Diff context lines |
+| `follow` | `qry.follow` | int | Follow renames in log |
+| `dt` | `qry.dt` | int | Diff type |
+| `grep` | `qry.grep` | string | Grep pattern for log search |
+| `etag` | `qry.etag` | string | ETag for conditional requests |
+
+## Command Dispatch Table
+
+The command table in `cmd.c` maps page names to handler functions:
+
+```c
+#define def_cmd(name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone) \
+ {#name, cmd_##name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone}
+
+static struct cgit_cmd cmds[] = {
+ def_cmd(atom, 1, 1, 0, 0, 0),
+ def_cmd(about, 0, 1, 1, 0, 0),
+ def_cmd(blame, 1, 1, 1, 1, 0),
+ def_cmd(blob, 1, 1, 0, 0, 0),
+ def_cmd(commit, 1, 1, 1, 1, 0),
+ def_cmd(diff, 1, 1, 1, 1, 0),
+ def_cmd(head, 1, 1, 0, 0, 1),
+ def_cmd(info, 1, 1, 0, 0, 1),
+ def_cmd(log, 1, 1, 1, 1, 0),
+ def_cmd(ls_cache,0, 0, 0, 0, 0),
+ def_cmd(objects, 1, 1, 0, 0, 1),
+ def_cmd(patch, 1, 1, 1, 1, 0),
+ def_cmd(plain, 1, 1, 0, 1, 0),
+ def_cmd(rawdiff, 1, 1, 0, 1, 0),
+ def_cmd(refs, 1, 1, 1, 0, 0),
+ def_cmd(repolist,0, 0, 1, 0, 0),
+ def_cmd(snapshot, 1, 1, 0, 0, 0),
+ def_cmd(stats, 1, 1, 1, 1, 0),
+ def_cmd(summary, 1, 1, 1, 0, 0),
+ def_cmd(tag, 1, 1, 1, 0, 0),
+ def_cmd(tree, 1, 1, 1, 1, 0),
+};
+```
+
+### Command Flags
+
+| Flag | Meaning |
+|------|---------|
+| `want_hierarchical` | Parse hierarchical path from URL |
+| `want_repo` | Requires a repository context |
+| `want_layout` | Render within HTML page layout |
+| `want_vpath` | Accept a virtual path (file path in repo) |
+| `is_clone` | HTTP clone protocol endpoint |
+
+### Lookup: `cgit_get_cmd()`
+
+```c
+struct cgit_cmd *cgit_get_cmd(const char *name)
+{
+ for (int i = 0; i < ARRAY_SIZE(cmds); i++)
+ if (!strcmp(cmds[i].name, name))
+ return &cmds[i];
+ return NULL;
+}
+```
+
+The function performs a linear search. With 21 entries, this is fast enough.
+
+## Request Processing Flow
+
+In `process_request()` (`cgit.c`):
+
+```
+1. Parse PATH_INFO via cgit_parse_url()
+2. Parse QUERY_STRING via http_parse_querystring(querystring_cb)
+3. Parse POST body (for authentication forms)
+4. Resolve repository: cgit_get_repoinfo(ctx.qry.repo)
+5. Determine command: cgit_get_cmd(ctx.qry.page)
+6. If no page specified:
+ - With repo → default to "summary"
+ - Without repo → default to "repolist"
+7. Check command flags:
+ - want_repo but no repo → "Repository not found" error
+ - is_clone and HTTP clone disabled → 404
+8. Handle authentication if auth-filter is configured
+9. Execute: cmd->fn(&ctx)
+```
+
+### Hierarchical Path Resolution
+
+When `want_hierarchical=1`, cgit splits `ctx.qry.path` into a reference
+(branch/tag/SHA) and a file path. It tries progressively longer prefixes
+of the path as git references until one resolves:
+
+```
+path = "main/src/lib/file.c"
+try: "main" → found branch "main"
+ qry.head = "main"
+ qry.path = "src/lib/file.c"
+```
+
+If no prefix resolves, the entire path is treated as a file path within the
+default branch.
+
+## Clone Protocol Endpoints
+
+Three commands serve the Git HTTP clone protocol:
+
+| Endpoint | Path | Function |
+|----------|------|----------|
+| `info` | `repo/info/refs` | `cgit_clone_info()` — advertise refs |
+| `objects` | `repo/objects/*` | `cgit_clone_objects()` — serve packfiles |
+| `head` | `repo/HEAD` | `cgit_clone_head()` — serve HEAD ref |
+
+These are only active when `enable-http-clone=1` (default).
+
+## URL Generation
+
+`ui-shared.c` provides URL construction helpers:
+
+```c
+const char *cgit_repourl(const char *reponame);
+const char *cgit_fileurl(const char *reponame, const char *pagename,
+ const char *filename, const char *query);
+const char *cgit_pageurl(const char *reponame, const char *pagename,
+ const char *query);
+const char *cgit_currurl(void);
+```
+
+When `virtual-root` is set, these produce clean paths. Otherwise, they
+produce query-string URLs.
+
+### Example URL generation:
+
+```c
+/* With virtual-root=/cgit/ */
+cgit_repourl("myrepo")
+ → "/cgit/myrepo/"
+
+cgit_fileurl("myrepo", "tree", "src/main.c", "h=dev")
+ → "/cgit/myrepo/tree/src/main.c?h=dev"
+
+cgit_pageurl("myrepo", "log", "ofs=50")
+ → "/cgit/myrepo/log/?ofs=50"
+```
+
+## Content-Type and HTTP Headers
+
+The response content type is set by the command handler before generating
+output. Common types:
+
+| Page | Content-Type |
+|------|-------------|
+| HTML pages | `text/html` |
+| atom | `text/xml` |
+| blob | auto-detected from content |
+| plain | MIME type from extension or `application/octet-stream` |
+| snapshot | `application/x-gzip`, etc. |
+| patch | `text/plain` |
+| clone endpoints | `text/plain`, `application/x-git-packed-objects` |
+
+Headers are emitted by `cgit_print_http_headers()` in `ui-shared.c` before
+any page content.
+
+## Error Handling
+
+If a requested repository or page is not found, cgit renders an error page
+within the standard layout. HTTP status codes:
+
+| Condition | Status |
+|-----------|--------|
+| Normal page | 200 OK |
+| Auth redirect | 302 Found |
+| Not modified | 304 Not Modified |
+| Bad request | 400 Bad Request |
+| Auth required | 401 Unauthorized |
+| Repo not found | 404 Not Found |
+| Page not found | 404 Not Found |
+
+The status code is set in `ctx.page.status` and emitted by the HTTP header
+function.