1 files changed, 331 insertions, 0 deletions
diff --git a/docs/handbook/cgit/url-routing.md b/docs/handbook/cgit/url-routing.md
new file mode 100644
index 0000000000..0adb3b7fc5
--- /dev/null
+++ b/docs/handbook/cgit/url-routing.md
@@ -0,0 +1,331 @@
+# cgit — URL Routing and Request Dispatch
+
+## Overview
+
+cgit supports two URL schemes: virtual-root (path-based) and query-string.
+Incoming requests are parsed into a `cgit_query` structure and dispatched to
+one of 23 command handlers via a function pointer table.
+
+Source files: `cgit.c` (querystring parsing, routing), `parsing.c`
+(`cgit_parse_url`), `cmd.c` (command table).
+
+## URL Schemes
+
+### Virtual Root (Path-Based)
+
+When `virtual-root` is configured, URLs use clean paths:
+
+```
+/cgit/                           → repository list
+/cgit/repo.git/                  → summary
+/cgit/repo.git/log/              → log (default branch)
+/cgit/repo.git/log/main/path    → log for path on branch main
+/cgit/repo.git/tree/v1.0/src/   → tree view at tag v1.0
+/cgit/repo.git/commit/?id=abc   → commit view
+```
+
+The path after the virtual root is passed in `PATH_INFO` and parsed by
+`cgit_parse_url()`.
+
+### Query-String (CGI)
+
+Without virtual root, all parameters are passed in the query string:
+
+```
+/cgit.cgi?url=repo.git/log/main/path&ofs=50
+```
+
+## Query Structure
+
+All parsed parameters are stored in `ctx.qry`:
+
+```c
+struct cgit_query {
+    char *raw;         /* raw URL / PATH_INFO */
+    char *repo;        /* repository URL */
+    char *page;        /* page/command name */
+    char *search;      /* search string */
+    char *grep;        /* grep pattern */
+    char *head;        /* branch reference */
+    char *sha1;        /* object SHA-1 */
+    char *sha2;        /* second SHA-1 (for diffs) */
+    char *path;        /* file/dir path within repo */
+    char *name;        /* snapshot name / ref name */
+    char *url;         /* combined URL path */
+    char *mimetype;    /* requested MIME type */
+    char *etag;        /* ETag from client */
+    int nohead;        /* suppress header */
+    int ofs;           /* pagination offset */
+    int has_symref;    /* path contains a symbolic ref */
+    int has_sha1;      /* explicit SHA was given */
+    int has_dot;       /* path contains '..' */
+    int ignored;       /* request should be ignored */
+    char *sort;        /* sort field */
+    int showmsg;       /* show full commit message */
+    int ssdiff;        /* side-by-side diff */
+    int show_all;      /* show all items */
+    int context;       /* diff context lines */
+    int follow;        /* follow renames */
+    int log_hierarchical_threading;
+};
+```
+
+## URL Parsing: `cgit_parse_url()`
+
+In `parsing.c`, the URL is decomposed into repo, page, and path:
+
+```c
+void cgit_parse_url(const char *url)
+{
+    /* Step 1: try progressively longer prefixes as repo URLs */
+    /* For each '/' in the URL, check if the prefix matches a repo */
+    
+    for (p = strchr(url, '/'); p; p = strchr(p + 1, '/')) {
+        *p = '\0';
+        repo = cgit_get_repoinfo(url);
+        *p = '/';
+        if (repo) {
+            ctx.qry.repo = xstrdup(url_prefix);
+            ctx.repo = repo;
+            url = p + 1;  /* remaining part */
+            break;
+        }
+    }
+    /* if no '/' found, try the whole URL as a repo name */
+    
+    /* Step 2: parse the remaining path as page/ref/path */
+    /* e.g., "log/main/src/file.c" → page="log", path="main/src/file.c" */
+    p = strchr(url, '/');
+    if (p) {
+        ctx.qry.page = xstrndup(url, p - url);
+        ctx.qry.path = trim_end(p + 1, '/');
+    } else if (*url) {
+        ctx.qry.page = xstrdup(url);
+    }
+}
+```
+
+## Query String Parsing: `querystring_cb()`
+
+HTTP query parameters and POST form data are decoded by `querystring_cb()`
+in `cgit.c`.  The function maps URL parameter names to `ctx.qry` fields:
+
+```c
+static void querystring_cb(const char *name, const char *value)
+{
+    if (!strcmp(name, "url"))      ctx.qry.url = xstrdup(value);
+    else if (!strcmp(name, "p"))   ctx.qry.page = xstrdup(value);
+    else if (!strcmp(name, "q"))   ctx.qry.search = xstrdup(value);
+    else if (!strcmp(name, "h"))   ctx.qry.head = xstrdup(value);
+    else if (!strcmp(name, "id"))  ctx.qry.sha1 = xstrdup(value);
+    else if (!strcmp(name, "id2")) ctx.qry.sha2 = xstrdup(value);
+    else if (!strcmp(name, "ofs")) ctx.qry.ofs = atoi(value);
+    else if (!strcmp(name, "path")) ctx.qry.path = xstrdup(value);
+    else if (!strcmp(name, "name")) ctx.qry.name = xstrdup(value);
+    else if (!strcmp(name, "mimetype")) ctx.qry.mimetype = xstrdup(value);
+    else if (!strcmp(name, "s"))   ctx.qry.sort = xstrdup(value);
+    else if (!strcmp(name, "showmsg")) ctx.qry.showmsg = atoi(value);
+    else if (!strcmp(name, "ss"))  ctx.qry.ssdiff = atoi(value);
+    else if (!strcmp(name, "all")) ctx.qry.show_all = atoi(value);
+    else if (!strcmp(name, "context")) ctx.qry.context = atoi(value);
+    else if (!strcmp(name, "follow")) ctx.qry.follow = atoi(value);
+    else if (!strcmp(name, "dt")) ctx.qry.dt = atoi(value);
+    else if (!strcmp(name, "grep")) ctx.qry.grep = xstrdup(value);
+    else if (!strcmp(name, "etag")) ctx.qry.etag = xstrdup(value);
+}
+```
+
+### URL Parameter Reference
+
+| Parameter | Query Field | Type | Description |
+|-----------|------------|------|-------------|
+| `url` | `qry.url` | string | Full URL path (repo/page/path) |
+| `p` | `qry.page` | string | Page/command name |
+| `q` | `qry.search` | string | Search string |
+| `h` | `qry.head` | string | Branch/ref name |
+| `id` | `qry.sha1` | string | Object SHA-1 |
+| `id2` | `qry.sha2` | string | Second SHA-1 (diffs) |
+| `ofs` | `qry.ofs` | int | Pagination offset |
+| `path` | `qry.path` | string | File path in repo |
+| `name` | `qry.name` | string | Reference/snapshot name |
+| `mimetype` | `qry.mimetype` | string | MIME type override |
+| `s` | `qry.sort` | string | Sort field |
+| `showmsg` | `qry.showmsg` | int | Show full commit message |
+| `ss` | `qry.ssdiff` | int | Side-by-side diff toggle |
+| `all` | `qry.show_all` | int | Show all entries |
+| `context` | `qry.context` | int | Diff context lines |
+| `follow` | `qry.follow` | int | Follow renames in log |
+| `dt` | `qry.dt` | int | Diff type |
+| `grep` | `qry.grep` | string | Grep pattern for log search |
+| `etag` | `qry.etag` | string | ETag for conditional requests |
+
+## Command Dispatch Table
+
+The command table in `cmd.c` maps page names to handler functions:
+
+```c
+#define def_cmd(name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone) \
+    {#name, cmd_##name, want_hierarchical, want_repo, want_layout, want_vpath, is_clone}
+
+static struct cgit_cmd cmds[] = {
+    def_cmd(atom,    1, 1, 0, 0, 0),
+    def_cmd(about,   0, 1, 1, 0, 0),
+    def_cmd(blame,   1, 1, 1, 1, 0),
+    def_cmd(blob,    1, 1, 0, 0, 0),
+    def_cmd(commit,  1, 1, 1, 1, 0),
+    def_cmd(diff,    1, 1, 1, 1, 0),
+    def_cmd(head,    1, 1, 0, 0, 1),
+    def_cmd(info,    1, 1, 0, 0, 1),
+    def_cmd(log,     1, 1, 1, 1, 0),
+    def_cmd(ls_cache,0, 0, 0, 0, 0),
+    def_cmd(objects, 1, 1, 0, 0, 1),
+    def_cmd(patch,   1, 1, 1, 1, 0),
+    def_cmd(plain,   1, 1, 0, 1, 0),
+    def_cmd(rawdiff, 1, 1, 0, 1, 0),
+    def_cmd(refs,    1, 1, 1, 0, 0),
+    def_cmd(repolist,0, 0, 1, 0, 0),
+    def_cmd(snapshot, 1, 1, 0, 0, 0),
+    def_cmd(stats,   1, 1, 1, 1, 0),
+    def_cmd(summary, 1, 1, 1, 0, 0),
+    def_cmd(tag,     1, 1, 1, 0, 0),
+    def_cmd(tree,    1, 1, 1, 1, 0),
+};
+```
+
+### Command Flags
+
+| Flag | Meaning |
+|------|---------|
+| `want_hierarchical` | Parse hierarchical path from URL |
+| `want_repo` | Requires a repository context |
+| `want_layout` | Render within HTML page layout |
+| `want_vpath` | Accept a virtual path (file path in repo) |
+| `is_clone` | HTTP clone protocol endpoint |
+
+### Lookup: `cgit_get_cmd()`
+
+```c
+struct cgit_cmd *cgit_get_cmd(const char *name)
+{
+    for (int i = 0; i < ARRAY_SIZE(cmds); i++)
+        if (!strcmp(cmds[i].name, name))
+            return &cmds[i];
+    return NULL;
+}
+```
+
+The function performs a linear search.  With 21 entries, this is fast enough.
+
+## Request Processing Flow
+
+In `process_request()` (`cgit.c`):
+
+```
+1. Parse PATH_INFO via cgit_parse_url()
+2. Parse QUERY_STRING via http_parse_querystring(querystring_cb)
+3. Parse POST body (for authentication forms)
+4. Resolve repository: cgit_get_repoinfo(ctx.qry.repo)
+5. Determine command: cgit_get_cmd(ctx.qry.page)
+6. If no page specified:
+   - With repo → default to "summary"
+   - Without repo → default to "repolist"
+7. Check command flags:
+   - want_repo but no repo → "Repository not found" error
+   - is_clone and HTTP clone disabled → 404
+8. Handle authentication if auth-filter is configured
+9. Execute: cmd->fn(&ctx)
+```
+
+### Hierarchical Path Resolution
+
+When `want_hierarchical=1`, cgit splits `ctx.qry.path` into a reference
+(branch/tag/SHA) and a file path.  It tries progressively longer prefixes
+of the path as git references until one resolves:
+
+```
+path = "main/src/lib/file.c"
+try: "main"                  → found branch "main"
+     qry.head = "main"
+     qry.path = "src/lib/file.c"
+```
+
+If no prefix resolves, the entire path is treated as a file path within the
+default branch.
+
+## Clone Protocol Endpoints
+
+Three commands serve the Git HTTP clone protocol:
+
+| Endpoint | Path | Function |
+|----------|------|----------|
+| `info` | `repo/info/refs` | `cgit_clone_info()` — advertise refs |
+| `objects` | `repo/objects/*` | `cgit_clone_objects()` — serve packfiles |
+| `head` | `repo/HEAD` | `cgit_clone_head()` — serve HEAD ref |
+
+These are only active when `enable-http-clone=1` (default).
+
+## URL Generation
+
+`ui-shared.c` provides URL construction helpers:
+
+```c
+const char *cgit_repourl(const char *reponame);
+const char *cgit_fileurl(const char *reponame, const char *pagename,
+                         const char *filename, const char *query);
+const char *cgit_pageurl(const char *reponame, const char *pagename,
+                         const char *query);
+const char *cgit_currurl(void);
+```
+
+When `virtual-root` is set, these produce clean paths.  Otherwise, they
+produce query-string URLs.
+
+### Example URL generation:
+
+```c
+/* With virtual-root=/cgit/ */
+cgit_repourl("myrepo")
+    → "/cgit/myrepo/"
+
+cgit_fileurl("myrepo", "tree", "src/main.c", "h=dev")
+    → "/cgit/myrepo/tree/src/main.c?h=dev"
+
+cgit_pageurl("myrepo", "log", "ofs=50")
+    → "/cgit/myrepo/log/?ofs=50"
+```
+
+## Content-Type and HTTP Headers
+
+The response content type is set by the command handler before generating
+output.  Common types:
+
+| Page | Content-Type |
+|------|-------------|
+| HTML pages | `text/html` |
+| atom | `text/xml` |
+| blob | auto-detected from content |
+| plain | MIME type from extension or `application/octet-stream` |
+| snapshot | `application/x-gzip`, etc. |
+| patch | `text/plain` |
+| clone endpoints | `text/plain`, `application/x-git-packed-objects` |
+
+Headers are emitted by `cgit_print_http_headers()` in `ui-shared.c` before
+any page content.
+
+## Error Handling
+
+If a requested repository or page is not found, cgit renders an error page
+within the standard layout.  HTTP status codes:
+
+| Condition | Status |
+|-----------|--------|
+| Normal page | 200 OK |
+| Auth redirect | 302 Found |
+| Not modified | 304 Not Modified |
+| Bad request | 400 Bad Request |
+| Auth required | 401 Unauthorized |
+| Repo not found | 404 Not Found |
+| Page not found | 404 Not Found |
+
+The status code is set in `ctx.page.status` and emitted by the HTTP header
+function.