summaryrefslogtreecommitdiff
path: root/meta/meta/run/update_optifine.py
diff options
context:
space:
mode:
Diffstat (limited to 'meta/meta/run/update_optifine.py')
-rw-r--r--meta/meta/run/update_optifine.py477
1 files changed, 128 insertions, 349 deletions
diff --git a/meta/meta/run/update_optifine.py b/meta/meta/run/update_optifine.py
index 833e08e263..4eef132e0d 100644
--- a/meta/meta/run/update_optifine.py
+++ b/meta/meta/run/update_optifine.py
@@ -1,206 +1,115 @@
import json
import os
import re
-from urllib.parse import urljoin, urlparse, parse_qs
+import hashlib
import concurrent.futures
-import threading
-
-try:
- from meta.common import upstream_path, ensure_upstream_dir, default_session
- from meta.common.optifine import VERSIONS_FILE, BASE_DIR
- HAVE_META = True
-except Exception:
- # meta.common or its dependencies (requests) may not be available in this environment.
- HAVE_META = False
- def upstream_path():
- return "upstream"
-
- def ensure_upstream_dir(path):
- path = os.path.join(upstream_path(), path)
- if not os.path.exists(path):
- os.makedirs(path, exist_ok=True)
-
- def default_session():
- raise RuntimeError("HTTP session unavailable: install 'requests' and 'cachecontrol'")
+from urllib.parse import urljoin, urlparse, parse_qs
- VERSIONS_FILE = "versions.json"
- BASE_DIR = "optifine"
+from meta.common import upstream_path, ensure_upstream_dir, default_session
+from meta.common.optifine import VERSIONS_FILE, BASE_DIR
UPSTREAM_DIR = upstream_path()
-
ensure_upstream_dir(BASE_DIR)
-sess = None
-if HAVE_META:
- sess = default_session()
+sess = default_session()
+# Configurable via environment
+TIMEOUT = float(os.environ.get("OPTIFINE_TIMEOUT", "10"))
+HASH_TIMEOUT = float(os.environ.get("OPTIFINE_HASH_TIMEOUT", "120"))
+CONCURRENCY = max(1, int(os.environ.get("OPTIFINE_CONCURRENCY", "8")))
+COMPUTE_HASH = os.environ.get("OPTIFINE_COMPUTE_HASH", "1").lower() in ("1", "true", "yes")
-def _resolve_href(href: str):
- """Return (filename, resolved_href).
- Handles cases where href is a redirect wrapper (e.g., adfoc.us with an inner
- 'url=' parameter) or where the 'f' query parameter is present.
- """
+def _resolve_href(href: str):
+ """Return (filename, resolved_href) from an OptiFine download link."""
parsed = urlparse(href)
q = parse_qs(parsed.query)
- # Direct f parameter
f = q.get("f")
if f:
return f[0], href
- # Some wrappers embed an inner url parameter that contains the real target
inner = q.get("url")
if inner:
- # inner may be a list; pick first
- inner_url = inner[0]
- inner_parsed = urlparse(inner_url)
- inner_q = parse_qs(inner_parsed.query)
- inner_f = inner_q.get("f")
+ inner_parsed = urlparse(inner[0])
+ inner_f = parse_qs(inner_parsed.query).get("f")
if inner_f:
- return inner_f[0], inner_url
+ return inner_f[0], inner[0]
- # fallback: last path component
return os.path.basename(parsed.path), href
-def _clean_key(filename: str) -> str:
- # Remove OptiFine prefix, any trailing ad-wrapper segments, and the .jar suffix
- key = re.sub(r"^OptiFine[_-]", "", filename, flags=re.IGNORECASE)
- key = re.sub(r"\.jar$", "", key, flags=re.IGNORECASE)
- # Strip trailing ad/adload/adloadx wrapper fragments that appear in some links
- key = re.sub(r"[_-]ad[a-z0-9_-]*$", "", key, flags=re.IGNORECASE)
- return key
-
-
def _strip_ad_wrapper(filename: str) -> str:
- """Remove trailing ad/adload/adloadx wrapper fragments from a filename.
-
- Example: OptiFine_1.20.1_HD_U_H7_adloadx.jar -> OptiFine_1.20.1_HD_U_H7.jar
- """
+ """Remove trailing ad/adload/adloadx fragments from a filename."""
if not filename:
return filename
root, ext = os.path.splitext(filename)
- # remove trailing segments that start with _ad or -ad
root = re.sub(r"[_-]ad[a-z0-9_-]*$", "", root, flags=re.IGNORECASE)
return root + ext
-def _guess_platforms(filename: str, label: str = None, changelog: str = None):
- """Heuristically guess platform compatibility tags for an OptiFine build.
-
- Returns a list like ['mojang', 'neoforge', 'fabric'] based on keywords.
- """
- text = " ".join(filter(None, [filename or "", label or "", changelog or ""]))
- tl = text.lower()
- platforms = []
- # OptiFine always targets vanilla (Mojang) builds
- platforms.append("mojang")
- # Forge / NeoForge variants
- if "neoforge" in tl or "neo-forge" in tl or "forge" in tl:
- platforms.append("neoforge")
- # Fabric
- if "fabric" in tl:
- platforms.append("fabric")
- # Quilt
- if "quilt" in tl:
- platforms.append("quilt")
- # LiteLoader / older loaders
- if "liteloader" in tl:
- platforms.append("liteloader")
-
- # Deduplicate while preserving order
- seen = set()
- out = []
- for p in platforms:
- if p not in seen:
- seen.add(p)
- out.append(p)
- return out
+def _clean_key(filename: str) -> str:
+ """Normalize filename to version key: strip OptiFine_ prefix and .jar suffix."""
+ key = re.sub(r"^OptiFine[_-]", "", filename, flags=re.IGNORECASE)
+ key = re.sub(r"\.jar$", "", key, flags=re.IGNORECASE)
+ key = re.sub(r"[_-]ad[a-z0-9_-]*$", "", key, flags=re.IGNORECASE)
+ return key
def _score_entry(entry: dict) -> int:
+ """Score an entry to pick the best when duplicates exist."""
url = (entry.get("download_page") or "").lower()
s = 0
- if "optifine.net/adloadx" in url or "optifine.net/adload" in url or "optifine.net/download" in url:
+ if any(k in url for k in ("optifine.net/adloadx", "optifine.net/adload", "optifine.net/download")):
s += 10
- if url.endswith(".jar") or entry.get("filename", "").lower().endswith(".jar"):
+ if url.endswith(".jar") or (entry.get("filename") or "").lower().endswith(".jar"):
s += 5
if "preview" in (entry.get("filename") or "").lower():
s -= 2
return s
-def main():
- url = "https://optifine.net/downloads"
- print(f"Fetching OptiFine downloads page: {url}")
- # configurable timeouts (seconds)
- default_timeout = float(os.environ.get("OPTIFINE_TIMEOUT", "10"))
-
- try:
- r = sess.get(url, timeout=default_timeout)
- r.raise_for_status()
- html = r.text
- except Exception as e:
- print(f"Error fetching downloads page: {e}")
- html = ""
-
+def _scrape_downloads(html: str, base_url: str) -> dict:
+ """Parse OptiFine downloads page and return {key: entry_dict}."""
versions = {}
- # Try parsing with BeautifulSoup if available; be permissive about href forms
try:
from bs4 import BeautifulSoup
-
soup = BeautifulSoup(html, "html.parser")
- anchors = soup.find_all("a", href=True)
- inspected = 0
- matched = 0
- for a in anchors:
- inspected += 1
+
+ for a in soup.find_all("a", href=True):
href = a["href"]
href_l = href.lower()
-
- # Accept several formats: any URL containing '?f=' (adload/adloadx/download), or direct .jar links
if "?f=" not in href_l and not href_l.endswith(".jar"):
continue
- matched += 1
filename, resolved = _resolve_href(href)
- # strip ad/adload/adloadx wrapper parts from filename
filename = _strip_ad_wrapper(filename)
- # Try to get version text from the same table row or nearby text
ver_text = None
changelog = None
date = None
+
tr = a.find_parent("tr")
if tr:
tds = tr.find_all("td")
if tds:
ver_text = tds[0].get_text(strip=True)
- # find changelog link in the row
ch = tr.find("a", href=lambda h: h and "changelog" in h)
if ch:
changelog = ch.get("href")
- # find date cell
date_td = tr.find("td", class_=lambda c: c and "colDate" in c)
if date_td:
date = date_td.get_text(strip=True)
if not ver_text:
- # fallback: anchor text or nearby text nodes
- if a.string and a.string.strip():
- ver_text = a.string.strip()
- else:
- prev = a.find_previous(string=True)
- if prev:
- ver_text = prev.strip()
+ ver_text = (a.string or "").strip() or filename
key = _clean_key(filename)
data = {
"filename": filename,
- "download_page": urljoin(url, resolved),
- "label": ver_text or filename,
+ "download_page": urljoin(base_url, resolved),
+ "label": ver_text,
"changelog": changelog,
"date": date,
}
@@ -208,13 +117,9 @@ def main():
existing = versions.get(key)
if existing is None or _score_entry(data) > _score_entry(existing):
versions[key] = data
- platforms = _guess_platforms(data.get("filename"), data.get("label"), data.get("changelog"))
- print(f"Added {key}: platforms: {', '.join(platforms)}")
-
- print(f"Inspected {inspected} anchors, matched {matched} potential downloads")
- except Exception:
- # Fallback: regex parse (case-insensitive)
- print("BeautifulSoup not available or parsing failed, falling back to regex parse")
+ except ImportError:
+ # Fallback: regex parse
+ print("BeautifulSoup not available, falling back to regex parse")
for match in re.finditer(r'href="([^"]*\?f=[^"\s]+)"', html, flags=re.IGNORECASE):
href = match.group(1)
filename, resolved = _resolve_href(href)
@@ -222,234 +127,108 @@ def main():
key = _clean_key(filename)
data = {
"filename": filename,
- "download_page": urljoin(url, resolved),
+ "download_page": urljoin(base_url, resolved),
"label": filename,
}
existing = versions.get(key)
if existing is None or _score_entry(data) > _score_entry(existing):
versions[key] = data
- platforms = _guess_platforms(data.get("filename"), data.get("label"), data.get("changelog"))
- print(f"Added {key}: platforms: {', '.join(platforms)}")
- # Determine base output directory. Some upstream implementations return a
- # path that already includes BASE_DIR, avoid duplicating it.
- if UPSTREAM_DIR.endswith(BASE_DIR):
- base_out_dir = UPSTREAM_DIR
- else:
- base_out_dir = os.path.join(UPSTREAM_DIR, BASE_DIR)
-
- # Ensure output directory exists (defensive: collapse duplicate trailing BASE_DIR segments)
- parts = base_out_dir.split(os.sep)
- while len(parts) >= 2 and parts[-1] == BASE_DIR and parts[-2] == BASE_DIR:
- parts.pop(-1)
- base_out_dir = os.sep.join(parts)
- os.makedirs(base_out_dir, exist_ok=True)
-
- out_path = os.path.join(base_out_dir, VERSIONS_FILE)
- # Attempt to resolve final download URLs and optionally compute hashes
- # Default to computing SHA256 for each resolved file unless explicitly disabled
- compute_hash = os.environ.get("OPTIFINE_COMPUTE_HASH", "1").lower() in ("1", "true", "yes")
- resolved_count = 0
- hashed_count = 0
-
- if HAVE_META and sess is not None:
+ return versions
+
+
+def _make_download_url(filename: str) -> str:
+ """Build a stable OptiFine download URL from a filename.
+
+ Uses the permanent https://optifine.net/download?f=FILENAME format
+ instead of the adloadx/downloadx token URLs which expire.
+ """
+ return f"https://optifine.net/download?f={filename}"
+
+
+def _resolve_and_hash(key: str, data: dict) -> dict:
+ """Build stable download URL and optionally compute SHA256 for a single entry."""
+ filename = data.get("filename")
+ if not filename:
+ return data
+
+ # Use stable download URL instead of expiring token-based downloadx URLs
+ download_url = _make_download_url(filename)
+ data["resolved_url"] = download_url
+
+ # Compute hash if enabled
+ if COMPUTE_HASH:
try:
- # Use a ThreadPoolExecutor to parallelize network I/O for resolving URLs
- concurrency = int(os.environ.get("OPTIFINE_CONCURRENCY", "8"))
- if concurrency < 1:
- concurrency = 1
-
- total = len(versions)
- counter = {"idx": 0}
- counter_lock = threading.Lock()
-
- def _process_item(item):
- key, data = item
- with counter_lock:
- counter["idx"] += 1
- idx = counter["idx"]
-
- dp = data.get("download_page")
- if not dp:
- return key, data, False, False
-
- print(f"[{idx}/{total}] Resolving {key} ({data.get('filename')}) -> {dp}")
-
- # Each worker creates its own session to avoid any session thread-safety issues
- sess_local = None
- if HAVE_META:
- try:
- sess_local = default_session()
- except Exception:
- sess_local = None
-
- # Fallback to global sess if default_session unavailable
- if sess_local is None:
- sess_local = sess
-
- final_url = None
- try:
- # Try HEAD first
- try:
- resp = sess_local.head(dp, allow_redirects=True, timeout=default_timeout)
- except Exception as e_head:
- # Try GET as fallback for hosts that block HEAD
- try:
- resp = sess_local.get(dp, allow_redirects=True, timeout=default_timeout)
- except Exception:
- resp = None
-
- if resp is not None:
- final_url = getattr(resp, "url", None)
-
- # Try to extract downloadx link from page HTML (short GET if needed)
- page_text = None
- if resp is not None and hasattr(resp, "text") and resp.text:
- page_text = resp.text
- else:
- try:
- rtmp = sess_local.get(dp, allow_redirects=True, timeout=5)
- page_text = getattr(rtmp, "text", None)
- final_url = getattr(rtmp, "url", final_url)
- except Exception:
- page_text = None
-
- if page_text:
- m = re.search(r"(downloadx\?f=[^\"'\s>]+)", page_text, flags=re.IGNORECASE)
- if m:
- candidate = m.group(1)
- base_for_join = final_url or dp
- final_url = urljoin(base_for_join, candidate)
- print(f" Extracted downloadx link for {key}: {final_url}")
-
- # If still not a .jar/f param, do a full GET and inspect final URL
- if not final_url or (".jar" not in final_url and "?f=" not in final_url):
- try:
- resp2 = sess_local.get(dp, allow_redirects=True, timeout=30)
- final_url = getattr(resp2, "url", final_url)
- except Exception:
- pass
-
- hashed = False
- if final_url:
- data["resolved_url"] = final_url
- print(f" Resolved {key} -> {final_url}")
-
- if compute_hash:
- try:
- import hashlib
-
- print(f" Hashing {key} from {final_url} ...")
- h = hashlib.sha256()
- size = 0
- hash_timeout = float(os.environ.get("OPTIFINE_HASH_TIMEOUT", "120"))
- r2 = sess_local.get(final_url, stream=True, timeout=hash_timeout)
- r2.raise_for_status()
- for chunk in r2.iter_content(8192):
- if not chunk:
- continue
- h.update(chunk)
- size += len(chunk)
- data["sha256"] = h.hexdigest()
- data["size"] = size
- hashed = True
- print(f" Hashed {key}: sha256={data['sha256']} size={data['size']}")
- except Exception as e_hash:
- print(f" Warning: failed to hash {final_url}: {e_hash}")
-
- return key, data, bool(final_url), hashed
- except Exception as e:
- print(f" Error processing {key}: {e}")
- return key, data, False, False
-
- items = list(versions.items())
- if concurrency == 1:
- # run serially
- results = map(_process_item, items)
- else:
- with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as ex:
- results = ex.map(_process_item, items)
-
- # Collect results and write per-version files as each item completes
- for key, data, resolved_flag, hashed_flag in results:
- versions[key] = data
- # Ensure per-version dir exists
- try:
- os.makedirs(base_out_dir, exist_ok=True)
- per_path = os.path.join(base_out_dir, f"{key}.json")
- with open(per_path, "w") as pf:
- json.dump(data, pf, indent=4)
- print(f"Wrote per-version file: {per_path}")
- except Exception as e:
- print(f"Warning: failed to write per-version file for {key}: {e}")
-
- if resolved_flag:
- resolved_count += 1
- if hashed_flag:
- hashed_count += 1
- except KeyboardInterrupt:
- print("Interrupted by user (KeyboardInterrupt). Writing partial results...")
-
- # Write combined index (ensure parent exists)
- os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
- with open(out_path, "w") as f:
- json.dump(versions, f, indent=4)
+ h = hashlib.sha256()
+ size = 0
+ r = sess.get(download_url, stream=True, timeout=HASH_TIMEOUT)
+ r.raise_for_status()
+ for chunk in r.iter_content(8192):
+ if chunk:
+ h.update(chunk)
+ size += len(chunk)
+ data["sha256"] = h.hexdigest()
+ data["size"] = size
+ except Exception as e:
+ print(f" Warning: hash failed for {key}: {e}")
+
+ return data
+
+
+def main():
+ url = "https://optifine.net/downloads"
+ print(f"Fetching OptiFine downloads page: {url}")
- # Also write per-version JSON files under the upstream component directory
try:
- for key, data in versions.items():
- per_path = os.path.join(base_out_dir, f"{key}.json")
- with open(per_path, "w") as pf:
- json.dump(data, pf, indent=4)
- print(f"Wrote per-version file: {per_path}")
+ r = sess.get(url, timeout=TIMEOUT)
+ r.raise_for_status()
+ html = r.text
except Exception as e:
- print(f"Warning: failed to write per-version files: {e}")
-
- print(f"Wrote {len(versions)} OptiFine entries to {out_path}")
- if HAVE_META and sess is not None:
- print(f"Resolved {resolved_count} final URLs")
- if compute_hash:
- print(f"Computed {hashed_count} SHA256 hashes (OPTIFINE_COMPUTE_HASH=1)")
- # If some entries are missing sha256 (e.g., were written before hashing completed),
- # compute them now in parallel and update files.
- missing = [ (k,v) for k,v in versions.items() if v.get("resolved_url") and not v.get("sha256") ]
- if missing:
- print(f"Computing missing SHA256 for {len(missing)} entries...")
- def _compute_and_write(item):
- k, v = item
- url_final = v.get("resolved_url")
- try:
- import hashlib
- hash_timeout = float(os.environ.get("OPTIFINE_HASH_TIMEOUT", "120"))
- h = hashlib.sha256()
- size = 0
- r = sess.get(url_final, stream=True, timeout=hash_timeout)
- r.raise_for_status()
- for chunk in r.iter_content(8192):
- if not chunk:
- continue
- h.update(chunk)
- size += len(chunk)
- v["sha256"] = h.hexdigest()
- v["size"] = size
- per_path = os.path.join(base_out_dir, f"{k}.json")
- with open(per_path, "w") as pf:
- json.dump(v, pf, indent=4)
- print(f" Hashed {k}: {v['sha256']} size={v['size']}")
- return True
- except Exception as e:
- print(f" Warning: failed to compute hash for {k}: {e}")
- return False
-
- concurrency = int(os.environ.get("OPTIFINE_CONCURRENCY", "8"))
- if concurrency < 1:
- concurrency = 1
- completed = 0
- with concurrent.futures.ThreadPoolExecutor(max_workers=concurrency) as ex:
- for ok in ex.map(_compute_and_write, missing):
- if ok:
- completed += 1
- print(f"Completed extra hashing: {completed}/{len(missing)}")
+ print(f"Error fetching downloads page: {e}")
+ return
+
+ versions = _scrape_downloads(html, url)
+ print(f"Scraped {len(versions)} OptiFine entries")
+
+ if not versions:
+ print("No versions found, aborting")
+ return
+
+ out_dir = os.path.join(UPSTREAM_DIR, BASE_DIR)
+ os.makedirs(out_dir, exist_ok=True)
+
+ # Resolve URLs and compute hashes in parallel
+ def _process(item):
+ key, data = item
+ print(f" Resolving {key}...")
+ data = _resolve_and_hash(key, data)
+ return key, data
+
+ items = list(versions.items())
+ if CONCURRENCY == 1:
+ results = [_process(item) for item in items]
+ else:
+ with concurrent.futures.ThreadPoolExecutor(max_workers=CONCURRENCY) as ex:
+ results = list(ex.map(_process, items))
+
+ # Write per-version files and build combined index
+ versions = {}
+ for key, data in results:
+ versions[key] = data
+ per_path = os.path.join(out_dir, f"{key}.json")
+ with open(per_path, "w") as f:
+ json.dump(data, f, indent=4)
+
+ # Write combined index
+ combined_path = os.path.join(UPSTREAM_DIR, VERSIONS_FILE)
+ os.makedirs(os.path.dirname(combined_path) or ".", exist_ok=True)
+ with open(combined_path, "w") as f:
+ json.dump(versions, f, indent=4)
+
+ resolved = sum(1 for v in versions.values() if v.get("resolved_url"))
+ hashed = sum(1 for v in versions.values() if v.get("sha256"))
+ print(f"Wrote {len(versions)} entries to {combined_path}")
+ print(f"Resolved: {resolved}, Hashed: {hashed}")
if __name__ == "__main__":