#!/usr/bin/env python3 from __future__ import annotations import argparse import base64 import hashlib import json import re import shutil import sys from pathlib import Path, PurePosixPath from typing import Any from urllib.parse import urljoin from urllib.request import Request, urlopen URL_PATTERN = re.compile(r"url\(([^)]+)\)") def load_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def ensure_parent(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) def sri(path: Path, algorithm: str) -> str: digest = hashlib.new(algorithm, path.read_bytes()).digest() return f"{algorithm}-" + base64.b64encode(digest).decode("ascii") def sha256_hex(path: Path) -> str: return hashlib.sha256(path.read_bytes()).hexdigest() def read_package_lock(path: Path) -> dict[str, Any]: if not path.exists(): raise FileNotFoundError( f"Missing {path}. Run `npm install --ignore-scripts` in {path.parent} first." ) return load_json(path) def lock_entry(lock_data: dict[str, Any], package_name: str) -> dict[str, Any]: packages = lock_data.get("packages", {}) key = "node_modules/" + package_name entry = packages.get(key) if not entry: raise KeyError(f"Package {package_name!r} not found in package-lock.json") return entry def read_package_metadata(package_root: Path) -> dict[str, Any]: package_json = package_root / "package.json" if not package_json.exists(): raise FileNotFoundError(f"Missing package metadata: {package_json}") return load_json(package_json) def package_root(npm_root: Path, package_name: str) -> Path: return npm_root / "node_modules" / Path(*package_name.split("/")) def normalize_manifest_path(path: str) -> str: pure = PurePosixPath(path) normalized = pure.as_posix() if normalized.startswith("../") or normalized == "..": raise ValueError(f"Refusing to write outside target root: {path}") return normalized def copy_file(source: Path, targets: list[Path]) -> None: if not source.exists(): raise FileNotFoundError(f"Missing vendored source file: {source}") for target in targets: ensure_parent(target) shutil.copy2(source, target) def fetch_bytes(url: str) -> bytes: request = Request(url, headers={"User-Agent": "GIA frontend asset vendoring"}) with urlopen(request) as response: return response.read() def download_url_bundle( asset: dict[str, Any], repo_root: Path, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: target_roots = [repo_root / entry for entry in asset["target_roots"]] entry_target = normalize_manifest_path(asset["entry_target"]) entry_url = asset["entry_url"] css_bytes = fetch_bytes(entry_url) text = css_bytes.decode("utf-8") downloaded: dict[str, bytes] = {entry_target: css_bytes} for match in URL_PATTERN.finditer(text): raw_ref = match.group(1).strip().strip("'\"") if not raw_ref or raw_ref.startswith("data:"): continue resolved_url = urljoin(entry_url, raw_ref) target_rel = normalize_manifest_path( PurePosixPath(entry_target).parent.joinpath(raw_ref).as_posix() ) downloaded[target_rel] = fetch_bytes(resolved_url) report_files: list[dict[str, Any]] = [] copied_targets: list[dict[str, Any]] = [] for relative_path, content in downloaded.items(): for target_root in target_roots: target = target_root / relative_path ensure_parent(target) target.write_bytes(content) copied_targets.append( { "path": str(target.relative_to(repo_root)), "sha256": sha256_hex(target), "sri_sha512": sri(target, "sha512"), } ) report_files.append( { "relative_path": relative_path, "download_url": urljoin(entry_url, relative_path) if relative_path != entry_target else entry_url, } ) return report_files, copied_targets def vendor_npm_file( asset: dict[str, Any], repo_root: Path, npm_root: Path, lock_data: dict[str, Any], ) -> dict[str, Any]: pkg_name = asset["package"] pkg_root = package_root(npm_root, pkg_name) metadata = read_package_metadata(pkg_root) source = pkg_root / asset["source_path"] targets = [repo_root / target for target in asset["targets"]] copy_file(source, targets) copied_targets = [ { "path": str(target.relative_to(repo_root)), "sha256": sha256_hex(target), "sri_sha512": sri(target, "sha512"), } for target in targets ] lock = lock_entry(lock_data, pkg_name) return { "id": asset["id"], "kind": asset["kind"], "package": pkg_name, "version": metadata["version"], "license": metadata.get("license", ""), "homepage": metadata.get("homepage", asset.get("official_url", "")), "official_url": asset.get("official_url", ""), "purpose": asset.get("purpose", ""), "notes": asset.get("notes", ""), "resolved": lock.get("resolved", ""), "dist_integrity": lock.get("integrity", ""), "source_path": asset["source_path"], "targets": copied_targets, } def vendor_url_bundle(asset: dict[str, Any], repo_root: Path) -> dict[str, Any]: downloaded_files, copied_targets = download_url_bundle(asset, repo_root) return { "id": asset["id"], "kind": asset["kind"], "version": asset["version"], "license": asset.get("license", ""), "homepage": asset.get("official_url", ""), "official_url": asset.get("official_url", ""), "purpose": asset.get("purpose", ""), "notes": asset.get("notes", ""), "entry_url": asset["entry_url"], "downloaded_files": downloaded_files, "targets": copied_targets, } def markdown_report(entries: list[dict[str, Any]]) -> str: lines = [ "# Frontend Library Inventory", "", "This report is generated by `scripts/vendor_frontend_assets.py` from `tools/frontend_assets/asset-manifest.json`.", "", ] for entry in entries: title = entry["id"] package_or_url = entry.get("package") or entry.get("entry_url", "") lines.extend( [ f"## {title}", "", f"- Source: `{package_or_url}`", f"- Version: `{entry.get('version', 'n/a')}`", f"- Official URL: {entry.get('official_url') or entry.get('homepage') or 'n/a'}", f"- Homepage: {entry.get('homepage') or 'n/a'}", f"- License: `{entry.get('license') or 'n/a'}`", f"- Purpose: {entry.get('purpose') or 'n/a'}", ] ) if entry.get("resolved"): lines.append(f"- Resolved tarball: `{entry['resolved']}`") if entry.get("dist_integrity"): lines.append(f"- Upstream package integrity: `{entry['dist_integrity']}`") if entry.get("notes"): lines.append(f"- Notes: {entry['notes']}") if entry.get("downloaded_files"): lines.append("- Downloaded bundle files:") for file_entry in entry["downloaded_files"]: lines.append( f" - `{file_entry['relative_path']}` from `{file_entry['download_url']}`" ) lines.append("- Local targets:") for target in entry["targets"]: lines.append(f" - `{target['path']}`") lines.append(f" - SHA-256: `{target['sha256']}`") lines.append(f" - SRI sha512: `{target['sri_sha512']}`") lines.extend(["", ""]) return "\n".join(lines).rstrip() + "\n" def main() -> int: parser = argparse.ArgumentParser(description="Vendor pinned frontend assets into Django static files.") parser.add_argument( "--manifest", default="tools/frontend_assets/asset-manifest.json", help="Path to the asset manifest, relative to the repo root.", ) args = parser.parse_args() repo_root = Path(__file__).resolve().parent.parent manifest_path = repo_root / args.manifest manifest = load_json(manifest_path) npm_root = repo_root / manifest["npm_root"] lock_data = read_package_lock(npm_root / "package-lock.json") entries: list[dict[str, Any]] = [] for asset in manifest["assets"]: if asset["kind"] == "npm_file": entries.append(vendor_npm_file(asset, repo_root, npm_root, lock_data)) elif asset["kind"] == "url_bundle": entries.append(vendor_url_bundle(asset, repo_root)) else: raise ValueError(f"Unsupported asset kind: {asset['kind']}") report_json_path = repo_root / manifest["report_json"] report_markdown_path = repo_root / manifest["report_markdown"] ensure_parent(report_json_path) ensure_parent(report_markdown_path) report_json_path.write_text(json.dumps({"entries": entries}, indent=2) + "\n", encoding="utf-8") report_markdown_path.write_text(markdown_report(entries), encoding="utf-8") return 0 if __name__ == "__main__": sys.exit(main())