Files
GIA/scripts/vendor_frontend_assets.py
2026-03-11 15:12:17 +00:00

268 lines
9.2 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import base64
import hashlib
import json
import re
import shutil
import sys
from pathlib import Path, PurePosixPath
from typing import Any
from urllib.parse import urljoin
from urllib.request import Request, urlopen
URL_PATTERN = re.compile(r"url\(([^)]+)\)")
def load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def ensure_parent(path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
def sri(path: Path, algorithm: str) -> str:
digest = hashlib.new(algorithm, path.read_bytes()).digest()
return f"{algorithm}-" + base64.b64encode(digest).decode("ascii")
def sha256_hex(path: Path) -> str:
return hashlib.sha256(path.read_bytes()).hexdigest()
def read_package_lock(path: Path) -> dict[str, Any]:
if not path.exists():
raise FileNotFoundError(
f"Missing {path}. Run `npm install --ignore-scripts` in {path.parent} first."
)
return load_json(path)
def lock_entry(lock_data: dict[str, Any], package_name: str) -> dict[str, Any]:
packages = lock_data.get("packages", {})
key = "node_modules/" + package_name
entry = packages.get(key)
if not entry:
raise KeyError(f"Package {package_name!r} not found in package-lock.json")
return entry
def read_package_metadata(package_root: Path) -> dict[str, Any]:
package_json = package_root / "package.json"
if not package_json.exists():
raise FileNotFoundError(f"Missing package metadata: {package_json}")
return load_json(package_json)
def package_root(npm_root: Path, package_name: str) -> Path:
return npm_root / "node_modules" / Path(*package_name.split("/"))
def normalize_manifest_path(path: str) -> str:
pure = PurePosixPath(path)
normalized = pure.as_posix()
if normalized.startswith("../") or normalized == "..":
raise ValueError(f"Refusing to write outside target root: {path}")
return normalized
def copy_file(source: Path, targets: list[Path]) -> None:
if not source.exists():
raise FileNotFoundError(f"Missing vendored source file: {source}")
for target in targets:
ensure_parent(target)
shutil.copy2(source, target)
def fetch_bytes(url: str) -> bytes:
request = Request(url, headers={"User-Agent": "GIA frontend asset vendoring"})
with urlopen(request) as response:
return response.read()
def download_url_bundle(
asset: dict[str, Any],
repo_root: Path,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
target_roots = [repo_root / entry for entry in asset["target_roots"]]
entry_target = normalize_manifest_path(asset["entry_target"])
entry_url = asset["entry_url"]
css_bytes = fetch_bytes(entry_url)
text = css_bytes.decode("utf-8")
downloaded: dict[str, bytes] = {entry_target: css_bytes}
for match in URL_PATTERN.finditer(text):
raw_ref = match.group(1).strip().strip("'\"")
if not raw_ref or raw_ref.startswith("data:"):
continue
resolved_url = urljoin(entry_url, raw_ref)
target_rel = normalize_manifest_path(
PurePosixPath(entry_target).parent.joinpath(raw_ref).as_posix()
)
downloaded[target_rel] = fetch_bytes(resolved_url)
report_files: list[dict[str, Any]] = []
copied_targets: list[dict[str, Any]] = []
for relative_path, content in downloaded.items():
for target_root in target_roots:
target = target_root / relative_path
ensure_parent(target)
target.write_bytes(content)
copied_targets.append(
{
"path": str(target.relative_to(repo_root)),
"sha256": sha256_hex(target),
"sri_sha512": sri(target, "sha512"),
}
)
report_files.append(
{
"relative_path": relative_path,
"download_url": urljoin(entry_url, relative_path)
if relative_path != entry_target
else entry_url,
}
)
return report_files, copied_targets
def vendor_npm_file(
asset: dict[str, Any],
repo_root: Path,
npm_root: Path,
lock_data: dict[str, Any],
) -> dict[str, Any]:
pkg_name = asset["package"]
pkg_root = package_root(npm_root, pkg_name)
metadata = read_package_metadata(pkg_root)
source = pkg_root / asset["source_path"]
targets = [repo_root / target for target in asset["targets"]]
copy_file(source, targets)
copied_targets = [
{
"path": str(target.relative_to(repo_root)),
"sha256": sha256_hex(target),
"sri_sha512": sri(target, "sha512"),
}
for target in targets
]
lock = lock_entry(lock_data, pkg_name)
return {
"id": asset["id"],
"kind": asset["kind"],
"package": pkg_name,
"version": metadata["version"],
"license": metadata.get("license", ""),
"homepage": metadata.get("homepage", asset.get("official_url", "")),
"official_url": asset.get("official_url", ""),
"purpose": asset.get("purpose", ""),
"notes": asset.get("notes", ""),
"resolved": lock.get("resolved", ""),
"dist_integrity": lock.get("integrity", ""),
"source_path": asset["source_path"],
"targets": copied_targets,
}
def vendor_url_bundle(asset: dict[str, Any], repo_root: Path) -> dict[str, Any]:
downloaded_files, copied_targets = download_url_bundle(asset, repo_root)
return {
"id": asset["id"],
"kind": asset["kind"],
"version": asset["version"],
"license": asset.get("license", ""),
"homepage": asset.get("official_url", ""),
"official_url": asset.get("official_url", ""),
"purpose": asset.get("purpose", ""),
"notes": asset.get("notes", ""),
"entry_url": asset["entry_url"],
"downloaded_files": downloaded_files,
"targets": copied_targets,
}
def markdown_report(entries: list[dict[str, Any]]) -> str:
lines = [
"# Frontend Library Inventory",
"",
"This report is generated by `scripts/vendor_frontend_assets.py` from `tools/frontend_assets/asset-manifest.json`.",
"",
]
for entry in entries:
title = entry["id"]
package_or_url = entry.get("package") or entry.get("entry_url", "")
lines.extend(
[
f"## {title}",
"",
f"- Source: `{package_or_url}`",
f"- Version: `{entry.get('version', 'n/a')}`",
f"- Official URL: {entry.get('official_url') or entry.get('homepage') or 'n/a'}",
f"- Homepage: {entry.get('homepage') or 'n/a'}",
f"- License: `{entry.get('license') or 'n/a'}`",
f"- Purpose: {entry.get('purpose') or 'n/a'}",
]
)
if entry.get("resolved"):
lines.append(f"- Resolved tarball: `{entry['resolved']}`")
if entry.get("dist_integrity"):
lines.append(f"- Upstream package integrity: `{entry['dist_integrity']}`")
if entry.get("notes"):
lines.append(f"- Notes: {entry['notes']}")
if entry.get("downloaded_files"):
lines.append("- Downloaded bundle files:")
for file_entry in entry["downloaded_files"]:
lines.append(
f" - `{file_entry['relative_path']}` from `{file_entry['download_url']}`"
)
lines.append("- Local targets:")
for target in entry["targets"]:
lines.append(f" - `{target['path']}`")
lines.append(f" - SHA-256: `{target['sha256']}`")
lines.append(f" - SRI sha512: `{target['sri_sha512']}`")
lines.extend(["", ""])
return "\n".join(lines).rstrip() + "\n"
def main() -> int:
parser = argparse.ArgumentParser(description="Vendor pinned frontend assets into Django static files.")
parser.add_argument(
"--manifest",
default="tools/frontend_assets/asset-manifest.json",
help="Path to the asset manifest, relative to the repo root.",
)
args = parser.parse_args()
repo_root = Path(__file__).resolve().parent.parent
manifest_path = repo_root / args.manifest
manifest = load_json(manifest_path)
npm_root = repo_root / manifest["npm_root"]
lock_data = read_package_lock(npm_root / "package-lock.json")
entries: list[dict[str, Any]] = []
for asset in manifest["assets"]:
if asset["kind"] == "npm_file":
entries.append(vendor_npm_file(asset, repo_root, npm_root, lock_data))
elif asset["kind"] == "url_bundle":
entries.append(vendor_url_bundle(asset, repo_root))
else:
raise ValueError(f"Unsupported asset kind: {asset['kind']}")
report_json_path = repo_root / manifest["report_json"]
report_markdown_path = repo_root / manifest["report_markdown"]
ensure_parent(report_json_path)
ensure_parent(report_markdown_path)
report_json_path.write_text(json.dumps({"entries": entries}, indent=2) + "\n", encoding="utf-8")
report_markdown_path.write_text(markdown_report(entries), encoding="utf-8")
return 0
if __name__ == "__main__":
sys.exit(main())