Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion docs/scripts/merge_published_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@
from urllib.error import HTTPError, URLError
from urllib.request import urlopen

__all__ = ["load_versions_manifest", "merge_published_site"]
_INVALID_ARTIFACT_CHARS = frozenset('"<>:|*?\r\n')

__all__ = [
"load_versions_manifest",
"merge_published_site",
"normalize_artifact_paths",
]


def load_versions_manifest(
Expand Down Expand Up @@ -67,6 +73,48 @@ def _copy_local_version(src: Path, dest: Path) -> None:
if dest.exists():
shutil.rmtree(dest)
shutil.copytree(src, dest)
normalize_artifact_paths(dest)


def _safe_artifact_name(name: str) -> str:
"""Return a filesystem-agnostic artifact name for one path component."""
name = name.split("?", 1)[0].split("#", 1)[0] or "download"
return "".join("_" if char in _INVALID_ARTIFACT_CHARS else char for char in name)


def normalize_artifact_paths(root: Path) -> list[tuple[Path, Path | None]]:
"""Normalize paths that GitHub artifact upload rejects.

Recursive ``wget`` mirrors URLs with query strings as literal filenames
such as ``clipboard.min.js?v=a7894cd8``. Browsers resolve that URL against
the real file ``clipboard.min.js``, so the mirrored query-string copy is
redundant and invalid for Actions artifacts.

Args:
root: Directory tree to normalize.

Returns:
``(old_path, new_path)`` pairs. ``new_path`` is ``None`` when the
invalid duplicate was removed because the safe target already existed.
"""
changes: list[tuple[Path, Path | None]] = []
for path in sorted(root.rglob("*"), key=lambda item: len(item.parts), reverse=True):
safe_name = _safe_artifact_name(path.name)
if safe_name == path.name:
continue

target = path.with_name(safe_name)
if target.exists():
if path.is_dir():
shutil.rmtree(path)
else:
path.unlink()
changes.append((path, None))
else:
path.rename(target)
changes.append((path, target))

return changes


def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None:
Expand Down Expand Up @@ -106,6 +154,11 @@ def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None
if nested.is_dir() and nested != dest:
nested.rename(dest)

if dest.is_dir():
changes = normalize_artifact_paths(dest)
if changes:
print(f"Normalized {len(changes)} artifact path(s) in {version}.")


def merge_published_site(
build_dir: Path,
Expand Down
1 change: 1 addition & 0 deletions tests/docs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,4 @@ def _load_merge_module():
_merge = _load_merge_module()
load_versions_manifest = _merge.load_versions_manifest
merge_published_site = _merge.merge_published_site
normalize_artifact_paths = _merge.normalize_artifact_paths
39 changes: 38 additions & 1 deletion tests/docs/test_merge_published_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@

import pytest

from .conftest import load_versions_manifest, merge_published_site
from .conftest import (
load_versions_manifest,
merge_published_site,
normalize_artifact_paths,
)


def _write_published_site(root: Path, versions: list[str], latest: str) -> None:
Expand Down Expand Up @@ -152,3 +156,36 @@ def test_main_push_after_tag_preserves_releases(
for name in ("v0.1.0", "v0.2.0", "v0.3.0"):
assert (build_dir / name).is_dir(), f"missing {name} after main push simulation"
assert "rebuilt" in (build_dir / "main" / "index.html").read_text(encoding="utf-8")


def test_normalize_artifact_paths_strips_wget_query_filenames(tmp_path: Path) -> None:
"""Regression for Actions artifact uploads rejecting wget query filenames."""
version_dir = tmp_path / "build" / "html" / "v0.2.2"
static_dir = version_dir / "_static"
static_dir.mkdir(parents=True)
query_file = static_dir / "clipboard.min.js?v=a7894cd8"
query_file.write_text("console.log('copy');", encoding="utf-8")

changes = normalize_artifact_paths(version_dir)

assert changes == [(query_file, static_dir / "clipboard.min.js")]
assert not query_file.exists()
assert (static_dir / "clipboard.min.js").read_text(encoding="utf-8") == (
"console.log('copy');"
)


def test_normalize_artifact_paths_removes_duplicate_query_file(tmp_path: Path) -> None:
"""Keep the browser-addressable asset when wget also saves a query copy."""
static_dir = tmp_path / "v0.2.2" / "_static"
static_dir.mkdir(parents=True)
safe_file = static_dir / "clipboard.min.js"
query_file = static_dir / "clipboard.min.js?v=a7894cd8"
safe_file.write_text("existing", encoding="utf-8")
query_file.write_text("duplicate", encoding="utf-8")

changes = normalize_artifact_paths(tmp_path)

assert changes == [(query_file, None)]
assert safe_file.read_text(encoding="utf-8") == "existing"
assert not query_file.exists()
Loading