Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 62 additions & 6 deletions docs/scripts/merge_published_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
_INVALID_ARTIFACT_CHARS = frozenset('"<>:|*?\r\n')

__all__ = [
"flatten_nested_version_dirs",
"load_versions_manifest",
"merge_published_site",
"normalize_artifact_paths",
Expand Down Expand Up @@ -117,6 +118,53 @@ def normalize_artifact_paths(root: Path) -> list[tuple[Path, Path | None]]:
return changes


def _cleanup_empty_dirs(root: Path) -> None:
for directory in sorted(
root.rglob("*"), key=lambda item: len(item.parts), reverse=True
):
if directory.is_dir() and not any(directory.iterdir()):
directory.rmdir()


def flatten_nested_version_dirs(build_dir: Path) -> list[tuple[Path, Path | None]]:
"""Move nested downloaded version dirs to the build root.

``wget -nH`` removes the host component but keeps URL path components. For
project Pages URLs this can produce ``build/html/EmbodiChain/v0.2.2``. The
version manifest generator only scans top-level ``v*`` directories, so keep
release versions directly under ``build/html``.

Args:
build_dir: Sphinx output root (``docs/build/html``).

Returns:
``(old_path, new_path)`` pairs. ``new_path`` is ``None`` when a nested
duplicate was removed because the top-level version already exists.
"""
build_dir = build_dir.resolve()
changes: list[tuple[Path, Path | None]] = []
candidates = [
candidate
for candidate in build_dir.rglob("v*")
if candidate.is_dir()
and candidate.parent != build_dir
and (candidate / "index.html").is_file()
]
candidates.sort(key=lambda candidate: len(candidate.parts))

for candidate in candidates:
target = build_dir / candidate.name
if target.exists():
shutil.rmtree(candidate)
changes.append((candidate, None))
else:
candidate.rename(target)
changes.append((candidate, target))

Comment on lines +155 to +163
_cleanup_empty_dirs(build_dir)
return changes


def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None:
"""Download one version subtree with wget (available in CI containers)."""
url = f"{site_base_url.rstrip('/')}/{version}/"
Expand All @@ -142,10 +190,10 @@ def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None
)
if result.returncode != 0:
print(f"wget failed for {url} (exit {result.returncode})", file=sys.stderr)
return

Comment on lines 191 to 193
# wget may create dest.parent/<version>/ or preserve extra URL path
# segments such as dest.parent/EmbodiChain/<version>/; normalize that.
flatten_nested_version_dirs(dest.parent)
if not dest.is_dir():
candidates = [
candidate
Expand All @@ -162,12 +210,10 @@ def _download_version_wget(site_base_url: str, version: str, dest: Path) -> None
changes = normalize_artifact_paths(dest)
if changes:
print(f"Normalized {len(changes)} artifact path(s) in {version}.")
elif result.returncode != 0:
return

for directory in sorted(
dest.parent.rglob("*"), key=lambda item: len(item.parts), reverse=True
):
if directory.is_dir() and not any(directory.iterdir()):
directory.rmdir()
_cleanup_empty_dirs(dest.parent)


def merge_published_site(
Expand Down Expand Up @@ -238,6 +284,16 @@ def merge_published_site(
if final_changes:
print(f"Normalized {len(final_changes)} artifact path(s) in build tree.")

final_flattened = flatten_nested_version_dirs(build_dir)
if final_flattened:
print(f"Flattened {len(final_flattened)} nested version dir(s) in build tree.")

post_flatten_changes = normalize_artifact_paths(build_dir)
if post_flatten_changes:
print(
f"Normalized {len(post_flatten_changes)} flattened artifact path(s) in build tree."
)

return merged


Expand Down
1 change: 1 addition & 0 deletions tests/docs/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ def _load_merge_module():
merge_published_site = _merge.merge_published_site
normalize_artifact_paths = _merge.normalize_artifact_paths
download_version_wget = _merge._download_version_wget
flatten_nested_version_dirs = _merge.flatten_nested_version_dirs
67 changes: 67 additions & 0 deletions tests/docs/test_merge_published_site.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from .conftest import (
download_version_wget,
flatten_nested_version_dirs,
load_versions_manifest,
merge_published_site,
normalize_artifact_paths,
Expand Down Expand Up @@ -137,6 +138,27 @@ def test_merge_normalizes_existing_cached_version(
assert (static_dir / "clipboard.min.js").read_text(encoding="utf-8") == "cached"


def test_merge_flattens_existing_repo_prefixed_cached_version(
build_dir: Path, published_site: Path
) -> None:
"""Restored cache can contain build/html/EmbodiChain/vX.Y.Z."""
nested_version = build_dir / "EmbodiChain" / "v0.2.0"
nested_version.mkdir(parents=True)
(nested_version / "index.html").write_text("nested cached", encoding="utf-8")

merged = merge_published_site(
build_dir,
published_root=published_site,
skip_versions=frozenset({"main"}),
)

assert merged == ["v0.1.0", "v0.2.0"]
assert not (build_dir / "EmbodiChain").exists()
assert (build_dir / "v0.2.0" / "index.html").read_text(encoding="utf-8") == (
"<html>v0.2.0 published</html>"
)


def test_merge_skip_version_for_fresh_tag_build(
build_dir: Path, published_site: Path
) -> None:
Expand Down Expand Up @@ -213,6 +235,24 @@ def test_normalize_artifact_paths_removes_duplicate_query_file(tmp_path: Path) -
assert not query_file.exists()


def test_flatten_nested_version_dirs_promotes_cached_repo_prefix(
tmp_path: Path,
) -> None:
"""Nested release directories must be top-level for versions.json discovery."""
build_dir = tmp_path / "build" / "html"
nested_version = build_dir / "EmbodiChain" / "v0.2.2"
nested_version.mkdir(parents=True)
(nested_version / "index.html").write_text("nested", encoding="utf-8")

changes = flatten_nested_version_dirs(build_dir)

assert changes == [(nested_version.resolve(), build_dir.resolve() / "v0.2.2")]
assert not (build_dir / "EmbodiChain").exists()
assert (build_dir / "v0.2.2" / "index.html").read_text(encoding="utf-8") == (
"nested"
)


def test_download_version_wget_promotes_repo_prefixed_output(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
Expand All @@ -238,3 +278,30 @@ def fake_run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str
"copy"
)
assert not (build_dir / "EmbodiChain").exists()


def test_download_version_wget_uses_repo_prefixed_output_after_wget_error(
tmp_path: Path, monkeypatch: pytest.MonkeyPatch
) -> None:
"""wget can return 8 for linked 404s after downloading usable pages."""
build_dir = tmp_path / "build" / "html"
dest = build_dir / "v0.2.2"

def fake_run(*args: object, **kwargs: object) -> subprocess.CompletedProcess[str]:
static_dir = build_dir / "EmbodiChain" / "v0.2.2" / "_static"
static_dir.mkdir(parents=True)
(static_dir.parent / "index.html").write_text("partial", encoding="utf-8")
(static_dir / "clipboard.min.js?v=a7894cd8").write_text(
"copy", encoding="utf-8"
)
return subprocess.CompletedProcess(args=[], returncode=8)

monkeypatch.setattr(subprocess, "run", fake_run)

download_version_wget("https://dexforce.github.io/EmbodiChain", "v0.2.2", dest)

assert (dest / "index.html").read_text(encoding="utf-8") == "partial"
assert (dest / "_static" / "clipboard.min.js").read_text(encoding="utf-8") == (
"copy"
)
assert not (build_dir / "EmbodiChain").exists()
Loading