Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
run: uv run ruff check --output-format=github .

- name: Run mypy
run: uv run mypy --strict src/ packages/ tests/
run: uv run mypy --strict tools/ noxfile.py

- name: Run ty
run: uv run ty check --output-format=github .
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def lints(session: nox.Session) -> None:
session.run("prek", "run", "--all-files")
session.run("ruff", "format", ".")
session.run("ruff", "check", "--fix", ".")
session.run("mypy", "--strict", "src/", "packages/", "tests/")
session.run("mypy", "--strict", "tools/", "noxfile.py")
session.run("ty", "check", ".")


Expand Down
30 changes: 29 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ authors = [
{ name = "Bradley Reynolds", email = "bradley.reynolds@tailstory.dev" },
]
requires-python = ">=3.14"
dependencies = [
"sphobjinv>=2.4",
]

[dependency-groups]
dev = [
Expand All @@ -30,6 +33,28 @@ cache-dir = ".cache/ruff"
select = [
"ALL",
]
ignore = [
"COM812", # conflicts with the formatter
"ISC001", # conflicts with the formatter
# No `from __future__ import annotations`, so annotation imports are evaluated at
# runtime and cannot move into a TYPE_CHECKING block.
"TC001",
"TC002",
"TC003",
]

[tool.ruff.lint.per-file-ignores]
"tools/*.py" = [
"T201", # CLI tools: the printed report is the deliverable
"FBT", # internal recursion helpers; keyword-only bools add churn, not safety
"PLR0913", # record/report builders take one parameter per output column by design
]
# stdlib_introspect runs across the whole introspect matrix (older interpreters
# included), so it keeps back-compatible idioms: TypeAlias over the newer `type`
# statement (UP040). It also imports arbitrary modules, where a broad except is
# intentional (BLE001).
"tools/stdlib_introspect.py" = ["BLE001", "UP040"]
"tools/coverage_diff.py" = ["S310"] # fetches a fixed https://docs.python.org inventory URL

[tool.ruff.lint.pydocstyle]
convention = "numpy"
Expand All @@ -38,9 +63,12 @@ convention = "numpy"
pretty = true
num_workers = 4
native_parser = true # required by num_workers
plugins = ["pydantic.mypy"]
cache_dir = ".cache/mypy"

[[tool.mypy.overrides]]
module = ["sphobjinv.*"]
ignore_missing_imports = true # sphobjinv ships no py.typed marker

# Still need to tell Mypy to ignore these
[tool.ty.rules]
unused-ignore-comment = "ignore"
134 changes: 90 additions & 44 deletions tools/coverage_diff.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
added/removed -- those deltas come from the matrix (merge_summary.py), not here.
"""

from __future__ import annotations

import argparse
import json
import os
Expand All @@ -25,11 +23,16 @@
import urllib.error
import urllib.request
from collections import Counter
from pathlib import Path
from typing import Any

import sphobjinv as soi

type Record = dict[str, Any]

INVENTORY_URL = "https://docs.python.org/{version}/objects.inv"
DEV_INVENTORY_URL = "https://docs.python.org/dev/objects.inv"
HTTP_NOT_FOUND = 404

# Introspected prefix -> docs-canonical prefix. The docs index posix|nt as os.*,
# posixpath|ntpath|genericpath as os.path.*, and builtins members unprefixed.
Expand All @@ -55,21 +58,25 @@
TOP_MODULES = 15


def version_key(version):
def version_key(version: str) -> tuple[int, ...]:
"""Sort key for a version string: ``'3.14'`` -> ``(3, 14)``."""
numbers = re.findall(r"\d+", version)
return tuple(int(number) for number in numbers[:2]) if numbers else (0,)


def version_label(version):
def version_label(version: str) -> str:
"""Canonical ``X.Y`` label for a version string."""
return ".".join(str(part) for part in version_key(version))


def cell_version(cell):
def cell_version(cell: str) -> str | None:
"""Extract the ``X.Y`` minor from a ``...-py3.14`` cell id, or ``None``."""
match = CELL_VERSION.search(cell)
return version_label(match.group(1)) if match else None


def normalize(qualname):
def normalize(qualname: str) -> str:
"""Map an introspected qualname onto its docs-canonical spelling."""
if qualname in MODULE_ALIASES:
return MODULE_ALIASES[qualname]
for prefix, replacement in PREFIX_ALIASES.items():
Expand All @@ -78,62 +85,69 @@ def normalize(qualname):
return qualname


def normalize_module(module):
def normalize_module(module: str) -> str:
"""Map an introspected module name onto its docs-canonical spelling."""
return MODULE_ALIASES.get(module, module)


def percent(part, whole):
def percent(part: int, whole: int) -> float:
"""Return ``part / whole`` as a percentage rounded to one decimal."""
return round(100 * part / whole, 1) if whole else 0.0


def load_union(path):
def load_union(path: str) -> list[Record]:
"""Read the union JSONL into a list of records."""
records = []
with open(path, encoding="utf-8") as source_file:
with Path(path).open(encoding="utf-8") as source_file:
for line in source_file:
line = line.strip()
if line:
records.append(json.loads(line))
stripped = line.strip()
if stripped:
records.append(json.loads(stripped))
return records


def http_get(url, attempts=4):
def http_get(url: str, attempts: int = 4) -> bytes:
"""GET ``url``, retrying transient URL errors with exponential backoff."""
if attempts < 1:
message = "attempts must be >= 1"
raise ValueError(message)
for attempt in range(attempts):
try:
request = urllib.request.Request(url, headers={"User-Agent": "coverage-diff"})
with urllib.request.urlopen(request, timeout=30) as response:
return response.read()
body: bytes = response.read()
return body
except urllib.error.HTTPError:
raise # 4xx/5xx: caller decides (404 -> dev fallback)
except urllib.error.URLError:
if attempt == attempts - 1:
raise
time.sleep(2**attempt)
raise ValueError("attempts must be >= 1")
raise AssertionError # unreachable: the guard forces attempts >= 1, so the last attempt returns or raises


def documented_names(version, inventory_dir=None):
def documented_names(version: str, inventory_dir: str | None = None) -> tuple[set[str], bool]:
"""Return (set of py-domain names, used_dev_fallback) for one minor."""
if inventory_dir:
local = os.path.join(inventory_dir, f"{version}.inv")
if os.path.exists(local):
with open(local, "rb") as handle:
inventory = soi.Inventory(zlib=handle.read())
local = Path(inventory_dir) / f"{version}.inv"
if local.exists():
inventory = soi.Inventory(zlib=local.read_bytes()) # ty: ignore[unknown-argument]
return {obj.name for obj in inventory.objects if obj.domain == "py"}, False
used_dev = False
try:
data = http_get(INVENTORY_URL.format(version=version))
except urllib.error.HTTPError as error:
if error.code != 404:
if error.code != HTTP_NOT_FOUND:
raise
data = http_get(DEV_INVENTORY_URL) # in-dev minor with no numbered inventory yet
used_dev = True
inventory = soi.Inventory(zlib=data)
inventory = soi.Inventory(zlib=data) # ty: ignore[unknown-argument]
return {obj.name for obj in inventory.objects if obj.domain == "py"}, used_dev


def build_surface(union, version):
def build_surface(union: list[Record], version: str) -> dict[str, Record]:
"""norm-qualname -> representative record, for one minor, OS-unioned."""
surface = {}
surface: dict[str, Record] = {}
for record in union:
if record.get("is_dunder"):
continue
Expand All @@ -146,15 +160,20 @@ def build_surface(union, version):
return surface


def diff_version(union, version, inventory_dir):
def diff_version(
union: list[Record],
version: str,
inventory_dir: str | None,
) -> tuple[Record, dict[str, Record], set[str], set[str]]:
"""Split one minor's surface into (summary, surface, missing, docs-only)."""
surface = build_surface(union, version)
documented_upstream, used_dev = documented_names(version, inventory_dir)
surface_names = set(surface)
covered = surface_names & documented_upstream
missing_from_official_docs = surface_names - documented_upstream
docs_only = documented_upstream - surface_names

by_module = {}
by_module: dict[str, Record] = {}
for name, record in surface.items():
module = normalize_module(record["module"])
bucket = by_module.setdefault(module, {"surface": 0, "covered": 0})
Expand All @@ -177,7 +196,8 @@ def diff_version(union, version, inventory_dir):
return summary, surface, missing_from_official_docs, docs_only


def gap_records(surface, missing_from_official_docs):
def gap_records(surface: dict[str, Record], missing_from_official_docs: set[str]) -> list[Record]:
"""Build the sorted gap rows for the entities missing from the official docs."""
rows = []
for name in missing_from_official_docs:
record = surface[name]
Expand All @@ -195,11 +215,14 @@ def gap_records(surface, missing_from_official_docs):
return rows


def default_target(versions):
return versions[-2] if len(versions) >= 2 else versions[-1]
def default_target(versions: list[str]) -> str:
"""Latest stable minor: the penultimate entry (the highest is the in-dev branch)."""
stable = versions[:-1] or versions # drop the in-dev highest, unless it is all we have
return stable[-1]


def main():
def main() -> None:
"""Parse arguments, diff every minor in the union, and write the outputs."""
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("union", help="stdlib_api_union.jsonl from the aggregate job")
parser.add_argument(
Expand All @@ -209,20 +232,27 @@ def main():
parser.add_argument("-o", "--output", default="official_docs_coverage_by_version.json")
parser.add_argument("--gap-out", metavar="PATH", help="default: official_docs_gap_<target>.jsonl")
parser.add_argument(
"--inventory-dir", metavar="DIR", help="read <minor>.inv from here before fetching (offline/cached runs)",
"--inventory-dir",
metavar="DIR",
help="read <minor>.inv from here before fetching (offline/cached runs)",
)
parser.add_argument(
"--md-summary", metavar="PATH", help="write the Markdown report here (defaults to $GITHUB_STEP_SUMMARY)",
"--md-summary",
metavar="PATH",
help="write the Markdown report here (defaults to $GITHUB_STEP_SUMMARY)",
)
args = parser.parse_args()

union = load_union(args.union)
versions = sorted(
{cell_version(cell) for record in union for cell in record.get("cells", []) if cell_version(cell)},
{minor for record in union for cell in record.get("cells", []) if (minor := cell_version(cell))},
key=version_key,
)

results, surfaces, gaps, docs_onlys = {}, {}, {}, {}
results: dict[str, Record] = {}
surfaces: dict[str, dict[str, Record]] = {}
gaps: dict[str, set[str]] = {}
docs_onlys: dict[str, set[str]] = {}
for version in versions:
summary, surface, missing, docs_only = diff_version(union, version, args.inventory_dir)
results[version] = summary
Expand All @@ -235,21 +265,34 @@ def main():
)

target = args.target_version or (default_target(versions) if versions else None)
target_surface = surfaces.get(target, {}) if target else {}
target_missing = gaps.get(target, set()) if target else set()
target_docs_only = docs_onlys.get(target, set()) if target else set()

coverage = {"target_version": target, "versions": results}
with open(args.output, "w", encoding="utf-8", newline="\n") as out_file:
with Path(args.output).open("w", encoding="utf-8", newline="\n") as out_file:
json.dump(coverage, out_file, indent=2)
out_file.write("\n")

gap_path = args.gap_out or f"official_docs_gap_{target}.jsonl"
rows = gap_records(surfaces.get(target, {}), gaps.get(target, set()))
with open(gap_path, "w", encoding="utf-8", newline="\n") as out_file:
rows = gap_records(target_surface, target_missing)
with Path(gap_path).open("w", encoding="utf-8", newline="\n") as out_file:
out_file.writelines(json.dumps(row) + "\n" for row in rows)

report(versions, results, target, rows, docs_onlys.get(target, set()), args.output, gap_path, args)
report(versions, results, target, rows, target_docs_only, args.output, gap_path, args)


def report(versions, results, target, gap_rows, docs_only, output_path, gap_path, args):
def report(
versions: list[str],
results: dict[str, Record],
target: str | None,
gap_rows: list[Record],
docs_only: set[str],
output_path: str,
gap_path: str,
args: argparse.Namespace,
) -> None:
"""Render the run as a Markdown report and a console summary."""
data_entries = sum(1 for row in gap_rows if row["is_data"])
lines = ["# docs.python.org coverage — stdlib API missing from the official reference", ""]
if not versions:
Expand Down Expand Up @@ -284,11 +327,14 @@ def report(versions, results, target, gap_rows, docs_only, output_path, gap_path
)
lines.append("")

core_breakdown = (
f"Reference-entry core (callables/classes/etc.): **{len(gap_rows) - data_entries}**; "
f"`data` entries: **{data_entries}**."
)
lines += [
f"## Missing from the official {target} docs — {len(gap_rows)} undocumented",
"",
f"Reference-entry core (callables/classes/etc.): **{len(gap_rows) - data_entries}**; "
f"`data` entries: **{data_entries}**.",
core_breakdown,
"",
"| kind | count |",
"| --- | ---: |",
Expand All @@ -297,7 +343,7 @@ def report(versions, results, target, gap_rows, docs_only, output_path, gap_path
lines.append(f"| {kind} | {count} |")
lines.append("")

module_stats = results.get(target, {}).get("by_module", {})
module_stats = results.get(target, {}).get("by_module", {}) if target else {}
per_module = Counter(row["module"] for row in gap_rows)
lines += [
f"## Top {TOP_MODULES} modules by undocumented count ({target})",
Expand All @@ -320,7 +366,7 @@ def report(versions, results, target, gap_rows, docs_only, output_path, gap_path

markdown_path = args.md_summary or os.environ.get("GITHUB_STEP_SUMMARY")
if markdown_path:
with open(markdown_path, "a", encoding="utf-8", newline="\n") as summary_file:
with Path(markdown_path).open("a", encoding="utf-8", newline="\n") as summary_file:
summary_file.write("\n".join(lines) + "\n")

print("\n=== docs.python.org coverage summary =====================")
Expand Down
Loading