diff --git a/api/export_api.py b/api/export_api.py index 700e348..475ba96 100644 --- a/api/export_api.py +++ b/api/export_api.py @@ -14,6 +14,7 @@ from utils.md_exporter import session_to_markdown from utils.json_exporter import session_to_json from utils.exclusion_rules import is_session_excluded +from utils.slugify import slugify export_bp = Blueprint("export", __name__) @@ -49,13 +50,6 @@ def get_export_state(): }) -def _slugify(text: str) -> str: - import re - text = text.lower() - text = re.sub(r"[^a-z0-9]+", "-", text) - return text.strip("-") - - @export_bp.route("/api/export", methods=["POST"]) def bulk_export(): body = request.get_json(silent=True) or {} @@ -97,9 +91,9 @@ def bulk_export(): stats = compute_stats(session) md = session_to_markdown(session, stats) - title_slug = _slugify(session["title"]) or "session" + title_slug = slugify(session["title"], default="session") short_id = sid[:8] - proj_slug = _slugify(project["name"]) + proj_slug = slugify(project["name"], default="project") ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" rel_path = f"{proj_slug}/{ts_file}__{title_slug}__{short_id}.md" @@ -155,7 +149,7 @@ def export_session(project_name, session_id): if is_session_excluded(rules, session, project_name): return jsonify({"error": "Session not found"}), 404 stats = compute_stats(session) - title_slug = _slugify(session["title"]) or "session" + title_slug = slugify(session["title"], default="session") if fmt == "json": content = session_to_json(session, stats) diff --git a/scripts/export.py b/scripts/export.py index e8f93cb..60969f8 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -33,6 +33,7 @@ load_rules, is_session_excluded, ) +from utils.slugify import slugify STATE_DIR = os.path.join(os.path.expanduser("~"), ".claude-code-chat-browser") @@ -366,9 +367,9 @@ def cmd_export(args): meta["first_timestamp"] = ts date_str = ts[:10] ts_file = ts[:19].replace(":", "-") # 2026-02-10T01-46-15 - title_slug = _slugify(session["title"]) + title_slug = slugify(session["title"], default="session") short_id = sid[:8] - project_slug = _slugify(project["name"]) + project_slug = slugify(project["name"], default="project") if fmt in ("md", "both"): md = session_to_markdown(session, stats) @@ -444,7 +445,7 @@ def cmd_export(args): def _export_single(session: dict, stats: dict, fmt: str, out_dir: str): """Write one session to disk as md, json, or both.""" - title_slug = _slugify(session["title"]) + title_slug = slugify(session["title"], default="session") short_id = session["session_id"][:8] ts = session["metadata"].get("first_timestamp", "") ts_file = ts[:19].replace(":", "-") if ts else "0000-00-00T00-00-00" @@ -609,18 +610,6 @@ def _save_state(sessions: dict, count: int, out_dir: str): json.dump(state, f, indent=2) -def _slugify(text: str) -> str: - slug = "" - for c in text.lower(): - if c.isalnum(): - slug += c - elif c in " -_/.": - slug += "-" - while "--" in slug: - slug = slug.replace("--", "-") - return slug.strip("-") - - def _die(msg: str): print(f"Error: {msg}", file=sys.stderr) sys.exit(1) diff --git a/tests/test_slugify.py b/tests/test_slugify.py new file mode 100644 index 0000000..78c71c8 --- /dev/null +++ b/tests/test_slugify.py @@ -0,0 +1,60 @@ +"""Regression tests for utils.slugify (Issue #30 / CCC8). + +Historically ``scripts/export.py`` used ``isalnum()`` (Unicode letters preserved) +while ``api/export_api.py`` used ASCII-only ``[^a-z0-9]+``. The canonical +implementation matches the API for portable zip / download filenames. +""" + +import os + +from utils.slugify import slugify + + +def test_ascii_words_hyphenated(): + assert slugify("Hello World") == "hello-world" + + +def test_punctuation_collapses_to_single_hyphen(): + assert slugify("foo__bar") == "foo-bar" + assert slugify("a.b.c") == "a-b-c" + + +def test_unicode_letters_become_ascii_safe(): + """Old CLI kept Latin-1 letters (e.g. é); canonical slug strips to ASCII.""" + assert slugify("Café noir") == "caf-noir" + + +def test_empty_after_strip(): + assert slugify("!!!") == "" + + +def test_digits_preserved(): + assert slugify("Issue 42 Fix") == "issue-42-fix" + + +def test_punctuation_examples_match_regex_behavior(): + assert slugify("AT&T") == "at-t" + assert slugify("issue#42") == "issue-42" + + +def test_default_used_when_slug_empty(): + assert slugify("!!!", default="session") == "session" + assert slugify("!!!") == "" + + +def test_export_leaf_path_parity_api_zip_vs_cli(): + """Same session inputs → same ``proj_slug``, ``title_slug``, and file leaf as API vs CLI.""" + title = "Issue #42: AT&T" + project = "Foo/Bar!" + sid = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + ts_file = "2026-05-07T12-00-00" + short_id = sid[:8] + title_slug = slugify(title, default="session") + proj_slug = slugify(project, default="project") + leaf_md = f"{ts_file}__{title_slug}__{short_id}.md" + api_zip_inner = f"{proj_slug}/{leaf_md}" + date_str = ts_file[:10] + cli_rel = os.path.join(date_str, proj_slug, leaf_md) + assert api_zip_inner.endswith(leaf_md) + assert os.path.basename(cli_rel) == leaf_md + assert cli_rel.replace("\\", "/").endswith(f"{proj_slug}/{leaf_md}") diff --git a/utils/slugify.py b/utils/slugify.py new file mode 100644 index 0000000..fbc5301 --- /dev/null +++ b/utils/slugify.py @@ -0,0 +1,27 @@ +"""Filesystem- and URL-safe slugs for export paths and download names. + +Uses ASCII letters and digits only; other characters (including Unicode +letters and punctuation) become hyphen runs, then trimmed. Matches the +historical behavior of ``api/export_api.py`` and avoids platform-specific +issues with non-ASCII paths inside zip archives. +""" + +import re + + +def slugify(text: str, *, default: str = "") -> str: + """Lowercase *text* and replace each run of non-[a-z0-9] with one hyphen. + + After stripping leading/trailing hyphens, returns that string; if it is + empty, returns *default*. Export code passes ``default="session"`` or + ``default="project"``. + Examples (handled by the ``[^a-z0-9]+`` substitution below): + + - ``AT&T`` → ``at-t`` + - ``issue#42`` → ``issue-42`` + """ + text = text.lower() + # Non-ASCII-alphanumeric runs → '-'; e.g. AT&T → at-t, issue#42 → issue-42. + text = re.sub(r"[^a-z0-9]+", "-", text) + stripped = text.strip("-") + return stripped if stripped else default