From 1866329e5072ea73ff74de2b86707c96a6017f8b Mon Sep 17 00:00:00 2001 From: Zied Jlassi <6190550+zied-jlassi@users.noreply.github.com> Date: Wed, 24 Jun 2026 15:57:23 +0200 Subject: [PATCH] fix(sessions): sanitize Unicode in titles like tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tag_session/tag_session_via_store run titles through _sanitize_unicode (NFKC + strip format/bidi/zero-width chars) before persisting, but the twin custom-title fields did not: rename_session, rename_session_via_store and fork_session only .strip()ed. A title and a tag are both user-controlled metadata surfaced identically by list_sessions, so a bidi-override or zero-width title was persisted and rendered unsanitized while the same string as a tag was cleaned. The fork case also derives a title from the source transcript (customTitle / aiTitle / first prompt) — content that may not be trusted — and previously copied it through verbatim. Apply _sanitize_unicode(...).strip() at all three sites so the custom-title path matches the tag path (an all-invisible title now raises 'title must be non-empty', consistent with tag_session). NFKC normalization now applies to titles as it already does to tags. Tests: rename sanitization + pure-invisible rejection, explicit fork title sanitization, and derived fork title sanitization (untrusted transcript content). Signed-off-by: Zied Jlassi <6190550+zied-jlassi@users.noreply.github.com> --- .../_internal/session_mutations.py | 17 ++-- tests/test_session_mutations.py | 80 +++++++++++++++++++ 2 files changed, 92 insertions(+), 5 deletions(-) diff --git a/src/claude_agent_sdk/_internal/session_mutations.py b/src/claude_agent_sdk/_internal/session_mutations.py index 55a7f2132..79523e37a 100644 --- a/src/claude_agent_sdk/_internal/session_mutations.py +++ b/src/claude_agent_sdk/_internal/session_mutations.py @@ -89,8 +89,10 @@ def rename_session( if not _validate_uuid(session_id): raise ValueError(f"Invalid session_id: {session_id}") # Matches CLI guard — empty/whitespace titles are rejected rather than - # overloaded as "clear title". - stripped = title.strip() + # overloaded as "clear title". Sanitize Unicode (bidi/zero-width/format + # chars) like tag_session so a title and a tag — twin user-controlled + # fields surfaced identically by list_sessions — get the same treatment. + stripped = _sanitize_unicode(title).strip() if not stripped: raise ValueError("title must be non-empty") @@ -464,9 +466,13 @@ def _build_fork_lines( # Derive title: explicit > original customTitle > original aiTitle > first # prompt. Suffix with " (fork)" for derived titles. listSessions reads the # LAST custom-title from the tail, so this entry is what surfaces. - fork_title = title.strip() if title else None + # Sanitize Unicode like tag_session/rename_session: an explicit title is a + # twin user-controlled field, and a derived title carries forward content + # from the source transcript (aiTitle / first prompt) that may not be + # trusted — both must be cleaned before they surface in list_sessions. + fork_title = _sanitize_unicode(title).strip() if title else None if not fork_title: - fork_title = f"{derive_title() or 'Forked session'} (fork)" + fork_title = f"{_sanitize_unicode(derive_title() or 'Forked session')} (fork)" lines.append( json.dumps( @@ -790,7 +796,8 @@ async def rename_session_via_store( """ if not _validate_uuid(session_id): raise ValueError(f"Invalid session_id: {session_id}") - stripped = title.strip() + # Sanitize Unicode like tag_session_via_store (twin metadata field). + stripped = _sanitize_unicode(title).strip() if not stripped: raise ValueError("title must be non-empty") project_key = project_key_for_directory(directory) diff --git a/tests/test_session_mutations.py b/tests/test_session_mutations.py index 18ad8aeb5..4277b4044 100644 --- a/tests/test_session_mutations.py +++ b/tests/test_session_mutations.py @@ -197,6 +197,35 @@ def test_title_trimmed_before_storing( entry = json.loads(lines[-1]) assert entry["customTitle"] == "Trimmed Title" + def test_unicode_sanitization(self, claude_config_dir: Path, tmp_path: Path): + """Title is Unicode-sanitized like a tag: zero-width/BOM chars stripped.""" + project_path = str(tmp_path / "proj") + Path(project_path).mkdir(parents=True) + project_dir = _make_project_dir( + claude_config_dir, os.path.realpath(project_path) + ) + sid, file_path = _make_session_file(project_dir) + + rename_session(sid, "clean\u200btitle\ufeff", directory=project_path) + + lines = file_path.read_text().strip().split("\n") + entry = json.loads(lines[-1]) + assert entry["customTitle"] == "cleantitle" + + def test_sanitization_rejects_pure_invisible( + self, claude_config_dir: Path, tmp_path: Path + ): + """A title that is only invisible chars is rejected, like a tag.""" + project_path = str(tmp_path / "proj") + Path(project_path).mkdir(parents=True) + project_dir = _make_project_dir( + claude_config_dir, os.path.realpath(project_path) + ) + sid, _ = _make_session_file(project_dir) + + with pytest.raises(ValueError, match="title must be non-empty"): + rename_session(sid, "\u200b\u200c\ufeff", directory=project_path) + def test_last_wins_via_list_sessions(self, claude_config_dir: Path, tmp_path: Path): """Multiple renames — list_sessions sees the last one.""" project_path = str(tmp_path / "proj") @@ -724,6 +753,57 @@ def test_fork_custom_title(self, claude_config_dir: Path, tmp_path: Path): fork_info = next(s for s in sessions if s.session_id == result.session_id) assert fork_info.custom_title == "My Fork" + def test_fork_title_unicode_sanitized( + self, claude_config_dir: Path, tmp_path: Path + ): + """An explicit fork title is Unicode-sanitized like rename/tag.""" + project_path = str(tmp_path / "proj") + Path(project_path).mkdir(parents=True) + project_dir = _make_project_dir( + claude_config_dir, os.path.realpath(project_path) + ) + sid, _, _ = _make_transcript_session(project_dir) + + result = fork_session( + sid, directory=project_path, title="clean\u200bfork\ufeff" + ) + + sessions = list_sessions(directory=project_path) + fork_info = next(s for s in sessions if s.session_id == result.session_id) + assert fork_info.custom_title == "cleanfork" + + def test_fork_derived_title_unicode_sanitized( + self, claude_config_dir: Path, tmp_path: Path + ): + """A derived fork title (from the source transcript, which may carry an + unsanitized customTitle/aiTitle) is sanitized before it surfaces.""" + project_path = str(tmp_path / "proj") + Path(project_path).mkdir(parents=True) + project_dir = _make_project_dir( + claude_config_dir, os.path.realpath(project_path) + ) + sid, file_path, _ = _make_transcript_session(project_dir) + # Simulate a title written outside the SDK (e.g. by the CLI) that still + # contains invisible characters. + with file_path.open("a", encoding="utf-8") as f: + f.write( + json.dumps( + { + "type": "custom-title", + "customTitle": "dirty\u200bsrc", + "sessionId": sid, + } + ) + + "\n" + ) + + result = fork_session(sid, directory=project_path) + + sessions = list_sessions(directory=project_path) + fork_info = next(s for s in sessions if s.session_id == result.session_id) + assert fork_info.custom_title == "dirtysrc (fork)" + assert "\u200b" not in (fork_info.custom_title or "") + def test_fork_default_title_has_suffix( self, claude_config_dir: Path, tmp_path: Path ):