diff --git a/src/firetower/incidents/hooks.py b/src/firetower/incidents/hooks.py index 7de9c47a..d9f0ed95 100644 --- a/src/firetower/incidents/hooks.py +++ b/src/firetower/incidents/hooks.py @@ -761,6 +761,119 @@ def _create_troubleshooting_doc(incident: Incident, channel_id: str) -> None: ).delete() +def _create_postmortem_doc(incident: Incident, channel_id: str) -> None: + notion = None + page = None + try: + if not NotionService.is_configured(): + logger.info( + "Notion not configured, skipping postmortem doc for incident %s", + incident.id, + ) + return + + notion = NotionService.from_settings() + if not notion: + return + + if incident.is_private: + logger.info("Skipping postmortem doc for private incident %s", incident.id) + return + + with transaction.atomic(): + link, created = ExternalLink.objects.select_for_update().get_or_create( + incident=incident, + type=ExternalLinkType.NOTION, + defaults={"url": ""}, + ) + if not created: + if link.url: + logger.info( + "Incident %s already has a postmortem doc, skipping", + incident.id, + ) + else: + logger.info( + "Incident %s postmortem doc creation already in progress, skipping", + incident.id, + ) + return + + incident_url = _build_incident_url(incident) + captain_email = incident.captain.email if incident.captain else None + page = notion.create_postmortem_page( + incident_number=incident.incident_number, + incident_title=incident.title, + incident_url=incident_url, + incident_date=incident.created_at, + severity=incident.severity, + captain_email=captain_email, + ) + if not page or not page.get("url"): + logger.error( + "Postmortem doc creation returned no URL for %s", + incident.incident_number, + ) + ExternalLink.objects.filter( + incident=incident, + type=ExternalLinkType.NOTION, + url="", + ).delete() + return + + orphan_page_id = None + try: + with transaction.atomic(): + link = ExternalLink.objects.select_for_update().get( + incident=incident, + type=ExternalLinkType.NOTION, + ) + if link.url: + logger.warning( + "Race condition: concurrent call already created postmortem doc for %s", + incident.incident_number, + ) + orphan_page_id = page["id"] + else: + link.url = page["url"] + link.save(update_fields=["url"]) + except ExternalLink.DoesNotExist: + logger.warning( + "Placeholder deleted before postmortem URL could be saved for %s", + incident.incident_number, + ) + notion.archive_page(page["id"]) + return + + if orphan_page_id: + notion.archive_page(orphan_page_id) + return + + try: + _slack_service.add_bookmark(channel_id, "Postmortem Doc", page["url"]) + except Exception: + logger.exception( + "Failed to add postmortem doc bookmark for %s", + incident.incident_number, + ) + except Exception: + logger.exception("Failed to create postmortem doc for incident %s", incident.id) + if notion and page and page.get("id"): + try: + notion.archive_page(page["id"]) + except Exception: + logger.exception( + "Failed to archive orphaned Notion page %s for incident %s", + page["id"], + incident.id, + ) + ExternalLink.objects.filter( + incident=incident, + type=ExternalLinkType.NOTION, + url="", + ).delete() + + def decorate_incident_channel( ctx: ChannelSetupContext, slack_service: SlackService, @@ -1262,6 +1375,7 @@ def on_incident_created(incident: Incident) -> None: # appears first in the channel. _create_datadog_notebook(incident, channel_id) _create_troubleshooting_doc(incident, channel_id) + _create_postmortem_doc(incident, channel_id) try: create_linear_parent_issue(incident, channel_id=channel_id) diff --git a/src/firetower/incidents/tests/test_hooks.py b/src/firetower/incidents/tests/test_hooks.py index bca3b59a..d53de6b9 100644 --- a/src/firetower/incidents/tests/test_hooks.py +++ b/src/firetower/incidents/tests/test_hooks.py @@ -10,6 +10,7 @@ from firetower.auth.models import ExternalProfile, ExternalProfileType from firetower.incidents.hooks import ( DEFAULT_STATUSPAGE_WARNING_BUFFER_MINUTES, + _create_postmortem_doc, _create_status_channel, _create_troubleshooting_doc, _invite_oncall_users, @@ -2679,6 +2680,344 @@ def test_creates_troubleshooting_doc( mock_create_ts.assert_called_once_with(incident, "C99999") +@pytest.mark.django_db +class TestCreatePostmortemDoc: + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_skips_for_private_incident(self, mock_slack, mock_notion_cls): + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_notion_cls.from_settings.return_value = mock_service + incident = Incident.objects.create( + title="Sensitive", + severity=IncidentSeverity.P1, + is_private=True, + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.create_postmortem_page.assert_not_called() + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + mock_slack.post_message.assert_not_called() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_creates_doc_and_adds_bookmark_without_message( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_service.create_postmortem_page.return_value = { + "id": "page-456", + "url": "https://notion.so/page-456", + } + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Production is down", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.create_postmortem_page.assert_called_once_with( + incident_number=incident.incident_number, + incident_title=incident.title, + incident_url=f"https://firetower.example.com/{incident.incident_number}", + incident_date=incident.created_at, + severity=incident.severity, + captain_email=None, + ) + link = ExternalLink.objects.get(incident=incident, type=ExternalLinkType.NOTION) + assert link.url == "https://notion.so/page-456" + + bookmark_calls = [ + c + for c in mock_slack.add_bookmark.call_args_list + if c[0][1] == "Postmortem Doc" + ] + assert len(bookmark_calls) == 1 + assert bookmark_calls[0][0][2] == "https://notion.so/page-456" + + mock_slack.post_message.assert_not_called() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_skips_when_not_configured(self, mock_slack, mock_notion_cls): + mock_notion_cls.is_configured.return_value = False + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + mock_notion_cls.from_settings.assert_not_called() + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_idempotent_when_link_exists(self, mock_slack, mock_notion_cls, settings): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + ExternalLink.objects.create( + incident=incident, + type=ExternalLinkType.NOTION, + url="https://notion.so/existing", + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.create_postmortem_page.assert_not_called() + link = ExternalLink.objects.get(incident=incident, type=ExternalLinkType.NOTION) + assert link.url == "https://notion.so/existing" + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_skips_when_placeholder_exists(self, mock_slack, mock_notion_cls, settings): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + ExternalLink.objects.create( + incident=incident, + type=ExternalLinkType.NOTION, + url="", + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.create_postmortem_page.assert_not_called() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_deletes_placeholder_when_api_returns_no_url( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_service.create_postmortem_page.return_value = {"id": "page-456"} + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_api_error_cleans_up_placeholder( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_service.create_postmortem_page.side_effect = RuntimeError("boom") + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_db_error_archives_orphan_notion_page( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_service.create_postmortem_page.return_value = { + "id": "orphan-page-id", + "url": "https://notion.so/orphan-page", + } + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + original_save = ExternalLink.save + + def save_raises_on_url_update(self, *args, **kwargs): + if kwargs.get("update_fields") == ["url"]: + raise RuntimeError("simulated db error") + return original_save(self, *args, **kwargs) + + with patch.object(ExternalLink, "save", save_raises_on_url_update): + _create_postmortem_doc(incident, "C99999") + + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + mock_service.archive_page.assert_called_once_with("orphan-page-id") + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_bookmark_failure_does_not_propagate( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_service.create_postmortem_page.return_value = { + "id": "page-456", + "url": "https://notion.so/page-456", + } + mock_notion_cls.from_settings.return_value = mock_service + mock_slack.add_bookmark.side_effect = RuntimeError("bookmark boom") + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + link = ExternalLink.objects.get(incident=incident, type=ExternalLinkType.NOTION) + assert link.url == "https://notion.so/page-456" + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_skips_when_notion_service_returns_none(self, mock_slack, mock_notion_cls): + mock_notion_cls.is_configured.return_value = True + mock_notion_cls.from_settings.return_value = None + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + _create_postmortem_doc(incident, "C99999") + + assert not ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION + ).exists() + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_race_loser_archives_orphan_page( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + def create_page_and_simulate_winner(**kwargs): + ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION, url="" + ).update(url="https://notion.so/winner-page") + return {"id": "orphan-page", "url": "https://notion.so/orphan-page"} + + mock_service.create_postmortem_page.side_effect = ( + create_page_and_simulate_winner + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.archive_page.assert_called_once_with("orphan-page") + link = ExternalLink.objects.get(incident=incident, type=ExternalLinkType.NOTION) + assert link.url == "https://notion.so/winner-page" + + @patch("firetower.incidents.hooks.NotionService") + @patch("firetower.incidents.hooks._slack_service") + def test_archives_page_when_placeholder_deleted( + self, mock_slack, mock_notion_cls, settings + ): + settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_notion_cls.is_configured.return_value = True + mock_service = MagicMock() + mock_notion_cls.from_settings.return_value = mock_service + + incident = Incident.objects.create( + title="Test", + severity=IncidentSeverity.P1, + ) + + def create_page_and_delete_placeholder(**kwargs): + ExternalLink.objects.filter( + incident=incident, type=ExternalLinkType.NOTION, url="" + ).delete() + return {"id": "orphan-page", "url": "https://notion.so/orphan-page"} + + mock_service.create_postmortem_page.side_effect = ( + create_page_and_delete_placeholder + ) + + _create_postmortem_doc(incident, "C99999") + + mock_service.archive_page.assert_called_once_with("orphan-page") + + +@pytest.mark.django_db +class TestOnIncidentCreatedPostmortemDoc: + @patch("firetower.incidents.hooks._create_postmortem_doc") + @patch("firetower.incidents.hooks._create_troubleshooting_doc") + @patch("firetower.incidents.hooks._create_status_channel_for_context") + @patch("firetower.incidents.hooks._invite_oncall_to_channel") + @patch("firetower.incidents.hooks._page_if_needed") + @patch("firetower.incidents.hooks._slack_service") + def test_creates_postmortem_doc( + self, + mock_slack, + mock_page, + mock_invite_oncall, + mock_status_channel, + mock_create_ts, + mock_create_pm, + ): + mock_slack.create_channel.return_value = "C99999" + mock_slack.build_channel_url.return_value = "https://slack.com/archives/C99999" + + incident = Incident.objects.create( + title="Production is down", + severity=IncidentSeverity.P1, + ) + + on_incident_created(incident) + + mock_create_pm.assert_called_once_with(incident, "C99999") + + @pytest.mark.django_db class TestLinearIssueTitle: def test_public_incident_without_sync_identifiers(self): diff --git a/src/firetower/integrations/services/notion.py b/src/firetower/integrations/services/notion.py index b2a35dfa..5b57d793 100644 --- a/src/firetower/integrations/services/notion.py +++ b/src/firetower/integrations/services/notion.py @@ -153,6 +153,12 @@ def get_users(self) -> dict[str, dict[str, str]]: self._users = users return users + def archive_page(self, page_id: str) -> None: + try: + self.client.pages.update(page_id=page_id, archived=True) + except Exception: + logger.exception("Failed to archive orphaned Notion page %s", page_id) + def create_postmortem_page( self, incident_number: str, @@ -244,15 +250,27 @@ def apply_template( self, page_id: str, messages: list[dict[str, Any]], - update_slack: bool = False, incident: Any | None = None, ) -> None: - if not update_slack and self.template_markdown: - content = self._render_template(self.template_markdown, incident) - if not self._send_markdown(page_id, content): - raise RuntimeError( - f"Failed to apply markdown template to Notion page {page_id}" + if self.template_markdown: + try: + existing = cast( + dict[str, Any], + self.client.blocks.children.list(block_id=page_id), ) + has_content = bool(existing.get("results")) + except Exception: + logger.warning( + "Failed to check existing content for page %s", + page_id, + ) + raise + if not has_content: + content = self._render_template(self.template_markdown, incident) + if not self._send_markdown(page_id, content): + raise RuntimeError( + f"Failed to apply markdown template to Notion page {page_id}" + ) timestamp = datetime.now(tz=UTC).strftime("%Y-%m-%d %H:%M:%S UTC") toggle: dict[str, Any] = { diff --git a/src/firetower/integrations/tests/test_notion.py b/src/firetower/integrations/tests/test_notion.py index 90508ddc..ed1ae1e0 100644 --- a/src/firetower/integrations/tests/test_notion.py +++ b/src/firetower/integrations/tests/test_notion.py @@ -152,6 +152,22 @@ def test_tolerates_user_with_missing_name(self, notion): assert users == {"a@sentry.io": {"name": "", "id": "U1"}} +class TestArchivePage: + def test_archives_page_successfully(self, notion): + notion.archive_page("page-123") + + notion.client.pages.update.assert_called_once_with( + page_id="page-123", archived=True + ) + + def test_logs_and_swallows_exception(self, notion): + notion.client.pages.update.side_effect = Exception("API error") + + notion.archive_page("page-123") + + notion.client.pages.update.assert_called_once() + + class TestSendMarkdown: def test_sends_insert_content_patch(self, notion): with patch( @@ -393,12 +409,13 @@ def _make_append_response(self, block_id: str) -> dict: return {"results": [{"id": block_id}]} def test_new_page_sends_markdown_then_slack_blocks(self, notion): + notion.client.blocks.children.list.return_value = {"results": []} notion.client.blocks.children.append.return_value = self._make_append_response( "toggle-id" ) with patch.object(notion, "_send_markdown", return_value=True) as mock_md: - notion.apply_template("page-id", messages=[], update_slack=False) + notion.apply_template("page-id", messages=[]) mock_md.assert_called_once_with("page-id", "# Template\n\nSome content.") notion.client.blocks.children.append.assert_called_once() @@ -408,6 +425,7 @@ def test_new_page_sends_markdown_then_slack_blocks(self, notion): assert toggle_block["type"] == "toggle" def test_raises_when_send_markdown_fails(self, notion): + notion.client.blocks.children.list.return_value = {"results": []} notion.client.blocks.children.append.return_value = self._make_append_response( "toggle-id" ) @@ -416,15 +434,16 @@ def test_raises_when_send_markdown_fails(self, notion): patch.object(notion, "_send_markdown", return_value=False), pytest.raises(RuntimeError, match="template"), ): - notion.apply_template("page-id", messages=[], update_slack=False) + notion.apply_template("page-id", messages=[]) def test_raises_when_toggle_creation_fails(self, notion): + notion.client.blocks.children.list.return_value = {"results": []} with ( patch.object(notion, "_send_markdown", return_value=True), patch.object(notion, "_append_children", return_value=None), pytest.raises(RuntimeError, match="toggle"), ): - notion.apply_template("page-id", messages=[], update_slack=False) + notion.apply_template("page-id", messages=[]) def test_interpolates_incident_into_template(self): svc = NotionService( @@ -433,6 +452,7 @@ def test_interpolates_incident_into_template(self): template_markdown="# PM\n[Action Items]({linear_url})", ) svc.client = MagicMock() + svc.client.blocks.children.list.return_value = {"results": []} svc.client.blocks.children.append.return_value = self._make_append_response( "toggle-id" ) @@ -442,24 +462,31 @@ def test_interpolates_incident_into_template(self): } with patch.object(svc, "_send_markdown", return_value=True) as mock_md: - svc.apply_template( - "page-id", messages=[], update_slack=False, incident=incident - ) + svc.apply_template("page-id", messages=[], incident=incident) mock_md.assert_called_once_with( "page-id", "# PM\n[Action Items](https://linear.app/team/issue/INC-42)" ) - def test_update_slack_skips_markdown_template(self, notion): + def test_skips_template_when_page_has_content(self, notion): + notion.client.blocks.children.list.return_value = { + "results": [{"id": "existing-block"}] + } notion.client.blocks.children.append.return_value = self._make_append_response( "toggle-id" ) with patch.object(notion, "_send_markdown", return_value=True) as mock_md: - notion.apply_template("page-id", messages=[], update_slack=True) + notion.apply_template("page-id", messages=[]) mock_md.assert_not_called() + def test_raises_on_transient_api_error_checking_content(self, notion): + notion.client.blocks.children.list.side_effect = Exception("connection reset") + + with pytest.raises(Exception, match="connection reset"): + notion.apply_template("page-id", messages=[]) + def test_appends_replies_as_children_of_parent_bullet(self, notion): notion.client.blocks.children.append.side_effect = [ {"results": [{"id": "toggle-id"}]}, @@ -483,7 +510,10 @@ def test_appends_replies_as_children_of_parent_bullet(self, notion): ] with patch.object(notion, "_send_markdown", return_value=True): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) reply_call = notion.client.blocks.children.append.call_args_list[2] assert reply_call.kwargs["block_id"] == "bullet-id" @@ -518,7 +548,10 @@ def test_appends_images_before_replies_as_children_of_bullet(self, notion): patch.object(notion, "_send_markdown", return_value=True), patch.object(notion, "_create_image_block", return_value=image_block), ): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) children_call = notion.client.blocks.children.append.call_args_list[2] assert children_call.kwargs["block_id"] == "bullet-id" @@ -545,7 +578,10 @@ def test_skips_failed_image_uploads(self, notion): patch.object(notion, "_send_markdown", return_value=True), patch.object(notion, "_create_image_block", return_value=None), ): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) # Only 2 appends: toggle creation + bullet batch. No children call since image failed. assert notion.client.blocks.children.append.call_count == 2 @@ -573,7 +609,10 @@ def test_batches_children_at_block_limit(self, notion): ] with patch.object(notion, "_send_markdown", return_value=True): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) # 4 total: toggle, bullet batch, first 85 replies, last 5 replies assert notion.client.blocks.children.append.call_count == 4 @@ -634,7 +673,10 @@ def test_logs_warning_when_notion_returns_fewer_ids_than_batch(self, notion): patch.object(notion, "_send_markdown", return_value=True), patch("firetower.integrations.services.notion.logger") as mock_logger, ): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) # Confirm the mismatch warning was emitted. warning_args = [call[0][0] for call in mock_logger.warning.call_args_list] @@ -665,7 +707,10 @@ def test_aborts_and_logs_error_when_create_slack_content_makes_no_progress( ), patch("firetower.integrations.services.notion.logger") as mock_logger, ): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) error_args = [call[0][0] for call in mock_logger.error.call_args_list] assert any("no progress" in msg for msg in error_args) @@ -692,7 +737,10 @@ def test_logs_warning_when_batch_append_returns_none(self, notion): patch.object(notion, "_send_markdown", return_value=True), patch("firetower.integrations.services.notion.logger") as mock_logger, ): - notion.apply_template("page-id", messages=messages, update_slack=True) + notion.apply_template( + "page-id", + messages=messages, + ) warning_args = [call[0][0] for call in mock_logger.warning.call_args_list] assert any("absent from the Notion dump" in msg for msg in warning_args) diff --git a/src/firetower/slack_app/handlers/dumpslack.py b/src/firetower/slack_app/handlers/dumpslack.py index f2918b4f..cf9c5bac 100644 --- a/src/firetower/slack_app/handlers/dumpslack.py +++ b/src/firetower/slack_app/handlers/dumpslack.py @@ -1,6 +1,7 @@ import logging import re import threading +import time from datetime import UTC, datetime from typing import Any @@ -48,8 +49,8 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: page_id: str | None = None page_url: str = "" - update_slack: bool = False notion_page_created: bool = False + created_page_id: str | None = None try: with transaction.atomic(): @@ -62,6 +63,36 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: ) existing_url = notion_link.url + # Another process (e.g. _create_postmortem_doc) may have claimed the + # placeholder row but not yet saved the Notion URL. Wait for it. + if not db_record_created and not existing_url: + for _ in range(15): + time.sleep(1) + try: + notion_link.refresh_from_db() + except ExternalLink.DoesNotExist: + break + if notion_link.url: + existing_url = notion_link.url + break + + if not existing_url: + # Placeholder was deleted (creation failed) or timed out + # (process crashed without cleanup). Re-acquire under lock + # so we can take over an orphaned placeholder or create a + # fresh row. + with transaction.atomic(): + notion_link, db_record_created = ( + ExternalLink.objects.select_for_update().get_or_create( + incident=incident, + type=ExternalLinkType.NOTION, + defaults={"url": ""}, + ) + ) + existing_url = notion_link.url + if not db_record_created and not existing_url: + db_record_created = True + # Notion API calls happen outside the transaction to avoid holding the # SELECT FOR UPDATE lock while making slow external requests. if not db_record_created and existing_url: @@ -79,7 +110,6 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: ) return page_url = existing_url - update_slack = True else: base_url = settings.FIRETOWER_BASE_URL incident_url = f"{base_url}/{incident.incident_number}" @@ -94,9 +124,12 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: ) page_id = page["id"] page_url = page["url"] + created_page_id = page["id"] # Re-acquire lock before saving to detect concurrent callers that - # also saw url="" and raced to create a page. The loser exits here; - # the winner's URL and apply_template call stand. + # also saw url="" and raced to create a page. The loser adopts the + # winner's page so apply_template still populates it. + orphan_page_id = page["id"] + race_lost = False with transaction.atomic(): notion_link = ExternalLink.objects.select_for_update().get( incident=incident, @@ -107,15 +140,34 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: "Race condition: concurrent call already created Notion page for %s", incident.incident_number, ) + race_lost = True + page_id = _extract_notion_page_id(notion_link.url) + page_url = notion_link.url + else: + notion_link.url = page_url + notion_link.save(update_fields=["url"]) + notion_page_created = True + if race_lost: + notion.archive_page(orphan_page_id) + if not page_id: return - notion_link.url = page_url - notion_link.save(update_fields=["url"]) - notion_page_created = True except Exception: logger.exception( "Failed to create Notion postmortem page for %s", incident.incident_number, ) + if created_page_id and not notion_page_created: + try: + notion.archive_page(created_page_id) + except Exception: + logger.exception( + "Failed to archive orphaned Notion page %s", created_page_id + ) + ExternalLink.objects.filter( + incident=incident, + type=ExternalLinkType.NOTION, + url="", + ).delete() try: client.chat_postMessage( channel=channel_id, @@ -129,13 +181,13 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: action = "Created" if notion_page_created else "Updated" + assert page_id is not None + slack_service = SlackService() messages = _get_channel_messages(slack_service, channel_id) try: - notion.apply_template( - page_id, messages, update_slack=update_slack, incident=incident - ) + notion.apply_template(page_id, messages, incident=incident) except Exception: logger.exception("Failed to populate Notion page %s", page_id) try: @@ -161,11 +213,15 @@ def _trigger_slack_dump(client: Any, channel_id: str, incident: Any) -> None: except Exception: logger.exception("Failed to add AI timeline to Notion page %s", page_id) - if notion_page_created: - try: - slack_service.add_bookmark(channel_id, "Postmortem Doc", page_url) - except Exception: - logger.exception("Failed to add Notion bookmark to channel %s", channel_id) + try: + if slack_service.client: + existing = slack_service.client.bookmarks_list(channel_id=channel_id) + bookmarks: list[dict[str, Any]] = existing.get("bookmarks", []) + has_bookmark = any(b.get("title") == "Postmortem Doc" for b in bookmarks) + if not has_bookmark: + slack_service.add_bookmark(channel_id, "Postmortem Doc", page_url) + except Exception: + logger.exception("Failed to add Notion bookmark to channel %s", channel_id) try: client.chat_postMessage( diff --git a/src/firetower/slack_app/tests/handlers/test_dumpslack.py b/src/firetower/slack_app/tests/handlers/test_dumpslack.py index 4918f772..6c734b8b 100644 --- a/src/firetower/slack_app/tests/handlers/test_dumpslack.py +++ b/src/firetower/slack_app/tests/handlers/test_dumpslack.py @@ -5,7 +5,7 @@ from django.contrib.auth.models import User from slack_sdk.errors import SlackApiError -from firetower.incidents.models import Incident, IncidentSeverity +from firetower.incidents.models import ExternalLinkType, Incident, IncidentSeverity from firetower.integrations.services.slack import SlackService, is_slack_url from firetower.slack_app.handlers.dumpslack import ( _backfill_milestones, @@ -548,6 +548,279 @@ def test_chat_post_message_failure_does_not_raise(self): client.chat_postMessage.assert_called() + def test_waits_for_in_progress_postmortem_doc(self): + notion_url = "https://notion.so/12345678-1234-1234-1234-123456789abc" + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url="") + mock_notion = MagicMock() + call_count = 0 + + def simulate_url_appearing(): + nonlocal call_count + call_count += 1 + if call_count >= 2: + mock_notion_link.url = notion_url + + mock_notion_link.refresh_from_db = simulate_url_appearing + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch("firetower.slack_app.handlers.dumpslack.time") as mock_time, + ): + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + False, + ) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_notion.create_postmortem_page.assert_not_called() + mock_time.sleep.assert_called() + assert client.chat_postMessage.call_count == 1 + posted = client.chat_postMessage.call_args[1]["text"] + assert notion_url in posted + + def test_times_out_waiting_and_takes_over_orphaned_placeholder(self): + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url="") + mock_notion = MagicMock() + mock_page = {"id": "page-id", "url": "https://notion.so/page-id"} + mock_notion.create_postmortem_page.return_value = mock_page + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch("firetower.slack_app.handlers.dumpslack.time") as mock_time, + patch("firetower.slack_app.handlers.dumpslack.settings") as mock_settings, + ): + mock_settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + False, + ) + mock_el.objects.select_for_update.return_value.get.return_value = ( + mock_notion_link + ) + _trigger_slack_dump(client, "C123", mock_incident) + + assert mock_time.sleep.call_count == 15 + mock_notion.create_postmortem_page.assert_called_once() + + def test_recovers_when_placeholder_deleted_during_poll(self): + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url="") + fresh_notion_link = MagicMock(url="") + mock_notion = MagicMock() + mock_page = {"id": "page-id", "url": "https://notion.so/page-id"} + mock_notion.create_postmortem_page.return_value = mock_page + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch("firetower.slack_app.handlers.dumpslack.time") as mock_time, + patch("firetower.slack_app.handlers.dumpslack.settings") as mock_settings, + ): + mock_settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_el.DoesNotExist = type("DoesNotExist", (Exception,), {}) + mock_notion_link.refresh_from_db.side_effect = mock_el.DoesNotExist + mock_el.objects.select_for_update.return_value.get_or_create.side_effect = [ + (mock_notion_link, False), + (fresh_notion_link, True), + ] + mock_el.objects.select_for_update.return_value.get.return_value = ( + fresh_notion_link + ) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_time.sleep.assert_called_once() + mock_notion.create_postmortem_page.assert_called_once() + + def test_uses_existing_url_and_updates_slack(self): + existing_url = "https://notion.so/12345678-1234-1234-1234-123456789abc" + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url=existing_url) + mock_notion = MagicMock() + mock_slack_service = MagicMock() + mock_slack_service.client.bookmarks_list.return_value = {"bookmarks": []} + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch( + "firetower.slack_app.handlers.dumpslack.SlackService", + return_value=mock_slack_service, + ), + ): + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + False, + ) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_notion.create_postmortem_page.assert_not_called() + mock_notion.apply_template.assert_called_once() + call_args = mock_notion.apply_template.call_args + assert call_args[0][0] == "12345678-1234-1234-1234-123456789abc" + assert call_args[1]["incident"] == mock_incident + mock_slack_service.add_bookmark.assert_called_once_with( + "C123", "Postmortem Doc", existing_url + ) + posted = client.chat_postMessage.call_args[1]["text"] + assert "Updated" in posted + assert existing_url in posted + + def test_skips_bookmark_when_already_exists(self): + existing_url = "https://notion.so/12345678-1234-1234-1234-123456789abc" + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url=existing_url) + mock_notion = MagicMock() + mock_slack_service = MagicMock() + mock_slack_service.client.bookmarks_list.return_value = { + "bookmarks": [{"title": "Postmortem Doc", "link": existing_url}] + } + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch( + "firetower.slack_app.handlers.dumpslack.SlackService", + return_value=mock_slack_service, + ), + ): + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + False, + ) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_slack_service.add_bookmark.assert_not_called() + + def test_race_loser_adopts_winner_page_and_archives_orphan(self): + winner_url = "https://notion.so/12345678-1234-1234-1234-123456789abc" + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_page = {"id": "our-page", "url": "https://notion.so/our-page-id"} + mock_notion_link = MagicMock(url="") + winner_link = MagicMock(url=winner_url) + mock_notion = MagicMock() + mock_notion.create_postmortem_page.return_value = mock_page + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch( + "firetower.slack_app.handlers.dumpslack._get_channel_messages", + return_value=[], + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch("firetower.slack_app.handlers.dumpslack.settings") as mock_settings, + ): + mock_settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + True, + ) + mock_el.objects.select_for_update.return_value.get.return_value = ( + winner_link + ) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_notion.archive_page.assert_called_once_with("our-page") + mock_notion.apply_template.assert_called_once() + call_args = mock_notion.apply_template.call_args + assert call_args[0][0] == "12345678-1234-1234-1234-123456789abc" + assert call_args[1]["incident"] == mock_incident + posted = client.chat_postMessage.call_args[1]["text"] + assert winner_url in posted + + def test_cleans_up_placeholder_on_failure(self): + client = MagicMock() + mock_incident = MagicMock(is_private=False) + mock_incident.captain = None + mock_notion_link = MagicMock(url="") + mock_notion = MagicMock() + mock_notion.create_postmortem_page.side_effect = RuntimeError("API error") + + with ( + patch( + "firetower.slack_app.handlers.dumpslack.NotionService.from_settings", + return_value=mock_notion, + ), + patch("firetower.slack_app.handlers.dumpslack.ExternalLink") as mock_el, + patch("firetower.slack_app.handlers.dumpslack.transaction"), + patch("firetower.slack_app.handlers.dumpslack.settings") as mock_settings, + ): + mock_settings.FIRETOWER_BASE_URL = "https://firetower.example.com" + mock_el.objects.select_for_update.return_value.get_or_create.return_value = ( + mock_notion_link, + True, + ) + mock_el.objects.filter.return_value.delete.return_value = (1, {}) + _trigger_slack_dump(client, "C123", mock_incident) + + mock_el.objects.filter.assert_called_once_with( + incident=mock_incident, + type=ExternalLinkType.NOTION, + url="", + ) + mock_el.objects.filter.return_value.delete.assert_called_once() + posted = client.chat_postMessage.call_args[1]["text"] + assert "Failed" in posted + class TestHandleDumpslackCommand: def _make_args(self, notion_config=None, channel_id="C123"):