Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions src/firetower/incidents/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class ChannelSetupContext:
title: str
severity: str
is_private: bool
skip_paging: bool = False
Comment thread
github-actions[bot] marked this conversation as resolved.
captain_slack_id: str | None = None
captain_name: str | None = None
reporter_slack_id: str | None = None
Expand All @@ -108,14 +109,15 @@ def page_for_channel(
links: list[dict[str, str]] | None = None,
channel_id: str | None = None,
is_private: bool = False,
skip_paging: bool = False,
) -> set[str]:
"""Trigger PD pages for pageable severities. No DB access.

Returns the set of policy names that were successfully paged.
"""
paged: set[str] = set()

if is_private or severity not in HIGH_SEVERITIES:
if is_private or skip_paging or severity not in HIGH_SEVERITIES:
return paged

pd_config = settings.PAGERDUTY
Expand Down Expand Up @@ -168,7 +170,12 @@ def page_for_channel(
return paged


def _page_if_needed(incident: Incident, channel_id: str | None = None) -> set[str]:
def _page_if_needed(
incident: Incident,
channel_id: str | None = None,
*,
skip_paging: bool = False,
) -> set[str]:
links: list[dict[str, str]] = [
{"href": _build_incident_url(incident), "text": "View in Firetower"}
]
Expand All @@ -187,6 +194,7 @@ def _page_if_needed(incident: Incident, channel_id: str | None = None) -> set[st
links=links,
channel_id=channel_id,
is_private=incident.is_private,
skip_paging=skip_paging,
)


Expand Down Expand Up @@ -307,10 +315,11 @@ def _invite_oncall_to_channel(
slack_service: SlackService,
*,
is_private: bool = False,
skip_paging: bool = False,
paged_policies: set[str] | None = None,
) -> None:
"""Invite on-call users to a channel. No DB access."""
if is_private or severity not in HIGH_SEVERITIES:
if is_private or skip_paging or severity not in HIGH_SEVERITIES:
return

pd_config = settings.PAGERDUTY
Expand Down Expand Up @@ -900,6 +909,7 @@ def decorate_incident_channel(
ctx.channel_id,
slack_service,
is_private=ctx.is_private,
skip_paging=ctx.skip_paging,
paged_policies=paged_policies,
)
except Exception:
Expand Down Expand Up @@ -1178,7 +1188,7 @@ def schedule_statuspage_followup_reminder(
)


def on_incident_created(incident: Incident) -> None:
def on_incident_created(incident: Incident, *, skip_paging: bool = False) -> None:
# Use get_or_create to atomically claim the ExternalLink row before calling
# the Slack API. If two concurrent requests both reach this point, only one
# will get created=True; the other bails out without creating a second channel.
Expand Down Expand Up @@ -1220,14 +1230,11 @@ def on_incident_created(incident: Incident) -> None:
f"Failed to create Slack channel for incident {incident.id}"
)

# Page P0/P1 early so on-call responders are alerted before we decorate the
# Slack channel. _page_if_needed reads the Slack link URL from the DB
# (already saved above), so the PD payload is complete even if channel_id
# is None here; channel_id is only used to post a fallback warning back to
# Slack if PD fails.
paged_policies: set[str] = set()
try:
paged_policies = _page_if_needed(incident, channel_id=channel_id)
paged_policies = _page_if_needed(
incident, channel_id=channel_id, skip_paging=skip_paging
)
except Exception:
logger.exception(f"Failed to page for incident {incident.id}")

Expand Down Expand Up @@ -1264,6 +1271,7 @@ def on_incident_created(incident: Incident) -> None:
title=incident.title,
severity=incident.severity,
is_private=incident.is_private,
skip_paging=skip_paging,
captain_slack_id=captain_slack_id,
captain_name=captain_name,
reporter_slack_id=reporter_slack_id,
Expand Down
4 changes: 3 additions & 1 deletion src/firetower/incidents/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,9 @@ def create(self, validated_data: dict) -> Incident:
self._auto_compute_downtime(incident, validated_data)

if settings.HOOKS_ENABLED and not self.context.get("skip_hooks"):
on_incident_created(incident)
on_incident_created(
incident, skip_paging=self.context.get("skip_paging", False)
)

Comment thread
sentry-warden[bot] marked this conversation as resolved.
return incident

Expand Down
75 changes: 73 additions & 2 deletions src/firetower/incidents/tests/test_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
DEFAULT_STATUSPAGE_WARNING_BUFFER_MINUTES,
_create_status_channel,
_create_troubleshooting_doc,
_invite_oncall_to_channel,
_invite_oncall_users,
_linear_issue_title,
_page_if_needed,
Expand Down Expand Up @@ -955,6 +956,21 @@ def test_private_incident_does_not_page(self, mock_pd_cls, settings):
mock_pd_cls.assert_not_called()
assert result == set()

@patch("firetower.incidents.hooks.PagerDutyService")
def test_skip_paging_does_not_page(self, mock_pd_cls, settings):
settings.PAGERDUTY = MOCK_PD_CONFIG
settings.FIRETOWER_BASE_URL = "https://firetower.example.com"

incident = Incident.objects.create(
title="Self-handled outage",
severity=IncidentSeverity.P0,
)

result = _page_if_needed(incident, skip_paging=True)

mock_pd_cls.assert_not_called()
assert result == set()

@patch("firetower.incidents.hooks.PagerDutyService")
def test_pages_only_configured_policies(self, mock_pd_cls, settings):
settings.PAGERDUTY = {
Expand Down Expand Up @@ -1028,6 +1044,23 @@ def test_returns_partial_set_on_mixed_results(self, mock_pd_cls, settings):

assert result == {"IMOC"}

@patch("firetower.incidents.hooks.PagerDutyService")
def test_returns_empty_set_when_skip_paging(self, mock_pd_cls, settings):
settings.PAGERDUTY = MOCK_PD_CONFIG
mock_slack = MagicMock()

result = page_for_channel(
IncidentSeverity.P0,
"INC-100",
"Self-handled outage",
mock_slack,
channel_id="C12345",
skip_paging=True,
)

assert result == set()
mock_pd_cls.assert_not_called()

@patch("firetower.incidents.hooks.PagerDutyService")
def test_returns_empty_set_for_non_pageable_severity(self, mock_pd_cls, settings):
settings.PAGERDUTY = MOCK_PD_CONFIG
Expand Down Expand Up @@ -1059,7 +1092,26 @@ def test_pages_high_sev_on_p0_creation(self, mock_slack, mock_page):

on_incident_created(incident)

mock_page.assert_called_once_with(incident, channel_id="C99999")
mock_page.assert_called_once_with(
incident, channel_id="C99999", skip_paging=False
)

@patch("firetower.incidents.hooks._page_if_needed")
@patch("firetower.incidents.hooks._slack_service")
def test_passes_skip_paging_to_page_if_needed(self, mock_slack, mock_page):
mock_slack.create_channel.return_value = "C99999"
mock_slack.build_channel_url.return_value = "https://slack.com/archives/C99999"

incident = Incident.objects.create(
title="Self-handled outage",
severity=IncidentSeverity.P0,
)

on_incident_created(incident, skip_paging=True)

mock_page.assert_called_once_with(
incident, channel_id="C99999", skip_paging=True
)

@patch("firetower.incidents.hooks._page_if_needed")
@patch("firetower.incidents.hooks._slack_service")
Expand All @@ -1074,7 +1126,9 @@ def test_calls_page_regardless_of_severity(self, mock_slack, mock_page):

on_incident_created(incident)

mock_page.assert_called_once_with(incident, channel_id="C99999")
mock_page.assert_called_once_with(
incident, channel_id="C99999", skip_paging=False
)

@patch("firetower.incidents.hooks._create_status_channel_for_context")
@patch("firetower.incidents.hooks._invite_oncall_to_channel")
Expand Down Expand Up @@ -1312,6 +1366,22 @@ def test_private_incident_skips_oncall_invite(
mock_slack.invite_to_channel.assert_not_called()
mock_slack.post_message.assert_not_called()

@patch("firetower.incidents.hooks._slack_service")
@patch("firetower.incidents.hooks.PagerDutyService")
def test_skip_paging_skips_oncall_invite(self, mock_pd_cls, mock_slack, settings):
settings.PAGERDUTY = MOCK_PD_CONFIG

_invite_oncall_to_channel(
IncidentSeverity.P0,
"C99999",
mock_slack,
skip_paging=True,
)

mock_pd_cls.assert_not_called()
mock_slack.invite_to_channel.assert_not_called()
mock_slack.post_message.assert_not_called()

@patch("firetower.incidents.hooks._slack_service")
@patch("firetower.incidents.hooks.PagerDutyService")
def test_imoc_excludes_oncalls_above_max_level(
Expand Down Expand Up @@ -1579,6 +1649,7 @@ def test_on_incident_created_calls_invite_oncall(
"C99999",
mock_slack,
is_private=False,
skip_paging=False,
paged_policies=set(),
)

Expand Down
2 changes: 1 addition & 1 deletion src/firetower/incidents/tests/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_create_calls_on_incident_created(self, mock_hook):
)
assert serializer.is_valid(), serializer.errors
incident = serializer.save()
mock_hook.assert_called_once_with(incident)
mock_hook.assert_called_once_with(incident, skip_paging=False)

@patch("firetower.incidents.serializers.on_incident_updated")
def test_update_calls_on_incident_updated_with_status(self, mock_hook):
Expand Down
2 changes: 2 additions & 0 deletions src/firetower/slack_app/bolt.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from firetower.slack_app.handlers.new_incident import (
handle_new_command,
handle_new_incident_submission,
handle_severity_action,

Check warning on line 33 in src/firetower/slack_app/bolt.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[VQZ-G9N] handle_severity_action re-adds skip_paging on every high-severity change, silently overriding user opt-out (additional location)

In `handle_severity_action` (line 277-278), `skip_paging` is unconditionally re-added to `selected_values` whenever severity is P0/P1 and `skip_paging` is not already in the current state. The handler cannot distinguish a transition into high-severity from a high→high transition, so if a user selects P0, explicitly unchecks 'Skip paging on-call responders', then changes severity from P0→P1 (or any high-severity to high-severity transition), Slack's `state.values` reflects the unchecked state, the condition triggers, and skip_paging is re-enabled — resulting in no PagerDuty page for a P0/P1 incident despite the user's intent. The same defeats any attempt to uncheck skip_paging followed by a severity change within the high band.
handle_tag_options,
)
from firetower.slack_app.handlers.reopen import handle_reopen_command
Expand Down Expand Up @@ -239,6 +240,7 @@
app.view("statuspage_modal")(
_with_metrics("statuspage_modal")(handle_statuspage_submission)
)
app.action("severity")(_with_metrics("severity_action")(handle_severity_action))

Check warning on line 243 in src/firetower/slack_app/bolt.py

View check run for this annotation

@sentry/warden / warden: find-bugs

[VQZ-G9N] handle_severity_action re-adds skip_paging on every high-severity change, silently overriding user opt-out (additional location)

In `handle_severity_action` (line 277-278), `skip_paging` is unconditionally re-added to `selected_values` whenever severity is P0/P1 and `skip_paging` is not already in the current state. The handler cannot distinguish a transition into high-severity from a high→high transition, so if a user selects P0, explicitly unchecks 'Skip paging on-call responders', then changes severity from P0→P1 (or any high-severity to high-severity transition), Slack's `state.values` reflects the unchecked state, the condition triggers, and skip_paging is re-enabled — resulting in no PagerDuty page for a P0/P1 incident despite the user's intent. The same defeats any attempt to uncheck skip_paging followed by a severity change within the high band.
Comment thread
github-actions[bot] marked this conversation as resolved.
app.action("component_impact_select")(
_with_metrics("component_impact_select")(handle_component_impact_select)
)
Expand Down
Loading
Loading