Skip to content

Commit

Permalink
Merge pull request #7831 from ministryofjustice/DSO/DSOS-2903/create-…
Browse files Browse the repository at this point in the history
…default-alarm-priority

set slack alarm priority to default so they appear in the PagerDuty UI
  • Loading branch information
robertsweetman committed Sep 4, 2024
2 parents 4e076c4 + ac5c011 commit 7a95be1
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 162 deletions.
3 changes: 0 additions & 3 deletions terraform/pagerduty/aws.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,6 @@ resource "aws_secretsmanager_secret_version" "pagerduty_integration_keys" {
iaps_prod_alarms = pagerduty_service_integration.iaps_prod_cloudwatch.integration_key,
laa_mojfin_prod_alarms = pagerduty_service_integration.laa_mojfin_prod_cloudwatch.integration_key,
laa_mojfin_non_prod_alarms = pagerduty_service_integration.laa_mojfin_non_prod_cloudwatch.integration_key,
hmpps_shef_dba_high_priority = pagerduty_service_integration.hmpps_shef_dba_high_priority.integration_key,
hmpps_shef_dba_low_priority = pagerduty_service_integration.hmpps_shef_dba_low_priority.integration_key,
hmpps_shef_dba_non_prod = pagerduty_service_integration.hmpps_shef_dba_non_prod.integration_key,
test_alarms = pagerduty_service_integration.test_alarms.integration_key,
laa_portal_nonprod_alarms = pagerduty_service_integration.laa_portal_nonprod_cloudwatch.integration_key,
laa_portal_prod_alarms = pagerduty_service_integration.laa_portal_prod_cloudwatch.integration_key
Expand Down
9 changes: 8 additions & 1 deletion terraform/pagerduty/data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,11 @@ data "aws_iam_policy_document" "pagerduty_kms" {
values = ["${data.aws_organizations_organization.root_account.id}/*/${local.environment_management.modernisation_platform_organisation_unit_id}/*"]
}
}
}
}

# default priority for DSO pagerduty slack integration
# If ["*"] is set as elsewhere services with alarms don't change the dashboards in PagerDuty UI
# Can be over-ridden manually later when someone's taken a look at the actual alert
data "pagerduty_priority" "p5" {
name = "P5"
}
6 changes: 0 additions & 6 deletions terraform/pagerduty/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ locals {

existing_users = {
karen_botsh = data.pagerduty_user.karen_botsh,
stephen_linden = data.pagerduty_user.stephen_linden,
simon_pledger = data.pagerduty_user.simon_pledger,
mark_roberts = data.pagerduty_user.mark_roberts,
aaron_robinson = data.pagerduty_user.aaron_robinson,
Expand All @@ -53,7 +52,6 @@ locals {
# oncall users local shortcut to make schedules a bit neater
david_elliott = pagerduty_user.pager_duty_users["david_elliott"].id
david_sibley = pagerduty_user.pager_duty_users["david_sibley"].id
stephen_linden = data.pagerduty_user.stephen_linden.id
edward_proctor = pagerduty_user.pager_duty_users["edward_proctor"].id
ewa_stempel = pagerduty_user.pager_duty_users["ewa_stempel"].id
mark_roberts = data.pagerduty_user.mark_roberts.id
Expand All @@ -72,10 +70,6 @@ locals {
}

# existing users
data "pagerduty_user" "stephen_linden" {
email = "stephen.linden${local.digital_email_suffix}"
}

data "pagerduty_user" "karen_botsh" {
email = "karen.botsh${local.digital_email_suffix}"
}
Expand Down
171 changes: 19 additions & 152 deletions terraform/pagerduty/member-services-integrations.tf
Original file line number Diff line number Diff line change
Expand Up @@ -591,158 +591,6 @@ resource "pagerduty_slack_connection" "laa_mojfin_non_prod_connection" {

# # Slack channel: #mp-laa-alerts-mojfin-non-prod


# NOTE: Update escalation_policy once alarms have been tested
resource "pagerduty_service" "hmpps_shef_dba_high_priority" {
name = "HMPPS Sheffield DBA High Priority Alarms"
description = "Production alarms requiring immediate attention by Sheffield DBAs, i.e. worthy of overnight callout"
auto_resolve_timeout = 345600
acknowledgement_timeout = "null"
escalation_policy = pagerduty_escalation_policy.member_policy.id
alert_creation = "create_alerts_and_incidents"
}
resource "pagerduty_service_integration" "hmpps_shef_dba_high_priority" {
name = data.pagerduty_vendor.cloudwatch.name
service = pagerduty_service.hmpps_shef_dba_high_priority.id
vendor = data.pagerduty_vendor.cloudwatch.id
}

resource "pagerduty_slack_connection" "hmpps_shef_dba_high_priority_connection" {
source_id = pagerduty_service.hmpps_shef_dba_high_priority.id
source_type = "service_reference"
workspace_id = local.slack_workspace_id
channel_id = "CDLAJTGRG"
notification_type = "responder"
lifecycle {
ignore_changes = [
config,
]
}
config {
events = [
"incident.triggered",
"incident.acknowledged",
"incident.escalated",
"incident.resolved",
"incident.reassigned",
"incident.annotated",
"incident.unacknowledged",
"incident.delegated",
"incident.priority_updated",
"incident.action_invocation.created",
"incident.action_invocation.terminated",
"incident.action_invocation.updated",
"incident.responder.added",
"incident.responder.replied",
"incident.status_update_published",
"incident.reopened"
]
priorities = ["*"]
}
}

# Slack channel: dba_alerts_prod

resource "pagerduty_service" "hmpps_shef_dba_low_priority" {
name = "HMPPS Sheffield DBA Low Priority Alarms"
description = "Low priority production alarms for attention of Sheffield DBAs"
auto_resolve_timeout = 345600
acknowledgement_timeout = "null"
escalation_policy = pagerduty_escalation_policy.member_policy.id
alert_creation = "create_alerts_and_incidents"
}
resource "pagerduty_service_integration" "hmpps_shef_dba_low_priority" {
name = data.pagerduty_vendor.cloudwatch.name
service = pagerduty_service.hmpps_shef_dba_low_priority.id
vendor = data.pagerduty_vendor.cloudwatch.id
}

resource "pagerduty_slack_connection" "hmpps_shef_dba_low_priority_connection" {
source_id = pagerduty_service.hmpps_shef_dba_low_priority.id
source_type = "service_reference"
workspace_id = local.slack_workspace_id
channel_id = "CDLAJTGRG"
notification_type = "responder"
lifecycle {
ignore_changes = [
config,
]
}
config {
events = [
"incident.triggered",
"incident.acknowledged",
"incident.escalated",
"incident.resolved",
"incident.reassigned",
"incident.annotated",
"incident.unacknowledged",
"incident.delegated",
"incident.priority_updated",
"incident.action_invocation.created",
"incident.action_invocation.terminated",
"incident.action_invocation.updated",
"incident.responder.added",
"incident.responder.replied",
"incident.status_update_published",
"incident.reopened"
]
priorities = ["*"]
}
}

# Slack channel: dba_alerts_prod

resource "pagerduty_service" "hmpps_shef_dba_non_prod" {
name = "HMPPS Sheffield DBA Non-Production Alarms"
description = "Non-production alarms for attention of Sheffield DBAs"
auto_resolve_timeout = 345600
acknowledgement_timeout = "null"
escalation_policy = pagerduty_escalation_policy.member_policy.id
alert_creation = "create_alerts_and_incidents"
}
resource "pagerduty_service_integration" "hmpps_shef_dba_non_prod" {
name = data.pagerduty_vendor.cloudwatch.name
service = pagerduty_service.hmpps_shef_dba_non_prod.id
vendor = data.pagerduty_vendor.cloudwatch.id
}

resource "pagerduty_slack_connection" "hmpps_shef_dba_non_prod_connection" {
source_id = pagerduty_service.hmpps_shef_dba_non_prod.id
source_type = "service_reference"
workspace_id = local.slack_workspace_id
channel_id = "CE7F6CQGH"
notification_type = "responder"
lifecycle {
ignore_changes = [
config,
]
}
config {
events = [
"incident.triggered",
"incident.acknowledged",
"incident.escalated",
"incident.resolved",
"incident.reassigned",
"incident.annotated",
"incident.unacknowledged",
"incident.delegated",
"incident.priority_updated",
"incident.action_invocation.created",
"incident.action_invocation.terminated",
"incident.action_invocation.updated",
"incident.responder.added",
"incident.responder.replied",
"incident.status_update_published",
"incident.reopened"
]
priorities = ["*"]
}
}

# Slack channel: dba_alerts_devtest

resource "pagerduty_service" "test_alarms" {
name = "Modernisation Platform Test Alarms"
description = "Pagerduty integration for test alarms"
Expand Down Expand Up @@ -2091,6 +1939,7 @@ resource "pagerduty_service" "services" {
acknowledgement_timeout = "null"
escalation_policy = pagerduty_escalation_policy.member_policy.id
alert_creation = "create_alerts_and_incidents"

}
resource "pagerduty_service_integration" "integrations" {
for_each = pagerduty_service.services
Expand All @@ -2111,3 +1960,21 @@ resource "pagerduty_slack_connection" "connections" {
priorities = ["*"]
}
}

resource "pagerduty_event_orchestration_service" "default" {
for_each = pagerduty_service.services
service = each.value.id
enable_event_orchestration_for_service = true
set {
id = "start"
rule {
label = "Set the default priority to P5 so breaches appear in the PagerDuty UI"
actions {
priority = data.pagerduty_priority.p5.id
}
}
}
catch_all {
actions {}
}
}

0 comments on commit 7a95be1

Please sign in to comment.