Skip to content

Commit

Permalink
feat: add monitoring terraform module (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
nvtaveras authored Jul 29, 2024
1 parent c37b08d commit 243a7c7
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
62 changes: 62 additions & 0 deletions infra/monitoring.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Creates a metric that counts the number of log entries containing 'HealthCheck' in the watchdog cloud function.
resource "google_logging_metric" "health_check_metric" {
project = module.bootstrap.seed_project_id
name = "health_check_logs_count"
description = "Number of log entries containing 'health check' in the watchdog cloud function"
filter = <<EOF
severity=DEFAULT
SEARCH("`[HealthCheck]`")
resource.labels.service_name="${google_cloudfunctions2_function.watchdog_notifications.name}"
EOF
}

# Creates a notification channel where alerts will be sent based on the alert policy below.
resource "google_monitoring_notification_channel" "victorops_channel" {
project = module.bootstrap.seed_project_id
display_name = "Splunk (VictorOps)"
type = "webhook_tokenauth"

labels = {
url = var.victorops_webhook_url
}
}

# Creates an alert policy that triggers when no health check logs have been received in the last 6 hours,
# and sends a notification to the channel above.
resource "google_monitoring_alert_policy" "health_check_policy" {
project = module.bootstrap.seed_project_id
display_name = "no-health-check-logs"
combiner = "OR"
enabled = true

documentation {
content = "No health check events have been logged in the last 6 hours"
}

conditions {
display_name = "No health check logs in 6 hours"

condition_threshold {
filter = <<EOF
resource.type = "cloud_run_revision" AND
metric.type = "logging.googleapis.com/user/${google_logging_metric.health_check_metric.name}"
EOF

duration = "300s" # Re-test the condition every 5 minutes
comparison = "COMPARISON_LT"
threshold_value = 1

aggregations {
alignment_period = "21600s" # 6 hours
per_series_aligner = "ALIGN_SUM"
}

trigger {
count = 1
}
}
}

notification_channels = [google_monitoring_notification_channel.victorops_channel.id]
severity = "CRITICAL"
}
8 changes: 8 additions & 0 deletions infra/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,11 @@ variable "quicknode_api_key" {
type = string
sensitive = true
}

# Webhook URL to send monitoring alerts from within GCP Monitoring
# You can find this URL in Victorops by going to "Integrations" -> "Stackdriver".
# The routing key can be found under "Settings" -> "Routing Keys"
variable "victorops_webhook_url" {
type = string
sensitive = true
}

0 comments on commit 243a7c7

Please sign in to comment.