Skip to content
This repository has been archived by the owner on Sep 12, 2023. It is now read-only.

Commit

Permalink
feat: support config via tags (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelwittig committed May 3, 2023
1 parent 1069007 commit 033c44a
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 41 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@ terraform init
terraform apply
```

## Config via tags

You can also configure this module by tagging the RDS cluster (requires v1.0.0 or higher). Tags take precedence over variables (tags override variables).

| tag key | default value | allowed values |
| ---------------------------------------------- | ------------------------------------------------ | ----------------------------------------------|
| `marbot` | on | on,off |
| `marbot:cpu-utilization` | variable `cpu_utilization` | static,off |
| `marbot:cpu-utilization:threshold` | variable `cpu_utilization_threshold` | 0-100 |
| `marbot:cpu-utilization:period` | variable `cpu_utilization_period` | <= 86400 and multiple of 60 |
| `marbot:cpu-utilization:evaluation-periods` | variable `cpu_utilization_evaluation_periods` | >= 1 and $period*$evaluation-periods <= 86400 |
| `marbot:cpu-credit-balance` | variable `cpu_credit_balance` | static,off |
| `marbot:cpu-credit-balance:threshold` | variable `cpu_credit_balance_threshold` | >= 0 |
| `marbot:cpu-credit-balance:period` | variable `cpu_credit_balance_period` | <= 86400 and multiple of 60 |
| `marbot:cpu-credit-balance:evaluation-periods` | variable `cpu_credit_balance_evaluation_periods` | >= 1 and $period*$evaluation-periods <= 86400 |
| `marbot:freeable-memory` | variable `freeable_memory` | static,off |
| `marbot:freeable-memory:threshold` | variable `freeable_memory_threshold` | >= 0 |
| `marbot:freeable-memory:period` | variable `freeable_memory_period` | <= 86400 and multiple of 60 |
| `marbot:freeable-memory:evaluation-periods` | variable `freeable_memory_evaluation_periods` | >= 1 and $period*$evaluation-periods <= 86400 |

## Update procedure

1. Update the `version`
Expand Down
66 changes: 46 additions & 20 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,34 @@ data "aws_caller_identity" "current" {}

data "aws_region" "current" {}

data "aws_rds_cluster" "cluster" {
cluster_identifier = var.db_cluster_identifier
}

locals {
topic_arn = var.create_topic == false ? var.topic_arn : join("", aws_sns_topic.marbot.*.arn)
enabled = var.enabled && lookup(data.aws_rds_cluster.cluster.tags, "marbot", "on") != "off"

cpu_utilization = lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-utilization", var.cpu_utilization)
cpu_utilization_threshold = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-utilization:threshold", var.cpu_utilization_threshold)), var.cpu_utilization_threshold)
cpu_utilization_period_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-utilization:period", var.cpu_utilization_period)), var.cpu_utilization_period)
cpu_utilization_period = min(max(floor(local.cpu_utilization_period_raw / 60) * 60, 60), 86400)
cpu_utilization_evaluation_periods_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-utilization:evaluation-periods", var.cpu_utilization_evaluation_periods)), var.cpu_utilization_evaluation_periods)
cpu_utilization_evaluation_periods = min(max(local.cpu_utilization_evaluation_periods_raw, 1), floor(86400 / local.cpu_utilization_period))

cpu_credit_balance = lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-credit-balance", var.cpu_credit_balance)
cpu_credit_balance_threshold = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-credit-balance:threshold", var.cpu_credit_balance_threshold)), var.cpu_credit_balance_threshold)
cpu_credit_balance_period_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-credit-balance:period", var.cpu_credit_balance_period)), var.cpu_credit_balance_period)
cpu_credit_balance_period = min(max(floor(local.cpu_credit_balance_period_raw / 60) * 60, 60), 86400)
cpu_credit_balance_evaluation_periods_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:cpu-credit-balance:evaluation-periods", var.cpu_credit_balance_evaluation_periods)), var.cpu_credit_balance_evaluation_periods)
cpu_credit_balance_evaluation_periods = min(max(local.cpu_credit_balance_evaluation_periods_raw, 1), floor(86400 / local.cpu_credit_balance_period))

freeable_memory = lookup(data.aws_rds_cluster.cluster.tags, "marbot:freeable-memory", var.freeable_memory)
freeable_memory_threshold = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:freeable-memory:threshold", var.freeable_memory_threshold)), var.freeable_memory_threshold)
freeable_memory_period_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:freeable-memory:period", var.freeable_memory_period)), var.freeable_memory_period)
freeable_memory_period = min(max(floor(local.freeable_memory_period_raw / 60) * 60, 60), 86400)
freeable_memory_evaluation_periods_raw = try(tonumber(lookup(data.aws_rds_cluster.cluster.tags, "marbot:freeable-memory:evaluation-periods", var.freeable_memory_evaluation_periods)), var.freeable_memory_evaluation_periods)
freeable_memory_evaluation_periods = min(max(local.freeable_memory_evaluation_periods_raw, 1), floor(86400 / local.freeable_memory_period))
}

##########################################################################
Expand Down Expand Up @@ -73,7 +99,7 @@ data "aws_iam_policy_document" "topic_policy" {

resource "aws_sns_topic_subscription" "marbot" {
depends_on = [aws_sns_topic_policy.marbot]
count = (var.create_topic && var.enabled) ? 1 : 0
count = (var.create_topic && local.enabled) ? 1 : 0

topic_arn = join("", aws_sns_topic.marbot.*.arn)
protocol = "https"
Expand All @@ -99,7 +125,7 @@ JSON

resource "aws_cloudwatch_event_rule" "monitoring_jump_start_connection" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.module_version_monitoring_enabled && var.enabled) ? 1 : 0
count = (var.module_version_monitoring_enabled && local.enabled) ? 1 : 0

name = "marbot-rds-cluster-connection-${random_id.id8.hex}"
description = "Monitoring Jump Start connection. (created by marbot)"
Expand All @@ -108,7 +134,7 @@ resource "aws_cloudwatch_event_rule" "monitoring_jump_start_connection" {
}

resource "aws_cloudwatch_event_target" "monitoring_jump_start_connection" {
count = (var.module_version_monitoring_enabled && var.enabled) ? 1 : 0
count = (var.module_version_monitoring_enabled && local.enabled) ? 1 : 0

rule = join("", aws_cloudwatch_event_rule.monitoring_jump_start_connection.*.name)
target_id = "marbot"
Expand All @@ -117,7 +143,7 @@ resource "aws_cloudwatch_event_target" "monitoring_jump_start_connection" {
{
"Type": "monitoring-jump-start-tf-connection",
"Module": "rds-cluster",
"Version": "0.10.0",
"Version": "1.0.0",
"Partition": "${data.aws_partition.current.partition}",
"AccountId": "${data.aws_caller_identity.current.account_id}",
"Region": "${data.aws_region.current.name}"
Expand All @@ -139,17 +165,17 @@ resource "random_id" "id8" {

resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.cpu_utilization_threshold >= 0 && var.enabled) ? 1 : 0
count = (local.cpu_utilization == "static" && local.enabled) ? 1 : 0

alarm_name = "marbot-rds-cluster-cpu-utilization-${random_id.id8.hex}"
alarm_description = "Average database CPU utilization over last 10 minutes too high. (created by marbot)"
alarm_description = "Average database CPU utilization too high. (created by marbot)"
namespace = "AWS/RDS"
metric_name = "CPUUtilization"
statistic = "Average"
period = 600
evaluation_periods = 1
period = local.cpu_utilization_period
evaluation_periods = local.cpu_utilization_evaluation_periods
comparison_operator = "GreaterThanThreshold"
threshold = var.cpu_utilization_threshold
threshold = local.cpu_utilization_threshold
alarm_actions = [local.topic_arn]
ok_actions = [local.topic_arn]
dimensions = {
Expand All @@ -163,17 +189,17 @@ resource "aws_cloudwatch_metric_alarm" "cpu_utilization" {

resource "aws_cloudwatch_metric_alarm" "cpu_credit_balance" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.cpu_credit_balance_threshold >= 0 && var.burst_monitoring_enabled && var.enabled) ? 1 : 0
count = (local.cpu_credit_balance == "static" && local.enabled) ? 1 : 0

alarm_name = "marbot-rds-cluster-cpu-credit-balance-${random_id.id8.hex}"
alarm_description = "Average database CPU credit balance over last 10 minutes too low, expect a significant performance drop soon. (created by marbot)"
alarm_description = "Average database CPU credit balance too low, expect a significant performance drop soon. (created by marbot)"
namespace = "AWS/RDS"
metric_name = "CPUCreditBalance"
statistic = "Average"
period = 600
evaluation_periods = 1
period = local.cpu_credit_balance_period
evaluation_periods = local.cpu_credit_balance_evaluation_periods
comparison_operator = "LessThanThreshold"
threshold = var.cpu_credit_balance_threshold
threshold = local.cpu_credit_balance_threshold
alarm_actions = [local.topic_arn]
ok_actions = [local.topic_arn]
dimensions = {
Expand All @@ -187,17 +213,17 @@ resource "aws_cloudwatch_metric_alarm" "cpu_credit_balance" {

resource "aws_cloudwatch_metric_alarm" "freeable_memory" {
depends_on = [aws_sns_topic_subscription.marbot]
count = (var.freeable_memory_threshold >= 0 && var.enabled) ? 1 : 0
count = (local.freeable_memory == "static" && local.enabled) ? 1 : 0

alarm_name = "marbot-rds-cluster-freeable-memory-${random_id.id8.hex}"
alarm_description = "Average database freeable memory over last 10 minutes too low, performance may suffer. (created by marbot)"
alarm_description = "Average database freeable memory too low, performance may suffer. (created by marbot)"
namespace = "AWS/RDS"
metric_name = "FreeableMemory"
statistic = "Average"
period = 600
evaluation_periods = 1
period = local.freeable_memory_period
evaluation_periods = local.freeable_memory_evaluation_periods
comparison_operator = "LessThanThreshold"
threshold = var.freeable_memory_threshold
threshold = local.freeable_memory_threshold
alarm_actions = [local.topic_arn]
ok_actions = [local.topic_arn]
dimensions = {
Expand All @@ -215,7 +241,7 @@ resource "aws_cloudwatch_metric_alarm" "freeable_memory" {

resource "aws_db_event_subscription" "rds_cluster_issue" {
depends_on = [aws_sns_topic_subscription.marbot]
count = var.enabled ? 1 : 0
count = local.enabled ? 1 : 0

name_prefix = "marbot"
sns_topic = local.topic_arn
Expand Down
97 changes: 76 additions & 21 deletions variables.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# We can not only check the var.topic_arn !="" because of the Terraform error: The "count" value depends on resource attributes that cannot be determined until apply, so Terraform cannot predict how many instances will be created.
variable "create_topic" {
type = bool
description = "Create SNS topic? If set to false you must set topic_arn as well!"
default = true
}

variable "topic_arn" {
type = string
description = "Optional SNS topic ARN if create_topic := false (usually the output of the modules marbot-monitoring-basic or marbot-standalone-topic)."
default = ""
}

variable "stage" {
type = string
description = "marbot stage (never change this!)."
default = "v1"
}

variable "endpoint_id" {
type = string
description = "Your marbot endpoint ID (to get this value: select a channel where marbot belongs to and send a message like this: \"@marbot show me my endpoint id\")."
Expand Down Expand Up @@ -26,45 +45,81 @@ variable "db_cluster_identifier" {
description = "The cluster identifier of the RDS Aurora cluster that you want to monitor."
}



variable "cpu_utilization" {
type = string
description = "CPU utilization (static|off)."
default = "static"
}

variable "cpu_utilization_threshold" {
type = number
description = "The maximum percentage of CPU utilization (set to -1 to disable)."
description = "The maximum percentage of CPU utilization (0-100)."
default = 80
}

variable "burst_monitoring_enabled" {
type = bool
description = "Deprecated, set variable cpu_credit_balance_threshold to -1 instead"
default = true
variable "cpu_utilization_period" {
type = number
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 600
}

variable "cpu_utilization_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}




variable "cpu_credit_balance" {
type = string
description = "CPU burst credits for t* instances (static|off)."
default = "static"
}

variable "cpu_credit_balance_threshold" {
type = number
description = "The minimum number of CPU credits available (t* instances only; set to -1 to disable)."
description = "The minimum number of CPU credits remaining in the burst bucket (>= 0)."
default = 20
}

variable "freeable_memory_threshold" {
variable "cpu_credit_balance_period" {
type = number
description = "The minimum amount of available random access memory in Byte (set to -1 to disable)."
default = 64000000 # 64 Megabyte in Byte
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 600
}

# We can not only check the var.topic_arn !="" because of the Terraform error: The "count" value depends on resource attributes that cannot be determined until apply, so Terraform cannot predict how many instances will be created.
variable "create_topic" {
type = bool
description = "Create SNS topic? If set to false you must set topic_arn as well!"
default = true
variable "cpu_credit_balance_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}

variable "topic_arn" {


variable "freeable_memory" {
type = string
description = "Optional SNS topic ARN if create_topic := false (usually the output of the modules marbot-monitoring-basic or marbot-standalone-topic)."
default = ""
description = "Available memory (static|off)."
default = "static"
}

variable "stage" {
type = string
description = "marbot stage (never change this!)."
default = "v1"
variable "freeable_memory_threshold" {
type = number
description = "The minimum amount of available random access memory in Byte (>= 0)."
default = 64000000 # 64 Megabyte in Byte
}

variable "freeable_memory_period" {
type = number
description = "The period in seconds over which the specified statistic is applied (<= 86400 and multiple of 60)."
default = 600
}

variable "freeable_memory_evaluation_periods" {
type = number
description = "The number of periods over which data is compared to the specified threshold (>= 1 and $period*$evaluation_periods <= 86400)."
default = 1
}

0 comments on commit 033c44a

Please sign in to comment.