diff --git a/README.md b/README.md
index 3986ae0b4b..35142ea9a6 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ To be able to support a number of use-cases the module has quite a lot of config
### AWS SSM Parameters
-The module uses the AWS System Manager Parameter Store to store configuration for the runners, as well as registration tokens and secrets for the Lambdas. Paths for the parameters can be configured via the variable `ssm_paths`. The location of the configuration parameters is retrieved by the runners via the instance tag `ghr:ssm_config_path`. The following default paths will be used.
+The module uses the AWS System Manager Parameter Store to store configuration for the runners, as well as registration tokens and secrets for the Lambdas. Paths for the parameters can be configured via the variable `ssm_paths`. The location of the configuration parameters is retrieved by the runners via the instance tag `ghr:ssm_config_path`. The following default paths will be used. Tokens or JIT config stored in the token path will be deleted after retrieval by instance, data not deleted after a day will be deleted by a SSM housekeeper lambda.
| Path | Description |
| ----------- | ----------- |
@@ -585,6 +585,7 @@ We welcome any improvement to the standard module to make the default as secure
| [runners\_maximum\_count](#input\_runners\_maximum\_count) | The maximum number of runners that will be created. | `number` | `3` | no |
| [runners\_scale\_down\_lambda\_timeout](#input\_runners\_scale\_down\_lambda\_timeout) | Time out for the scale down lambda in seconds. | `number` | `60` | no |
| [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no |
+| [runners\_ssm\_housekeeper](#input\_runners\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.
`schedule_expression`: is used to configure the schedule for the lambda.
`enabled`: enable or disable the lambda trigger via the EventBridge.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function. Token path will be read by default from the module. |
object({|
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
{| no | | [scale\_down\_schedule\_expression](#input\_scale\_down\_schedule\_expression) | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | | [scale\_up\_reserved\_concurrent\_executions](#input\_scale\_up\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secrets. |
"config": {}
}
object({| `{}` | no | diff --git a/lambdas/functions/control-plane/jest.config.ts b/lambdas/functions/control-plane/jest.config.ts index 09acd3dcd6..ab3ae23f36 100644 --- a/lambdas/functions/control-plane/jest.config.ts +++ b/lambdas/functions/control-plane/jest.config.ts @@ -6,10 +6,10 @@ const config: Config = { ...defaultConfig, coverageThreshold: { global: { - statements: 97.6, - branches: 94.6, - functions: 97, - lines: 98, + statements: 97.89, + branches: 94.64, + functions: 97.33, + lines: 98.21, }, }, }; diff --git a/lambdas/functions/control-plane/package.json b/lambdas/functions/control-plane/package.json index ba4f5f9a5b..950276d061 100644 --- a/lambdas/functions/control-plane/package.json +++ b/lambdas/functions/control-plane/package.json @@ -8,7 +8,7 @@ "test": "NODE_ENV=test jest", "test:watch": "NODE_ENV=test jest --watch", "lint": "yarn eslint src", - "watch": "ts-node-dev --respawn --exit-child src/local.ts", + "watch": "ts-node-dev --respawn --exit-child src/local-ssm-housekeeper.ts", "build": "ncc build src/lambda.ts -o dist", "dist": "yarn build && cd dist && zip ../runners.zip index.js", "format": "prettier --write \"**/*.ts\"", diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index 9cb93d0139..c0fa0a8854 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -2,11 +2,12 @@ import { logger } from '@terraform-aws-github-runner/aws-powertools-util'; import { Context, SQSEvent, SQSRecord } from 'aws-lambda'; import { mocked } from 'jest-mock'; -import { adjustPool, scaleDownHandler, scaleUpHandler } from './lambda'; +import { adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda'; import { adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up'; +import { cleanSSMTokens } from './scale-runners/ssm-housekeeper'; const body: ActionRequestMessage = { eventType: 'workflow_job', @@ -61,6 +62,7 @@ const context: Context = { jest.mock('./scale-runners/scale-up'); jest.mock('./scale-runners/scale-down'); jest.mock('./pool/pool'); +jest.mock('./scale-runners/ssm-housekeeper'); jest.mock('@terraform-aws-github-runner/aws-powertools-util'); // Docs for testing async with jest: https://jestjs.io/docs/tutorial-async @@ -87,7 +89,7 @@ describe('Test scale up lambda wrapper.', () => { const error = new Error('Non scale should resolve.'); const mock = mocked(scaleUp); mock.mockRejectedValue(error); - await expect(scaleUpHandler(sqsEvent, context)).resolves; + await expect(scaleUpHandler(sqsEvent, context)).resolves.not.toThrow; }); it('Scale should be rejected', async () => { @@ -110,7 +112,7 @@ async function testInvalidRecords(sqsRecords: SQSRecord[]) { Records: sqsRecords, }; - await expect(scaleUpHandler(sqsEventMultipleRecords, context)).resolves; + await expect(scaleUpHandler(sqsEventMultipleRecords, context)).resolves.not.toThrow(); expect(logWarnSpy).toHaveBeenCalledWith( expect.stringContaining( @@ -127,14 +129,14 @@ describe('Test scale down lambda wrapper.', () => { resolve(); }); }); - await expect(scaleDownHandler({}, context)).resolves; + await expect(scaleDownHandler({}, context)).resolves.not.toThrow(); }); it('Scaling down with error.', async () => { const error = new Error('Scaling down with error.'); const mock = mocked(scaleDown); mock.mockRejectedValue(error); - await expect(await scaleDownHandler({}, context)).resolves; + await expect(scaleDownHandler({}, context)).resolves.not.toThrow(); }); }); @@ -146,7 +148,7 @@ describe('Adjust pool.', () => { resolve(); }); }); - await expect(adjustPool({ poolSize: 2 }, context)).resolves; + await expect(adjustPool({ poolSize: 2 }, context)).resolves.not.toThrow(); }); it('Handle error for adjusting pool.', async () => { @@ -158,3 +160,28 @@ describe('Adjust pool.', () => { expect(logSpy).lastCalledWith(expect.stringContaining(error.message), expect.anything()); }); }); + +describe('Test ssm housekeeper lambda wrapper.', () => { + it('Invoke without errors.', async () => { + const mock = mocked(cleanSSMTokens); + mock.mockImplementation(() => { + return new Promise((resolve) => { + resolve(); + }); + }); + + process.env.SSM_CLEANUP_CONFIG = JSON.stringify({ + dryRun: false, + minimumDaysOld: 1, + tokenPath: '/path/to/tokens/', + }); + + await expect(ssmHousekeeper({}, context)).resolves.not.toThrow(); + }); + + it('Errors not throwed.', async () => { + const mock = mocked(cleanSSMTokens); + mock.mockRejectedValue(new Error()); + await expect(ssmHousekeeper({}, context)).resolves.not.toThrow(); + }); +}); diff --git a/lambdas/functions/control-plane/src/lambda.ts b/lambdas/functions/control-plane/src/lambda.ts index 05204f38d3..e4e3f66096 100644 --- a/lambdas/functions/control-plane/src/lambda.ts +++ b/lambdas/functions/control-plane/src/lambda.ts @@ -6,6 +6,7 @@ import { PoolEvent, adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; import { scaleUp } from './scale-runners/scale-up'; +import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper'; export async function scaleUpHandler(event: SQSEvent, context: Context): Promise
root = optional(string, "github-action-runners")
app = optional(string, "app")
runners = optional(string, "runners")
use_prefix = optional(bool, true)
})
object({|
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
{| no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
"config": {}
}
object({| `{}` | no | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 2ae61c0820..2541200c49 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -104,4 +104,6 @@ module "runners" { pool_runner_owner = each.value.runner_config.pool_runner_owner pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions associate_public_ipv4_address = var.associate_public_ipv4_address + + ssm_housekeeper = var.runners_ssm_housekeeper } diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index cd81fb5ee1..daa7bf17ca 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -552,3 +552,25 @@ variable "associate_public_ipv4_address" { type = bool default = false } + +variable "runners_ssm_housekeeper" { + description = <
root = optional(string, "github-action-runners")
app = optional(string, "app")
runners = optional(string, "runners")
})
object({| n/a | yes | +| [ssm\_housekeeper](#input\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.
arn = string
})
object({|
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
{| no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
"config": {}
}
object({| n/a | yes | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name. | `map(string)` | `{}` | no | diff --git a/modules/runners/main.tf b/modules/runners/main.tf index 48583d4d7c..f43db5daff 100644 --- a/modules/runners/main.tf +++ b/modules/runners/main.tf @@ -43,6 +43,8 @@ locals { enable_job_queued_check = var.enable_job_queued_check == null ? !var.enable_ephemeral_runners : var.enable_job_queued_check arn_ssm_parameters_path_config = "arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${var.ssm_paths.root}/${var.ssm_paths.config}" + + token_path = "${var.ssm_paths.root}/${var.ssm_paths.tokens}" } data "aws_ami" "runner" { diff --git a/modules/runners/policies/lambda-ssm-housekeeper.json b/modules/runners/policies/lambda-ssm-housekeeper.json new file mode 100644 index 0000000000..5e49baafaa --- /dev/null +++ b/modules/runners/policies/lambda-ssm-housekeeper.json @@ -0,0 +1,13 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ssm:DeleteParameter", + "ssm:GetParametersByPath" + ], + "Resource": "${ssm_token_path}*" + } + ] +} diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index ae5ba6b938..7a05c48c3f 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -40,7 +40,7 @@ resource "aws_lambda_function" "scale_up" { RUNNER_NAME_PREFIX = var.runner_name_prefix RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count SERVICE_NAME = "runners-scale-up" - SSM_TOKEN_PATH = "${var.ssm_paths.root}/${var.ssm_paths.tokens}" + SSM_TOKEN_PATH = local.token_path SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" SUBNET_IDS = join(",", var.subnet_ids) } diff --git a/modules/runners/ssm-housekeeper.tf b/modules/runners/ssm-housekeeper.tf new file mode 100644 index 0000000000..b849d1c687 --- /dev/null +++ b/modules/runners/ssm-housekeeper.tf @@ -0,0 +1,115 @@ +locals { + ssm_housekeeper = { + schedule_expression = var.ssm_housekeeper.schedule_expression + enabled = var.ssm_housekeeper.enabled + lambda_timeout = var.ssm_housekeeper.lambda_timeout + config = { + tokenPath = var.ssm_housekeeper.config.tokenPath == null ? local.token_path : var.ssm_housekeeper.config.tokenPath + minimumDaysOld = var.ssm_housekeeper.config.minimumDaysOld + dryRun = var.ssm_housekeeper.config.dryRun + } + } +} + +resource "aws_lambda_function" "ssm_housekeeper" { + s3_bucket = var.lambda_s3_bucket != null ? var.lambda_s3_bucket : null + s3_key = var.runners_lambda_s3_key != null ? var.runners_lambda_s3_key : null + s3_object_version = var.runners_lambda_s3_object_version != null ? var.runners_lambda_s3_object_version : null + filename = var.lambda_s3_bucket == null ? local.lambda_zip : null + source_code_hash = var.lambda_s3_bucket == null ? filebase64sha256(local.lambda_zip) : null + function_name = "${var.prefix}-ssm-housekeeper" + role = aws_iam_role.ssm_housekeeper.arn + handler = "index.ssmHousekeeper" + runtime = var.lambda_runtime + timeout = local.ssm_housekeeper.lambda_timeout + tags = local.tags + memory_size = 512 + architectures = [var.lambda_architecture] + + environment { + variables = { + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) + SERVICE_NAME = "ssm-housekeeper" + } + } + + dynamic "vpc_config" { + for_each = var.lambda_subnet_ids != null && var.lambda_security_group_ids != null ? [true] : [] + content { + security_group_ids = var.lambda_security_group_ids + subnet_ids = var.lambda_subnet_ids + } + } + + dynamic "tracing_config" { + for_each = var.lambda_tracing_mode != null ? [true] : [] + content { + mode = var.lambda_tracing_mode + } + } +} + +resource "aws_cloudwatch_log_group" "ssm_housekeeper" { + name = "/aws/lambda/${aws_lambda_function.ssm_housekeeper.function_name}" + retention_in_days = var.logging_retention_in_days + kms_key_id = var.logging_kms_key_id + tags = var.tags +} + +resource "aws_cloudwatch_event_rule" "ssm_housekeeper" { + name = "${var.prefix}-ssm-housekeeper" + schedule_expression = local.ssm_housekeeper.schedule_expression + tags = var.tags + is_enabled = local.ssm_housekeeper.enabled +} + +resource "aws_cloudwatch_event_target" "ssm_housekeeper" { + rule = aws_cloudwatch_event_rule.ssm_housekeeper.name + arn = aws_lambda_function.ssm_housekeeper.arn +} + +resource "aws_lambda_permission" "ssm_housekeeper" { + statement_id = "AllowExecutionFromCloudWatch" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.ssm_housekeeper.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.ssm_housekeeper.arn +} + +resource "aws_iam_role" "ssm_housekeeper" { + name = "${var.prefix}-action-ssm-housekeeper-lambda-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json + path = local.role_path + permissions_boundary = var.role_permissions_boundary + tags = local.tags +} + +resource "aws_iam_role_policy" "ssm_housekeeper" { + name = "${var.prefix}-ssm-housekeeper-policy" + role = aws_iam_role.ssm_housekeeper.name + policy = templatefile("${path.module}/policies/lambda-ssm-housekeeper.json", { + ssm_token_path = "arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${local.token_path}" + }) +} + +resource "aws_iam_role_policy" "ssm_housekeeper_logging" { + name = "${var.prefix}-lambda-logging" + role = aws_iam_role.ssm_housekeeper.name + policy = templatefile("${path.module}/policies/lambda-cloudwatch.json", { + log_group_arn = aws_cloudwatch_log_group.ssm_housekeeper.arn + }) +} + +resource "aws_iam_role_policy_attachment" "ssm_housekeeper_vpc_execution_role" { + count = length(var.lambda_subnet_ids) > 0 ? 1 : 0 + role = aws_iam_role.ssm_housekeeper.name + policy_arn = "arn:${var.aws_partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" +} + +resource "aws_iam_role_policy" "ssm_housekeeper_xray" { + count = var.lambda_tracing_mode != null ? 1 : 0 + policy = data.aws_iam_policy_document.lambda_xray[0].json + role = aws_iam_role.ssm_housekeeper.name +} diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 9b62ca4056..3cda8dc355 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -613,3 +613,25 @@ variable "associate_public_ipv4_address" { type = bool default = false } + +variable "ssm_housekeeper" { + description = <
root = string
tokens = string
config = string
})