diff --git a/README.md b/README.md index 3986ae0b4b..35142ea9a6 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ To be able to support a number of use-cases the module has quite a lot of config ### AWS SSM Parameters -The module uses the AWS System Manager Parameter Store to store configuration for the runners, as well as registration tokens and secrets for the Lambdas. Paths for the parameters can be configured via the variable `ssm_paths`. The location of the configuration parameters is retrieved by the runners via the instance tag `ghr:ssm_config_path`. The following default paths will be used. +The module uses the AWS System Manager Parameter Store to store configuration for the runners, as well as registration tokens and secrets for the Lambdas. Paths for the parameters can be configured via the variable `ssm_paths`. The location of the configuration parameters is retrieved by the runners via the instance tag `ghr:ssm_config_path`. The following default paths will be used. Tokens or JIT config stored in the token path will be deleted after retrieval by instance, data not deleted after a day will be deleted by a SSM housekeeper lambda. | Path | Description | | ----------- | ----------- | @@ -585,6 +585,7 @@ We welcome any improvement to the standard module to make the default as secure | [runners\_maximum\_count](#input\_runners\_maximum\_count) | The maximum number of runners that will be created. | `number` | `3` | no | | [runners\_scale\_down\_lambda\_timeout](#input\_runners\_scale\_down\_lambda\_timeout) | Time out for the scale down lambda in seconds. | `number` | `60` | no | | [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no | +| [runners\_ssm\_housekeeper](#input\_runners\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.

`schedule_expression`: is used to configure the schedule for the lambda.
`enabled`: enable or disable the lambda trigger via the EventBridge.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function. Token path will be read by default from the module. |
object({
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
|
{
"config": {}
}
| no | | [scale\_down\_schedule\_expression](#input\_scale\_down\_schedule\_expression) | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | | [scale\_up\_reserved\_concurrent\_executions](#input\_scale\_up\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secrets. |
object({
root = optional(string, "github-action-runners")
app = optional(string, "app")
runners = optional(string, "runners")
use_prefix = optional(bool, true)
})
| `{}` | no | diff --git a/lambdas/functions/control-plane/jest.config.ts b/lambdas/functions/control-plane/jest.config.ts index 09acd3dcd6..ab3ae23f36 100644 --- a/lambdas/functions/control-plane/jest.config.ts +++ b/lambdas/functions/control-plane/jest.config.ts @@ -6,10 +6,10 @@ const config: Config = { ...defaultConfig, coverageThreshold: { global: { - statements: 97.6, - branches: 94.6, - functions: 97, - lines: 98, + statements: 97.89, + branches: 94.64, + functions: 97.33, + lines: 98.21, }, }, }; diff --git a/lambdas/functions/control-plane/package.json b/lambdas/functions/control-plane/package.json index ba4f5f9a5b..950276d061 100644 --- a/lambdas/functions/control-plane/package.json +++ b/lambdas/functions/control-plane/package.json @@ -8,7 +8,7 @@ "test": "NODE_ENV=test jest", "test:watch": "NODE_ENV=test jest --watch", "lint": "yarn eslint src", - "watch": "ts-node-dev --respawn --exit-child src/local.ts", + "watch": "ts-node-dev --respawn --exit-child src/local-ssm-housekeeper.ts", "build": "ncc build src/lambda.ts -o dist", "dist": "yarn build && cd dist && zip ../runners.zip index.js", "format": "prettier --write \"**/*.ts\"", diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index 9cb93d0139..c0fa0a8854 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -2,11 +2,12 @@ import { logger } from '@terraform-aws-github-runner/aws-powertools-util'; import { Context, SQSEvent, SQSRecord } from 'aws-lambda'; import { mocked } from 'jest-mock'; -import { adjustPool, scaleDownHandler, scaleUpHandler } from './lambda'; +import { adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda'; import { adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up'; +import { cleanSSMTokens } from './scale-runners/ssm-housekeeper'; const body: ActionRequestMessage = { eventType: 'workflow_job', @@ -61,6 +62,7 @@ const context: Context = { jest.mock('./scale-runners/scale-up'); jest.mock('./scale-runners/scale-down'); jest.mock('./pool/pool'); +jest.mock('./scale-runners/ssm-housekeeper'); jest.mock('@terraform-aws-github-runner/aws-powertools-util'); // Docs for testing async with jest: https://jestjs.io/docs/tutorial-async @@ -87,7 +89,7 @@ describe('Test scale up lambda wrapper.', () => { const error = new Error('Non scale should resolve.'); const mock = mocked(scaleUp); mock.mockRejectedValue(error); - await expect(scaleUpHandler(sqsEvent, context)).resolves; + await expect(scaleUpHandler(sqsEvent, context)).resolves.not.toThrow; }); it('Scale should be rejected', async () => { @@ -110,7 +112,7 @@ async function testInvalidRecords(sqsRecords: SQSRecord[]) { Records: sqsRecords, }; - await expect(scaleUpHandler(sqsEventMultipleRecords, context)).resolves; + await expect(scaleUpHandler(sqsEventMultipleRecords, context)).resolves.not.toThrow(); expect(logWarnSpy).toHaveBeenCalledWith( expect.stringContaining( @@ -127,14 +129,14 @@ describe('Test scale down lambda wrapper.', () => { resolve(); }); }); - await expect(scaleDownHandler({}, context)).resolves; + await expect(scaleDownHandler({}, context)).resolves.not.toThrow(); }); it('Scaling down with error.', async () => { const error = new Error('Scaling down with error.'); const mock = mocked(scaleDown); mock.mockRejectedValue(error); - await expect(await scaleDownHandler({}, context)).resolves; + await expect(scaleDownHandler({}, context)).resolves.not.toThrow(); }); }); @@ -146,7 +148,7 @@ describe('Adjust pool.', () => { resolve(); }); }); - await expect(adjustPool({ poolSize: 2 }, context)).resolves; + await expect(adjustPool({ poolSize: 2 }, context)).resolves.not.toThrow(); }); it('Handle error for adjusting pool.', async () => { @@ -158,3 +160,28 @@ describe('Adjust pool.', () => { expect(logSpy).lastCalledWith(expect.stringContaining(error.message), expect.anything()); }); }); + +describe('Test ssm housekeeper lambda wrapper.', () => { + it('Invoke without errors.', async () => { + const mock = mocked(cleanSSMTokens); + mock.mockImplementation(() => { + return new Promise((resolve) => { + resolve(); + }); + }); + + process.env.SSM_CLEANUP_CONFIG = JSON.stringify({ + dryRun: false, + minimumDaysOld: 1, + tokenPath: '/path/to/tokens/', + }); + + await expect(ssmHousekeeper({}, context)).resolves.not.toThrow(); + }); + + it('Errors not throwed.', async () => { + const mock = mocked(cleanSSMTokens); + mock.mockRejectedValue(new Error()); + await expect(ssmHousekeeper({}, context)).resolves.not.toThrow(); + }); +}); diff --git a/lambdas/functions/control-plane/src/lambda.ts b/lambdas/functions/control-plane/src/lambda.ts index 05204f38d3..e4e3f66096 100644 --- a/lambdas/functions/control-plane/src/lambda.ts +++ b/lambdas/functions/control-plane/src/lambda.ts @@ -6,6 +6,7 @@ import { PoolEvent, adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; import { scaleUp } from './scale-runners/scale-up'; +import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper'; export async function scaleUpHandler(event: SQSEvent, context: Context): Promise { setContext(context, 'lambda.ts'); @@ -48,3 +49,15 @@ export async function adjustPool(event: PoolEvent, context: Context): Promise { + setContext(context, 'lambda.ts'); + logger.logEventIfEnabled(event); + const config = JSON.parse(process.env.SSM_CLEANUP_CONFIG) as SSMCleanupOptions; + + try { + await cleanSSMTokens(config); + } catch (e) { + logger.error(`${(e as Error).message}`, { error: e as Error }); + } +} diff --git a/lambdas/functions/control-plane/src/local-ssm-housekeeper.ts b/lambdas/functions/control-plane/src/local-ssm-housekeeper.ts new file mode 100644 index 0000000000..ec635b13ad --- /dev/null +++ b/lambdas/functions/control-plane/src/local-ssm-housekeeper.ts @@ -0,0 +1,15 @@ +import { cleanSSMTokens } from './scale-runners/ssm-housekeeper'; + +export function run(): void { + cleanSSMTokens({ + dryRun: true, + minimumDaysOld: 3, + tokenPath: '/ghr/my-env/runners/tokens', + }) + .then() + .catch((e) => { + console.log(e); + }); +} + +run(); diff --git a/lambdas/functions/control-plane/src/modules.d.ts b/lambdas/functions/control-plane/src/modules.d.ts index c6431d20aa..1722800088 100644 --- a/lambdas/functions/control-plane/src/modules.d.ts +++ b/lambdas/functions/control-plane/src/modules.d.ts @@ -14,6 +14,7 @@ declare namespace NodeJS { RUNNER_OWNER: string; SCALE_DOWN_CONFIG: string; SSM_TOKEN_PATH: string; + SSM_CLEANUP_CONFIG: string; SUBNET_IDS: string; INSTANCE_TYPES: string; INSTANCE_TARGET_CAPACITY_TYPE: 'on-demand' | 'spot'; diff --git a/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.test.ts b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.test.ts new file mode 100644 index 0000000000..3af60b27e6 --- /dev/null +++ b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.test.ts @@ -0,0 +1,117 @@ +import { DeleteParameterCommand, GetParametersByPathCommand, SSMClient } from '@aws-sdk/client-ssm'; +import { mockClient } from 'aws-sdk-client-mock'; +import 'aws-sdk-client-mock-jest'; +import { cleanSSMTokens } from './ssm-housekeeper'; + +process.env.AWS_REGION = 'eu-east-1'; + +const mockSSMClient = mockClient(SSMClient); + +const deleteAmisOlderThenDays = 1; +const now = new Date(); +const dateOld = new Date(); +dateOld.setDate(dateOld.getDate() - deleteAmisOlderThenDays - 1); + +const tokenPath = '/path/to/tokens/'; + +describe('clean SSM tokens / JIT config', () => { + beforeEach(() => { + mockSSMClient.reset(); + mockSSMClient.on(GetParametersByPathCommand).resolves({ + Parameters: undefined, + }); + mockSSMClient.on(GetParametersByPathCommand, { Path: tokenPath }).resolves({ + Parameters: [ + { + Name: tokenPath + 'i-old-01', + LastModifiedDate: dateOld, + }, + ], + NextToken: 'next', + }); + mockSSMClient.on(GetParametersByPathCommand, { Path: tokenPath, NextToken: 'next' }).resolves({ + Parameters: [ + { + Name: tokenPath + 'i-new-01', + LastModifiedDate: now, + }, + ], + NextToken: undefined, + }); + }); + + it('should delete parameters older then minimumDaysOld', async () => { + await cleanSSMTokens({ + dryRun: false, + minimumDaysOld: deleteAmisOlderThenDays, + tokenPath: tokenPath, + }); + + expect(mockSSMClient).toHaveReceivedCommandWith(GetParametersByPathCommand, { Path: tokenPath }); + expect(mockSSMClient).toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-old-01' }); + expect(mockSSMClient).not.toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-new-01' }); + }); + + it('should not delete when dry run is activated', async () => { + await cleanSSMTokens({ + dryRun: true, + minimumDaysOld: deleteAmisOlderThenDays, + tokenPath: tokenPath, + }); + + expect(mockSSMClient).toHaveReceivedCommandWith(GetParametersByPathCommand, { Path: tokenPath }); + expect(mockSSMClient).not.toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-old-01' }); + expect(mockSSMClient).not.toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-new-01' }); + }); + + it('should not call delete when no parameters are found.', async () => { + await expect( + cleanSSMTokens({ + dryRun: false, + minimumDaysOld: deleteAmisOlderThenDays, + tokenPath: 'no-exist', + }), + ).resolves.not.toThrow(); + + expect(mockSSMClient).not.toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-old-01' }); + expect(mockSSMClient).not.toHaveReceivedCommandWith(DeleteParameterCommand, { Name: tokenPath + 'i-new-01' }); + }); + + it('should not error on delete failure.', async () => { + mockSSMClient.on(DeleteParameterCommand).rejects(new Error('ParameterNotFound')); + + await expect( + cleanSSMTokens({ + dryRun: false, + minimumDaysOld: deleteAmisOlderThenDays, + tokenPath: tokenPath, + }), + ).resolves.not.toThrow(); + }); + + it('should only accept valid options.', async () => { + await expect( + cleanSSMTokens({ + dryRun: false, + minimumDaysOld: undefined as unknown as number, + tokenPath: tokenPath, + }), + ).rejects.toBeInstanceOf(Error); + + await expect( + cleanSSMTokens({ + dryRun: false, + minimumDaysOld: 0, + tokenPath: tokenPath, + }), + ).rejects.toBeInstanceOf(Error); + + await expect( + cleanSSMTokens({ + dryRun: false, + minimumDaysOld: 1, + tokenPath: undefined as unknown as string, + }), + ).rejects.toBeInstanceOf(Error); + }); +}); diff --git a/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts new file mode 100644 index 0000000000..ea23920416 --- /dev/null +++ b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts @@ -0,0 +1,61 @@ +import { DeleteParameterCommand, GetParametersByPathCommand, SSMClient } from '@aws-sdk/client-ssm'; +import { logger } from '@terraform-aws-github-runner/aws-powertools-util'; + +export interface SSMCleanupOptions { + dryRun: boolean; + minimumDaysOld: number; + tokenPath: string; +} + +function validateOptions(options: SSMCleanupOptions): void { + const errorMessages: string[] = []; + if (!options.minimumDaysOld || options.minimumDaysOld < 1) { + errorMessages.push(`minimumDaysOld must be greater then 0, value is set to "${options.minimumDaysOld}"`); + } + if (!options.tokenPath) { + errorMessages.push('tokenPath must be defined'); + } + if (errorMessages.length > 0) { + throw new Error(errorMessages.join(', ')); + } +} + +export async function cleanSSMTokens(options: SSMCleanupOptions): Promise { + logger.info(`Cleaning tokens / JIT config older then ${options.minimumDaysOld} days, dryRun: ${options.dryRun}`); + logger.debug('Cleaning with options', { options }); + validateOptions(options); + + const client = new SSMClient({ region: process.env.AWS_REGION }); + const parameters = await client.send(new GetParametersByPathCommand({ Path: options.tokenPath })); + while (parameters.NextToken) { + const nextParameters = await client.send( + new GetParametersByPathCommand({ Path: options.tokenPath, NextToken: parameters.NextToken }), + ); + parameters.Parameters?.push(...(nextParameters.Parameters ?? [])); + parameters.NextToken = nextParameters.NextToken; + } + logger.info(`Found #${parameters.Parameters?.length} parameters in path ${options.tokenPath}`); + logger.debug('Found parameters', { parameters }); + + // minimumDate = today - minimumDaysOld + const minimumDate = new Date(); + minimumDate.setDate(minimumDate.getDate() - options.minimumDaysOld); + + for (const parameter of parameters.Parameters ?? []) { + if (parameter.LastModifiedDate && new Date(parameter.LastModifiedDate) < minimumDate) { + logger.info(`Deleting parameter ${parameter.Name} with last modified date ${parameter.LastModifiedDate}`); + try { + if (!options.dryRun) { + // sleep 50ms to avoid rait limit + await new Promise((resolve) => setTimeout(resolve, 50)); + await client.send(new DeleteParameterCommand({ Name: parameter.Name })); + } + } catch (e) { + logger.warn(`Failed to delete parameter ${parameter.Name} with error ${(e as Error).message}`); + logger.debug('Failed to delete parameter', { e }); + } + } else { + logger.debug(`Skipping parameter ${parameter.Name} with last modified date ${parameter.LastModifiedDate}`); + } + } +} diff --git a/main.tf b/main.tf index afeb344dda..6979d57553 100644 --- a/main.tf +++ b/main.tf @@ -277,6 +277,8 @@ module "runners" { pool_lambda_timeout = var.pool_lambda_timeout pool_runner_owner = var.pool_runner_owner pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions + + ssm_housekeeper = var.runners_ssm_housekeeper } module "runner_binaries" { @@ -318,7 +320,6 @@ module "runner_binaries" { lambda_security_group_ids = var.lambda_security_group_ids aws_partition = var.aws_partition - lambda_principals = var.lambda_principals } diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index 948b8c25dc..263a2ff16a 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -166,6 +166,7 @@ module "multi-runner" { | [runners\_lambda\_zip](#input\_runners\_lambda\_zip) | File location of the lambda zip file for scaling runners. | `string` | `null` | no | | [runners\_scale\_down\_lambda\_timeout](#input\_runners\_scale\_down\_lambda\_timeout) | Time out for the scale down lambda in seconds. | `number` | `60` | no | | [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no | +| [runners\_ssm\_housekeeper](#input\_runners\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.

`schedule_expression`: is used to configure the schedule for the lambda.
`enabled`: enable or disable the lambda trigger via the EventBridge.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function. Token path will be read by default from the module. |
object({
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
|
{
"config": {}
}
| no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
object({
root = optional(string, "github-action-runners")
app = optional(string, "app")
runners = optional(string, "runners")
})
| `{}` | no | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 2ae61c0820..2541200c49 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -104,4 +104,6 @@ module "runners" { pool_runner_owner = each.value.runner_config.pool_runner_owner pool_lambda_reserved_concurrent_executions = var.pool_lambda_reserved_concurrent_executions associate_public_ipv4_address = var.associate_public_ipv4_address + + ssm_housekeeper = var.runners_ssm_housekeeper } diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index cd81fb5ee1..daa7bf17ca 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -552,3 +552,25 @@ variable "associate_public_ipv4_address" { type = bool default = false } + +variable "runners_ssm_housekeeper" { + description = < [scale\_down\_schedule\_expression](#input\_scale\_down\_schedule\_expression) | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no | | [scale\_up\_reserved\_concurrent\_executions](#input\_scale\_up\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no | | [sqs\_build\_queue](#input\_sqs\_build\_queue) | SQS queue to consume accepted build events. |
object({
arn = string
})
| n/a | yes | +| [ssm\_housekeeper](#input\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.

`schedule_expression`: is used to configure the schedule for the lambda.
`enabled`: enable or disable the lambda trigger via the EventBridge.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function. Token path will be read by default from the module. |
object({
schedule_expression = optional(string, "rate(1 day)")
enabled = optional(bool, true)
lambda_timeout = optional(number, 60)
config = object({
tokenPath = optional(string)
minimumDaysOld = optional(number, 1)
dryRun = optional(bool, false)
})
})
|
{
"config": {}
}
| no | | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
object({
root = string
tokens = string
config = string
})
| n/a | yes | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name. | `map(string)` | `{}` | no | diff --git a/modules/runners/main.tf b/modules/runners/main.tf index 48583d4d7c..f43db5daff 100644 --- a/modules/runners/main.tf +++ b/modules/runners/main.tf @@ -43,6 +43,8 @@ locals { enable_job_queued_check = var.enable_job_queued_check == null ? !var.enable_ephemeral_runners : var.enable_job_queued_check arn_ssm_parameters_path_config = "arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${var.ssm_paths.root}/${var.ssm_paths.config}" + + token_path = "${var.ssm_paths.root}/${var.ssm_paths.tokens}" } data "aws_ami" "runner" { diff --git a/modules/runners/policies/lambda-ssm-housekeeper.json b/modules/runners/policies/lambda-ssm-housekeeper.json new file mode 100644 index 0000000000..5e49baafaa --- /dev/null +++ b/modules/runners/policies/lambda-ssm-housekeeper.json @@ -0,0 +1,13 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ssm:DeleteParameter", + "ssm:GetParametersByPath" + ], + "Resource": "${ssm_token_path}*" + } + ] +} diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index ae5ba6b938..7a05c48c3f 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -40,7 +40,7 @@ resource "aws_lambda_function" "scale_up" { RUNNER_NAME_PREFIX = var.runner_name_prefix RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count SERVICE_NAME = "runners-scale-up" - SSM_TOKEN_PATH = "${var.ssm_paths.root}/${var.ssm_paths.tokens}" + SSM_TOKEN_PATH = local.token_path SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" SUBNET_IDS = join(",", var.subnet_ids) } diff --git a/modules/runners/ssm-housekeeper.tf b/modules/runners/ssm-housekeeper.tf new file mode 100644 index 0000000000..b849d1c687 --- /dev/null +++ b/modules/runners/ssm-housekeeper.tf @@ -0,0 +1,115 @@ +locals { + ssm_housekeeper = { + schedule_expression = var.ssm_housekeeper.schedule_expression + enabled = var.ssm_housekeeper.enabled + lambda_timeout = var.ssm_housekeeper.lambda_timeout + config = { + tokenPath = var.ssm_housekeeper.config.tokenPath == null ? local.token_path : var.ssm_housekeeper.config.tokenPath + minimumDaysOld = var.ssm_housekeeper.config.minimumDaysOld + dryRun = var.ssm_housekeeper.config.dryRun + } + } +} + +resource "aws_lambda_function" "ssm_housekeeper" { + s3_bucket = var.lambda_s3_bucket != null ? var.lambda_s3_bucket : null + s3_key = var.runners_lambda_s3_key != null ? var.runners_lambda_s3_key : null + s3_object_version = var.runners_lambda_s3_object_version != null ? var.runners_lambda_s3_object_version : null + filename = var.lambda_s3_bucket == null ? local.lambda_zip : null + source_code_hash = var.lambda_s3_bucket == null ? filebase64sha256(local.lambda_zip) : null + function_name = "${var.prefix}-ssm-housekeeper" + role = aws_iam_role.ssm_housekeeper.arn + handler = "index.ssmHousekeeper" + runtime = var.lambda_runtime + timeout = local.ssm_housekeeper.lambda_timeout + tags = local.tags + memory_size = 512 + architectures = [var.lambda_architecture] + + environment { + variables = { + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) + SERVICE_NAME = "ssm-housekeeper" + } + } + + dynamic "vpc_config" { + for_each = var.lambda_subnet_ids != null && var.lambda_security_group_ids != null ? [true] : [] + content { + security_group_ids = var.lambda_security_group_ids + subnet_ids = var.lambda_subnet_ids + } + } + + dynamic "tracing_config" { + for_each = var.lambda_tracing_mode != null ? [true] : [] + content { + mode = var.lambda_tracing_mode + } + } +} + +resource "aws_cloudwatch_log_group" "ssm_housekeeper" { + name = "/aws/lambda/${aws_lambda_function.ssm_housekeeper.function_name}" + retention_in_days = var.logging_retention_in_days + kms_key_id = var.logging_kms_key_id + tags = var.tags +} + +resource "aws_cloudwatch_event_rule" "ssm_housekeeper" { + name = "${var.prefix}-ssm-housekeeper" + schedule_expression = local.ssm_housekeeper.schedule_expression + tags = var.tags + is_enabled = local.ssm_housekeeper.enabled +} + +resource "aws_cloudwatch_event_target" "ssm_housekeeper" { + rule = aws_cloudwatch_event_rule.ssm_housekeeper.name + arn = aws_lambda_function.ssm_housekeeper.arn +} + +resource "aws_lambda_permission" "ssm_housekeeper" { + statement_id = "AllowExecutionFromCloudWatch" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.ssm_housekeeper.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.ssm_housekeeper.arn +} + +resource "aws_iam_role" "ssm_housekeeper" { + name = "${var.prefix}-action-ssm-housekeeper-lambda-role" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json + path = local.role_path + permissions_boundary = var.role_permissions_boundary + tags = local.tags +} + +resource "aws_iam_role_policy" "ssm_housekeeper" { + name = "${var.prefix}-ssm-housekeeper-policy" + role = aws_iam_role.ssm_housekeeper.name + policy = templatefile("${path.module}/policies/lambda-ssm-housekeeper.json", { + ssm_token_path = "arn:${var.aws_partition}:ssm:${var.aws_region}:${data.aws_caller_identity.current.account_id}:parameter${local.token_path}" + }) +} + +resource "aws_iam_role_policy" "ssm_housekeeper_logging" { + name = "${var.prefix}-lambda-logging" + role = aws_iam_role.ssm_housekeeper.name + policy = templatefile("${path.module}/policies/lambda-cloudwatch.json", { + log_group_arn = aws_cloudwatch_log_group.ssm_housekeeper.arn + }) +} + +resource "aws_iam_role_policy_attachment" "ssm_housekeeper_vpc_execution_role" { + count = length(var.lambda_subnet_ids) > 0 ? 1 : 0 + role = aws_iam_role.ssm_housekeeper.name + policy_arn = "arn:${var.aws_partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" +} + +resource "aws_iam_role_policy" "ssm_housekeeper_xray" { + count = var.lambda_tracing_mode != null ? 1 : 0 + policy = data.aws_iam_policy_document.lambda_xray[0].json + role = aws_iam_role.ssm_housekeeper.name +} diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 9b62ca4056..3cda8dc355 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -613,3 +613,25 @@ variable "associate_public_ipv4_address" { type = bool default = false } + +variable "ssm_housekeeper" { + description = <