diff --git a/README.md b/README.md index 35142ea9a6..4c2018b9ed 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ This [Terraform](https://www.terraform.io/) module creates the required infrastr - [Examples](#examples) - [Sub modules](#sub-modules) - [Logging](#logging) +- [Tracing](#tracing) - [Debugging](#debugging) - [Security Considerations](#security-considerations) - [Requirements](#requirements) @@ -427,6 +428,17 @@ An example log message of the scale-up function: } } ``` +## Tracing +For the distributed architecture of this application it can be difficult to troubleshoot this application. +We support the option to enable tracing for all the lambda functions created by this application. To enable tracing user can simply provide the `tracing_config` option inside the root module or inner modules. + +This tracing config generates timelines for following events: +- Basic lifecycle of lambda function +- Traces for Github API calls (can be configured by capture_http_requests). +- Traces for all AWS SDK calls + +This feature has been disabled by default. + ## Debugging @@ -543,7 +555,7 @@ We welcome any improvement to the standard module to make the default as secure | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | +| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | DEPRECATED: Replaced by `tracing_config`. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with. | `string` | `null` | no | | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | @@ -593,6 +605,7 @@ We welcome any improvement to the standard module to make the default as secure | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using an S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({| `{}` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | | [userdata\_pre\_install](#input\_userdata\_pre\_install) | Script to be ran before the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | | [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | diff --git a/examples/ephemeral/main.tf b/examples/ephemeral/main.tf index f549b566fd..6aa9e2cda4 100644 --- a/examples/ephemeral/main.tf +++ b/examples/ephemeral/main.tf @@ -69,11 +69,20 @@ module "runners" { # enable_job_queued_check = true + # tracing_config = { + # mode = "Active" + # capture_error = true + # capture_http_requests = true + # } + + # configure your pre-built AMI # enable_userdata = false - # ami_filter = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] } - # data "aws_caller_identity" "current" {} - # ami_owners = [data.aws_caller_identity.current.account_id] + # ami_filter = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] } + # ami_owners = [data.aws_caller_identity.current.account_id] + + # or use the default AMI + # enable_userdata = true # Enable debug logging for the lambda functions # log_level = "debug" diff --git a/examples/multi-runner/main.tf b/examples/multi-runner/main.tf index 45e268609f..b1ab05b427 100644 --- a/examples/multi-runner/main.tf +++ b/examples/multi-runner/main.tf @@ -57,6 +57,12 @@ module "runners" { id = var.github_app.id webhook_secret = random_id.random.hex } + # enable this section for tracing + # tracing_config = { + # mode = "Active" + # capture_error = true + # capture_http_requests = true + # } # Assuming local build lambda's to use pre build ones, uncomment the lines below and download the # lambda zip files lambda_download # webhook_lambda_zip = "../lambdas-download/webhook.zip" diff --git a/lambdas/functions/ami-housekeeper/src/ami.ts b/lambdas/functions/ami-housekeeper/src/ami.ts index c74aa839c2..1298ff0f6f 100644 --- a/lambdas/functions/ami-housekeeper/src/ami.ts +++ b/lambdas/functions/ami-housekeeper/src/ami.ts @@ -10,6 +10,7 @@ import { } from '@aws-sdk/client-ec2'; import { DescribeParametersCommand, GetParameterCommand, SSMClient } from '@aws-sdk/client-ssm'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; const logger = createChildLogger('ami'); @@ -82,7 +83,7 @@ async function getAmisNotInUse(options: AmiCleanupOptions) { const amiIdsInSSM = await getAmisReferedInSSM(options); const amiIdsInTemplates = await getAmiInLatestTemplates(options); - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); logger.debug('Getting all AMIs from ec2 with filters', { filters: options.amiFilters }); const amiEc2 = await ec2Client.send( new DescribeImagesCommand({ @@ -133,7 +134,7 @@ async function deleteAmi(amiDetails: Image, options: AmiCleanupOptionsInternal): try { logger.info(`deleting ami ${amiDetails.Name || amiDetails.ImageId} created at ${amiDetails.CreationDate}`); - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); await ec2Client.send(new DeregisterImageCommand({ ImageId: amiDetails.ImageId, DryRun: options.dryRun })); await deleteSnapshot(options, amiDetails, ec2Client); } catch (error) { @@ -158,7 +159,7 @@ async function deleteSnapshot(options: AmiCleanupOptions, amiDetails: Image, ec2 } async function getAmiInLatestTemplates(options: AmiCleanupOptions): Promise<(string | undefined)[]> { - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); const launnchTemplates = await ec2Client.send( new DescribeLaunchTemplatesCommand({ LaunchTemplateNames: options.launchTemplateNames, @@ -188,7 +189,7 @@ async function getAmisReferedInSSM(options: AmiCleanupOptions): Promise<(string return []; } - const ssmClient = new SSMClient({}); + const ssmClient = getTracedAWSV3Client(new SSMClient({})); const ssmParams = await ssmClient.send( new DescribeParametersCommand({ ParameterFilters: [ diff --git a/lambdas/functions/control-plane/package.json b/lambdas/functions/control-plane/package.json index 950276d061..dd8cb85078 100644 --- a/lambdas/functions/control-plane/package.json +++ b/lambdas/functions/control-plane/package.json @@ -40,11 +40,13 @@ "dependencies": { "@aws-sdk/client-ec2": "^3.436.0", "@aws-sdk/types": "^3.433.0", + "@middy/core": "^3.6.2", "@octokit/auth-app": "6.0.1", "@octokit/rest": "20.0.2", "@octokit/types": "^12.1.1", "@terraform-aws-github-runner/aws-powertools-util": "*", "@terraform-aws-github-runner/aws-ssm-util": "*", + "axios": "^1.5.1", "cron-parser": "^4.8.1", "typescript": "^5.0.4" } diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts index da15866b75..1a53f47555 100644 --- a/lambdas/functions/control-plane/src/aws/runners.d.ts +++ b/lambdas/functions/control-plane/src/aws/runners.d.ts @@ -39,4 +39,5 @@ export interface RunnerInputParameters { }; numberOfRunners?: number; amiIdSsmParameterName?: string; + tracingEnabled?: boolean; } diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index ed044ff4fe..fb08ec4b3b 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -10,6 +10,7 @@ import { TerminateInstancesCommand, } from '@aws-sdk/client-ec2'; import { GetParameterCommand, GetParameterResult, PutParameterCommand, SSMClient } from '@aws-sdk/client-ssm'; +import { tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { mockClient } from 'aws-sdk-client-mock'; import 'aws-sdk-client-mock-jest'; @@ -236,6 +237,15 @@ describe('create runner', () => { Name: 'my-ami-id-param', }); }); + it('calls create fleet of 1 instance with runner tracing enabled', async () => { + tracer.getRootXrayTraceId = jest.fn().mockReturnValue('123'); + + await createRunner(createRunnerConfig({ ...defaultRunnerConfig, tracingEnabled: true })); + + expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, { + ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, tracingEnabled: true }), + }); + }); }); describe('create runner with errors', () => { @@ -350,6 +360,7 @@ interface RunnerConfig { allocationStrategy: SpotAllocationStrategy; maxSpotPrice?: string; amiIdSsmParameterName?: string; + tracingEnabled?: boolean; } function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { @@ -366,6 +377,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { }, subnets: ['subnet-123', 'subnet-456'], amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName, + tracingEnabled: runnerConfig.tracingEnabled, }; } @@ -376,6 +388,7 @@ interface ExpectedFleetRequestValues { maxSpotPrice?: string; totalTargetCapacity: number; imageId?: string; + tracingEnabled?: boolean; } function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): CreateFleetCommandInput { @@ -385,6 +398,10 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): { Key: 'ghr:Type', Value: expectedValues.type }, { Key: 'ghr:Owner', Value: REPO_NAME }, ]; + if (expectedValues.tracingEnabled) { + const traceId = tracer.getRootXrayTraceId(); + tags.push({ Key: 'ghr:trace_id', Value: traceId! }); + } const request: CreateFleetCommandInput = { LaunchTemplateConfigs: [ { diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 38d69895db..33cad51ac3 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -9,6 +9,7 @@ import { _InstanceType, } from '@aws-sdk/client-ec2'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client, tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { getParameter } from '@terraform-aws-github-runner/aws-ssm-util'; import moment from 'moment'; @@ -56,7 +57,7 @@ function constructFilters(filters?: Runners.ListRunnerFilters): Ec2Filter[][] { } async function getRunners(ec2Filters: Ec2Filter[]): Promise
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| `{}` | no | ## Outputs diff --git a/modules/ami-housekeeper/iam.tf b/modules/ami-housekeeper/iam.tf index a95949aadf..454d943b4b 100644 --- a/modules/ami-housekeeper/iam.tf +++ b/modules/ami-housekeeper/iam.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/ami-housekeeper/main.tf b/modules/ami-housekeeper/main.tf index bb2faa49fa..0cb9935c22 100644 --- a/modules/ami-housekeeper/main.tf +++ b/modules/ami-housekeeper/main.tf @@ -19,10 +19,13 @@ resource "aws_lambda_function" "ami_housekeeper" { environment { variables = { - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - AMI_CLEANUP_OPTIONS = jsonencode(var.cleanup_config) - SERVICE_NAME = "ami-housekeeper" + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + AMI_CLEANUP_OPTIONS = jsonencode(var.cleanup_config) + SERVICE_NAME = "ami-housekeeper" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -37,9 +40,9 @@ resource "aws_lambda_function" "ami_housekeeper" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -123,7 +126,7 @@ resource "aws_lambda_permission" "ami_housekeeper" { } resource "aws_iam_role_policy" "ami_housekeeper_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.ami_housekeeper.name } diff --git a/modules/ami-housekeeper/variables.tf b/modules/ami-housekeeper/variables.tf index faf2f65b30..36562a8a82 100644 --- a/modules/ami-housekeeper/variables.tf +++ b/modules/ami-housekeeper/variables.tf @@ -124,10 +124,14 @@ variable "lambda_architecture" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } # specif for this module diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index c5d6690684..52edd29406 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -144,7 +144,6 @@ module "multi-runner" { | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | @@ -173,6 +172,7 @@ module "multi-runner" { | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| `{}` | no | | [vpc\_id](#input\_vpc\_id) | The VPC for security groups of the action runners. | `string` | n/a | yes | | [webhook\_lambda\_apigateway\_access\_log\_settings](#input\_webhook\_lambda\_apigateway\_access\_log\_settings) | Access log settings for webhook API gateway. |
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| `null` | no | | [webhook\_lambda\_s3\_key](#input\_webhook\_lambda\_s3\_key) | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/modules/multi-runner/ami-housekeeper.tf b/modules/multi-runner/ami-housekeeper.tf index c34a51a0fb..cd40dddac7 100644 --- a/modules/multi-runner/ami-housekeeper.tf +++ b/modules/multi-runner/ami-housekeeper.tf @@ -18,7 +18,7 @@ module "ami_housekeeper" { lambda_security_group_ids = var.lambda_security_group_ids lambda_subnet_ids = var.lambda_subnet_ids lambda_timeout = var.ami_housekeeper_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id diff --git a/modules/multi-runner/runner-binaries.tf b/modules/multi-runner/runner-binaries.tf index 87387a5092..a50d1dd97b 100644 --- a/modules/multi-runner/runner-binaries.tf +++ b/modules/multi-runner/runner-binaries.tf @@ -17,7 +17,7 @@ module "runner_binaries" { lambda_architecture = var.lambda_architecture lambda_zip = var.runner_binaries_syncer_lambda_zip lambda_timeout = var.runner_binaries_syncer_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id enable_event_rule_binaries_syncer = var.enable_event_rule_binaries_syncer diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 2541200c49..d0653122d2 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -66,7 +66,7 @@ module "runners" { lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout lambda_subnet_ids = var.lambda_subnet_ids lambda_security_group_ids = var.lambda_security_group_ids - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index a7f32502c0..e75a9653f7 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -543,10 +543,14 @@ variable "ssm_paths" { default = {} } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } variable "associate_public_ipv4_address" { diff --git a/modules/multi-runner/webhook.tf b/modules/multi-runner/webhook.tf index 2159779daa..9a1df4dce6 100644 --- a/modules/multi-runner/webhook.tf +++ b/modules/multi-runner/webhook.tf @@ -19,7 +19,7 @@ module "webhook" { lambda_architecture = var.lambda_architecture lambda_zip = var.webhook_lambda_zip lambda_timeout = var.webhook_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id diff --git a/modules/runner-binaries-syncer/README.md b/modules/runner-binaries-syncer/README.md index a2453361ee..8e8354929d 100644 --- a/modules/runner-binaries-syncer/README.md +++ b/modules/runner-binaries-syncer/README.md @@ -100,7 +100,6 @@ No modules. | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout](#input\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `300` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -117,6 +116,7 @@ No modules. | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
destination_arn = string
format = string
})
object({| `{}` | no | ## Outputs diff --git a/modules/runner-binaries-syncer/iam.tf b/modules/runner-binaries-syncer/iam.tf index a95949aadf..454d943b4b 100644 --- a/modules/runner-binaries-syncer/iam.tf +++ b/modules/runner-binaries-syncer/iam.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/runner-binaries-syncer/runner-binaries-syncer.tf b/modules/runner-binaries-syncer/runner-binaries-syncer.tf index 7226b76ebe..8da74bbfa1 100644 --- a/modules/runner-binaries-syncer/runner-binaries-syncer.tf +++ b/modules/runner-binaries-syncer/runner-binaries-syncer.tf @@ -23,15 +23,18 @@ resource "aws_lambda_function" "syncer" { environment { variables = { - ENVIRONMENT = var.prefix - GITHUB_RUNNER_ARCHITECTURE = var.runner_architecture - GITHUB_RUNNER_OS = local.gh_binary_os_label[var.runner_os] - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - S3_BUCKET_NAME = aws_s3_bucket.action_dist.id - S3_OBJECT_KEY = local.action_runner_distribution_object_key - S3_SSE_ALGORITHM = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.sse_algorithm, null) - S3_SSE_KMS_KEY_ID = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.kms_master_key_id, null) + ENVIRONMENT = var.prefix + GITHUB_RUNNER_ARCHITECTURE = var.runner_architecture + GITHUB_RUNNER_OS = local.gh_binary_os_label[var.runner_os] + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + S3_BUCKET_NAME = aws_s3_bucket.action_dist.id + S3_OBJECT_KEY = local.action_runner_distribution_object_key + S3_SSE_ALGORITHM = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.sse_algorithm, null) + S3_SSE_KMS_KEY_ID = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.kms_master_key_id, null) } } @@ -46,9 +49,9 @@ resource "aws_lambda_function" "syncer" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -182,7 +185,7 @@ resource "aws_lambda_permission" "on_deploy" { } resource "aws_iam_role_policy" "syncer_lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.syncer_lambda.name } diff --git a/modules/runner-binaries-syncer/variables.tf b/modules/runner-binaries-syncer/variables.tf index d008614a0f..2e8f735977 100644 --- a/modules/runner-binaries-syncer/variables.tf +++ b/modules/runner-binaries-syncer/variables.tf @@ -227,8 +227,12 @@ variable "lambda_architecture" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } diff --git a/modules/runners/README.md b/modules/runners/README.md index 1f07717a45..fda075d125 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -108,6 +108,7 @@ yarn run dist | [aws_iam_role_policy_attachment.scale_down_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.scale_up_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.ssm_housekeeper_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.xray_tracing](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_lambda_event_source_mapping.scale_up](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_event_source_mapping) | resource | | [aws_lambda_function.scale_down](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | | [aws_lambda_function.scale_up](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | @@ -176,7 +177,6 @@ yarn run dist | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the lambda will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout\_scale\_down](#input\_lambda\_timeout\_scale\_down) | Time out for the scale down lambda in seconds. | `number` | `60` | no | | [lambda\_timeout\_scale\_up](#input\_lambda\_timeout\_scale\_up) | Time out for the scale up lambda in seconds. | `number` | `60` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -214,6 +214,7 @@ yarn run dist | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| n/a | yes | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
root = string
tokens = string
config = string
})
object({| `{}` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | User-data script snippet to insert after GitHub action runner install | `string` | `""` | no | | [userdata\_pre\_install](#input\_userdata\_pre\_install) | User-data script snippet to insert before GitHub action runner install | `string` | `""` | no | | [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | diff --git a/modules/runners/policies-lambda-common.tf b/modules/runners/policies-lambda-common.tf index feeb10c1ae..feb0d39fd9 100644 --- a/modules/runners/policies-lambda-common.tf +++ b/modules/runners/policies-lambda-common.tf @@ -34,7 +34,7 @@ resource "aws_iam_policy" "ami_id_ssm_parameter_read" { } data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/runners/policies-runner.tf b/modules/runners/policies-runner.tf index 4491fa3fae..d1b9190930 100644 --- a/modules/runners/policies-runner.tf +++ b/modules/runners/policies-runner.tf @@ -45,6 +45,12 @@ resource "aws_iam_role_policy" "dist_bucket" { ) } +resource "aws_iam_role_policy_attachment" "xray_tracing" { + count = var.tracing_config.mode != null ? 1 : 0 + role = aws_iam_role.runner.name + policy_arn = "arn:${var.aws_partition}:iam::aws:policy/AWSXRayDaemonWriteAccess" +} + resource "aws_iam_role_policy" "describe_tags" { name = "runner-describe-tags" role = aws_iam_role.runner.name diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf index c855739d55..4533e95f24 100644 --- a/modules/runners/pool.tf +++ b/modules/runners/pool.tf @@ -55,7 +55,6 @@ module "pool" { arn_ssm_parameters_path_config = local.arn_ssm_parameters_path_config } - aws_partition = var.aws_partition - lambda_tracing_mode = var.lambda_tracing_mode - + aws_partition = var.aws_partition + tracing_config = var.tracing_config } diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index 8c818dc81a..2b7c808df8 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -47,7 +47,7 @@ No modules. |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | | [config](#input\_config) | Lookup details in parent module. |
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| n/a | yes | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
})
object({| `{}` | no | ## Outputs diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index ffb90dff2d..2b7fb0449b 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -17,31 +17,34 @@ resource "aws_lambda_function" "pool" { environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral - ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config - ENVIRONMENT = var.config.prefix - GHES_URL = var.config.ghes.url - INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.config.instance_types) - LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name - LOG_LEVEL = var.config.lambda.log_level - NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes - RUNNER_LABELS = lower(join(",", var.config.runner.labels)) - RUNNER_GROUP_NAME = var.config.runner.group_name - RUNNER_NAME_PREFIX = var.config.runner.name_prefix - RUNNER_OWNER = var.config.runner.pool_owner - SERVICE_NAME = "runners-pool" - SSM_TOKEN_PATH = var.config.ssm_token_path - SSM_CONFIG_PATH = var.config.ssm_config_path - SUBNET_IDS = join(",", var.config.subnet_ids) + AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral + ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config + ENVIRONMENT = var.config.prefix + GHES_URL = var.config.ghes.url + INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.config.instance_types) + LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name + LOG_LEVEL = var.config.lambda.log_level + NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes + RUNNER_LABELS = lower(join(",", var.config.runner.labels)) + RUNNER_GROUP_NAME = var.config.runner.group_name + RUNNER_NAME_PREFIX = var.config.runner.name_prefix + RUNNER_OWNER = var.config.runner.pool_owner + SERVICE_NAME = "runners-pool" + SSM_TOKEN_PATH = var.config.ssm_token_path + SSM_CONFIG_PATH = var.config.ssm_config_path + SUBNET_IDS = join(",", var.config.subnet_ids) + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -54,9 +57,9 @@ resource "aws_lambda_function" "pool" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -152,7 +155,7 @@ resource "aws_iam_role_policy_attachment" "ami_id_ssm_parameter_read" { # lambda xray policy data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", @@ -169,7 +172,7 @@ data "aws_iam_policy_document" "lambda_xray" { } resource "aws_iam_role_policy" "pool_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.pool.name } diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index 365b625b94..b1c2f19dd6 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -69,8 +69,14 @@ variable "aws_partition" { default = "aws" } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } + + diff --git a/modules/runners/scale-down.tf b/modules/runners/scale-down.tf index 521b512e92..d211a9ded7 100644 --- a/modules/runners/scale-down.tf +++ b/modules/runners/scale-down.tf @@ -22,17 +22,20 @@ resource "aws_lambda_function" "scale_down" { environment { variables = { - ENVIRONMENT = var.prefix - GHES_URL = var.ghes_url - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes - SCALE_DOWN_CONFIG = jsonencode(var.idle_config) - SERVICE_NAME = "runners-scale-down" + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes + SCALE_DOWN_CONFIG = jsonencode(var.idle_config) + SERVICE_NAME = "runners-scale-down" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -45,9 +48,9 @@ resource "aws_lambda_function" "scale_down" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -111,7 +114,7 @@ resource "aws_iam_role_policy_attachment" "scale_down_vpc_execution_role" { } resource "aws_iam_role_policy" "scale_down_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.scale_down.name } diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index 7a05c48c3f..5dfba01e17 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -13,36 +13,39 @@ resource "aws_lambda_function" "scale_up" { memory_size = 512 tags = local.tags architectures = [var.lambda_architecture] - environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners - ENABLE_JIT_CONFIG = var.enable_jit_config - ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check - ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners - ENVIRONMENT = var.prefix - GHES_URL = var.ghes_url - INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.instance_types) - LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - RUNNER_LABELS = lower(join(",", var.runner_labels)) - RUNNER_GROUP_NAME = var.runner_group_name - RUNNER_NAME_PREFIX = var.runner_name_prefix - RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count - SERVICE_NAME = "runners-scale-up" - SSM_TOKEN_PATH = local.token_path - SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" - SUBNET_IDS = join(",", var.subnet_ids) + AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners + ENABLE_JIT_CONFIG = var.enable_jit_config + ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check + ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.instance_types) + LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + RUNNER_LABELS = lower(join(",", var.runner_labels)) + RUNNER_GROUP_NAME = var.runner_group_name + RUNNER_NAME_PREFIX = var.runner_name_prefix + RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count + SERVICE_NAME = "runners-scale-up" + SSM_TOKEN_PATH = local.token_path + SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" + SUBNET_IDS = join(",", var.subnet_ids) + } } @@ -55,9 +58,9 @@ resource "aws_lambda_function" "scale_up" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -134,7 +137,7 @@ resource "aws_iam_role_policy_attachment" "ami_id_ssm_parameter_read" { } resource "aws_iam_role_policy" "scale_up_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.scale_up.name } diff --git a/modules/runners/ssm-housekeeper.tf b/modules/runners/ssm-housekeeper.tf index 18f266f01f..2a8962b955 100644 --- a/modules/runners/ssm-housekeeper.tf +++ b/modules/runners/ssm-housekeeper.tf @@ -28,10 +28,13 @@ resource "aws_lambda_function" "ssm_housekeeper" { environment { variables = { - ENVIRONMENT = var.prefix - LOG_LEVEL = var.log_level - SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) - SERVICE_NAME = "ssm-housekeeper" + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) + SERVICE_NAME = "ssm-housekeeper" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -44,9 +47,9 @@ resource "aws_lambda_function" "ssm_housekeeper" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -110,7 +113,7 @@ resource "aws_iam_role_policy_attachment" "ssm_housekeeper_vpc_execution_role" { } resource "aws_iam_role_policy" "ssm_housekeeper_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.ssm_housekeeper.name } diff --git a/modules/runners/templates/start-runner.sh b/modules/runners/templates/start-runner.sh index 29788c9216..98c0dd449b 100644 --- a/modules/runners/templates/start-runner.sh +++ b/modules/runners/templates/start-runner.sh @@ -1,6 +1,86 @@ -# shellcheck shell=bash +#!/bin/bash + +# https://docs.aws.amazon.com/xray/latest/devguide/xray-api-sendingdata.html +# https://docs.aws.amazon.com/xray/latest/devguide/scorekeep-scripts.html +create_xray_start_segment() { + START_TIME=$(date -d "$(uptime -s)" +%s) + TRACE_ID=$1 + INSTANCE_ID=$2 + SEGMENT_ID=$(dd if=/dev/random bs=8 count=1 2>/dev/null | od -An -tx1 | tr -d ' \t\n') + SEGMENT_DOC="{\"trace_id\": \"$TRACE_ID\", \"id\": \"$SEGMENT_ID\", \"start_time\": $START_TIME, \"in_progress\": true, \"name\": \"Runner\",\"origin\": \"AWS::EC2::Instance\", \"aws\": {\"ec2\":{\"instance_id\":\"$INSTANCE_ID\"}}}" + HEADER='{"format": "json", "version": 1}' + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +create_xray_success_segment() { + local SEGMENT_DOC=$1 + if [ -z "$SEGMENT_DOC" ]; then + echo "No segment doc provided" + return + fi + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq '. | del(.in_progress)') + END_TIME=$(date +%s) + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq -c ". + {\"end_time\": $END_TIME}") + HEADER="{\"format\": \"json\", \"version\": 1}" + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +create_xray_error_segment() { + local SEGMENT_DOC="$1" + if [ -z "$SEGMENT_DOC" ]; then + echo "No segment doc provided" + return + fi + MESSAGE="$2" + ERROR="{\"exceptions\": [{\"message\": \"$MESSAGE\"}]}" + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq '. | del(.in_progress)') + END_TIME=$(date +%s) + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq -c ". + {\"end_time\": $END_TIME, \"error\": true, \"cause\": $ERROR }") + HEADER="{\"format\": \"json\", \"version\": 1}" + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +cleanup() { + local exit_code="$1" + local error_location="$2" + local error_lineno="$3" + + if [ "$exit_code" -ne 0 ]; then + echo "ERROR: runner-start-failed with exit code $exit_code occurred on $error_location" + create_xray_error_segment "$SEGMENT" "runner-start-failed with exit code $exit_code occurred on $error_location - $error_lineno" + fi + # allows to flush the cloud watch logs and traces + sleep 10 + if [ "$agent_mode" = "ephemeral" ] || [ "$exit_code" -ne 0 ]; then + echo "Stopping CloudWatch service" + systemctl stop amazon-cloudwatch-agent.service || true + echo "Terminating instance" + aws ec2 terminate-instances \ + --instance-ids "$instance_id" \ + --region "$region" \ + || true + fi +} -## Retrieve instance metadata +trap 'cleanup $? $LINENO $BASH_LINENO' EXIT echo "Retrieving TOKEN from AWS API" token=$(curl -f -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 180" || true) @@ -32,6 +112,7 @@ availability_zone=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169. environment=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:environment) ssm_config_path=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:ssm_config_path) runner_name_prefix=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:runner_name_prefix || echo "") +xray_trace_id=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:trace_id || echo "") %{ else } tags=$(aws ec2 describe-tags --region "$region" --filters "Name=resource-id,Values=$instance_id") @@ -40,6 +121,7 @@ echo "Retrieved tags from AWS API ($tags)" environment=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:environment") | .Value') ssm_config_path=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:ssm_config_path") | .Value') runner_name_prefix=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:runner_name_prefix") | .Value' || echo "") +xray_trace_id=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:trace_id") | .Value' || echo "") %{ endif } @@ -65,6 +147,18 @@ echo "Retrieved /$ssm_config_path/enable_jit_config parameter - ($enable_jit_con token_path=$(echo "$parameters" | jq --arg ssm_config_path "$ssm_config_path" -r '.[] | select(.Name == "'$ssm_config_path'/token_path") | .Value') echo "Retrieved /$ssm_config_path/token_path parameter - ($token_path)" +if [[ "$xray_trace_id" != "" ]]; then + # run xray service + curl https://s3.us-east-2.amazonaws.com/aws-xray-assets.us-east-2/xray-daemon/aws-xray-daemon-linux-3.x.zip -o aws-xray-daemon-linux-3.x.zip + unzip aws-xray-daemon-linux-3.x.zip -d aws-xray-daemon-linux-3.x + chmod +x ./aws-xray-daemon-linux-3.x/xray + ./aws-xray-daemon-linux-3.x/xray -o -n "$region" & + + + SEGMENT=$(create_xray_start_segment "$xray_trace_id" "$instance_id") + echo "$SEGMENT" +fi + if [[ "$enable_cloudwatch_agent" == "true" ]]; then echo "Cloudwatch is enabled" amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c "ssm:$ssm_config_path/cloudwatch_agent_config_runner" @@ -96,7 +190,7 @@ fi chown -R $run_as . info_arch=$(uname -p) -info_os=$(( lsb_release -ds || cat /etc/*release || uname -om ) 2>/dev/null | head -n1 | cut -d "=" -f2- | tr -d '"') +info_os=$( ( lsb_release -ds || cat /etc/*release || uname -om ) 2>/dev/null | head -n1 | cut -d "=" -f2- | tr -d '"') tee /opt/actions-runner/.setup_info <
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
map(object({| n/a | yes | | [sqs\_workflow\_job\_queue](#input\_sqs\_workflow\_job\_queue) | SQS queue to monitor github events. |
arn = string
id = string
fifo = bool
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = bool
priority = optional(number, 999)
})
}))
object({| `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
id = string
arn = string
})
object({| `{}` | no | | [webhook\_lambda\_apigateway\_access\_log\_settings](#input\_webhook\_lambda\_apigateway\_access\_log\_settings) | Access log settings for webhook API gateway. |
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
object({| `null` | no | | [webhook\_lambda\_s3\_key](#input\_webhook\_lambda\_s3\_key) | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [webhook\_lambda\_s3\_object\_version](#input\_webhook\_lambda\_s3\_object\_version) | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | diff --git a/modules/webhook/policies.tf b/modules/webhook/policies.tf index a95949aadf..454d943b4b 100644 --- a/modules/webhook/policies.tf +++ b/modules/webhook/policies.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/webhook/variables.tf b/modules/webhook/variables.tf index e66f8df226..c298f7bd09 100644 --- a/modules/webhook/variables.tf +++ b/modules/webhook/variables.tf @@ -182,8 +182,12 @@ variable "github_app_parameters" { }) } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } diff --git a/modules/webhook/webhook.tf b/modules/webhook/webhook.tf index 4ceda1b7c2..6dafc08874 100644 --- a/modules/webhook/webhook.tf +++ b/modules/webhook/webhook.tf @@ -21,13 +21,16 @@ resource "aws_lambda_function" "webhook" { environment { variables = { - ENVIRONMENT = var.prefix - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - PARAMETER_GITHUB_APP_WEBHOOK_SECRET = var.github_app_parameters.webhook_secret.name - REPOSITORY_WHITE_LIST = jsonencode(var.repository_white_list) - RUNNER_CONFIG = jsonencode(local.runner_config_sorted) - SQS_WORKFLOW_JOB_QUEUE = try(var.sqs_workflow_job_queue, null) != null ? var.sqs_workflow_job_queue.id : "" + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + PARAMETER_GITHUB_APP_WEBHOOK_SECRET = var.github_app_parameters.webhook_secret.name + REPOSITORY_WHITE_LIST = jsonencode(var.repository_white_list) + RUNNER_CONFIG = jsonencode(local.runner_config_sorted) + SQS_WORKFLOW_JOB_QUEUE = try(var.sqs_workflow_job_queue, null) != null ? var.sqs_workflow_job_queue.id : "" } } @@ -42,9 +45,9 @@ resource "aws_lambda_function" "webhook" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -128,7 +131,7 @@ resource "aws_iam_role_policy" "webhook_ssm" { } resource "aws_iam_role_policy" "xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.webhook_lambda.name } diff --git a/variables.deprecated.tf b/variables.deprecated.tf index e69de29bb2..c4e8abfe1d 100644 --- a/variables.deprecated.tf +++ b/variables.deprecated.tf @@ -0,0 +1,10 @@ +variable "lambda_tracing_mode" { + description = "DEPRECATED: Replaced by `tracing_config`." + type = string + default = null + + validation { + condition = anytrue([var.lambda_tracing_mode == null]) + error_message = "DEPRECATED, Replaced by `tracing_config`." + } +} diff --git a/variables.tf b/variables.tf index 1bdbfbe393..574734c420 100644 --- a/variables.tf +++ b/variables.tf @@ -747,10 +747,14 @@ variable "runner_name_prefix" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } variable "runner_credit_specification" {
destination_arn = string
format = string
})