diff --git a/README.md b/README.md index 35142ea9a6..4c2018b9ed 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ This [Terraform](https://www.terraform.io/) module creates the required infrastr - [Examples](#examples) - [Sub modules](#sub-modules) - [Logging](#logging) +- [Tracing](#tracing) - [Debugging](#debugging) - [Security Considerations](#security-considerations) - [Requirements](#requirements) @@ -427,6 +428,17 @@ An example log message of the scale-up function: } } ``` +## Tracing +For the distributed architecture of this application it can be difficult to troubleshoot this application. +We support the option to enable tracing for all the lambda functions created by this application. To enable tracing user can simply provide the `tracing_config` option inside the root module or inner modules. + +This tracing config generates timelines for following events: +- Basic lifecycle of lambda function +- Traces for Github API calls (can be configured by capture_http_requests). +- Traces for all AWS SDK calls + +This feature has been disabled by default. + ## Debugging @@ -543,7 +555,7 @@ We welcome any improvement to the standard module to make the default as secure | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | +| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | DEPRECATED: Replaced by `tracing_config`. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with. | `string` | `null` | no | | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | @@ -593,6 +605,7 @@ We welcome any improvement to the standard module to make the default as secure | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using an S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | | [userdata\_pre\_install](#input\_userdata\_pre\_install) | Script to be ran before the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no | | [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | diff --git a/examples/ephemeral/main.tf b/examples/ephemeral/main.tf index f549b566fd..6aa9e2cda4 100644 --- a/examples/ephemeral/main.tf +++ b/examples/ephemeral/main.tf @@ -69,11 +69,20 @@ module "runners" { # enable_job_queued_check = true + # tracing_config = { + # mode = "Active" + # capture_error = true + # capture_http_requests = true + # } + + # configure your pre-built AMI # enable_userdata = false - # ami_filter = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] } - # data "aws_caller_identity" "current" {} - # ami_owners = [data.aws_caller_identity.current.account_id] + # ami_filter = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] } + # ami_owners = [data.aws_caller_identity.current.account_id] + + # or use the default AMI + # enable_userdata = true # Enable debug logging for the lambda functions # log_level = "debug" diff --git a/examples/multi-runner/main.tf b/examples/multi-runner/main.tf index 45e268609f..b1ab05b427 100644 --- a/examples/multi-runner/main.tf +++ b/examples/multi-runner/main.tf @@ -57,6 +57,12 @@ module "runners" { id = var.github_app.id webhook_secret = random_id.random.hex } + # enable this section for tracing + # tracing_config = { + # mode = "Active" + # capture_error = true + # capture_http_requests = true + # } # Assuming local build lambda's to use pre build ones, uncomment the lines below and download the # lambda zip files lambda_download # webhook_lambda_zip = "../lambdas-download/webhook.zip" diff --git a/lambdas/functions/ami-housekeeper/src/ami.ts b/lambdas/functions/ami-housekeeper/src/ami.ts index c74aa839c2..1298ff0f6f 100644 --- a/lambdas/functions/ami-housekeeper/src/ami.ts +++ b/lambdas/functions/ami-housekeeper/src/ami.ts @@ -10,6 +10,7 @@ import { } from '@aws-sdk/client-ec2'; import { DescribeParametersCommand, GetParameterCommand, SSMClient } from '@aws-sdk/client-ssm'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; const logger = createChildLogger('ami'); @@ -82,7 +83,7 @@ async function getAmisNotInUse(options: AmiCleanupOptions) { const amiIdsInSSM = await getAmisReferedInSSM(options); const amiIdsInTemplates = await getAmiInLatestTemplates(options); - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); logger.debug('Getting all AMIs from ec2 with filters', { filters: options.amiFilters }); const amiEc2 = await ec2Client.send( new DescribeImagesCommand({ @@ -133,7 +134,7 @@ async function deleteAmi(amiDetails: Image, options: AmiCleanupOptionsInternal): try { logger.info(`deleting ami ${amiDetails.Name || amiDetails.ImageId} created at ${amiDetails.CreationDate}`); - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); await ec2Client.send(new DeregisterImageCommand({ ImageId: amiDetails.ImageId, DryRun: options.dryRun })); await deleteSnapshot(options, amiDetails, ec2Client); } catch (error) { @@ -158,7 +159,7 @@ async function deleteSnapshot(options: AmiCleanupOptions, amiDetails: Image, ec2 } async function getAmiInLatestTemplates(options: AmiCleanupOptions): Promise<(string | undefined)[]> { - const ec2Client = new EC2Client({}); + const ec2Client = getTracedAWSV3Client(new EC2Client({})); const launnchTemplates = await ec2Client.send( new DescribeLaunchTemplatesCommand({ LaunchTemplateNames: options.launchTemplateNames, @@ -188,7 +189,7 @@ async function getAmisReferedInSSM(options: AmiCleanupOptions): Promise<(string return []; } - const ssmClient = new SSMClient({}); + const ssmClient = getTracedAWSV3Client(new SSMClient({})); const ssmParams = await ssmClient.send( new DescribeParametersCommand({ ParameterFilters: [ diff --git a/lambdas/functions/control-plane/package.json b/lambdas/functions/control-plane/package.json index 950276d061..dd8cb85078 100644 --- a/lambdas/functions/control-plane/package.json +++ b/lambdas/functions/control-plane/package.json @@ -40,11 +40,13 @@ "dependencies": { "@aws-sdk/client-ec2": "^3.436.0", "@aws-sdk/types": "^3.433.0", + "@middy/core": "^3.6.2", "@octokit/auth-app": "6.0.1", "@octokit/rest": "20.0.2", "@octokit/types": "^12.1.1", "@terraform-aws-github-runner/aws-powertools-util": "*", "@terraform-aws-github-runner/aws-ssm-util": "*", + "axios": "^1.5.1", "cron-parser": "^4.8.1", "typescript": "^5.0.4" } diff --git a/lambdas/functions/control-plane/src/aws/runners.d.ts b/lambdas/functions/control-plane/src/aws/runners.d.ts index da15866b75..1a53f47555 100644 --- a/lambdas/functions/control-plane/src/aws/runners.d.ts +++ b/lambdas/functions/control-plane/src/aws/runners.d.ts @@ -39,4 +39,5 @@ export interface RunnerInputParameters { }; numberOfRunners?: number; amiIdSsmParameterName?: string; + tracingEnabled?: boolean; } diff --git a/lambdas/functions/control-plane/src/aws/runners.test.ts b/lambdas/functions/control-plane/src/aws/runners.test.ts index ed044ff4fe..fb08ec4b3b 100644 --- a/lambdas/functions/control-plane/src/aws/runners.test.ts +++ b/lambdas/functions/control-plane/src/aws/runners.test.ts @@ -10,6 +10,7 @@ import { TerminateInstancesCommand, } from '@aws-sdk/client-ec2'; import { GetParameterCommand, GetParameterResult, PutParameterCommand, SSMClient } from '@aws-sdk/client-ssm'; +import { tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { mockClient } from 'aws-sdk-client-mock'; import 'aws-sdk-client-mock-jest'; @@ -236,6 +237,15 @@ describe('create runner', () => { Name: 'my-ami-id-param', }); }); + it('calls create fleet of 1 instance with runner tracing enabled', async () => { + tracer.getRootXrayTraceId = jest.fn().mockReturnValue('123'); + + await createRunner(createRunnerConfig({ ...defaultRunnerConfig, tracingEnabled: true })); + + expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, { + ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, tracingEnabled: true }), + }); + }); }); describe('create runner with errors', () => { @@ -350,6 +360,7 @@ interface RunnerConfig { allocationStrategy: SpotAllocationStrategy; maxSpotPrice?: string; amiIdSsmParameterName?: string; + tracingEnabled?: boolean; } function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { @@ -366,6 +377,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters { }, subnets: ['subnet-123', 'subnet-456'], amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName, + tracingEnabled: runnerConfig.tracingEnabled, }; } @@ -376,6 +388,7 @@ interface ExpectedFleetRequestValues { maxSpotPrice?: string; totalTargetCapacity: number; imageId?: string; + tracingEnabled?: boolean; } function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): CreateFleetCommandInput { @@ -385,6 +398,10 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): { Key: 'ghr:Type', Value: expectedValues.type }, { Key: 'ghr:Owner', Value: REPO_NAME }, ]; + if (expectedValues.tracingEnabled) { + const traceId = tracer.getRootXrayTraceId(); + tags.push({ Key: 'ghr:trace_id', Value: traceId! }); + } const request: CreateFleetCommandInput = { LaunchTemplateConfigs: [ { diff --git a/lambdas/functions/control-plane/src/aws/runners.ts b/lambdas/functions/control-plane/src/aws/runners.ts index 38d69895db..33cad51ac3 100644 --- a/lambdas/functions/control-plane/src/aws/runners.ts +++ b/lambdas/functions/control-plane/src/aws/runners.ts @@ -9,6 +9,7 @@ import { _InstanceType, } from '@aws-sdk/client-ec2'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client, tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { getParameter } from '@terraform-aws-github-runner/aws-ssm-util'; import moment from 'moment'; @@ -56,7 +57,7 @@ function constructFilters(filters?: Runners.ListRunnerFilters): Ec2Filter[][] { } async function getRunners(ec2Filters: Ec2Filter[]): Promise { - const ec2 = new EC2Client({ region: process.env.AWS_REGION }); + const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION })); const runners: Runners.RunnerList[] = []; let nextToken; let hasNext = true; @@ -93,7 +94,7 @@ function getRunnerInfo(runningInstances: DescribeInstancesResult) { } export async function terminateRunner(instanceId: string): Promise { - const ec2 = new EC2Client({ region: process.env.AWS_REGION }); + const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION })); await ec2.send(new TerminateInstancesCommand({ InstanceIds: [instanceId] })); logger.info(`Runner ${instanceId} has been terminated.`); } @@ -126,7 +127,7 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete }, }); - const ec2Client = new EC2Client({ region: process.env.AWS_REGION }); + const ec2Client = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION })); let amiIdOverride = undefined; @@ -145,6 +146,7 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete } const numberOfRunners = runnerParameters.numberOfRunners ? runnerParameters.numberOfRunners : 1; + const tags = [ { Key: 'ghr:Application', Value: 'github-action-runner' }, { Key: 'ghr:created_by', Value: numberOfRunners === 1 ? 'scale-up-lambda' : 'pool-lambda' }, @@ -152,6 +154,11 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete { Key: 'ghr:Owner', Value: runnerParameters.runnerOwner }, ]; + if (runnerParameters.tracingEnabled) { + const traceId = tracer.getRootXrayTraceId(); + tags.push({ Key: 'ghr:trace_id', Value: traceId! }); + } + let fleet: CreateFleetResult; try { // see for spec https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html diff --git a/lambdas/functions/control-plane/src/axios/fetch-override.test.ts b/lambdas/functions/control-plane/src/axios/fetch-override.test.ts new file mode 100644 index 0000000000..e19c0c1f89 --- /dev/null +++ b/lambdas/functions/control-plane/src/axios/fetch-override.test.ts @@ -0,0 +1,31 @@ +import axios, { AxiosResponse } from 'axios'; + +import { axiosFetch } from './fetch-override'; + +jest.mock('axios'); +type FetchResponse = AxiosResponse & { json: () => string }; + +describe('axiosFetch', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + it('should return a promise that resolves with the response data', async () => { + // Arrange + const url = 'https://example.com'; + const options = { body: { foo: 'bar' } }; + const responseData = { data: { baz: 'qux' } }; + const mockedAxios = axios as unknown as jest.Mock; + mockedAxios.mockResolvedValue(responseData); + + // Act + const result = (await axiosFetch(url, options)) as FetchResponse; + + // Assert + expect(axios).toHaveBeenCalledWith(url, { ...options, data: options.body }); + expect(result).toEqual({ + ...responseData, + json: expect.any(Function), + }); + expect(result.json()).toEqual(responseData.data); + }); +}); diff --git a/lambdas/functions/control-plane/src/axios/fetch-override.ts b/lambdas/functions/control-plane/src/axios/fetch-override.ts new file mode 100644 index 0000000000..fe15054d20 --- /dev/null +++ b/lambdas/functions/control-plane/src/axios/fetch-override.ts @@ -0,0 +1,19 @@ +import axios, { AxiosRequestConfig, AxiosResponse } from 'axios'; + +type FetchResponse = AxiosResponse & { json: () => string }; + +type FetchOptions = AxiosRequestConfig & { body?: object }; + +// Fetch is not covered to be traced by xray so we need to override it with axios +// https://github.com/aws/aws-xray-sdk-node/issues/531 +export const axiosFetch = async (url: string, options: FetchOptions): Promise => { + const response = await axios(url, { ...options, data: options.body }); + return new Promise((resolve) => { + resolve({ + ...response, + json: () => { + return response.data; + }, + }); + }); +}; diff --git a/lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts b/lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts index afb6412bde..584f0d6b8d 100644 --- a/lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts +++ b/lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts @@ -95,7 +95,7 @@ ${decryptedValue}`, // Assert expect(mockedCreatAppAuth).toBeCalledTimes(1); - expect(mockedCreatAppAuth).toBeCalledWith(authOptions); + expect(mockedCreatAppAuth).toBeCalledWith({ ...authOptions, request: expect.anything() }); }); test('Creates auth object for public GitHub', async () => { @@ -121,7 +121,7 @@ ${decryptedValue}`, expect(getParameter).toBeCalledWith(PARAMETER_GITHUB_APP_KEY_BASE64_NAME); expect(mockedCreatAppAuth).toBeCalledTimes(1); - expect(mockedCreatAppAuth).toBeCalledWith(authOptions); + expect(mockedCreatAppAuth).toBeCalledWith({ ...authOptions, request: expect.anything() }); expect(mockedAuth).toBeCalledWith({ type: authType }); expect(result.token).toBe(token); }); diff --git a/lambdas/functions/control-plane/src/gh-auth/gh-auth.ts b/lambdas/functions/control-plane/src/gh-auth/gh-auth.ts index a52a0a832e..ae00775991 100644 --- a/lambdas/functions/control-plane/src/gh-auth/gh-auth.ts +++ b/lambdas/functions/control-plane/src/gh-auth/gh-auth.ts @@ -13,11 +13,13 @@ import { Octokit } from '@octokit/rest'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; import { getParameter } from '@terraform-aws-github-runner/aws-ssm-util'; -const logger = createChildLogger('gh-auth'); +import { axiosFetch } from '../axios/fetch-override'; +const logger = createChildLogger('gh-auth'); export async function createOctoClient(token: string, ghesApiUrl = ''): Promise { const ocktokitOptions: OctokitOptions = { auth: token, + request: { fetch: axiosFetch }, }; if (ghesApiUrl) { ocktokitOptions.baseUrl = ghesApiUrl; @@ -64,7 +66,12 @@ async function createAuth(installationId: number | undefined, ghesApiUrl: string if (ghesApiUrl) { authOptions.request = request.defaults({ baseUrl: ghesApiUrl, + request: { + fetch: axiosFetch, + }, }); + } else { + authOptions.request = request.defaults({ request: { fetch: axiosFetch } }); } return createAppAuth(authOptions); } diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index c0fa0a8854..9e2f5e0ce9 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -1,8 +1,8 @@ -import { logger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { captureLambdaHandler, logger } from '@terraform-aws-github-runner/aws-powertools-util'; import { Context, SQSEvent, SQSRecord } from 'aws-lambda'; import { mocked } from 'jest-mock'; -import { adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda'; +import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda'; import { adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; @@ -161,6 +161,14 @@ describe('Adjust pool.', () => { }); }); +describe('Test middleware', () => { + it('Should have a working middleware', async () => { + const mockedLambdaHandler = captureLambdaHandler as unknown as jest.Mock; + mockedLambdaHandler.mockReturnValue({ before: jest.fn(), after: jest.fn(), onError: jest.fn() }); + expect(addMiddleware).not.toThrowError(); + }); +}); + describe('Test ssm housekeeper lambda wrapper.', () => { it('Invoke without errors.', async () => { const mock = mocked(cleanSSMTokens); diff --git a/lambdas/functions/control-plane/src/lambda.ts b/lambdas/functions/control-plane/src/lambda.ts index e4e3f66096..8168f99e68 100644 --- a/lambdas/functions/control-plane/src/lambda.ts +++ b/lambdas/functions/control-plane/src/lambda.ts @@ -1,6 +1,7 @@ +import middy from '@middy/core'; import { logger, setContext } from '@terraform-aws-github-runner/aws-powertools-util'; +import { captureLambdaHandler, tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { Context, SQSEvent } from 'aws-lambda'; -import 'source-map-support/register'; import { PoolEvent, adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; @@ -50,6 +51,18 @@ export async function adjustPool(event: PoolEvent, context: Context): Promise { + const handler = captureLambdaHandler(tracer); + if (!handler) { + return; + } + middy(scaleUpHandler).use(handler); + middy(scaleDownHandler).use(handler); + middy(adjustPool).use(handler); + middy(ssmHousekeeper).use(handler); +}; +addMiddleware(); + export async function ssmHousekeeper(event: unknown, context: Context): Promise { setContext(context, 'lambda.ts'); logger.logEventIfEnabled(event); diff --git a/lambdas/functions/control-plane/src/pool/pool.ts b/lambdas/functions/control-plane/src/pool/pool.ts index e3b37c551d..52fd4ba000 100644 --- a/lambdas/functions/control-plane/src/pool/pool.ts +++ b/lambdas/functions/control-plane/src/pool/pool.ts @@ -36,6 +36,7 @@ export async function adjust(event: PoolEvent): Promise { const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default const runnerOwner = process.env.RUNNER_OWNER; const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; + const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); let ghesApiUrl = ''; if (ghesBaseUrl) { @@ -118,6 +119,7 @@ export async function adjust(event: PoolEvent): Promise { subnets, numberOfRunners: topUp, amiIdSsmParameterName, + tracingEnabled, }, githubInstallationClient, ); diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts index e115e0e8e0..ee7ef21372 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.test.ts @@ -77,6 +77,7 @@ const EXPECTED_RUNNER_PARAMS: RunnerInputParameters = { instanceAllocationStrategy: 'lowest-price', }, subnets: ['subnet-123'], + tracingEnabled: false, }; let expectedRunnerParams: RunnerInputParameters; diff --git a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts index 1124cc0c74..435835af3d 100644 --- a/lambdas/functions/control-plane/src/scale-runners/scale-up.ts +++ b/lambdas/functions/control-plane/src/scale-runners/scale-up.ts @@ -50,6 +50,7 @@ interface CreateEC2RunnerConfig { ec2instanceCriteria: RunnerInputParameters['ec2instanceCriteria']; numberOfRunners?: number; amiIdSsmParameterName?: string; + tracingEnabled?: boolean; } function generateRunnerServiceConfig(githubRunnerConfig: CreateGitHubRunnerConfig, token: string) { @@ -235,6 +236,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME; const runnerNamePrefix = process.env.RUNNER_NAME_PREFIX || ''; const ssmConfigPath = process.env.SSM_CONFIG_PATH || ''; + const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false }); if (ephemeralEnabled && payload.eventType !== 'workflow_job') { logger.warn(`${payload.eventType} event is not supported in combination with ephemeral runners.`); @@ -304,6 +306,7 @@ export async function scaleUp(eventSource: string, payload: ActionRequestMessage launchTemplateName, subnets, amiIdSsmParameterName, + tracingEnabled, }, githubInstallationClient, ); diff --git a/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts index ea23920416..78d015c921 100644 --- a/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts +++ b/lambdas/functions/control-plane/src/scale-runners/ssm-housekeeper.ts @@ -1,5 +1,6 @@ import { DeleteParameterCommand, GetParametersByPathCommand, SSMClient } from '@aws-sdk/client-ssm'; import { logger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; export interface SSMCleanupOptions { dryRun: boolean; @@ -25,7 +26,7 @@ export async function cleanSSMTokens(options: SSMCleanupOptions): Promise logger.debug('Cleaning with options', { options }); validateOptions(options); - const client = new SSMClient({ region: process.env.AWS_REGION }); + const client = getTracedAWSV3Client(new SSMClient({ region: process.env.AWS_REGION })); const parameters = await client.send(new GetParametersByPathCommand({ Path: options.tokenPath })); while (parameters.NextToken) { const nextParameters = await client.send( diff --git a/lambdas/functions/gh-agent-syncer/package.json b/lambdas/functions/gh-agent-syncer/package.json index 3897ea9377..9442ddaa3d 100644 --- a/lambdas/functions/gh-agent-syncer/package.json +++ b/lambdas/functions/gh-agent-syncer/package.json @@ -40,6 +40,7 @@ "@aws-sdk/client-s3": "^3.436.0", "@aws-sdk/lib-storage": "^3.436.0", "@aws-sdk/types": "^3.433.0", + "@middy/core": "^3.6.2", "@terraform-aws-github-runner/aws-powertools-util": "*", "axios": "^1.5.1" } diff --git a/lambdas/functions/gh-agent-syncer/src/lambda.ts b/lambdas/functions/gh-agent-syncer/src/lambda.ts index 729b1b8cc9..7925d279c0 100644 --- a/lambdas/functions/gh-agent-syncer/src/lambda.ts +++ b/lambdas/functions/gh-agent-syncer/src/lambda.ts @@ -1,8 +1,12 @@ +import middy from '@middy/core'; import { logger, setContext } from '@terraform-aws-github-runner/aws-powertools-util'; +import { captureLambdaHandler, tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { Context } from 'aws-lambda'; import { sync } from './syncer/syncer'; +middy(handler).use(captureLambdaHandler(tracer)); + // eslint-disable-next-line export async function handler(event: any, context: Context): Promise { setContext(context, 'lambda.ts'); diff --git a/lambdas/functions/gh-agent-syncer/src/syncer/syncer.ts b/lambdas/functions/gh-agent-syncer/src/syncer/syncer.ts index a36da68a27..cb5f84b0d0 100644 --- a/lambdas/functions/gh-agent-syncer/src/syncer/syncer.ts +++ b/lambdas/functions/gh-agent-syncer/src/syncer/syncer.ts @@ -2,6 +2,7 @@ import { GetObjectTaggingCommand, S3Client, ServerSideEncryption, Tag } from '@a import { Upload } from '@aws-sdk/lib-storage'; import { Octokit } from '@octokit/rest'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; import axios from 'axios'; import { Stream } from 'stream'; @@ -84,7 +85,7 @@ async function uploadToS3( } export async function sync(): Promise { - const s3 = new S3Client({}); + const s3 = getTracedAWSV3Client(new S3Client({})); const runnerOs = process.env.GITHUB_RUNNER_OS || 'linux'; const runnerArch = process.env.GITHUB_RUNNER_ARCHITECTURE || 'x64'; diff --git a/lambdas/functions/webhook/package.json b/lambdas/functions/webhook/package.json index 9bad5b92df..49ab442100 100644 --- a/lambdas/functions/webhook/package.json +++ b/lambdas/functions/webhook/package.json @@ -39,6 +39,7 @@ }, "dependencies": { "@aws-sdk/client-sqs": "^3.436.0", + "@middy/core": "^3.6.2", "@octokit/rest": "^20.0.1", "@octokit/types": "^12.1.1", "@octokit/webhooks": "^12.0.3", diff --git a/lambdas/functions/webhook/src/lambda.ts b/lambdas/functions/webhook/src/lambda.ts index fa072fe171..87037da2b0 100644 --- a/lambdas/functions/webhook/src/lambda.ts +++ b/lambdas/functions/webhook/src/lambda.ts @@ -1,4 +1,6 @@ +import middy from '@middy/core'; import { logger, setContext } from '@terraform-aws-github-runner/aws-powertools-util'; +import { captureLambdaHandler, tracer } from '@terraform-aws-github-runner/aws-powertools-util'; import { APIGatewayEvent, Context } from 'aws-lambda'; import { handle } from './webhook/handler'; @@ -7,6 +9,7 @@ export interface Response { statusCode: number; body?: string; } +middy(githubWebhook).use(captureLambdaHandler(tracer)); export async function githubWebhook(event: APIGatewayEvent, context: Context): Promise { setContext(context, 'lambda.ts'); logger.logEventIfEnabled(event); diff --git a/lambdas/functions/webhook/src/sqs/index.ts b/lambdas/functions/webhook/src/sqs/index.ts index 4398c5827e..fa851dee4d 100644 --- a/lambdas/functions/webhook/src/sqs/index.ts +++ b/lambdas/functions/webhook/src/sqs/index.ts @@ -1,6 +1,7 @@ import { SQS, SendMessageCommandInput } from '@aws-sdk/client-sqs'; import { WorkflowJobEvent } from '@octokit/webhooks-types'; import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; const logger = createChildLogger('sqs'); @@ -30,7 +31,7 @@ export interface GithubWorkflowEvent { } export const sendActionRequest = async (message: ActionRequestMessage): Promise => { - const sqs = new SQS({ region: process.env.AWS_REGION }); + const sqs = getTracedAWSV3Client(new SQS({ region: process.env.AWS_REGION })); const sqsMessage: SendMessageCommandInput = { QueueUrl: message.queueId, diff --git a/lambdas/libs/aws-powertools-util/package.json b/lambdas/libs/aws-powertools-util/package.json index f880511e5f..80ca3fcb72 100644 --- a/lambdas/libs/aws-powertools-util/package.json +++ b/lambdas/libs/aws-powertools-util/package.json @@ -37,6 +37,7 @@ }, "dependencies": { "@aws-lambda-powertools/logger": "^1.8.0", + "@aws-lambda-powertools/tracer": "^1.14.0", "aws-lambda": "^1.0.7" } } diff --git a/lambdas/libs/aws-powertools-util/src/index.ts b/lambdas/libs/aws-powertools-util/src/index.ts index 1ff09efd40..3a8ac31bac 100644 --- a/lambdas/libs/aws-powertools-util/src/index.ts +++ b/lambdas/libs/aws-powertools-util/src/index.ts @@ -1 +1,2 @@ export * from './logger'; +export * from './tracer'; diff --git a/lambdas/libs/aws-powertools-util/src/tracer/index.ts b/lambdas/libs/aws-powertools-util/src/tracer/index.ts new file mode 100644 index 0000000000..19eba985d9 --- /dev/null +++ b/lambdas/libs/aws-powertools-util/src/tracer/index.ts @@ -0,0 +1,10 @@ +import { Tracer, captureLambdaHandler } from '@aws-lambda-powertools/tracer'; + +const tracer = new Tracer({ + serviceName: process.env.SERVICE_NAME || 'runners', +}); + +function getTracedAWSV3Client(client: T): T { + return tracer.captureAWSv3Client(client); +} +export { tracer, captureLambdaHandler, getTracedAWSV3Client }; diff --git a/lambdas/libs/aws-powertools-util/src/tracer/tracer.test.ts b/lambdas/libs/aws-powertools-util/src/tracer/tracer.test.ts new file mode 100644 index 0000000000..888df18ae0 --- /dev/null +++ b/lambdas/libs/aws-powertools-util/src/tracer/tracer.test.ts @@ -0,0 +1,18 @@ +import { captureLambdaHandler, getTracedAWSV3Client, tracer } from '../'; + +describe('A root tracer.', () => { + beforeEach(() => { + jest.clearAllMocks(); + jest.resetAllMocks(); + }); + + test('Should call underlying tracer.', async () => { + jest.spyOn(tracer, 'captureAWSv3Client'); + getTracedAWSV3Client({}); + expect(tracer.captureAWSv3Client).toBeCalledTimes(1); + }); + test('Should have a working middleware', async () => { + const { before } = captureLambdaHandler(tracer); + expect(before).toBeDefined(); + }); +}); diff --git a/lambdas/libs/aws-ssm-util/package.json b/lambdas/libs/aws-ssm-util/package.json index 2bb4e6f9a8..1ad5646640 100644 --- a/lambdas/libs/aws-ssm-util/package.json +++ b/lambdas/libs/aws-ssm-util/package.json @@ -37,6 +37,7 @@ }, "dependencies": { "@aws-sdk/client-ssm": "^3.436.0", + "@terraform-aws-github-runner/aws-powertools-util": "*", "@aws-sdk/types": "^3.433.0" } } diff --git a/lambdas/libs/aws-ssm-util/src/index.ts b/lambdas/libs/aws-ssm-util/src/index.ts index fdbaa11537..36d5a6d8e8 100644 --- a/lambdas/libs/aws-ssm-util/src/index.ts +++ b/lambdas/libs/aws-ssm-util/src/index.ts @@ -1,13 +1,14 @@ import { GetParameterCommand, PutParameterCommand, SSMClient } from '@aws-sdk/client-ssm'; +import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util'; export async function getParameter(parameter_name: string): Promise { - const client = new SSMClient({ region: process.env.AWS_REGION }); + const client = getTracedAWSV3Client(new SSMClient({ region: process.env.AWS_REGION })); return (await client.send(new GetParameterCommand({ Name: parameter_name, WithDecryption: true }))).Parameter ?.Value as string; } export async function putParameter(parameter_name: string, parameter_value: string, secure: boolean): Promise { - const client = new SSMClient({ region: process.env.AWS_REGION }); + const client = getTracedAWSV3Client(new SSMClient({ region: process.env.AWS_REGION })); await client.send( new PutParameterCommand({ Name: parameter_name, diff --git a/main.tf b/main.tf index 6979d57553..f309eceb60 100644 --- a/main.tf +++ b/main.tf @@ -153,7 +153,7 @@ module "webhook" { lambda_architecture = var.lambda_architecture lambda_zip = var.webhook_lambda_zip lambda_timeout = var.webhook_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id @@ -237,7 +237,7 @@ module "runners" { lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout lambda_subnet_ids = var.lambda_subnet_ids lambda_security_group_ids = var.lambda_security_group_ids - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id enable_cloudwatch_agent = var.enable_cloudwatch_agent @@ -303,7 +303,7 @@ module "runner_binaries" { lambda_architecture = var.lambda_architecture lambda_zip = var.runner_binaries_syncer_lambda_zip lambda_timeout = var.runner_binaries_syncer_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id @@ -342,7 +342,7 @@ module "ami_housekeeper" { lambda_security_group_ids = var.lambda_security_group_ids lambda_subnet_ids = var.lambda_subnet_ids lambda_timeout = var.ami_housekeeper_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id diff --git a/modules/ami-housekeeper/README.md b/modules/ami-housekeeper/README.md index 5b41b5d50f..55e9bae2c9 100644 --- a/modules/ami-housekeeper/README.md +++ b/modules/ami-housekeeper/README.md @@ -112,7 +112,6 @@ No modules. | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout](#input\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `60` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -121,6 +120,7 @@ No modules. | [role\_path](#input\_role\_path) | The path that will be added to the role, if not set the environment name will be used. | `string` | `null` | no | | [role\_permissions\_boundary](#input\_role\_permissions\_boundary) | Permissions boundary that will be added to the created role for the lambda. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/ami-housekeeper/iam.tf b/modules/ami-housekeeper/iam.tf index a95949aadf..454d943b4b 100644 --- a/modules/ami-housekeeper/iam.tf +++ b/modules/ami-housekeeper/iam.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/ami-housekeeper/main.tf b/modules/ami-housekeeper/main.tf index bb2faa49fa..0cb9935c22 100644 --- a/modules/ami-housekeeper/main.tf +++ b/modules/ami-housekeeper/main.tf @@ -19,10 +19,13 @@ resource "aws_lambda_function" "ami_housekeeper" { environment { variables = { - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - AMI_CLEANUP_OPTIONS = jsonencode(var.cleanup_config) - SERVICE_NAME = "ami-housekeeper" + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + AMI_CLEANUP_OPTIONS = jsonencode(var.cleanup_config) + SERVICE_NAME = "ami-housekeeper" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -37,9 +40,9 @@ resource "aws_lambda_function" "ami_housekeeper" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -123,7 +126,7 @@ resource "aws_lambda_permission" "ami_housekeeper" { } resource "aws_iam_role_policy" "ami_housekeeper_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.ami_housekeeper.name } diff --git a/modules/ami-housekeeper/variables.tf b/modules/ami-housekeeper/variables.tf index faf2f65b30..36562a8a82 100644 --- a/modules/ami-housekeeper/variables.tf +++ b/modules/ami-housekeeper/variables.tf @@ -124,10 +124,14 @@ variable "lambda_architecture" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } # specif for this module diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index c5d6690684..52edd29406 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -144,7 +144,6 @@ module "multi-runner" { | [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no | | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | | [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no | @@ -173,6 +172,7 @@ module "multi-runner" { | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | | [vpc\_id](#input\_vpc\_id) | The VPC for security groups of the action runners. | `string` | n/a | yes | | [webhook\_lambda\_apigateway\_access\_log\_settings](#input\_webhook\_lambda\_apigateway\_access\_log\_settings) | Access log settings for webhook API gateway. |
object({
destination_arn = string
format = string
})
| `null` | no | | [webhook\_lambda\_s3\_key](#input\_webhook\_lambda\_s3\_key) | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | diff --git a/modules/multi-runner/ami-housekeeper.tf b/modules/multi-runner/ami-housekeeper.tf index c34a51a0fb..cd40dddac7 100644 --- a/modules/multi-runner/ami-housekeeper.tf +++ b/modules/multi-runner/ami-housekeeper.tf @@ -18,7 +18,7 @@ module "ami_housekeeper" { lambda_security_group_ids = var.lambda_security_group_ids lambda_subnet_ids = var.lambda_subnet_ids lambda_timeout = var.ami_housekeeper_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id diff --git a/modules/multi-runner/runner-binaries.tf b/modules/multi-runner/runner-binaries.tf index 87387a5092..a50d1dd97b 100644 --- a/modules/multi-runner/runner-binaries.tf +++ b/modules/multi-runner/runner-binaries.tf @@ -17,7 +17,7 @@ module "runner_binaries" { lambda_architecture = var.lambda_architecture lambda_zip = var.runner_binaries_syncer_lambda_zip lambda_timeout = var.runner_binaries_syncer_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id enable_event_rule_binaries_syncer = var.enable_event_rule_binaries_syncer diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 2541200c49..d0653122d2 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -66,7 +66,7 @@ module "runners" { lambda_timeout_scale_down = var.runners_scale_down_lambda_timeout lambda_subnet_ids = var.lambda_subnet_ids lambda_security_group_ids = var.lambda_security_group_ids - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id enable_cloudwatch_agent = each.value.runner_config.enable_cloudwatch_agent diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index a7f32502c0..e75a9653f7 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -543,10 +543,14 @@ variable "ssm_paths" { default = {} } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } variable "associate_public_ipv4_address" { diff --git a/modules/multi-runner/webhook.tf b/modules/multi-runner/webhook.tf index 2159779daa..9a1df4dce6 100644 --- a/modules/multi-runner/webhook.tf +++ b/modules/multi-runner/webhook.tf @@ -19,7 +19,7 @@ module "webhook" { lambda_architecture = var.lambda_architecture lambda_zip = var.webhook_lambda_zip lambda_timeout = var.webhook_lambda_timeout - lambda_tracing_mode = var.lambda_tracing_mode + tracing_config = var.tracing_config logging_retention_in_days = var.logging_retention_in_days logging_kms_key_id = var.logging_kms_key_id diff --git a/modules/runner-binaries-syncer/README.md b/modules/runner-binaries-syncer/README.md index a2453361ee..8e8354929d 100644 --- a/modules/runner-binaries-syncer/README.md +++ b/modules/runner-binaries-syncer/README.md @@ -100,7 +100,6 @@ No modules. | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout](#input\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `300` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -117,6 +116,7 @@ No modules. | [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/runner-binaries-syncer/iam.tf b/modules/runner-binaries-syncer/iam.tf index a95949aadf..454d943b4b 100644 --- a/modules/runner-binaries-syncer/iam.tf +++ b/modules/runner-binaries-syncer/iam.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/runner-binaries-syncer/runner-binaries-syncer.tf b/modules/runner-binaries-syncer/runner-binaries-syncer.tf index 7226b76ebe..8da74bbfa1 100644 --- a/modules/runner-binaries-syncer/runner-binaries-syncer.tf +++ b/modules/runner-binaries-syncer/runner-binaries-syncer.tf @@ -23,15 +23,18 @@ resource "aws_lambda_function" "syncer" { environment { variables = { - ENVIRONMENT = var.prefix - GITHUB_RUNNER_ARCHITECTURE = var.runner_architecture - GITHUB_RUNNER_OS = local.gh_binary_os_label[var.runner_os] - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - S3_BUCKET_NAME = aws_s3_bucket.action_dist.id - S3_OBJECT_KEY = local.action_runner_distribution_object_key - S3_SSE_ALGORITHM = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.sse_algorithm, null) - S3_SSE_KMS_KEY_ID = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.kms_master_key_id, null) + ENVIRONMENT = var.prefix + GITHUB_RUNNER_ARCHITECTURE = var.runner_architecture + GITHUB_RUNNER_OS = local.gh_binary_os_label[var.runner_os] + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + S3_BUCKET_NAME = aws_s3_bucket.action_dist.id + S3_OBJECT_KEY = local.action_runner_distribution_object_key + S3_SSE_ALGORITHM = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.sse_algorithm, null) + S3_SSE_KMS_KEY_ID = try(var.server_side_encryption_configuration.rule.apply_server_side_encryption_by_default.kms_master_key_id, null) } } @@ -46,9 +49,9 @@ resource "aws_lambda_function" "syncer" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -182,7 +185,7 @@ resource "aws_lambda_permission" "on_deploy" { } resource "aws_iam_role_policy" "syncer_lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.syncer_lambda.name } diff --git a/modules/runner-binaries-syncer/variables.tf b/modules/runner-binaries-syncer/variables.tf index d008614a0f..2e8f735977 100644 --- a/modules/runner-binaries-syncer/variables.tf +++ b/modules/runner-binaries-syncer/variables.tf @@ -227,8 +227,12 @@ variable "lambda_architecture" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } diff --git a/modules/runners/README.md b/modules/runners/README.md index 1f07717a45..fda075d125 100644 --- a/modules/runners/README.md +++ b/modules/runners/README.md @@ -108,6 +108,7 @@ yarn run dist | [aws_iam_role_policy_attachment.scale_down_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.scale_up_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_iam_role_policy_attachment.ssm_housekeeper_vpc_execution_role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | +| [aws_iam_role_policy_attachment.xray_tracing](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role_policy_attachment) | resource | | [aws_lambda_event_source_mapping.scale_up](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_event_source_mapping) | resource | | [aws_lambda_function.scale_down](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | | [aws_lambda_function.scale_up](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | @@ -176,7 +177,6 @@ yarn run dist | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the lambda will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout\_scale\_down](#input\_lambda\_timeout\_scale\_down) | Time out for the scale down lambda in seconds. | `number` | `60` | no | | [lambda\_timeout\_scale\_up](#input\_lambda\_timeout\_scale\_up) | Time out for the scale up lambda in seconds. | `number` | `60` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -214,6 +214,7 @@ yarn run dist | [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secreets. |
object({
root = string
tokens = string
config = string
})
| n/a | yes | | [subnet\_ids](#input\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | n/a | yes | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | | [userdata\_post\_install](#input\_userdata\_post\_install) | User-data script snippet to insert after GitHub action runner install | `string` | `""` | no | | [userdata\_pre\_install](#input\_userdata\_pre\_install) | User-data script snippet to insert before GitHub action runner install | `string` | `""` | no | | [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no | diff --git a/modules/runners/policies-lambda-common.tf b/modules/runners/policies-lambda-common.tf index feeb10c1ae..feb0d39fd9 100644 --- a/modules/runners/policies-lambda-common.tf +++ b/modules/runners/policies-lambda-common.tf @@ -34,7 +34,7 @@ resource "aws_iam_policy" "ami_id_ssm_parameter_read" { } data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/runners/policies-runner.tf b/modules/runners/policies-runner.tf index 4491fa3fae..d1b9190930 100644 --- a/modules/runners/policies-runner.tf +++ b/modules/runners/policies-runner.tf @@ -45,6 +45,12 @@ resource "aws_iam_role_policy" "dist_bucket" { ) } +resource "aws_iam_role_policy_attachment" "xray_tracing" { + count = var.tracing_config.mode != null ? 1 : 0 + role = aws_iam_role.runner.name + policy_arn = "arn:${var.aws_partition}:iam::aws:policy/AWSXRayDaemonWriteAccess" +} + resource "aws_iam_role_policy" "describe_tags" { name = "runner-describe-tags" role = aws_iam_role.runner.name diff --git a/modules/runners/pool.tf b/modules/runners/pool.tf index c855739d55..4533e95f24 100644 --- a/modules/runners/pool.tf +++ b/modules/runners/pool.tf @@ -55,7 +55,6 @@ module "pool" { arn_ssm_parameters_path_config = local.arn_ssm_parameters_path_config } - aws_partition = var.aws_partition - lambda_tracing_mode = var.lambda_tracing_mode - + aws_partition = var.aws_partition + tracing_config = var.tracing_config } diff --git a/modules/runners/pool/README.md b/modules/runners/pool/README.md index 8c818dc81a..2b7c808df8 100644 --- a/modules/runners/pool/README.md +++ b/modules/runners/pool/README.md @@ -47,7 +47,7 @@ No modules. |------|-------------|------|---------|:--------:| | [aws\_partition](#input\_aws\_partition) | (optional) partition for the arn if not 'aws' | `string` | `"aws"` | no | | [config](#input\_config) | Lookup details in parent module. |
object({
lambda = object({
log_level = string
logging_retention_in_days = number
logging_kms_key_id = string
reserved_concurrent_executions = number
s3_bucket = string
s3_key = string
s3_object_version = string
security_group_ids = list(string)
runtime = string
architecture = string
timeout = number
zip = string
subnet_ids = list(string)
})
tags = map(string)
ghes = object({
url = string
ssl_verify = string
})
github_app_parameters = object({
key_base64 = map(string)
id = map(string)
})
subnet_ids = list(string)
runner = object({
disable_runner_autoupdate = bool
ephemeral = bool
enable_jit_config = bool
boot_time_in_minutes = number
labels = list(string)
launch_template = object({
name = string
})
group_name = string
name_prefix = string
pool_owner = string
role = object({
arn = string
})
})
instance_types = list(string)
instance_target_capacity_type = string
instance_allocation_strategy = string
instance_max_spot_price = string
prefix = string
pool = list(object({
schedule_expression = string
size = number
}))
role_permissions_boundary = string
kms_key_arn = string
ami_kms_key_arn = string
role_path = string
ssm_token_path = string
ssm_config_path = string
ami_id_ssm_parameter_name = string
ami_id_ssm_parameter_read_policy_arn = string
arn_ssm_parameters_path_config = string
})
| n/a | yes | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | ## Outputs diff --git a/modules/runners/pool/main.tf b/modules/runners/pool/main.tf index ffb90dff2d..2b7fb0449b 100644 --- a/modules/runners/pool/main.tf +++ b/modules/runners/pool/main.tf @@ -17,31 +17,34 @@ resource "aws_lambda_function" "pool" { environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral - ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config - ENVIRONMENT = var.config.prefix - GHES_URL = var.config.ghes.url - INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.config.instance_types) - LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name - LOG_LEVEL = var.config.lambda.log_level - NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes - RUNNER_LABELS = lower(join(",", var.config.runner.labels)) - RUNNER_GROUP_NAME = var.config.runner.group_name - RUNNER_NAME_PREFIX = var.config.runner.name_prefix - RUNNER_OWNER = var.config.runner.pool_owner - SERVICE_NAME = "runners-pool" - SSM_TOKEN_PATH = var.config.ssm_token_path - SSM_CONFIG_PATH = var.config.ssm_config_path - SUBNET_IDS = join(",", var.config.subnet_ids) + AMI_ID_SSM_PARAMETER_NAME = var.config.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.config.runner.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.config.runner.ephemeral + ENABLE_JIT_CONFIG = var.config.runner.enable_jit_config + ENVIRONMENT = var.config.prefix + GHES_URL = var.config.ghes.url + INSTANCE_ALLOCATION_STRATEGY = var.config.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.config.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.config.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.config.instance_types) + LAUNCH_TEMPLATE_NAME = var.config.runner.launch_template.name + LOG_LEVEL = var.config.lambda.log_level + NODE_TLS_REJECT_UNAUTHORIZED = var.config.ghes.url != null && !var.config.ghes.ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.config.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.config.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.config.lambda.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.config.runner.boot_time_in_minutes + RUNNER_LABELS = lower(join(",", var.config.runner.labels)) + RUNNER_GROUP_NAME = var.config.runner.group_name + RUNNER_NAME_PREFIX = var.config.runner.name_prefix + RUNNER_OWNER = var.config.runner.pool_owner + SERVICE_NAME = "runners-pool" + SSM_TOKEN_PATH = var.config.ssm_token_path + SSM_CONFIG_PATH = var.config.ssm_config_path + SUBNET_IDS = join(",", var.config.subnet_ids) + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -54,9 +57,9 @@ resource "aws_lambda_function" "pool" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -152,7 +155,7 @@ resource "aws_iam_role_policy_attachment" "ami_id_ssm_parameter_read" { # lambda xray policy data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", @@ -169,7 +172,7 @@ data "aws_iam_policy_document" "lambda_xray" { } resource "aws_iam_role_policy" "pool_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.pool.name } diff --git a/modules/runners/pool/variables.tf b/modules/runners/pool/variables.tf index 365b625b94..b1c2f19dd6 100644 --- a/modules/runners/pool/variables.tf +++ b/modules/runners/pool/variables.tf @@ -69,8 +69,14 @@ variable "aws_partition" { default = "aws" } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } + + diff --git a/modules/runners/scale-down.tf b/modules/runners/scale-down.tf index 521b512e92..d211a9ded7 100644 --- a/modules/runners/scale-down.tf +++ b/modules/runners/scale-down.tf @@ -22,17 +22,20 @@ resource "aws_lambda_function" "scale_down" { environment { variables = { - ENVIRONMENT = var.prefix - GHES_URL = var.ghes_url - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes - SCALE_DOWN_CONFIG = jsonencode(var.idle_config) - SERVICE_NAME = "runners-scale-down" + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + RUNNER_BOOT_TIME_IN_MINUTES = var.runner_boot_time_in_minutes + SCALE_DOWN_CONFIG = jsonencode(var.idle_config) + SERVICE_NAME = "runners-scale-down" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -45,9 +48,9 @@ resource "aws_lambda_function" "scale_down" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -111,7 +114,7 @@ resource "aws_iam_role_policy_attachment" "scale_down_vpc_execution_role" { } resource "aws_iam_role_policy" "scale_down_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.scale_down.name } diff --git a/modules/runners/scale-up.tf b/modules/runners/scale-up.tf index 7a05c48c3f..5dfba01e17 100644 --- a/modules/runners/scale-up.tf +++ b/modules/runners/scale-up.tf @@ -13,36 +13,39 @@ resource "aws_lambda_function" "scale_up" { memory_size = 512 tags = local.tags architectures = [var.lambda_architecture] - environment { variables = { - AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name - DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate - ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners - ENABLE_JIT_CONFIG = var.enable_jit_config - ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check - ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners - ENVIRONMENT = var.prefix - GHES_URL = var.ghes_url - INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy - INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price - INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type - INSTANCE_TYPES = join(",", var.instance_types) - LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name - LOG_LEVEL = var.log_level - MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) - NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 - PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name - PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - RUNNER_LABELS = lower(join(",", var.runner_labels)) - RUNNER_GROUP_NAME = var.runner_group_name - RUNNER_NAME_PREFIX = var.runner_name_prefix - RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count - SERVICE_NAME = "runners-scale-up" - SSM_TOKEN_PATH = local.token_path - SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" - SUBNET_IDS = join(",", var.subnet_ids) + AMI_ID_SSM_PARAMETER_NAME = var.ami_id_ssm_parameter_name + DISABLE_RUNNER_AUTOUPDATE = var.disable_runner_autoupdate + ENABLE_EPHEMERAL_RUNNERS = var.enable_ephemeral_runners + ENABLE_JIT_CONFIG = var.enable_jit_config + ENABLE_JOB_QUEUED_CHECK = local.enable_job_queued_check + ENABLE_ORGANIZATION_RUNNERS = var.enable_organization_runners + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + INSTANCE_ALLOCATION_STRATEGY = var.instance_allocation_strategy + INSTANCE_MAX_SPOT_PRICE = var.instance_max_spot_price + INSTANCE_TARGET_CAPACITY_TYPE = var.instance_target_capacity_type + INSTANCE_TYPES = join(",", var.instance_types) + LAUNCH_TEMPLATE_NAME = aws_launch_template.runner.name + LOG_LEVEL = var.log_level + MINIMUM_RUNNING_TIME_IN_MINUTES = coalesce(var.minimum_running_time_in_minutes, local.min_runtime_defaults[var.runner_os]) + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + RUNNER_LABELS = lower(join(",", var.runner_labels)) + RUNNER_GROUP_NAME = var.runner_group_name + RUNNER_NAME_PREFIX = var.runner_name_prefix + RUNNERS_MAXIMUM_COUNT = var.runners_maximum_count + SERVICE_NAME = "runners-scale-up" + SSM_TOKEN_PATH = local.token_path + SSM_CONFIG_PATH = "${var.ssm_paths.root}/${var.ssm_paths.config}" + SUBNET_IDS = join(",", var.subnet_ids) + } } @@ -55,9 +58,9 @@ resource "aws_lambda_function" "scale_up" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -134,7 +137,7 @@ resource "aws_iam_role_policy_attachment" "ami_id_ssm_parameter_read" { } resource "aws_iam_role_policy" "scale_up_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.scale_up.name } diff --git a/modules/runners/ssm-housekeeper.tf b/modules/runners/ssm-housekeeper.tf index 18f266f01f..2a8962b955 100644 --- a/modules/runners/ssm-housekeeper.tf +++ b/modules/runners/ssm-housekeeper.tf @@ -28,10 +28,13 @@ resource "aws_lambda_function" "ssm_housekeeper" { environment { variables = { - ENVIRONMENT = var.prefix - LOG_LEVEL = var.log_level - SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) - SERVICE_NAME = "ssm-housekeeper" + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + SSM_CLEANUP_CONFIG = jsonencode(local.ssm_housekeeper.config) + SERVICE_NAME = "ssm-housekeeper" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error } } @@ -44,9 +47,9 @@ resource "aws_lambda_function" "ssm_housekeeper" { } dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -110,7 +113,7 @@ resource "aws_iam_role_policy_attachment" "ssm_housekeeper_vpc_execution_role" { } resource "aws_iam_role_policy" "ssm_housekeeper_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.ssm_housekeeper.name } diff --git a/modules/runners/templates/start-runner.sh b/modules/runners/templates/start-runner.sh index 29788c9216..98c0dd449b 100644 --- a/modules/runners/templates/start-runner.sh +++ b/modules/runners/templates/start-runner.sh @@ -1,6 +1,86 @@ -# shellcheck shell=bash +#!/bin/bash + +# https://docs.aws.amazon.com/xray/latest/devguide/xray-api-sendingdata.html +# https://docs.aws.amazon.com/xray/latest/devguide/scorekeep-scripts.html +create_xray_start_segment() { + START_TIME=$(date -d "$(uptime -s)" +%s) + TRACE_ID=$1 + INSTANCE_ID=$2 + SEGMENT_ID=$(dd if=/dev/random bs=8 count=1 2>/dev/null | od -An -tx1 | tr -d ' \t\n') + SEGMENT_DOC="{\"trace_id\": \"$TRACE_ID\", \"id\": \"$SEGMENT_ID\", \"start_time\": $START_TIME, \"in_progress\": true, \"name\": \"Runner\",\"origin\": \"AWS::EC2::Instance\", \"aws\": {\"ec2\":{\"instance_id\":\"$INSTANCE_ID\"}}}" + HEADER='{"format": "json", "version": 1}' + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +create_xray_success_segment() { + local SEGMENT_DOC=$1 + if [ -z "$SEGMENT_DOC" ]; then + echo "No segment doc provided" + return + fi + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq '. | del(.in_progress)') + END_TIME=$(date +%s) + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq -c ". + {\"end_time\": $END_TIME}") + HEADER="{\"format\": \"json\", \"version\": 1}" + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +create_xray_error_segment() { + local SEGMENT_DOC="$1" + if [ -z "$SEGMENT_DOC" ]; then + echo "No segment doc provided" + return + fi + MESSAGE="$2" + ERROR="{\"exceptions\": [{\"message\": \"$MESSAGE\"}]}" + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq '. | del(.in_progress)') + END_TIME=$(date +%s) + SEGMENT_DOC=$(echo "$SEGMENT_DOC" | jq -c ". + {\"end_time\": $END_TIME, \"error\": true, \"cause\": $ERROR }") + HEADER="{\"format\": \"json\", \"version\": 1}" + TRACE_DATA="$HEADER\n$SEGMENT_DOC" + echo "$HEADER" > document.txt + echo "$SEGMENT_DOC" >> document.txt + UDP_IP="127.0.0.1" + UDP_PORT=2000 + cat document.txt > /dev/udp/$UDP_IP/$UDP_PORT + echo "$SEGMENT_DOC" +} + +cleanup() { + local exit_code="$1" + local error_location="$2" + local error_lineno="$3" + + if [ "$exit_code" -ne 0 ]; then + echo "ERROR: runner-start-failed with exit code $exit_code occurred on $error_location" + create_xray_error_segment "$SEGMENT" "runner-start-failed with exit code $exit_code occurred on $error_location - $error_lineno" + fi + # allows to flush the cloud watch logs and traces + sleep 10 + if [ "$agent_mode" = "ephemeral" ] || [ "$exit_code" -ne 0 ]; then + echo "Stopping CloudWatch service" + systemctl stop amazon-cloudwatch-agent.service || true + echo "Terminating instance" + aws ec2 terminate-instances \ + --instance-ids "$instance_id" \ + --region "$region" \ + || true + fi +} -## Retrieve instance metadata +trap 'cleanup $? $LINENO $BASH_LINENO' EXIT echo "Retrieving TOKEN from AWS API" token=$(curl -f -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 180" || true) @@ -32,6 +112,7 @@ availability_zone=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169. environment=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:environment) ssm_config_path=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:ssm_config_path) runner_name_prefix=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:runner_name_prefix || echo "") +xray_trace_id=$(curl -f -H "X-aws-ec2-metadata-token: $token" -v http://169.254.169.254/latest/meta-data/tags/instance/ghr:trace_id || echo "") %{ else } tags=$(aws ec2 describe-tags --region "$region" --filters "Name=resource-id,Values=$instance_id") @@ -40,6 +121,7 @@ echo "Retrieved tags from AWS API ($tags)" environment=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:environment") | .Value') ssm_config_path=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:ssm_config_path") | .Value') runner_name_prefix=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:runner_name_prefix") | .Value' || echo "") +xray_trace_id=$(echo "$tags" | jq -r '.Tags[] | select(.Key == "ghr:trace_id") | .Value' || echo "") %{ endif } @@ -65,6 +147,18 @@ echo "Retrieved /$ssm_config_path/enable_jit_config parameter - ($enable_jit_con token_path=$(echo "$parameters" | jq --arg ssm_config_path "$ssm_config_path" -r '.[] | select(.Name == "'$ssm_config_path'/token_path") | .Value') echo "Retrieved /$ssm_config_path/token_path parameter - ($token_path)" +if [[ "$xray_trace_id" != "" ]]; then + # run xray service + curl https://s3.us-east-2.amazonaws.com/aws-xray-assets.us-east-2/xray-daemon/aws-xray-daemon-linux-3.x.zip -o aws-xray-daemon-linux-3.x.zip + unzip aws-xray-daemon-linux-3.x.zip -d aws-xray-daemon-linux-3.x + chmod +x ./aws-xray-daemon-linux-3.x/xray + ./aws-xray-daemon-linux-3.x/xray -o -n "$region" & + + + SEGMENT=$(create_xray_start_segment "$xray_trace_id" "$instance_id") + echo "$SEGMENT" +fi + if [[ "$enable_cloudwatch_agent" == "true" ]]; then echo "Cloudwatch is enabled" amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c "ssm:$ssm_config_path/cloudwatch_agent_config_runner" @@ -96,7 +190,7 @@ fi chown -R $run_as . info_arch=$(uname -p) -info_os=$(( lsb_release -ds || cat /etc/*release || uname -om ) 2>/dev/null | head -n1 | cut -d "=" -f2- | tr -d '"') +info_os=$( ( lsb_release -ds || cat /etc/*release || uname -om ) 2>/dev/null | head -n1 | cut -d "=" -f2- | tr -d '"') tee /opt/actions-runner/.setup_info </opt/start-runner-service.sh <<-EOF - echo "Starting the runner in ephemeral mode" if [[ "$enable_jit_config" == "true" ]]; then @@ -139,17 +231,7 @@ cat >/opt/start-runner-service.sh <<-EOF echo "Starting without JIT config" sudo --preserve-env=RUNNER_ALLOW_RUNASROOT -u "$run_as" -- ./run.sh fi - echo "Runner has finished" - - echo "Stopping cloudwatch service" - systemctl stop amazon-cloudwatch-agent.service - echo "Terminating instance" - aws ec2 terminate-instances --instance-ids "$instance_id" --region "$region" -EOF - # Starting the runner via a own process to ensure this process terminates - nohup bash /opt/start-runner-service.sh & - else echo "Installing the runner as a service" ./svc.sh install "$run_as" diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 3cda8dc355..28a7b49697 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -585,12 +585,17 @@ variable "runner_name_prefix" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } + variable "credit_specification" { description = "The credit option for CPU usage of a T instance. Can be unset, \"standard\" or \"unlimited\"." type = string diff --git a/modules/webhook/README.md b/modules/webhook/README.md index 2ff514fabf..89e7a75f87 100644 --- a/modules/webhook/README.md +++ b/modules/webhook/README.md @@ -88,7 +88,6 @@ No modules. | [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no | | [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no | | [lambda\_timeout](#input\_lambda\_timeout) | Time out of the lambda in seconds. | `number` | `10` | no | -| [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no | | [lambda\_zip](#input\_lambda\_zip) | File location of the lambda zip file. | `string` | `null` | no | | [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are 'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no | | [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with | `string` | `null` | no | @@ -100,6 +99,7 @@ No modules. | [runner\_config](#input\_runner\_config) | SQS queue to publish accepted build events based on the runner type. When exact match is disabled the webhook accecpts the event if one of the workflow job labels is part of the matcher. The priority defines the order the matchers are applied. |
map(object({
arn = string
id = string
fifo = bool
matcherConfig = object({
labelMatchers = list(list(string))
exactMatch = bool
priority = optional(number, 999)
})
}))
| n/a | yes | | [sqs\_workflow\_job\_queue](#input\_sqs\_workflow\_job\_queue) | SQS queue to monitor github events. |
object({
id = string
arn = string
})
| `null` | no | | [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no | +| [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. |
object({
mode = optional(string, null)
capture_http_requests = optional(bool, false)
capture_error = optional(bool, false)
})
| `{}` | no | | [webhook\_lambda\_apigateway\_access\_log\_settings](#input\_webhook\_lambda\_apigateway\_access\_log\_settings) | Access log settings for webhook API gateway. |
object({
destination_arn = string
format = string
})
| `null` | no | | [webhook\_lambda\_s3\_key](#input\_webhook\_lambda\_s3\_key) | S3 key for webhook lambda function. Required if using S3 bucket to specify lambdas. | `string` | `null` | no | | [webhook\_lambda\_s3\_object\_version](#input\_webhook\_lambda\_s3\_object\_version) | S3 object version for webhook lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no | diff --git a/modules/webhook/policies.tf b/modules/webhook/policies.tf index a95949aadf..454d943b4b 100644 --- a/modules/webhook/policies.tf +++ b/modules/webhook/policies.tf @@ -1,5 +1,5 @@ data "aws_iam_policy_document" "lambda_xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 statement { actions = [ "xray:BatchGetTraces", diff --git a/modules/webhook/variables.tf b/modules/webhook/variables.tf index e66f8df226..c298f7bd09 100644 --- a/modules/webhook/variables.tf +++ b/modules/webhook/variables.tf @@ -182,8 +182,12 @@ variable "github_app_parameters" { }) } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } diff --git a/modules/webhook/webhook.tf b/modules/webhook/webhook.tf index 4ceda1b7c2..6dafc08874 100644 --- a/modules/webhook/webhook.tf +++ b/modules/webhook/webhook.tf @@ -21,13 +21,16 @@ resource "aws_lambda_function" "webhook" { environment { variables = { - ENVIRONMENT = var.prefix - LOG_LEVEL = var.log_level - POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" - PARAMETER_GITHUB_APP_WEBHOOK_SECRET = var.github_app_parameters.webhook_secret.name - REPOSITORY_WHITE_LIST = jsonencode(var.repository_white_list) - RUNNER_CONFIG = jsonencode(local.runner_config_sorted) - SQS_WORKFLOW_JOB_QUEUE = try(var.sqs_workflow_job_queue, null) != null ? var.sqs_workflow_job_queue.id : "" + ENVIRONMENT = var.prefix + LOG_LEVEL = var.log_level + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + PARAMETER_GITHUB_APP_WEBHOOK_SECRET = var.github_app_parameters.webhook_secret.name + REPOSITORY_WHITE_LIST = jsonencode(var.repository_white_list) + RUNNER_CONFIG = jsonencode(local.runner_config_sorted) + SQS_WORKFLOW_JOB_QUEUE = try(var.sqs_workflow_job_queue, null) != null ? var.sqs_workflow_job_queue.id : "" } } @@ -42,9 +45,9 @@ resource "aws_lambda_function" "webhook" { tags = var.tags dynamic "tracing_config" { - for_each = var.lambda_tracing_mode != null ? [true] : [] + for_each = var.tracing_config.mode != null ? [true] : [] content { - mode = var.lambda_tracing_mode + mode = var.tracing_config.mode } } } @@ -128,7 +131,7 @@ resource "aws_iam_role_policy" "webhook_ssm" { } resource "aws_iam_role_policy" "xray" { - count = var.lambda_tracing_mode != null ? 1 : 0 + count = var.tracing_config.mode != null ? 1 : 0 policy = data.aws_iam_policy_document.lambda_xray[0].json role = aws_iam_role.webhook_lambda.name } diff --git a/variables.deprecated.tf b/variables.deprecated.tf index e69de29bb2..c4e8abfe1d 100644 --- a/variables.deprecated.tf +++ b/variables.deprecated.tf @@ -0,0 +1,10 @@ +variable "lambda_tracing_mode" { + description = "DEPRECATED: Replaced by `tracing_config`." + type = string + default = null + + validation { + condition = anytrue([var.lambda_tracing_mode == null]) + error_message = "DEPRECATED, Replaced by `tracing_config`." + } +} diff --git a/variables.tf b/variables.tf index 1bdbfbe393..574734c420 100644 --- a/variables.tf +++ b/variables.tf @@ -747,10 +747,14 @@ variable "runner_name_prefix" { } } -variable "lambda_tracing_mode" { - description = "Enable X-Ray tracing for the lambda functions." - type = string - default = null +variable "tracing_config" { + description = "Configuration for lambda tracing." + type = object({ + mode = optional(string, null) + capture_http_requests = optional(bool, false) + capture_error = optional(bool, false) + }) + default = {} } variable "runner_credit_specification" {