From 2e41312b588eebd197b6ce34552b8735c8272a95 Mon Sep 17 00:00:00 2001
From: ford prior <fprior@amazon.com>
Date: Mon, 16 Dec 2024 12:01:21 -0500
Subject: [PATCH] Tools: Weathertop - Update docs & deploy.py (#7153)

---
 .tools/test/DEPLOYMENT.md    | 139 +++++++++-------
 .tools/test/stacks/deploy.py | 310 ++++++++++++++++++++++++++---------
 2 files changed, 315 insertions(+), 134 deletions(-)

diff --git a/.tools/test/DEPLOYMENT.md b/.tools/test/DEPLOYMENT.md
index e4557a49928..48cddf963f3 100644
--- a/.tools/test/DEPLOYMENT.md
+++ b/.tools/test/DEPLOYMENT.md
@@ -1,92 +1,109 @@
 # Deployment Instructions
 
-There are two ways to deploy the code in this directory.
+This repository contains infrastructure deployment scripts for testing SDK example code. The infrastructure is managed through AWS CDK and can be deployed in two ways:
+1. [deploy.py](#1-using-the-deploy-script)
+2. [invoking the CDK directly](#2-invoking-cdk-directly)
 
-## 1. Using the deploy script
+## Option 1. Using `deploy.py`
 
-To deploy any stack in this directory, run the [deploy.py](stacks/deploy.py) script.
+The [deploy.py](stacks/deploy.py) script is the primary method for deploying the infrastructure stacks.
+It exists in order to facilitate stack deployments to an infinite number of AWS accounts, without requiring the user to
+fetch new tokens and set new variables for each deployment.
+
+### Deployment types
+The script handles three types of deployments:
+
+1. **Images Stack** (`images`):
+   - Creates empty ECR private repositories for all tools listed in [targets.yaml](stacks/config/targets.yaml)
+   - Users must implement their own image versioning and pushing mechanism
+   - Example: GitHub Actions with OIDC provider works well for this purpose
+
+2. **Admin Stack** (`admin`):
+   - Deploys event emission infrastructure
+   - Creates IAM policies for cross-account event subscription
+   - **Required**: Must be deployed before any plugin stacks
+   - Works with single or multiple accounts listed in [targets.yaml](stacks/config/targets.yaml)
+
+3. **Plugin Stack** (`plugin`):
+   - Deploys two stacks to each account in [targets.yaml](stacks/config/targets.yaml):
+     1. Plugin stack that subscribes to admin stack events
+     2. Account nuker stack that cleans up residual test resources
+   - Requires `admin` stack to be deployed first
+
+### Environment
+It is designed to run from the command line interface (CLI) on macOS or Linux systems. You can use the default terminal emulator on macOS, such as zsh or bash, or any other terminal emulator of your choice.
+
+### Why subprocess?
+The script uses Python's subprocess module to execute the AWS Cloud Development Kit (CDK) command-line interface (CLI) commands. While the CDK provides a Python CDK library, 1) we use the TypeScript version per team standard, and 2) that Python CDK library does not expose a way to invoke the script itself from within a Python script. As a consequence, we are stuck using the `subprocess` module to invoke the CDK CLI commands for our TypeScript stack.
 
 ### Script Prerequisites
 
+- Command line interface (CLI) on macOS installed, such as zsh or bash
 - Python 3.11 installed
-- `ada` or equivalent CLI library for fetching AWS credentials.
+- AWS CLI and CDK installed and configured (NodeJS 18+)
+- Admin-like IAM permissions on the role assumed (`AdministratorAccess` will work for non-production test environments).
+- Configuration files [resources.yaml](stacks/config/resources.yaml) and [targets.yaml](stacks/config/targets.yaml)
+- Environment variables set for:
+  - `TOKEN_TOOL`: Path to credential management tool
+  - `TOKEN_PROVIDER`: Identity provider for AWS credentials
 - Dependencies installed in Python virtual environment:
-
 ```
-python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt
+python -m venv .venv && source .venv/bin/activate && pip install -r requirements.txt``
 ```
 
-- AWS CLI and CDK installed and configured (NodeJS 18+)
-- Permissions to execute AWS CDK and shell commands (`AdministratorAccess` will work for non-production test environments)
-- Configuration files `resources.yaml` and `targets.yaml`, which exist in the [stacks/config](stacks/config) directory within the same directory as the script
+### Note on EnvVars `TOKEN_TOOL` and `TOKEN_PROVIDER`
+These environment variables are designed to partly obscure the tooling used by AWS.
+The `get_tokens` function on [deploy.py#L167](stacks/deploy.py#L167) may require additional refactoring to comply with whatever token tool you are using.
 
 ### Usage
 
 #### Command Syntax
 
 ```bash
-cd stacks ; python deploy.py <stack>
+cd stacks ; python deploy.py <stack-type>
 ```
 
-Replace `<stack>` with one of the supported stacks:
+Replace `<stack-type>` with one of the supported stacks:
 
 - `admin`: Deploys admin-specific resources.
 - `images`: Deploys image-related resources.
 - `plugin`: Deploys plugin-specific resources.
-  - To deploy only a specific language's plugin, pass `--language <language>` where language is an account in [targets.yaml](stacks/config/targets.yaml).
-
-#### Additional Notes
-
-The script automatically navigates to the required directory based on the type and language of deployment (typescript is the default).
-
-Environment variables are set and used during the deployment process.
+  - To deploy only a specific language's plugin only, pass `--language <language>` where `<language>` is an account name in [targets.yaml](stacks/config/targets.yaml). E.g. `python`
 
-Errors during command execution are caught and displayed.
-
-The script includes a sleep period after deployment to avoid conflicts with simultaneous CDK operations.
-
-Make sure to check the script's output for any errors or confirmation messages that indicate the deployment's success or failure. Adjust the config files as necessary to match your deployment requirements.
+## Technical Notes
+This creates some brittleness but provides necessary flexibility for cross-account deployments
 
+#### Additional Notes
+Some non-obvious quirks of the script include:
+ - programmatic file traversing to the required CDK directory based on the type and language of CDK deployment (`typescript` is the default).
+ - a random-seeming sleep period after deployment to avoid conflicts with the previous CDK operation that may have not killed its thread yet.
+ - more generally, extensive use of the `subprocess` module which creates some acceptable brittleness that may result in future regression.
 ---
 
-## 2. Invoking CDK directly
-
-The second option involves navigating to each stack directory and running the CDK commands.
-
-The following instructions assume a "plugin account" (the AWS account where testing activities will occur) of "python" (corresponding to a Docker image) per [this repository's configuration](config/targets.yaml).
-You can replace Python with any of the other languages listed in this repository's configuration.
-
-To request an alternate configuration for your own repository or use case, please [submit an issue](https://github.com/awsdocs/aws-doc-sdk-examples/issues/new?labels=type%2Fenhancement&labels=Tools&title=%5BEnhancement%5D%3A+Weathertop+Customization+Request&&) with the `Tools` label.
+## Option 2. Invoking CDK directly
 
-### 1. Deploy Plugin Stack for your language (e.g. Python)
+This option involves navigating to each stack directory([images](stacks/images), [admin](stacks/admin), or [plugin](stacks/plugin)) and running the `cdk` commands explained below.
 
-User will:
+Required steps for all stack types:
+1. Set Python virtualenv within [plugin directory](stacks/plugin/admin).
+1. Get AWS account tokens for target account.
+1. Run `cdk bootstrap` and `cdk deploy`.
 
-1. Set Python virtualenv within [plugin directory](plugin/admin).
-1. `export LANGUAGE_NAME=python`.
-1. Get AWS account tokens for plugin account.
-1. `cdk bootstrap` and `cdk deploy`.
+### Special details for `plugin` type
+For the `plugin` type, there are a few important details: 
+1. User must also run `export LANGUAGE_NAME=python` if your tool is `python`.
+1. For the stack to begin accepting test events, you must set `status` to `enabled` for your tool (e.g. `python`) in [targets.yaml](stacks/config/targets.yaml) and redeploy the `admin` stack.
+1. To manually trigger test runs, [submit a test job](#submit-test-job) in AWS Batch.
 
-### 2. Enable Consumer Stack to receive event notifications
+## Testing & Validation
+Users can trigger test runs from within the AWS Console after deploying the `plugin` stack for their chosen tool.
 
-User will:
+### Submit test job
 
-1. Set `status` to `enabled` in [targets.yaml](config/targets.yaml) for your language
-1. Raise PR.
+Users can trigger test runs from within the AWS Console after deploying the `plugin` stack for their chosen tool.
 
-Admin will:
-
-1. Approve and merge PR.
-1. Set Python virtualenv within [admin directory](stacks/admin).
-1. Get Admin account tokens.
-1. `cdk bootstrap` and `cdk deploy`.
-1. Request that user [submit a test job](#3-submit-test-job).
-
-### 3. Submit test job
-
-User will:
-
-1. Log into console for Python account
+Steps:
+1. Log into console for tool AWS account (e.g. `python`)
 1. Navigate to "Job Definitions".
    ![](docs/validation-flow-1.jpg)
 1. Click "Submit Job".
@@ -97,10 +114,12 @@ User will:
    ![](docs/validation-flow-4.jpg)
 1. Click "Create job".
    ![](docs/validation-flow-5.jpg)
-1. [Validate results of test job](#3-optional-view-test-job-results)
-
-### 3. Optional: View CloudWatch job results in Batch
-
-1. Navigate to a job
-1. When status is `SUCCEEDED` or `FAILED`, click "Logging" tab.
+1. [Validate results of test job](#view-test-run-results)
+
+### View test run results
+1. Log into console for tool AWS account (e.g. `python`)
+1. Click `Jobs` and select the only job queue.
+2. Toggle `Load all jobs`.
+1. View job details by clicking the hyperlinked value in the `Name` field.
+2. When status is `SUCCEEDED` or `FAILED`, click "Logging" tab.
    ![](docs/validation-flow-6.jpg)
diff --git a/.tools/test/stacks/deploy.py b/.tools/test/stacks/deploy.py
index bd0d6474bfd..0bc9f24901e 100644
--- a/.tools/test/stacks/deploy.py
+++ b/.tools/test/stacks/deploy.py
@@ -1,144 +1,306 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
+"""
+AWS CDK Deployment Script for SDK Testing Infrastructure
+
+This script manages the deployment of AWS infrastructure components needed for SDK testing.
+It handles three main deployment scenarios:
+
+1. ECR Repository Setup (--type images):
+   Creates empty ECR private repositories for tools listed in targets.yaml
+
+2. Admin Stack Deployment (--type admin):
+   Deploys a stack that emits events and contains IAM policies allowing
+   cross-account subscription from AWS accounts listed in targets.yaml
+
+3. Plugin Stack Deployment (--type plugin):
+   Deploys two stacks in each target account:
+   - A plugin stack that subscribes to the admin stack's events
+   - An account nuker stack that cleans up resources left by test executions
+
+Prerequisites:
+    - TOKEN_TOOL environment variable set to the token generation tool path
+    - TOKEN_PROVIDER environment variable set to the token provider
+    - Appropriate AWS credentials and permissions
+    - Valid configuration in config/resources.yaml and config/targets.yaml
+
+Note:
+    This script uses subprocess to run CDK commands as CDK doesn't support
+    direct module import. While this creates some brittleness, it provides
+    necessary flexibility for cross-account deployments.
+"""
 
 import argparse
-import subprocess
+import logging
 import os
-import yaml
-import time
 import re
+import shutil
+import subprocess
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+import boto3
+import yaml
+from botocore.exceptions import ClientError, NoCredentialsError
+
+from nuke.typescript.create_account_alias import create_account_alias
+from nuke.typescript.upload_job_scripts import process_stack_and_upload_files
+
+# Constants
+AWS_DEFAULT_REGION = "us-east-1"
+CDK_DEPLOYMENT_ROLE = "weathertop-cdk-deployments"
+ACCOUNT_ALIAS = "weathertop-test"
+CDK_ACKNOWLEDGE_ID = "31885"
+
+# Configure logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+class DeploymentError(Exception):
+    """Custom exception for deployment-related errors."""
+
+    pass
+
+
+class ConfigurationManager:
+    """Manages loading and validation of configuration files."""
+
+    def __init__(self, config_dir: Path = Path("config")):
+        self.config_dir = config_dir
+
+    def load_admin_config(self) -> Dict[str, Dict[str, str]]:
+        """Load admin configuration from resources.yaml."""
+        try:
+            with open(self.config_dir / "resources.yaml") as file:
+                data = yaml.safe_load(file)
+                return {
+                    "admin": {
+                        "account_id": str(data["admin_acct"]),
+                        "status": "enabled",
+                    }
+                }
+        except Exception as e:
+            logger.error(f"Failed to read admin config: {e}")
+            raise
+
+    def load_target_accounts(self) -> Dict[str, Any]:
+        """Load target accounts from targets.yaml."""
+        try:
+            with open(self.config_dir / "targets.yaml") as file:
+                return yaml.safe_load(file)
+        except Exception as e:
+            logger.error(f"Failed to read targets config: {e}")
+            raise
 
 
-def run_shell_command(command, env_vars=None):
+def get_caller_identity() -> None:
     """
-    Execute a given shell command securely and return its output.
+    Get the caller identity from AWS STS.
+
+    Logs the account ID, ARN, and user ID of the caller.
+    Logs an error if no credentials are found or if there is a client error.
+    """
+    try:
+        session = boto3.Session()
+        sts_client = session.client("sts")
+        caller_identity = sts_client.get_caller_identity()
+
+        logger.info(f"Credentials Account ID: {caller_identity['Account']}")
+        logger.debug(f"Arn: {caller_identity['Arn']}")
+        logger.debug(f"UserId: {caller_identity['UserId']}")
+
+    except NoCredentialsError:
+        logger.info("No credentials found in shared folder. Credentials wiped!")
+    except ClientError as e:
+        logger.error(f"An error occurred: {e}")
+
+def run_shell_command(
+    command: List[str], env_vars: Optional[Dict[str, str]] = None
+) -> None:
+    """
+    Execute a given shell command securely and log its output.
 
     Args:
-    command (list): The command and its arguments listed as separate items.
-    env_vars (dict, optional): Additional environment variables to set for the command.
+        command: The command and its arguments listed as separate items.
+        env_vars: Additional environment variables to set for the command.
 
-    Outputs the result of the command execution to the console. In case of an error,
-    it outputs the error message and the stack trace.
+    Raises:
+        subprocess.CalledProcessError: If the command execution fails.
     """
-    # Prepare the environment
     env = os.environ.copy()
     if env_vars:
         env.update(env_vars)
 
     command_str = " ".join(command)
-    print("COMMAND: " + command_str)
+    logger.info(f"COMMAND: {command_str}")
+
     try:
         output = subprocess.check_output(command, stderr=subprocess.STDOUT, env=env)
-        print(f"Command output: {output.decode()}")
+        logger.info(f"STDOUT:\n{output.decode()}")
     except subprocess.CalledProcessError as e:
-        print(f"Error executing command: {e.output.decode()}")
+        logger.error(f"Error executing command: {e.output.decode()}")
         raise
     except Exception as e:
-        print(f"Exception executing command: {e!r}")
+        logger.error(f"Exception executing command: {e!r}")
         raise
 
 
-def validate_alphanumeric(value, name):
+def validate_alphanumeric(value: str, name: str) -> None:
     """
     Validate that the given value is alphanumeric.
 
     Args:
-    value (str): The value to validate.
-    name (str): The name of the variable for error messages.
+        value: The value to validate.
+        name: The name of the variable for error messages.
 
     Raises:
-    ValueError: If the value is not alphanumeric.
+        ValueError: If the value is not alphanumeric.
     """
     if not re.match(r"^\w+$", value):
         raise ValueError(f"{name} must be alphanumeric. Received: {value}")
 
 
-def deploy_resources(account_id, account_name, dir, lang="typescript"):
+def get_tokens(account_id: str) -> None:
     """
-    Deploy resources to a specified account using configuration specified by directory and language.
+    Get AWS tokens for the specified account.
 
     Args:
-    account_id (str): The AWS account ID where resources will be deployed.
-    account_name (str): A human-readable name for the account, used for environment variables.
-    dir (str): The base directory containing deployment scripts or configurations.
-    lang (str, optional): The programming language of the deployment scripts. Defaults to 'typescript'.
-
-    Changes to the desired directory, sets up necessary environment variables, and executes
-    deployment commands.
+        account_id: The AWS account ID for which tokens will be obtained.
     """
-    validate_alphanumeric(account_id, "account_id")
-    validate_alphanumeric(account_name, "account_name")
+    get_token_tool = os.getenv("TOKEN_TOOL")
+    get_token_provider = os.getenv("TOKEN_PROVIDER")
 
-    if dir not in os.getcwd():
-        os.chdir(f"{dir}/{lang}")
+    if not all([get_token_tool, get_token_provider]):
+        raise DeploymentError(
+            "TOKEN_TOOL and TOKEN_PROVIDER environment variables must be set"
+        )
 
-    # Securely update tokens
     get_tokens_command = [
-        "ada",
+        get_token_tool,
         "credentials",
         "update",
         "--account",
         account_id,
         "--provider",
-        "isengard",
+        get_token_provider,
         "--role",
-        "weathertop-cdk-deployments",
+        CDK_DEPLOYMENT_ROLE,
         "--once",
     ]
     run_shell_command(get_tokens_command)
+    os.environ["AWS_DEFAULT_REGION"] = AWS_DEFAULT_REGION
+    get_caller_identity()
+
+
+def deploy_resources(
+    account_id: str,
+    account_name: str,
+    dir_path: Union[str, Path],
+    lang: str = "typescript",
+) -> None:
+    """
+    Deploy resources to a specified account using configuration specified by directory and language.
+
+    Args:
+        account_id: The AWS account ID where resources will be deployed.
+        account_name: A human-readable name for the account, used for environment variables.
+        dir_path: The base directory containing deployment scripts or configurations.
+        lang: The programming language of the deployment scripts.
+    """
+    validate_alphanumeric(account_id, "account_id")
+    validate_alphanumeric(account_name, "account_name")
+
+    if dir_path not in os.getcwd():
+        os.chdir(os.path.join(dir_path, lang))
 
-    # Deploy using CDK
+    run_shell_command(["cdk", "acknowledge", CDK_ACKNOWLEDGE_ID])
     deploy_command = ["cdk", "deploy", "--require-approval", "never"]
-    print(" ".join(deploy_command))
     run_shell_command(deploy_command, env_vars={"TOOL_NAME": account_name})
 
     # Delay to avoid CLI conflicts
-    # TODO: Add waiter
+    # TODO: Replace with proper waiter implementation
     time.sleep(15)
 
+    get_caller_identity()
 
-def main():
-    parser = argparse.ArgumentParser(description="admin, images, or plugin stack.")
-    parser.add_argument("type", choices=["admin", "images", "plugin"])
-    parser.add_argument("--language")
-    args = parser.parse_args()
 
-    accounts = None
+def deploy_stacks(
+    stack_type: str, accounts: Dict[str, Any], language: Optional[str]
+) -> None:
+    """
+    Deploy the specified stack type to all target accounts.
+
+    Args:
+        stack_type: Type of stack to deploy (admin, images, or plugin)
+        accounts: Dictionary of account configurations
+        language: Optional specific language to deploy for
+    """
+    items = [(language, accounts[language])] if language else accounts.items()
 
-    if args.type in {"admin", "images"}:
-        try:
-            with open("config/resources.yaml", "r") as file:
-                data = yaml.safe_load(file)
-                accounts = {
-                    "admin": {
-                        "account_id": f"{data['admin_acct']}",
-                        "status": "enabled",
-                    }
-                }
-        except Exception as e:
-            print(f"Failed to read config data: \n{e}")
-    elif args.type in {"plugin"}:
-        try:
-            with open("config/targets.yaml", "r") as file:
-                accounts = yaml.safe_load(file)
-        except Exception as e:
-            print(f"Failed to read config data: \n{e}")
-    
-    if accounts is None:
-        raise ValueError(f"Could not load accounts for stack {args.type}")
-
-    if args.language:
-        items = [(args.language, accounts[args.language])]
-    else:
-        items = accounts.items()
-        
     for account_name, account_info in items:
+        logger.info(
+            f"\n\n\n\n #### NEW DEPLOYMENT #### \n\n\n\n"
+            f"Deploying 🚀 {stack_type} stack to account {account_name}"
+            f" with ID {account_info['account_id']}"
+        )
+
+        get_tokens(account_info["account_id"])
+        deploy_resources(account_info["account_id"], account_name, stack_type)
+
+        if stack_type == "plugin":
+            logger.info(
+                f"Deploying ☢️  AWS-Nuke to account {account_name}"
+                f" with ID {account_info['account_id']}"
+            )
+            os.chdir("../..")
+
+            get_tokens(account_info["account_id"])
+            create_account_alias(ACCOUNT_ALIAS)
+
+            get_tokens(account_info["account_id"])
+            deploy_resources(account_info["account_id"], account_name, "nuke")
+
+            get_tokens(account_info["account_id"])
+            process_stack_and_upload_files()
 
-        print(
-            f"Reading from account {account_name} with ID {account_info['account_id']}"
+            os.chdir("../..")
+
+
+def main() -> None:
+    """Execute the main deployment workflow."""
+    parser = argparse.ArgumentParser(
+        description="Deploy admin, images, or plugin stack.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "type", choices=["admin", "images", "plugin"], help="Type of stack to deploy"
+    )
+    parser.add_argument("--language", help="Specific language to deploy for")
+    args = parser.parse_args()
+
+    config_manager = ConfigurationManager()
+
+    try:
+        accounts = (
+            config_manager.load_admin_config()
+            if args.type in {"admin", "images"}
+            else config_manager.load_target_accounts()
         )
-        deploy_resources(account_info["account_id"], account_name, args.type)
+
+        if not accounts:
+            raise DeploymentError(f"No accounts found for stack type: {args.type}")
+
+        deploy_stacks(args.type, accounts, args.language)
+
+    except Exception as e:
+        logger.error(f"Deployment failed: {e}")
+        raise
 
 
 if __name__ == "__main__":
+    os.environ["JSII_SILENCE_WARNING_UNTESTED_NODE_VERSION"] = "true"
     main()