From 52ed7d72e9669a7bdfaf11caeea6daa29911bc8f Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 30 Dec 2024 21:25:45 +0100 Subject: [PATCH] Extend --from-job and --from-pr to PROD images as well (#45296) Building on top of #45287 we also add the same capability for PROD images. Additionally: * the token is required so it is passed as obligatory --github-token option (with GITHUB_TOKEN env var as source of it). Help contains link to generate the token so that it can be easily generated. * error messages contain message text on top of error code * the --github-repository is used to determine the right github repo to use when calling the API. * PROD image platform escaping used `-` instead of `-` which make it impossible to find the artifact when looking for it. This has been fixed. --- .../doc/images/output_ci-image_load.svg | 64 +++++++++++------ .../doc/images/output_ci-image_load.txt | 2 +- .../doc/images/output_prod-image_load.svg | 68 +++++++++++++++---- .../doc/images/output_prod-image_load.txt | 2 +- .../commands/ci_image_commands.py | 48 ++++++------- .../commands/ci_image_commands_config.py | 3 +- .../commands/common_image_options.py | 22 ++++++ .../commands/production_image_commands.py | 36 +++++++--- .../production_image_commands_config.py | 4 ++ dev/breeze/src/airflow_breeze/utils/github.py | 37 +++++----- 10 files changed, 188 insertions(+), 98 deletions(-) diff --git a/dev/breeze/doc/images/output_ci-image_load.svg b/dev/breeze/doc/images/output_ci-image_load.svg index f58763cd2a129..9dc873546c077 100644 --- a/dev/breeze/doc/images/output_ci-image_load.svg +++ b/dev/breeze/doc/images/output_ci-image_load.svg @@ -1,4 +1,4 @@ - + - + @@ -117,9 +119,24 @@ + + + + + + + + + + + + + + + - Command: ci-image load + Command: ci-image load @@ -135,25 +152,30 @@ Load CI image from a file. ╭─ Load image flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.9< | 3.10 | 3.11 | 3.12)                                 -[default: 3.9]                                               ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64) ---image-fileOptional file name to load the image from - name must follow the                     -convention:`ci-image-save-{escaped_platform}-*-{python_version}.tar`.                -(FILE)                                                                               ---github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] ---skip-image-file-deletionSkip image deletion after loading. ---from-jobOptional run id of the github action job to load the image from.(TEXT) ---from-prOptional pr number of the github action job to load the image from. loads the image  -from the latest job.                                                                 -(TEXT)                                                                               -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. +--python-pPython major/minor version used in Airflow image for images. +(>3.9< | 3.10 | 3.11 | 3.12)                                 +[default: 3.9]                                               +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64) +--image-fileOptional file name to load the image from - name must follow the                  +convention:`ci-image-save-{escaped_platform}-*-{python_version}.tar`. where       +escaped_platform is one of linux_amd64 or linux_arm64.                            +(FILE)                                                                            +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +*--github-tokenThe token used to authenticate to GitHub. You can generate it with                +https://github.com/settings/tokens/new?description=Read%20repo&scopes=public_repo +(TEXT)                                                                            +[required]                                                                        +--from-jobOptional run id of the github action job to load the image from.(TEXT) +--from-prOptional pr number of the github action job to load the image from. loads the     +image from the latest job.                                                        +(TEXT)                                                                            +--skip-image-file-deletionSkip image deletion after loading. ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_ci-image_load.txt b/dev/breeze/doc/images/output_ci-image_load.txt index d407ecbaf0c92..5d1ab347c4564 100644 --- a/dev/breeze/doc/images/output_ci-image_load.txt +++ b/dev/breeze/doc/images/output_ci-image_load.txt @@ -1 +1 @@ -f0f9a36fe07d5cdaf1b704ac473bb155 +31025e189ed595557884247d83aa9783 diff --git a/dev/breeze/doc/images/output_prod-image_load.svg b/dev/breeze/doc/images/output_prod-image_load.svg index 680ada8df9cfd..5531c40a07d85 100644 --- a/dev/breeze/doc/images/output_prod-image_load.svg +++ b/dev/breeze/doc/images/output_prod-image_load.svg @@ -1,4 +1,4 @@ - + - + @@ -96,9 +98,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + - Command: prod-image load + Command: prod-image load @@ -114,18 +143,27 @@ Load PROD image from a file. ╭─ Load image flags ───────────────────────────────────────────────────────────────────────────────────────────────────╮ ---python-pPython major/minor version used in Airflow image for images. -(>3.9< | 3.10 | 3.11 | 3.12)                                 -[default: 3.9]                                               ---platformPlatform for Airflow image.(linux/amd64 | linux/arm64) ---image-fileOptional file to save the image to.(FILE) ---skip-image-file-deletionSkip image deletion after loading. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ -╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ ---verbose-vPrint verbose information about performed steps. ---dry-run-DIf dry-run is set, commands are only printed, not executed. ---help-hShow this message and exit. -╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +--python-pPython major/minor version used in Airflow image for images. +(>3.9< | 3.10 | 3.11 | 3.12)                                 +[default: 3.9]                                               +--platformPlatform for Airflow image.(linux/amd64 | linux/arm64) +--image-fileOptional file to save the image to.(FILE) +--github-repository-gGitHub repository used to pull, push run images.(TEXT)[default: apache/airflow] +*--github-tokenThe token used to authenticate to GitHub. You can generate it with                +https://github.com/settings/tokens/new?description=Read%20repo&scopes=public_repo +(TEXT)                                                                            +[required]                                                                        +--from-jobOptional run id of the github action job to load the image from.(TEXT) +--from-prOptional pr number of the github action job to load the image from. loads the     +image from the latest job.                                                        +(TEXT)                                                                            +--skip-image-file-deletionSkip image deletion after loading. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Common options ─────────────────────────────────────────────────────────────────────────────────────────────────────╮ +--dry-run-DIf dry-run is set, commands are only printed, not executed. +--verbose-vPrint verbose information about performed steps. +--help-hShow this message and exit. +╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ diff --git a/dev/breeze/doc/images/output_prod-image_load.txt b/dev/breeze/doc/images/output_prod-image_load.txt index 973697202b080..e8ade27f6f44e 100644 --- a/dev/breeze/doc/images/output_prod-image_load.txt +++ b/dev/breeze/doc/images/output_prod-image_load.txt @@ -1 +1 @@ -97cb62e20186843f91556c936863a7af +00dd027dac4b09f71ebd07ff826d43f7 diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py index fa088ac968681..f9140ebe6a1c7 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands.py @@ -43,6 +43,9 @@ option_dev_apt_deps, option_disable_airflow_repo_cache, option_docker_cache, + option_from_job, + option_from_pr, + option_github_token_for_images, option_install_mysql_client_type, option_platform_multiple, option_prepare_buildx_cache, @@ -117,22 +120,6 @@ if TYPE_CHECKING: from airflow_breeze.params.shell_params import ShellParams -option_from_job = click.option( - "--from-job", - required=False, - default="", - envvar="FROM_JOB", - help="Optional run id of the github action job to load the image from.", -) - -option_from_pr = click.option( - "--from-pr", - default="", - required=False, - envvar="FROM_PR", - help="Optional pr number of the github action job to load the image from. loads the image from the latest job.", -) - @click.group( cls=BreezeGroup, name="ci-image", help="Tools that developers can use to manually manage CI images" @@ -306,7 +293,8 @@ def get_exitcode(status: int) -> int: type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path, resolve_path=True), envvar="IMAGE_FILE", help="Optional file name to load the image from - name must follow the convention:" - "`ci-image-save-{escaped_platform}-*-{python_version}.tar`.", + "`ci-image-save-{escaped_platform}-*-{python_version}.tar`. where escaped_platform is one of " + "linux_amd64 or linux_arm64.", ) @@ -644,23 +632,25 @@ def save( @ci_image.command(name="load") -@option_python -@option_platform_single -@option_github_repository -@option_skip_image_file_deletion -@option_verbose @option_ci_image_file_to_load +@option_dry_run @option_from_job @option_from_pr -@option_dry_run +@option_github_repository +@option_github_token_for_images +@option_platform_single +@option_python +@option_skip_image_file_deletion +@option_verbose def load( - python: str, - platform: str, + from_job: str | None, + from_pr: str | None, github_repository: str, + github_token: str, image_file: Path | None, + platform: str, + python: str, skip_image_file_deletion: bool, - from_job: str | None, - from_pr: str | None, ): """Load CI image from a file.""" perform_environment_checks() @@ -672,9 +662,9 @@ def load( path = f"/tmp/ci-image-save-{escaped_platform}-{python}.tar" if from_job: - download_artifact_from_run_id(from_job, path) + download_artifact_from_run_id(from_job, path, github_repository, github_token) elif from_pr: - download_artifact_from_pr(from_pr, path) + download_artifact_from_pr(from_pr, path, github_repository, github_token) if not image_file: image_file = Path(path) diff --git a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py index 903604d254b94..d6247a7f46b9b 100644 --- a/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/ci_image_commands_config.py @@ -183,9 +183,10 @@ "--platform", "--image-file", "--github-repository", - "--skip-image-file-deletion", + "--github-token", "--from-job", "--from-pr", + "--skip-image-file-deletion", ], }, ], diff --git a/dev/breeze/src/airflow_breeze/commands/common_image_options.py b/dev/breeze/src/airflow_breeze/commands/common_image_options.py index e48c2e9bb8fd5..6f1ca75f98147 100644 --- a/dev/breeze/src/airflow_breeze/commands/common_image_options.py +++ b/dev/breeze/src/airflow_breeze/commands/common_image_options.py @@ -189,3 +189,25 @@ is_flag=True, envvar="SKIP_IMAGE_FILE_DELETION", ) +option_from_job = click.option( + "--from-job", + required=False, + default="", + envvar="FROM_JOB", + help="Optional run id of the github action job to load the image from.", +) + +option_from_pr = click.option( + "--from-pr", + default="", + required=False, + envvar="FROM_PR", + help="Optional pr number of the github action job to load the image from. loads the image from the latest job.", +) +option_github_token_for_images = click.option( + "--github-token", + help="The token used to authenticate to GitHub. You can generate it with " + "https://github.com/settings/tokens/new?description=Read%20repo&scopes=public_repo", + envvar="GITHUB_TOKEN", + required=True, +) diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py index 6033a8cae2e57..1f2a30468cb3a 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands.py @@ -39,6 +39,9 @@ option_dev_apt_deps, option_disable_airflow_repo_cache, option_docker_cache, + option_from_job, + option_from_pr, + option_github_token_for_images, option_install_mysql_client_type, option_platform_multiple, option_prepare_buildx_cache, @@ -92,6 +95,7 @@ prepare_docker_build_command, warm_up_docker_builder, ) +from airflow_breeze.utils.github import download_artifact_from_pr, download_artifact_from_run_id from airflow_breeze.utils.image import run_pull_image, run_pull_in_parallel from airflow_breeze.utils.parallel import ( DockerBuildxProgressMatcher, @@ -164,7 +168,8 @@ def prepare_for_building_prod_image(params: BuildProdParams): type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path, resolve_path=True), envvar="IMAGE_FILE", help="Optional file name to load the image from - name must follow the convention:" - "`prod-image-save-{escaped_platform}-*-{python_version}.tar`.", + "`prod-image-save-{escaped_platform}-*-{python_version}.tar` where escaped_platform is one of " + "linux_amd64 or linux_arm64.", ) @@ -631,7 +636,7 @@ def save( ).airflow_image_name with ci_group("Buildx disk usage"): run_command(["docker", "buildx", "du", "--verbose"], check=False) - escaped_platform = platform.replace("/", "-") + escaped_platform = platform.replace("/", "_") if not image_file: image_file = Path(f"/tmp/prod-image-save-{escaped_platform}-{python}.tar") get_console().print(f"[info]Saving Python PROD image {image_name} to {image_file}[/]") @@ -642,23 +647,36 @@ def save( @prod_image.command(name="load") -@option_python +@option_dry_run +@option_from_job +@option_from_pr +@option_github_repository +@option_github_token_for_images @option_platform_single +@option_prod_image_file_to_save +@option_python @option_skip_image_file_deletion @option_verbose -@option_prod_image_file_to_save -@option_dry_run def load( - python: str, + from_job: str | None, + from_pr: str | None, + github_repository: str, + github_token: str, + image_file: Path | None, platform: str, + python: str, skip_image_file_deletion: bool, - image_file: Path | None, ): """Load PROD image from a file.""" perform_environment_checks() - escaped_platform = platform.replace("/", "-") + escaped_platform = platform.replace("/", "_") + path = f"/tmp/prod-image-save-{escaped_platform}-{python}.tar" + if from_job: + download_artifact_from_run_id(from_job, path, github_repository, github_token) + elif from_pr: + download_artifact_from_pr(from_pr, path, github_repository, github_token) if not image_file: - image_file = Path(f"/tmp/prod-image-save-{escaped_platform}-{python}.tar") + image_file = Path(path) if not image_file.exists(): get_console().print(f"[error]The image {image_file} does not exist.[/]") sys.exit(1) diff --git a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py index 870655e637c30..19069a5fe803d 100644 --- a/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/production_image_commands_config.py @@ -194,6 +194,10 @@ "--python", "--platform", "--image-file", + "--github-repository", + "--github-token", + "--from-job", + "--from-pr", "--skip-image-file-deletion", ], }, diff --git a/dev/breeze/src/airflow_breeze/utils/github.py b/dev/breeze/src/airflow_breeze/utils/github.py index 1396d5692271a..e90b0a801dee6 100644 --- a/dev/breeze/src/airflow_breeze/utils/github.py +++ b/dev/breeze/src/airflow_breeze/utils/github.py @@ -182,30 +182,29 @@ def get_tag_date(tag: str) -> str | None: return datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") -def download_artifact_from_run_id(run_id: str, output_file: str): +def download_artifact_from_run_id(run_id: str, output_file: str, github_repository: str, github_token: str): """ Downloads a file from GitHub Actions artifact :param run_id: run_id of the workflow :param output_file: Path where the file should be downloaded + :param github_repository: GitHub repository + :param github_token: GitHub token """ import requests from tqdm import tqdm - url = f"https://api.github.com/repos/apache/airflow/actions/runs/{run_id}/artifacts" + url = f"https://api.github.com/repos/{github_repository}/actions/runs/{run_id}/artifacts" headers = {"Accept": "application/vnd.github.v3+json"} session = requests.Session() - if os.getenv("GITHUB_TOKEN"): - headers["Authorization"] = f"Bearer {os.getenv ('GITHUB_TOKEN')}" - + headers["Authorization"] = f"Bearer {github_token}" artifact_response = requests.get(url, headers=headers) if artifact_response.status_code != 200: get_console().print( - "[error]Describing artifacts failed with status code %s, " - "you might need to provide GITHUB_TOKEN, set it as environment variable", - artifact_response.status_code, + "[error]Describing artifacts failed with status code " + f"{artifact_response.status_code}: {artifact_response.text}", ) sys.exit(1) @@ -226,9 +225,8 @@ def download_artifact_from_run_id(run_id: str, output_file: str): if response.status_code != 200: get_console().print( - "[error] Downloading artifact failed with status code %s, " - "you might need to provide GITHUB_TOKEN, set it as environment variable", - response.status_code, + "[error]Downloading artifacts failed with status code " + f"{response.status_code}: {response.text}", ) sys.exit(1) @@ -249,27 +247,24 @@ def download_artifact_from_run_id(run_id: str, output_file: str): os.remove(temp_file) -def download_artifact_from_pr(pr: str, output_file: str): +def download_artifact_from_pr(pr: str, output_file: str, github_repository: str, github_token: str): import requests pr_number = pr.lstrip("#") - pr_url = f"https://api.github.com/repos/apache/airflow/pulls/{pr_number}" - workflow_run_url = "https://api.github.com/repos/apache/airflow/actions/runs" + pr_url = f"https://api.github.com/repos/{github_repository}/pulls/{pr_number}" + workflow_run_url = f"https://api.github.com/repos/{github_repository}/actions/runs" headers = {"Accept": "application/vnd.github.v3+json"} session = requests.Session() - if os.getenv("GITHUB_TOKEN"): - headers["Authorization"] = f"Bearer {os.getenv('GITHUB_TOKEN')}" + headers["Authorization"] = f"Bearer {github_token}" pull_response = session.get(pr_url, headers=headers) if pull_response.status_code != 200: get_console().print( - "[error]Fetching PR failed with status code %s, %s, " - "you might need to provide GITHUB_TOKEN, set it as environment variable", - pull_response.status_code, - pull_response.content, + "[error]Fetching PR failed with status codee " + f"{pull_response.status_code}: {pull_response.text}", ) sys.exit(1) @@ -300,4 +295,4 @@ def download_artifact_from_pr(pr: str, output_file: str): get_console().print(f"[info]Found run id {run_id} for PR {pr}") - download_artifact_from_run_id(str(run_id), output_file) + download_artifact_from_run_id(str(run_id), output_file, github_repository, github_token)