Skip to content

Commit

Permalink
Extend --from-job and --from-pr to PROD images as well (apache#45296)
Browse files Browse the repository at this point in the history
Building on top of apache#45287 we also add the same capability for PROD
images. Additionally:

* the token is required so it is passed as obligatory --github-token
  option (with GITHUB_TOKEN env var as source of it). Help contains
  link to generate the token so that it can be easily generated.

* error messages contain message text on top of error code

* the --github-repository is used to determine the right github repo
  to use when calling the API.

* PROD image platform escaping used `-` instead of `-` which make it
  impossible to find the artifact when looking for it. This has
  been fixed.
  • Loading branch information
potiuk authored Dec 30, 2024
1 parent a152b6a commit 52ed7d7
Show file tree
Hide file tree
Showing 10 changed files with 188 additions and 98 deletions.
64 changes: 43 additions & 21 deletions dev/breeze/doc/images/output_ci-image_load.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion dev/breeze/doc/images/output_ci-image_load.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
f0f9a36fe07d5cdaf1b704ac473bb155
31025e189ed595557884247d83aa9783
68 changes: 53 additions & 15 deletions dev/breeze/doc/images/output_prod-image_load.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion dev/breeze/doc/images/output_prod-image_load.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
97cb62e20186843f91556c936863a7af
00dd027dac4b09f71ebd07ff826d43f7
48 changes: 19 additions & 29 deletions dev/breeze/src/airflow_breeze/commands/ci_image_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
option_dev_apt_deps,
option_disable_airflow_repo_cache,
option_docker_cache,
option_from_job,
option_from_pr,
option_github_token_for_images,
option_install_mysql_client_type,
option_platform_multiple,
option_prepare_buildx_cache,
Expand Down Expand Up @@ -117,22 +120,6 @@
if TYPE_CHECKING:
from airflow_breeze.params.shell_params import ShellParams

option_from_job = click.option(
"--from-job",
required=False,
default="",
envvar="FROM_JOB",
help="Optional run id of the github action job to load the image from.",
)

option_from_pr = click.option(
"--from-pr",
default="",
required=False,
envvar="FROM_PR",
help="Optional pr number of the github action job to load the image from. loads the image from the latest job.",
)


@click.group(
cls=BreezeGroup, name="ci-image", help="Tools that developers can use to manually manage CI images"
Expand Down Expand Up @@ -306,7 +293,8 @@ def get_exitcode(status: int) -> int:
type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path, resolve_path=True),
envvar="IMAGE_FILE",
help="Optional file name to load the image from - name must follow the convention:"
"`ci-image-save-{escaped_platform}-*-{python_version}.tar`.",
"`ci-image-save-{escaped_platform}-*-{python_version}.tar`. where escaped_platform is one of "
"linux_amd64 or linux_arm64.",
)


Expand Down Expand Up @@ -644,23 +632,25 @@ def save(


@ci_image.command(name="load")
@option_python
@option_platform_single
@option_github_repository
@option_skip_image_file_deletion
@option_verbose
@option_ci_image_file_to_load
@option_dry_run
@option_from_job
@option_from_pr
@option_dry_run
@option_github_repository
@option_github_token_for_images
@option_platform_single
@option_python
@option_skip_image_file_deletion
@option_verbose
def load(
python: str,
platform: str,
from_job: str | None,
from_pr: str | None,
github_repository: str,
github_token: str,
image_file: Path | None,
platform: str,
python: str,
skip_image_file_deletion: bool,
from_job: str | None,
from_pr: str | None,
):
"""Load CI image from a file."""
perform_environment_checks()
Expand All @@ -672,9 +662,9 @@ def load(
path = f"/tmp/ci-image-save-{escaped_platform}-{python}.tar"

if from_job:
download_artifact_from_run_id(from_job, path)
download_artifact_from_run_id(from_job, path, github_repository, github_token)
elif from_pr:
download_artifact_from_pr(from_pr, path)
download_artifact_from_pr(from_pr, path, github_repository, github_token)

if not image_file:
image_file = Path(path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,10 @@
"--platform",
"--image-file",
"--github-repository",
"--skip-image-file-deletion",
"--github-token",
"--from-job",
"--from-pr",
"--skip-image-file-deletion",
],
},
],
Expand Down
22 changes: 22 additions & 0 deletions dev/breeze/src/airflow_breeze/commands/common_image_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,25 @@
is_flag=True,
envvar="SKIP_IMAGE_FILE_DELETION",
)
option_from_job = click.option(
"--from-job",
required=False,
default="",
envvar="FROM_JOB",
help="Optional run id of the github action job to load the image from.",
)

option_from_pr = click.option(
"--from-pr",
default="",
required=False,
envvar="FROM_PR",
help="Optional pr number of the github action job to load the image from. loads the image from the latest job.",
)
option_github_token_for_images = click.option(
"--github-token",
help="The token used to authenticate to GitHub. You can generate it with "
"https://github.com/settings/tokens/new?description=Read%20repo&scopes=public_repo",
envvar="GITHUB_TOKEN",
required=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
option_dev_apt_deps,
option_disable_airflow_repo_cache,
option_docker_cache,
option_from_job,
option_from_pr,
option_github_token_for_images,
option_install_mysql_client_type,
option_platform_multiple,
option_prepare_buildx_cache,
Expand Down Expand Up @@ -92,6 +95,7 @@
prepare_docker_build_command,
warm_up_docker_builder,
)
from airflow_breeze.utils.github import download_artifact_from_pr, download_artifact_from_run_id
from airflow_breeze.utils.image import run_pull_image, run_pull_in_parallel
from airflow_breeze.utils.parallel import (
DockerBuildxProgressMatcher,
Expand Down Expand Up @@ -164,7 +168,8 @@ def prepare_for_building_prod_image(params: BuildProdParams):
type=click.Path(exists=True, dir_okay=False, readable=True, path_type=Path, resolve_path=True),
envvar="IMAGE_FILE",
help="Optional file name to load the image from - name must follow the convention:"
"`prod-image-save-{escaped_platform}-*-{python_version}.tar`.",
"`prod-image-save-{escaped_platform}-*-{python_version}.tar` where escaped_platform is one of "
"linux_amd64 or linux_arm64.",
)


Expand Down Expand Up @@ -631,7 +636,7 @@ def save(
).airflow_image_name
with ci_group("Buildx disk usage"):
run_command(["docker", "buildx", "du", "--verbose"], check=False)
escaped_platform = platform.replace("/", "-")
escaped_platform = platform.replace("/", "_")
if not image_file:
image_file = Path(f"/tmp/prod-image-save-{escaped_platform}-{python}.tar")
get_console().print(f"[info]Saving Python PROD image {image_name} to {image_file}[/]")
Expand All @@ -642,23 +647,36 @@ def save(


@prod_image.command(name="load")
@option_python
@option_dry_run
@option_from_job
@option_from_pr
@option_github_repository
@option_github_token_for_images
@option_platform_single
@option_prod_image_file_to_save
@option_python
@option_skip_image_file_deletion
@option_verbose
@option_prod_image_file_to_save
@option_dry_run
def load(
python: str,
from_job: str | None,
from_pr: str | None,
github_repository: str,
github_token: str,
image_file: Path | None,
platform: str,
python: str,
skip_image_file_deletion: bool,
image_file: Path | None,
):
"""Load PROD image from a file."""
perform_environment_checks()
escaped_platform = platform.replace("/", "-")
escaped_platform = platform.replace("/", "_")
path = f"/tmp/prod-image-save-{escaped_platform}-{python}.tar"
if from_job:
download_artifact_from_run_id(from_job, path, github_repository, github_token)
elif from_pr:
download_artifact_from_pr(from_pr, path, github_repository, github_token)
if not image_file:
image_file = Path(f"/tmp/prod-image-save-{escaped_platform}-{python}.tar")
image_file = Path(path)
if not image_file.exists():
get_console().print(f"[error]The image {image_file} does not exist.[/]")
sys.exit(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@
"--python",
"--platform",
"--image-file",
"--github-repository",
"--github-token",
"--from-job",
"--from-pr",
"--skip-image-file-deletion",
],
},
Expand Down
37 changes: 16 additions & 21 deletions dev/breeze/src/airflow_breeze/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,30 +182,29 @@ def get_tag_date(tag: str) -> str | None:
return datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def download_artifact_from_run_id(run_id: str, output_file: str):
def download_artifact_from_run_id(run_id: str, output_file: str, github_repository: str, github_token: str):
"""
Downloads a file from GitHub Actions artifact
:param run_id: run_id of the workflow
:param output_file: Path where the file should be downloaded
:param github_repository: GitHub repository
:param github_token: GitHub token
"""
import requests
from tqdm import tqdm

url = f"https://api.github.com/repos/apache/airflow/actions/runs/{run_id}/artifacts"
url = f"https://api.github.com/repos/{github_repository}/actions/runs/{run_id}/artifacts"
headers = {"Accept": "application/vnd.github.v3+json"}

session = requests.Session()
if os.getenv("GITHUB_TOKEN"):
headers["Authorization"] = f"Bearer {os.getenv ('GITHUB_TOKEN')}"

headers["Authorization"] = f"Bearer {github_token}"
artifact_response = requests.get(url, headers=headers)

if artifact_response.status_code != 200:
get_console().print(
"[error]Describing artifacts failed with status code %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
artifact_response.status_code,
"[error]Describing artifacts failed with status code "
f"{artifact_response.status_code}: {artifact_response.text}",
)
sys.exit(1)

Expand All @@ -226,9 +225,8 @@ def download_artifact_from_run_id(run_id: str, output_file: str):

if response.status_code != 200:
get_console().print(
"[error] Downloading artifact failed with status code %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
response.status_code,
"[error]Downloading artifacts failed with status code "
f"{response.status_code}: {response.text}",
)
sys.exit(1)

Expand All @@ -249,27 +247,24 @@ def download_artifact_from_run_id(run_id: str, output_file: str):
os.remove(temp_file)


def download_artifact_from_pr(pr: str, output_file: str):
def download_artifact_from_pr(pr: str, output_file: str, github_repository: str, github_token: str):
import requests

pr_number = pr.lstrip("#")
pr_url = f"https://api.github.com/repos/apache/airflow/pulls/{pr_number}"
workflow_run_url = "https://api.github.com/repos/apache/airflow/actions/runs"
pr_url = f"https://api.github.com/repos/{github_repository}/pulls/{pr_number}"
workflow_run_url = f"https://api.github.com/repos/{github_repository}/actions/runs"

headers = {"Accept": "application/vnd.github.v3+json"}

session = requests.Session()
if os.getenv("GITHUB_TOKEN"):
headers["Authorization"] = f"Bearer {os.getenv('GITHUB_TOKEN')}"
headers["Authorization"] = f"Bearer {github_token}"

pull_response = session.get(pr_url, headers=headers)

if pull_response.status_code != 200:
get_console().print(
"[error]Fetching PR failed with status code %s, %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
pull_response.status_code,
pull_response.content,
"[error]Fetching PR failed with status codee "
f"{pull_response.status_code}: {pull_response.text}",
)
sys.exit(1)

Expand Down Expand Up @@ -300,4 +295,4 @@ def download_artifact_from_pr(pr: str, output_file: str):

get_console().print(f"[info]Found run id {run_id} for PR {pr}")

download_artifact_from_run_id(str(run_id), output_file)
download_artifact_from_run_id(str(run_id), output_file, github_repository, github_token)

0 comments on commit 52ed7d7

Please sign in to comment.