Factor out sharding of packed tensors (#2059) #2954
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and push docker image to internal registry | |
on: | |
workflow_dispatch: | |
push: | |
branches: | |
- 'main' | |
tags: | |
- 'v*' | |
pull_request: | |
paths: | |
- ".github/workflows/build.yaml" | |
- "integration-tests/**" | |
- "server/**" | |
- "proto/**" | |
- "router/**" | |
- "launcher/**" | |
- "Cargo.lock" | |
- "rust-toolchain.toml" | |
- "Dockerfile" | |
- "Dockerfile_amd" | |
- "Dockerfile_intel" | |
branches: | |
- 'main' | |
jobs: | |
build-and-push-image: | |
concurrency: | |
group: ${{ github.workflow }}-build-and-push-image-${{ matrix.name }}-${{ github.head_ref || github.run_id }} | |
cancel-in-progress: true | |
runs-on: [self-hosted, nvidia-gpu , multi-gpu, 4-a10, ci] | |
strategy: | |
matrix: | |
include: | |
- name: "cuda" | |
label: "" | |
dockerfile: "Dockerfile" | |
- name: "amd" | |
label: "-rocm" | |
dockerfile: "Dockerfile_amd" | |
- name: "intel" | |
label: "-intel" | |
dockerfile: "Dockerfile_intel" | |
permissions: | |
contents: write | |
packages: write | |
# This is used to complete the identity challenge | |
# with sigstore/fulcio when running outside of PRs. | |
id-token: write | |
security-events: write | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Inject slug/short variables | |
uses: rlespinasse/github-slug-action@v4.4.1 | |
- name: Tailscale | |
uses: huggingface/tailscale-action@main | |
with: | |
authkey: ${{ secrets.TAILSCALE_AUTHKEY }} | |
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} | |
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} | |
- name: Initialize Docker Buildx | |
uses: docker/setup-buildx-action@v2.0.0 | |
with: | |
install: true | |
- name: Login to GitHub Container Registry | |
if: github.event_name != 'pull_request' | |
uses: docker/login-action@v2 | |
with: | |
registry: ghcr.io | |
username: ${{ github.actor }} | |
password: ${{ secrets.GITHUB_TOKEN }} | |
- name: Login to internal Container Registry | |
uses: docker/login-action@v2.1.0 | |
with: | |
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} | |
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} | |
registry: registry.internal.huggingface.tech | |
- name: Login to Azure Container Registry | |
if: github.event_name != 'pull_request' | |
uses: docker/login-action@v2.1.0 | |
with: | |
username: ${{ secrets.AZURE_DOCKER_USERNAME }} | |
password: ${{ secrets.AZURE_DOCKER_PASSWORD }} | |
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io | |
# If pull request | |
- name: Extract metadata (tags, labels) for Docker | |
if: ${{ github.event_name == 'pull_request' }} | |
id: meta-pr | |
uses: docker/metadata-action@v4.3.0 | |
with: | |
images: | | |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | |
tags: | | |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ matrix.label }} | |
# If main, release or tag | |
- name: Extract metadata (tags, labels) for Docker | |
if: ${{ github.event_name != 'pull_request' }} | |
id: meta | |
uses: docker/metadata-action@v4.3.0 | |
with: | |
flavor: | | |
latest=auto | |
images: | | |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference | |
ghcr.io/huggingface/text-generation-inference | |
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference | |
tags: | | |
type=semver,pattern={{version}}${{ matrix.label }} | |
type=semver,pattern={{major}}.{{minor}}${{ matrix.label }} | |
type=raw,value=latest${{ matrix.label }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} | |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ matrix.label }} | |
- name: Build and push Docker image | |
id: build-and-push | |
uses: docker/build-push-action@v4 | |
with: | |
context: . | |
file: ${{ matrix.dockerfile }} | |
push: true | |
platforms: 'linux/amd64' | |
build-args: | | |
GIT_SHA=${{ env.GITHUB_SHA }} | |
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ matrix.label }} | |
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} | |
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} | |
network: host | |
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min | |
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min | |
- name: Set up Python | |
if: matrix.name == 'cuda' | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install | |
if: matrix.name == 'cuda' | |
run: | | |
make install-integration-tests | |
- name: Run tests | |
if: matrix.name == 'cuda' | |
run: | | |
export DOCKER_VOLUME=/mnt/cache | |
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} | |
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }} | |
pytest -s -vv integration-tests | |
- name: Tailscale Wait | |
if: ${{ failure() || runner.debug == '1' }} | |
uses: huggingface/tailscale-action@main | |
with: | |
waitForSSH: true |