Release model-runner images for CE, version v1.0.13 and latest #49
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Release model-runner images for CE | |
| run-name: Release model-runner images for CE, version ${{ inputs.releaseTag }}${{ inputs.pushLatest && ' and latest' || '' }} | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| pushLatest: | |
| description: "Tag images produced by this job as latest" | |
| required: false | |
| type: boolean | |
| default: false | |
| releaseTag: | |
| description: "Release tag" | |
| required: false | |
| type: string | |
| default: "test" | |
| llamaServerVersion: | |
| description: "llama-server version" | |
| required: false | |
| type: string | |
| default: "latest" | |
| vllmVersion: | |
| description: "vLLM version" | |
| required: false | |
| type: string | |
| default: "0.12.0" | |
| sglangVersion: | |
| description: "SGLang version" | |
| required: false | |
| type: string | |
| default: "0.4.0" | |
| # This can be removed once we have llama.cpp built for MUSA and CANN. | |
| buildMusaCann: | |
| description: "Build MUSA and CANN images" | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd | |
| - name: Set up Go | |
| uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 | |
| with: | |
| go-version: 1.25.6 | |
| cache: true | |
| - name: Run tests | |
| run: go test ./... | |
| build: | |
| needs: test | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd | |
| - name: Format tags | |
| id: tags | |
| shell: bash | |
| run: | | |
| echo "cpu<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "cuda<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-cuda" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-cuda" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "vllm-cuda<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-vllm-cuda" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-vllm-cuda" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "sglang-cuda<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-sglang-cuda" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-sglang-cuda" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "diffusers<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-diffusers" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-diffusers" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "rocm<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-rocm" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-rocm" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "musa<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-musa" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-musa" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| echo "cann<<EOF" >> "$GITHUB_OUTPUT" | |
| echo "docker/model-runner:${{ inputs.releaseTag }}-cann" >> "$GITHUB_OUTPUT" | |
| if [ "${{ inputs.pushLatest }}" == "true" ]; then | |
| echo "docker/model-runner:latest-cann" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo 'EOF' >> "$GITHUB_OUTPUT" | |
| - name: Log in to DockerHub | |
| uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef | |
| with: | |
| username: "docker" | |
| password: ${{ secrets.ORG_ACCESS_TOKEN }} | |
| - name: Set up Buildx | |
| uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 | |
| with: | |
| version: "lab:latest" | |
| driver: cloud | |
| endpoint: "docker/make-product-smarter" | |
| install: true | |
| - name: Build CPU image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-llamacpp | |
| platforms: linux/amd64, linux/arm64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.cpu }} | |
| - name: Build CUDA image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-llamacpp | |
| platforms: linux/amd64, linux/arm64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=cuda" | |
| "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.cuda }} | |
| - name: Build vLLM CUDA image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-vllm | |
| platforms: linux/amd64, linux/arm64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=cuda" | |
| "BASE_IMAGE=nvidia/cuda:13.0.2-runtime-ubuntu24.04" | |
| "VLLM_VERSION=${{ inputs.vllmVersion }}" | |
| "VLLM_CUDA_VERSION=cu130" | |
| "VLLM_PYTHON_TAG=cp38-abi3" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.vllm-cuda }} | |
| - name: Build SGLang CUDA image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-sglang | |
| platforms: linux/amd64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=cuda" | |
| "BASE_IMAGE=nvidia/cuda:12.9.0-runtime-ubuntu24.04" | |
| "SGLANG_VERSION=${{ inputs.sglangVersion }}" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.sglang-cuda }} | |
| - name: Build Diffusers image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-diffusers | |
| platforms: linux/amd64, linux/arm64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.diffusers }} | |
| - name: Build ROCm image | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-llamacpp | |
| platforms: linux/amd64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=rocm" | |
| "BASE_IMAGE=rocm/dev-ubuntu-22.04" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.rocm }} | |
| - name: Build MUSA image | |
| if: ${{ inputs.buildMusaCann }} | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-llamacpp | |
| platforms: linux/amd64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=musa" | |
| "BASE_IMAGE=mthreads/musa:rc4.3.0-runtime-ubuntu22.04-amd64" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.musa }} | |
| - name: Build CANN image | |
| if: ${{ inputs.buildMusaCann }} | |
| uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 | |
| with: | |
| file: Dockerfile | |
| target: final-llamacpp | |
| platforms: linux/arm64, linux/amd64 | |
| build-args: | | |
| "LLAMA_SERVER_VERSION=${{ inputs.llamaServerVersion }}" | |
| "LLAMA_SERVER_VARIANT=cann" | |
| "BASE_IMAGE=ascendai/cann:8.2.rc2-910b-ubuntu22.04-py3.11" | |
| push: true | |
| sbom: true | |
| provenance: mode=max | |
| tags: ${{ steps.tags.outputs.cann }} |