Skip to content

v0.2.20: Fused NN Kernels + Flash Attention 3 SM120 + FP8 Block-Scale MMA #277

v0.2.20: Fused NN Kernels + Flash Attention 3 SM120 + FP8 Block-Scale MMA

v0.2.20: Fused NN Kernels + Flash Attention 3 SM120 + FP8 Block-Scale MMA #277

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
# Require approval for PRs from forks (first-time contributors)
# https://docs.github.com/en/actions/managing-workflow-runs/approving-workflow-runs-from-public-forks
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff mypy numpy psutil
- name: Lint with ruff
run: ruff check src tests
- name: Type check with mypy
run: mypy src/pygpukit --ignore-missing-imports --disable-error-code=union-attr --disable-error-code=no-redef --disable-error-code=no-any-return --disable-error-code=attr-defined --disable-error-code=assignment --disable-error-code=arg-type --disable-error-code=index --disable-error-code=misc
test:
runs-on: ${{ matrix.os }}
needs: lint
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest pytest-cov numpy psutil
- name: Run tests
run: pytest tests/ -v --cov=pygpukit --cov-report=xml --cov-report=term-missing
env:
PYTHONPATH: src
- name: Upload coverage to Codecov
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.11'
uses: codecov/codecov-action@v4
with:
file: ./coverage.xml
fail_ci_if_error: false
cmake-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 1 # Shallow clone for faster checkout
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install pybind11
run: pip install pybind11
- name: Install CUDA Toolkit
uses: Jimver/cuda-toolkit@v0.2.29
id: cuda-toolkit
with:
cuda: "13.0.2"
method: network
use-github-cache: true
use-local-cache: false
- name: Setup ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: cuda-build-${{ runner.os }}
max-size: 500M
- name: Configure CMake
run: |
cd native
mkdir -p build && cd build
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CUDA_ARCHITECTURES="80;86;89;90;100;120a" \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-Dpybind11_DIR=$(python -c "import pybind11; print(pybind11.get_cmake_dir())")
- name: Build native module
run: |
cd native/build
cmake --build . --config Release -j$(nproc)
build:
runs-on: ubuntu-latest
needs: [test, cmake-check]
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install build dependencies
run: |
python -m pip install --upgrade pip
pip install build twine
- name: Build sdist (wheels built in release workflow with CUDA)
run: python -m build --sdist
- name: Check package
run: twine check dist/*
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: dist
path: dist/