Skip to content

Build Wheels (CU126) for Linux #78

Build Wheels (CU126) for Linux

Build Wheels (CU126) for Linux #78

name: Build Wheels (CU126) for Linux
on:
workflow_dispatch:
permissions:
contents: write
jobs:
build_wheels:
name: Build Wheel ${{ matrix.os }} py${{ matrix.pyver }} cu126
runs-on: ubuntu-22.04
container: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
strategy:
fail-fast: false
matrix:
os: ["ubuntu-22.04"]
pyver: ["3.10", "3.11", "3.12", "3.13", "3.14"] # Python versions
cuda: ["12.6.3"]
cudaarch: ["70-real;75-real;80-real;86-real;87-real;89-real"]
defaults:
run:
shell: bash
env:
CUDAVER: ${{ matrix.cuda }}
CUDAARCHVER: ${{ matrix.cudaarch }}
MAX_JOBS: 12
steps:
- name: Install dependencies
run: |
apt update
apt install -y \
build-essential \
ccache \
cmake \
curl \
git \
libgomp1 \
libjpeg-dev \
libssl-dev \
ninja-build
- name: Checkout
uses: actions/checkout@v6
with:
submodules: recursive
- name: Install uv and Python ${{ matrix.pyver }}
uses: astral-sh/setup-uv@v7
with:
python-version: ${{ matrix.pyver }}
activate-environment: true
enable-cache: true
- name: Show CUDA version
run: nvcc -V
- name: Build wheel
env:
LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
VERBOSE: "1"
CUDA_HOME: "/usr/local/cuda"
CUDA_PATH: "/usr/local/cuda"
CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda"
run: |
set -euo pipefail
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}"
find /usr/ -name 'libcuda.so.*' || true
find /usr/ -name 'libcudart.so.*' || true
cuda_ver_short=$(echo "${CUDAVER}" | cut -d'.' -f 1,2 | sed 's/\.//g')
# Build one CUDA wheel with dynamic GGML backends:
# - GGML_BACKEND_DL enables runtime-loadable backend shared libraries.
# - GGML_CPU_ALL_VARIANTS builds CPU variant backends when supported.
# - GGML_NATIVE=OFF avoids binding the wheel to the CI runner CPU.
CMAKE_ARGS_ARRAY=(
"-G Ninja"
# Disable non-wheel targets.
"-DLLAMA_BUILD_EXAMPLES=OFF"
"-DLLAMA_BUILD_TESTS=OFF"
"-DLLAMA_BUILD_TOOLS=OFF"
"-DLLAMA_BUILD_SERVER=OFF"
"-DLLAMA_BUILD_UI=OFF"
"-DLLAMA_USE_PREBUILT_UI=OFF"
"-DLLAMA_CURL=OFF"
"-DLLAMA_OPENSSL=ON"
# GGML dynamic backend layout.
"-DGGML_CPU=ON"
"-DGGML_CUDA=ON"
"-DGGML_NATIVE=OFF"
"-DGGML_BACKEND_DL=ON"
"-DGGML_CPU_ALL_VARIANTS=ON"
"-DGGML_OPENMP=ON"
# CUDA backend.
"-DCMAKE_CUDA_ARCHITECTURES=${CUDAARCHVER}"
"-DGGML_CUDA_FORCE_MMQ=ON"
"-DCUDA_SEPARABLE_COMPILATION=ON"
"-DCMAKE_CUDA_FLAGS=--diag-suppress=177,221,550"
# Build behavior.
"-DCMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS}"
"-DGGML_CCACHE=ON"
"-DENABLE_CCACHE=ON"
)
CMAKE_ARGS="${CMAKE_ARGS_ARRAY[*]}"
echo "CMAKE_ARGS=${CMAKE_ARGS}"
uv pip install --upgrade build setuptools wheel packaging
CMAKE_ARGS="${CMAKE_ARGS}" uv build --wheel
if ! ls dist/*.whl >/dev/null 2>&1; then
echo "No wheel built in dist/ directory"
exit 1
fi
wheel_path=$(ls dist/*.whl | head -n 1)
filename=$(basename "$wheel_path")
# Wheel filename format:
# name-version-python_tag-abi_tag-platform_tag.whl
IFS='-' read -r dist_name version py_tag abi_tag plat_tag <<< "$filename"
# CPU all-variants is now an internal runtime layout detail.
new_version="${version}+cu${cuda_ver_short}"
new_filename="${dist_name}-${new_version}-${py_tag}-${abi_tag}-${plat_tag}"
mv "$wheel_path" "dist/$new_filename"
echo "Renamed wheel to: $new_filename"
echo "CUDA_VERSION=$cuda_ver_short" >> "$GITHUB_ENV"
echo "TAG_VERSION=$version" >> "$GITHUB_ENV"
- name: Get current date
id: get-date
run: |
currentDate=$(date +%Y%m%d)
echo "BUILD_DATE=$currentDate" >> "$GITHUB_ENV"
- name: Create release
if: always() && env.TAG_VERSION != ''
uses: softprops/action-gh-release@v3
with:
files: dist/*
tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-linux-${{ env.BUILD_DATE }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}