Build Wheels (CU126) for Linux #78
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Wheels (CU126) for Linux | |
| on: | |
| workflow_dispatch: | |
| permissions: | |
| contents: write | |
| jobs: | |
| build_wheels: | |
| name: Build Wheel ${{ matrix.os }} py${{ matrix.pyver }} cu126 | |
| runs-on: ubuntu-22.04 | |
| container: nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| os: ["ubuntu-22.04"] | |
| pyver: ["3.10", "3.11", "3.12", "3.13", "3.14"] # Python versions | |
| cuda: ["12.6.3"] | |
| cudaarch: ["70-real;75-real;80-real;86-real;87-real;89-real"] | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| CUDAVER: ${{ matrix.cuda }} | |
| CUDAARCHVER: ${{ matrix.cudaarch }} | |
| MAX_JOBS: 12 | |
| steps: | |
| - name: Install dependencies | |
| run: | | |
| apt update | |
| apt install -y \ | |
| build-essential \ | |
| ccache \ | |
| cmake \ | |
| curl \ | |
| git \ | |
| libgomp1 \ | |
| libjpeg-dev \ | |
| libssl-dev \ | |
| ninja-build | |
| - name: Checkout | |
| uses: actions/checkout@v6 | |
| with: | |
| submodules: recursive | |
| - name: Install uv and Python ${{ matrix.pyver }} | |
| uses: astral-sh/setup-uv@v7 | |
| with: | |
| python-version: ${{ matrix.pyver }} | |
| activate-environment: true | |
| enable-cache: true | |
| - name: Show CUDA version | |
| run: nvcc -V | |
| - name: Build wheel | |
| env: | |
| LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" | |
| VERBOSE: "1" | |
| CUDA_HOME: "/usr/local/cuda" | |
| CUDA_PATH: "/usr/local/cuda" | |
| CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda" | |
| run: | | |
| set -euo pipefail | |
| echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" | |
| find /usr/ -name 'libcuda.so.*' || true | |
| find /usr/ -name 'libcudart.so.*' || true | |
| cuda_ver_short=$(echo "${CUDAVER}" | cut -d'.' -f 1,2 | sed 's/\.//g') | |
| # Build one CUDA wheel with dynamic GGML backends: | |
| # - GGML_BACKEND_DL enables runtime-loadable backend shared libraries. | |
| # - GGML_CPU_ALL_VARIANTS builds CPU variant backends when supported. | |
| # - GGML_NATIVE=OFF avoids binding the wheel to the CI runner CPU. | |
| CMAKE_ARGS_ARRAY=( | |
| "-G Ninja" | |
| # Disable non-wheel targets. | |
| "-DLLAMA_BUILD_EXAMPLES=OFF" | |
| "-DLLAMA_BUILD_TESTS=OFF" | |
| "-DLLAMA_BUILD_TOOLS=OFF" | |
| "-DLLAMA_BUILD_SERVER=OFF" | |
| "-DLLAMA_BUILD_UI=OFF" | |
| "-DLLAMA_USE_PREBUILT_UI=OFF" | |
| "-DLLAMA_CURL=OFF" | |
| "-DLLAMA_OPENSSL=ON" | |
| # GGML dynamic backend layout. | |
| "-DGGML_CPU=ON" | |
| "-DGGML_CUDA=ON" | |
| "-DGGML_NATIVE=OFF" | |
| "-DGGML_BACKEND_DL=ON" | |
| "-DGGML_CPU_ALL_VARIANTS=ON" | |
| "-DGGML_OPENMP=ON" | |
| # CUDA backend. | |
| "-DCMAKE_CUDA_ARCHITECTURES=${CUDAARCHVER}" | |
| "-DGGML_CUDA_FORCE_MMQ=ON" | |
| "-DCUDA_SEPARABLE_COMPILATION=ON" | |
| "-DCMAKE_CUDA_FLAGS=--diag-suppress=177,221,550" | |
| # Build behavior. | |
| "-DCMAKE_BUILD_PARALLEL_LEVEL=${MAX_JOBS}" | |
| "-DGGML_CCACHE=ON" | |
| "-DENABLE_CCACHE=ON" | |
| ) | |
| CMAKE_ARGS="${CMAKE_ARGS_ARRAY[*]}" | |
| echo "CMAKE_ARGS=${CMAKE_ARGS}" | |
| uv pip install --upgrade build setuptools wheel packaging | |
| CMAKE_ARGS="${CMAKE_ARGS}" uv build --wheel | |
| if ! ls dist/*.whl >/dev/null 2>&1; then | |
| echo "No wheel built in dist/ directory" | |
| exit 1 | |
| fi | |
| wheel_path=$(ls dist/*.whl | head -n 1) | |
| filename=$(basename "$wheel_path") | |
| # Wheel filename format: | |
| # name-version-python_tag-abi_tag-platform_tag.whl | |
| IFS='-' read -r dist_name version py_tag abi_tag plat_tag <<< "$filename" | |
| # CPU all-variants is now an internal runtime layout detail. | |
| new_version="${version}+cu${cuda_ver_short}" | |
| new_filename="${dist_name}-${new_version}-${py_tag}-${abi_tag}-${plat_tag}" | |
| mv "$wheel_path" "dist/$new_filename" | |
| echo "Renamed wheel to: $new_filename" | |
| echo "CUDA_VERSION=$cuda_ver_short" >> "$GITHUB_ENV" | |
| echo "TAG_VERSION=$version" >> "$GITHUB_ENV" | |
| - name: Get current date | |
| id: get-date | |
| run: | | |
| currentDate=$(date +%Y%m%d) | |
| echo "BUILD_DATE=$currentDate" >> "$GITHUB_ENV" | |
| - name: Create release | |
| if: always() && env.TAG_VERSION != '' | |
| uses: softprops/action-gh-release@v3 | |
| with: | |
| files: dist/* | |
| tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-linux-${{ env.BUILD_DATE }} | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |