diff --git a/contrib/godbolt/README.md b/contrib/godbolt/README.md new file mode 100644 index 00000000..84947b2e --- /dev/null +++ b/contrib/godbolt/README.md @@ -0,0 +1,132 @@ +# Compiler Explorer (Godbolt) Integration for rust-cuda + +This directory contains everything needed to add rust-cuda as a compiler on +[Compiler Explorer](https://compiler-explorer.com/) so that users can type +Rust GPU kernel code and see the resulting PTX assembly. + +## How it works + +Compiler Explorer expects a single "compiler" binary that reads source on +stdin or from a file and writes assembly to stdout. Since rust-cuda has no +standalone compiler (the pipeline is `rustc` with a custom codegen backend +plus `cargo` for dependency resolution), the integration uses a small Rust +wrapper binary that: + +1. Accepts a `.rs` file containing `#[kernel]` functions. +2. Creates a temporary Cargo project that depends on `cuda_std`. +3. Sets `CARGO_ENCODED_RUSTFLAGS` with the same flags `cuda_builder` uses + (codegen backend path, `no_std` injection, `nvptx64-nvidia-cuda` target, + `-Zbuild-std=core,alloc`, etc.). +4. Runs `cargo build` and parses the JSON output to locate the `.ptx` artifact. +5. Prints the PTX to stdout (or LLVM IR if `--emit=llvm-ir` is passed). +6. Forwards compiler diagnostics to stderr so CE displays them. + +## Files + +| File | Purpose | +|------|---------| +| `rust-cuda-wrapper/` | Rust crate for the wrapper binary CE invokes as the "compiler" | +| `rust-cuda.defaults.properties` | CE configuration (compiler type, flags, defaults) | +| `rust-cuda.amazon.properties` | CE instance-specific overrides for the AWS fleet | +| `install.sh` | Installs the pinned nightly, builds the codegen backend and the wrapper, and lays out the prefix | +| `test-kernel.rs` | Sample kernel with shared memory and thread indexing | + +## Supported flags + +| Flag | Description | +|------|-------------| +| `--emit=ptx` | Output PTX assembly (default) | +| `--emit=llvm-ir` | Output LLVM IR before libnvvm conversion | +| `--opt-level=0` | Disable optimisations | +| `--opt-level=3` | Enable optimisations (default) | +| `--gpu-arch=sm_XX` | Target GPU compute capability (default `sm_75` / Turing) | +| `--version` | Print version info | + +## Testing locally + +### Prerequisites + +- CUDA toolkit installed (need `libnvvm` in `$CUDA_PATH/nvvm/lib64/`) +- The Rust nightly pinned in `rust-toolchain.toml` (`nightly-2026-04-02`) +- A built `librustc_codegen_nvvm.so` + +### Quick test + +```bash +# From the rust-cuda repo root, after building the codegen backend: +export RUST_CUDA_ROOT=/opt/compiler-explorer/rust-cuda # or your install prefix +export CUDA_PATH=/usr/local/cuda + +# Run install.sh first (or manually arrange the prefix): +./contrib/godbolt/install.sh + +# Then test: +$RUST_CUDA_ROOT/bin/rust-cuda-wrapper contrib/godbolt/test-kernel.rs +``` + +You should see PTX assembly printed to stdout. + +### Without install.sh + +If you already have the codegen backend built in the workspace, you can +point the wrapper at the repo tree directly: + +```bash +export RUST_CUDA_ROOT=/path/to/rust-cuda +# Ensure $RUST_CUDA_ROOT/lib/librustc_codegen_nvvm.so exists. + +cd contrib/godbolt/rust-cuda-wrapper +cargo run --release -- ../test-kernel.rs +``` + +### Running the integration test + +The wrapper crate ships a smoke test that compiles `test-kernel.rs` +end-to-end and asserts the output looks like PTX: + +```bash +cd contrib/godbolt/rust-cuda-wrapper +cargo test # skips without RUST_CUDA_ROOT +RUST_CUDA_ROOT=/path/to/rust-cuda cargo test # runs the real build +``` + +## Submitting to Compiler Explorer + +1. Open an issue on [compiler-explorer/compiler-explorer](https://github.com/compiler-explorer/compiler-explorer) + proposing the new compiler, linking to this directory. +2. Open a PR on [compiler-explorer/infra](https://github.com/compiler-explorer/infra) + that adds `install.sh` to the builder configuration. +3. Copy `rust-cuda.defaults.properties` into + `etc/config/` in the compiler-explorer repo. +4. Copy `rust-cuda.amazon.properties` into the appropriate instance + config directory. + +Key things CE maintainers will want to verify: + +- The wrapper is sandboxed (it only writes to `$TMPDIR` and cleans up). +- Build times are acceptable (first build is slow due to `-Zbuild-std`; subsequent + builds reuse the sysroot cache). +- The CUDA toolkit / `libnvvm` licence permits redistribution on CE's + infrastructure (NVIDIA's EULA generally allows this for development tools). + +## Compilation pipeline + +For reference, the full pipeline that the wrapper reproduces: + +``` + User's .rs file + | + v + [cargo build] + | --target=nvptx64-nvidia-cuda + | -Zbuild-std=core,alloc + | CARGO_ENCODED_RUSTFLAGS with -Zcodegen-backend=... + v + [rustc + rustc_codegen_nvvm] + | Compiles Rust -> NVVM IR (LLVM 7 bitcode dialect) + v + [libnvvm] (from CUDA toolkit) + | Optimises NVVM IR -> PTX + v + .ptx file (stdout) +``` diff --git a/contrib/godbolt/install.sh b/contrib/godbolt/install.sh new file mode 100644 index 00000000..ba17ff82 --- /dev/null +++ b/contrib/godbolt/install.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# install.sh - Install the rust-cuda toolchain for Compiler Explorer. +# +# This script is meant to be run on a CE builder node (or locally for +# testing). It performs the following: +# +# 1. Installs the pinned Rust nightly with required components. +# 2. Clones the rust-cuda repository (or uses a local checkout). +# 3. Builds the rustc_codegen_nvvm codegen backend against libnvvm. +# 4. Copies the backend, cuda_std sources, and the wrapper script into +# a self-contained prefix under /opt/compiler-explorer/rust-cuda/. +# +# Prerequisites: +# - CUDA toolkit installed (CUDA_PATH or /usr/local/cuda) +# - cmake, ninja-build, clang, pkg-config, libssl-dev, zlib1g-dev +# - For LLVM 7 path: the prebuilt LLVM archive is downloaded automatically +# by the codegen's build.rs, or you can pre-install LLVM 7 and export +# LLVM_CONFIG=/path/to/llvm-config-7. +# +# Environment variables: +# INSTALL_PREFIX - Where to install (default: /opt/compiler-explorer/rust-cuda) +# CUDA_PATH - CUDA toolkit root (default: /usr/local/cuda) +# RUST_CUDA_REPO - Path to an existing rust-cuda checkout (skips git clone) +# RUST_CUDA_REF - Git ref to check out (default: main) + +set -euo pipefail + +INSTALL_PREFIX="${INSTALL_PREFIX:-/opt/compiler-explorer/rust-cuda}" +CUDA_PATH="${CUDA_PATH:-/usr/local/cuda}" +RUST_CUDA_REF="${RUST_CUDA_REF:-main}" + +NIGHTLY="nightly-2026-04-02" +COMPONENTS="rust-src,rustc-dev,llvm-tools-preview" + +echo "==> rust-cuda Compiler Explorer installer" +echo " prefix: ${INSTALL_PREFIX}" +echo " CUDA: ${CUDA_PATH}" +echo " nightly: ${NIGHTLY}" + +# --------------------------------------------------------------------------- +# 1. Install the pinned Rust nightly +# --------------------------------------------------------------------------- +echo "==> Installing Rust ${NIGHTLY} ..." +if ! command -v rustup &>/dev/null; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain none + export PATH="${HOME}/.cargo/bin:${PATH}" +fi + +rustup toolchain install "${NIGHTLY}" --component "${COMPONENTS}" +rustup default "${NIGHTLY}" + +echo " rustc: $(rustc --version)" + +# --------------------------------------------------------------------------- +# 2. Get the rust-cuda source +# --------------------------------------------------------------------------- +if [[ -n "${RUST_CUDA_REPO:-}" ]]; then + REPO_DIR="${RUST_CUDA_REPO}" + echo "==> Using existing checkout at ${REPO_DIR}" +else + REPO_DIR="$(mktemp -d -t rust-cuda-src.XXXXXXXXXX)" + echo "==> Cloning rust-cuda (ref: ${RUST_CUDA_REF}) into ${REPO_DIR} ..." + git clone --depth 1 --branch "${RUST_CUDA_REF}" \ + https://github.com/Rust-GPU/rust-cuda.git "${REPO_DIR}" +fi + +# --------------------------------------------------------------------------- +# 3. Build the codegen backend +# --------------------------------------------------------------------------- +echo "==> Building rustc_codegen_nvvm ..." +export LD_LIBRARY_PATH="${CUDA_PATH}/nvvm/lib64:${CUDA_PATH}/lib64:${LD_LIBRARY_PATH:-}" + +cd "${REPO_DIR}" +cargo build -p rustc_codegen_nvvm --release + +# Find the built .so +CODEGEN_SO="$(find target/release -maxdepth 2 -name 'librustc_codegen_nvvm.so' -print -quit 2>/dev/null || true)" +if [[ -z "${CODEGEN_SO}" ]]; then + # Try the deps directory with hash suffix. + CODEGEN_SO="$(find target/release/deps -maxdepth 1 -name 'librustc_codegen_nvvm-*.so' -print -quit 2>/dev/null || true)" +fi +if [[ -z "${CODEGEN_SO}" ]]; then + echo "error: could not find librustc_codegen_nvvm.so after build" >&2 + exit 1 +fi +echo " codegen backend: ${CODEGEN_SO}" + +# --------------------------------------------------------------------------- +# 4. Install into the prefix +# --------------------------------------------------------------------------- +echo "==> Installing to ${INSTALL_PREFIX} ..." +mkdir -p "${INSTALL_PREFIX}"/{bin,lib,crates} + +# Backend shared library. +cp "${CODEGEN_SO}" "${INSTALL_PREFIX}/lib/librustc_codegen_nvvm.so" + +# Copy the crates that kernel code depends on at build time. +for crate in cuda_std cuda_std_macros; do + cp -a "${REPO_DIR}/crates/${crate}" "${INSTALL_PREFIX}/crates/${crate}" +done + +# Copy workspace-level files needed by cargo (Cargo.lock is especially +# important so dependency resolution is reproducible). +cp "${REPO_DIR}/Cargo.lock" "${INSTALL_PREFIX}/" 2>/dev/null || true + +# Build and install the wrapper binary. +( + cd "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper" + cargo build --release +) +cp "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper/target/release/rust-cuda-wrapper" \ + "${INSTALL_PREFIX}/bin/" +chmod +x "${INSTALL_PREFIX}/bin/rust-cuda-wrapper" + +# Version marker. +echo "${NIGHTLY}" > "${INSTALL_PREFIX}/rust-toolchain-version" + +# Also copy any native libs the codegen may need at link time (from the +# same build directory). +for lib in "${REPO_DIR}"/target/release/deps/lib*.so; do + [[ -f "${lib}" ]] && cp "${lib}" "${INSTALL_PREFIX}/lib/" 2>/dev/null || true +done + +echo "==> Installation complete." +echo "" +echo "Test with:" +echo " RUST_CUDA_ROOT=${INSTALL_PREFIX} CUDA_PATH=${CUDA_PATH} \\" +echo " ${INSTALL_PREFIX}/bin/rust-cuda-wrapper contrib/godbolt/test-kernel.rs" diff --git a/contrib/godbolt/rust-cuda-wrapper/.gitignore b/contrib/godbolt/rust-cuda-wrapper/.gitignore new file mode 100644 index 00000000..ea8c4bf7 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/.gitignore @@ -0,0 +1 @@ +/target diff --git a/contrib/godbolt/rust-cuda-wrapper/Cargo.lock b/contrib/godbolt/rust-cuda-wrapper/Cargo.lock new file mode 100644 index 00000000..e3a467be --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/Cargo.lock @@ -0,0 +1,571 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rust-cuda-wrapper" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "serde_json", + "tempfile", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/contrib/godbolt/rust-cuda-wrapper/Cargo.toml b/contrib/godbolt/rust-cuda-wrapper/Cargo.toml new file mode 100644 index 00000000..4f77eb17 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "rust-cuda-wrapper" +version = "0.1.0" +edition = "2024" + +# Stand-alone workspace so this crate is independent of the rust-cuda +# workspace's pinned nightly. CE may update it on its own cadence. +[workspace] + +[dependencies] +anyhow = "1.0.102" +clap = { version = "4.6.1", features = ["derive"] } +serde_json = "1.0.149" +tempfile = "3.27.0" diff --git a/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template b/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template new file mode 100644 index 00000000..bacc34ec --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template @@ -0,0 +1,10 @@ +[package] +name = "gpu_kernel" +version = "0.1.0" +edition = "2024" + +[dependencies] +cuda_std = { path = "__CUDA_STD_PATH__" } + +[lib] +crate-type = ["cdylib", "rlib"] diff --git a/contrib/godbolt/rust-cuda-wrapper/src/main.rs b/contrib/godbolt/rust-cuda-wrapper/src/main.rs new file mode 100644 index 00000000..ae6b9958 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/src/main.rs @@ -0,0 +1,160 @@ +//! Compiler Explorer wrapper for rust-cuda. +//! +//! Accepts a `.rs` file containing a `#[kernel]` function, drops it into a +//! generated temporary Cargo project that depends on `cuda_std`, invokes +//! `cargo build` with `rustc_codegen_nvvm` as the codegen backend, and +//! writes the resulting PTX (or LLVM IR) to stdout. Compiler diagnostics +//! are relayed to stderr and the process exits with a non-zero status if +//! no artifact was produced. +//! +//! Expects two environment variables: +//! +//! * `RUST_CUDA_ROOT`: install prefix containing `lib/librustc_codegen_nvvm.so` +//! and `crates/cuda_std/` (required). +//! * `CUDA_PATH`: CUDA toolkit root, used to locate `libnvvm` at runtime +//! (defaults to `/usr/local/cuda` if unset). + +use anyhow::{Context, Result}; +use clap::Parser; +use clap::ValueEnum; +use std::path::PathBuf; + +/// Rustflags applied on every build. These mirror the flags `cuda_builder` +/// passes when invoking rustc directly, so PTX produced through the wrapper +/// matches what a normal rust-cuda build would emit. +const STATIC_RUSTFLAGS: [&str; 6] = [ + "-Zunstable-options", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(nvvm_internal)", + "-Zcrate-attr=no_std", + "-Zsaturating_float_casts=false", + "-Cpanic=immediate-abort", +]; + +#[derive(ValueEnum, Clone, Debug)] +enum Emit { + Ptx, + LlvmIr, +} + +#[derive(Parser, Debug)] +#[command(version)] +struct Args { + #[arg(long, default_value_t = 3)] + opt_level: u8, + #[arg(long, default_value = "compute_75")] + gpu_arch: String, + #[arg(long, value_enum, default_value_t = Emit::Ptx)] + emit: Emit, + input: PathBuf, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + let tmp = tempfile::TempDir::new().context("failed to create temp dir")?; + let src_dir = tmp.path().join("gpu_kernel").join("src"); + std::fs::create_dir_all(&src_dir).context("failed to create src dir")?; + + let root = std::env::var("RUST_CUDA_ROOT").context("RUST_CUDA_ROOT not set")?; + let template = include_str!("cargo_toml.template"); + let cargo_toml = template.replace("__CUDA_STD_PATH__", &format!("{}/crates/cuda_std", root)); + std::fs::write( + tmp.path().join("gpu_kernel").join("Cargo.toml"), + &cargo_toml, + ) + .context("failed to write cargo toml")?; + std::fs::copy(&args.input, src_dir.join("lib.rs")).context("failed to copy input file")?; + + let mut rustflags: Vec = vec![format!( + "-Zcodegen-backend={}/lib/librustc_codegen_nvvm.so", + root + )]; + for flag in STATIC_RUSTFLAGS { + rustflags.push(flag.to_string()); + } + let mut llvm_args = format!("-arch={} --override-libm", args.gpu_arch); + if args.opt_level == 0 { + llvm_args.push_str(" -opt=0"); + } + + if matches!(args.emit, Emit::LlvmIr) { + rustflags.push("--emit=llvm-ir".to_string()); + } + + rustflags.push(format!("-Cllvm-args={}", llvm_args)); + + // Cargo reads flags from CARGO_ENCODED_RUSTFLAGS as a list joined by the + // ASCII unit-separator (0x1F), which avoids the quoting ambiguity that + // RUSTFLAGS has with flags containing spaces (e.g. `-Cllvm-args=...`). + let encoded = rustflags.join("\x1f"); + + // The codegen backend dlopen()s libnvvm and friends at load time, so + // their directories must be on LD_LIBRARY_PATH for rustc to start at all. + let cuda_path = std::env::var("CUDA_PATH").unwrap_or_else(|_| "/usr/local/cuda".to_string()); + let codegen_dir = format!("{}/lib", root); + let existing_ld = std::env::var("LD_LIBRARY_PATH").unwrap_or_default(); + let ld_library_path = format!( + "{}:{}/nvvm/lib64:{}/lib64:{}", + codegen_dir, cuda_path, cuda_path, existing_ld + ); + + let crate_dir = tmp.path().join("gpu_kernel"); + let mut cmd = std::process::Command::new("cargo"); + cmd.current_dir(&crate_dir) + .env("CARGO_ENCODED_RUSTFLAGS", &encoded) + // `cuda_std` gates `f16`/`f128` support behind this feature flag; the + // nvptx64 target does not support those types natively. + .env("CARGO_FEATURE_NO_F16_F128", "1") + .env("LD_LIBRARY_PATH", &ld_library_path) + .arg("build") + .arg("--lib") + .arg("--message-format=json-render-diagnostics") + .arg("-Zbuild-std=core,alloc") + .arg("--target=nvptx64-nvidia-cuda"); + + // Any non-zero opt-level maps to a cargo release build; libnvvm performs + // its own optimisation level selection via `-Cllvm-args=-opt=N` above. + if args.opt_level != 0 { + cmd.arg("--release"); + } + + let output = cmd.output().context("failed to spawn cargo")?; + let stdout = String::from_utf8_lossy(&output.stdout); + let mut ptx_path: Option = None; + + for line in stdout.lines() { + let json: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, + }; + + if json.get("reason").and_then(|v| v.as_str()) != Some("compiler-artifact") { + continue; + } + + if let Some(filenames) = json.get("filenames").and_then(|v| v.as_array()) { + for f in filenames { + if let Some(s) = f.as_str() + && s.ends_with(".ptx") + { + ptx_path = Some(s.to_string()); + } + } + } + } + + match ptx_path { + Some(path) => { + let contents = std::fs::read_to_string(&path).context("failed to read PTX file")?; + print!("{}", contents); + } + None => { + eprintln!("{}", String::from_utf8_lossy(&output.stderr)); + eprintln!("error: compilation failed, no PTX output produced"); + std::process::exit(1); + } + } + + Ok(()) +} diff --git a/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs b/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs new file mode 100644 index 00000000..5e00578f --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs @@ -0,0 +1,30 @@ +use std::process::Command; + +#[test] +fn compiles_test_kernel_to_ptx() { + if std::env::var("RUST_CUDA_ROOT").is_err() { + eprintln!("skipping: RUST_CUDA_ROOT not set"); + return; + } + + let bin = env!("CARGO_BIN_EXE_rust-cuda-wrapper"); + let kernel = concat!(env!("CARGO_MANIFEST_DIR"), "/../test-kernel.rs"); + + let output = Command::new(bin) + .arg(kernel) + .output() + .expect("failed to run wrapper"); + + assert!( + output.status.success(), + "wrapper exited non-zero. stderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains(".version"), + "stdout doesn't look like PTX. got:\n{}", + stdout + ); +} diff --git a/contrib/godbolt/rust-cuda.defaults.properties b/contrib/godbolt/rust-cuda.defaults.properties new file mode 100644 index 00000000..35b4fb9e --- /dev/null +++ b/contrib/godbolt/rust-cuda.defaults.properties @@ -0,0 +1,65 @@ +# Compiler Explorer -- rust-cuda default properties +# +# This file defines how the rust-cuda "compiler" appears in the Compiler +# Explorer UI. It is loaded by CE's configuration layer; see +# https://github.com/compiler-explorer/compiler-explorer/blob/main/docs/AddingACompiler.md + +# --------------------------------------------------------------------------- +# Compiler identity +# --------------------------------------------------------------------------- +compilerType=rust-cuda +group=rust-cuda +groupName=Rust CUDA (rust-cuda) + +# The wrapper binary that Godbolt executes. install.sh places it here. +compiler=/opt/compiler-explorer/rust-cuda/bin/rust-cuda-wrapper + +# Language shown in the UI dropdown. +lang=rust + +# Human-readable name in the compiler selector. +name=rust-cuda main (llvm7) + +# Semantic version used for sorting in the compiler list. +semver=main + +# --------------------------------------------------------------------------- +# Behaviour +# --------------------------------------------------------------------------- + +# The wrapper always writes PTX to stdout and diagnostics to stderr. +compilerShouldNotProduceOutput=true + +# Godbolt options pane entries (shown as dropdowns / checkboxes). +options=--emit=ptx + +# Default flags passed on every invocation. +defaultOptions=--emit=ptx --opt-level=3 + +# CE will offer these in the "Compiler options" box as suggestions. +supportedOptions=--emit=ptx --emit=llvm-ir --opt-level=0 --opt-level=3 --gpu-arch=sm_52 --gpu-arch=sm_70 --gpu-arch=sm_75 --gpu-arch=sm_80 --gpu-arch=sm_86 --gpu-arch=sm_89 --gpu-arch=sm_90 + +# Output is PTX assembly (NVIDIA's ISA-level text format). +outputIsAsm=true + +# Do not try to demangle -- PTX symbols are already human-readable. +demangler= + +# No binary output to disassemble. +supportsBinary=false + +# No execution support (GPU code cannot run on the CE host). +supportsExecute=false + +# The --version flag is handled by the wrapper. +versionFlag=--version +versionRe=rust-cuda-wrapper.* + +# Instruction-set reference for hover tooltips. +instructionSet=ptx + +# --------------------------------------------------------------------------- +# Source boilerplate +# --------------------------------------------------------------------------- +# The default source shown when a user picks this compiler. +defaultSource=use cuda_std::prelude::*;\n\n#[kernel]\n#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)]\npub unsafe fn vecadd(a: &[f32], b: &[f32], c: *mut f32) {\n let idx = thread::index_1d() as usize;\n if idx < a.len() {\n let elem = unsafe { &mut *c.add(idx) };\n *elem = a[idx] + b[idx];\n }\n} diff --git a/contrib/godbolt/test-kernel.rs b/contrib/godbolt/test-kernel.rs new file mode 100644 index 00000000..54708229 --- /dev/null +++ b/contrib/godbolt/test-kernel.rs @@ -0,0 +1,77 @@ +// A sample Rust CUDA kernel for Compiler Explorer. +// +// This demonstrates shared-memory tiling, thread indexing, and +// synchronisation -- the core patterns used in GPU programming +// with rust-cuda. + +use cuda_std::prelude::*; +use core::mem::MaybeUninit; + +const TILE: usize = 16; + +/// Tiled matrix-vector multiply: y = A * x. +/// +/// Each block collaboratively loads a tile of A into shared memory, +/// then each thread accumulates its dot-product contribution. +/// +/// - `a`: row-major matrix, m rows x n cols +/// - `x`: input vector, length n +/// - `y`: output vector, length m (must be pre-zeroed) +/// - `m`: number of rows +/// - `n`: number of columns +#[kernel] +#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] +pub unsafe fn matvec(a: &[f32], x: &[f32], y: *mut f32, m: usize, n: usize) { + #[address_space(shared)] + static mut TILE_A: [MaybeUninit; TILE] = [MaybeUninit::uninit(); TILE]; + + let row = thread::block_idx_x() as usize * thread::block_dim_x() as usize + + thread::thread_idx_x() as usize; + let tx = thread::thread_idx_x() as usize; + + let mut sum = 0.0f32; + + // Walk across the columns in tiles of size TILE. + let mut col = 0usize; + while col < n { + // Collaboratively load one tile of x into shared memory. + if col + tx < n { + unsafe { + TILE_A[tx].write(x[col + tx]); + } + } else { + unsafe { + TILE_A[tx].write(0.0); + } + } + thread::sync_threads(); + + // Each thread accumulates the dot product for its row. + if row < m { + let mut k = 0usize; + while k < TILE && col + k < n { + sum += a[row * n + (col + k)] * unsafe { TILE_A[k].assume_init() }; + k += 1; + } + } + thread::sync_threads(); + + col += TILE; + } + + if row < m { + let out = unsafe { &mut *y.add(row) }; + *out = sum; + } +} + +/// Element-wise vector addition (simple baseline for comparison). +#[kernel] +#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] +pub unsafe fn vecadd(a: &[f32], b: &[f32], c: *mut f32) { + let idx = thread::index_1d() as usize; + if idx < a.len() { + let elem = unsafe { &mut *c.add(idx) }; + *elem = a[idx] + b[idx]; + } +}