From 7ae9aed65ca0a2e5bd5daabb84702ffb21888335 Mon Sep 17 00:00:00 2001 From: George Amponis Date: Wed, 6 May 2026 22:30:42 +0200 Subject: [PATCH 1/4] feat(godbolt): add Compiler Explorer integration for rust-cuda kernels --- contrib/godbolt/README.md | 119 +++++++++ contrib/godbolt/install.sh | 124 +++++++++ contrib/godbolt/rust-cuda-wrapper.sh | 241 ++++++++++++++++++ contrib/godbolt/rust-cuda.defaults.properties | 65 +++++ contrib/godbolt/test-kernel.rs | 77 ++++++ 5 files changed, 626 insertions(+) create mode 100644 contrib/godbolt/README.md create mode 100644 contrib/godbolt/install.sh create mode 100644 contrib/godbolt/rust-cuda-wrapper.sh create mode 100644 contrib/godbolt/rust-cuda.defaults.properties create mode 100644 contrib/godbolt/test-kernel.rs diff --git a/contrib/godbolt/README.md b/contrib/godbolt/README.md new file mode 100644 index 00000000..2dcfd173 --- /dev/null +++ b/contrib/godbolt/README.md @@ -0,0 +1,119 @@ +# Compiler Explorer (Godbolt) Integration for rust-cuda + +This directory contains everything needed to add rust-cuda as a compiler on +[Compiler Explorer](https://compiler-explorer.com/) so that users can type +Rust GPU kernel code and see the resulting PTX assembly. + +## How it works + +Compiler Explorer expects a single "compiler" binary that reads source on +stdin or from a file and writes assembly to stdout. Since rust-cuda has no +standalone compiler (the pipeline is `rustc` with a custom codegen backend +plus `cargo` for dependency resolution), the integration uses a wrapper +script that: + +1. Accepts a `.rs` file containing `#[kernel]` functions. +2. Creates a temporary Cargo project that depends on `cuda_std`. +3. Sets `CARGO_ENCODED_RUSTFLAGS` with the same flags `cuda_builder` uses + (codegen backend path, `no_std` injection, `nvptx64-nvidia-cuda` target, + `-Zbuild-std=core,alloc`, etc.). +4. Runs `cargo build` and parses the JSON output to locate the `.ptx` artifact. +5. Prints the PTX to stdout (or LLVM IR if `--emit=llvm-ir` is passed). +6. Forwards compiler diagnostics to stderr so CE displays them. + +## Files + +| File | Purpose | +|------|---------| +| `rust-cuda-wrapper.sh` | The wrapper script CE invokes as the "compiler" | +| `rust-cuda.defaults.properties` | CE configuration (compiler type, flags, defaults) | +| `rust-cuda.amazon.properties` | CE instance-specific overrides for the AWS fleet | +| `install.sh` | Installs the pinned nightly, builds the codegen backend, and lays out the prefix | +| `test-kernel.rs` | Sample kernel with shared memory and thread indexing | + +## Supported flags + +| Flag | Description | +|------|-------------| +| `--emit=ptx` | Output PTX assembly (default) | +| `--emit=llvm-ir` | Output LLVM IR before libnvvm conversion | +| `--opt-level=0` | Disable optimisations | +| `--opt-level=3` | Enable optimisations (default) | +| `--gpu-arch=sm_XX` | Target GPU compute capability (default `sm_75` / Turing) | +| `--version` | Print version info | + +## Testing locally + +### Prerequisites + +- CUDA toolkit installed (need `libnvvm` in `$CUDA_PATH/nvvm/lib64/`) +- The Rust nightly pinned in `rust-toolchain.toml` (`nightly-2026-04-02`) +- A built `librustc_codegen_nvvm.so` + +### Quick test + +```bash +# From the rust-cuda repo root, after building the codegen backend: +export RUST_CUDA_ROOT=/opt/compiler-explorer/rust-cuda # or your install prefix +export CUDA_PATH=/usr/local/cuda + +# Run install.sh first (or manually arrange the prefix): +./contrib/godbolt/install.sh + +# Then test: +./contrib/godbolt/rust-cuda-wrapper.sh contrib/godbolt/test-kernel.rs +``` + +You should see PTX assembly printed to stdout. + +### Without install.sh + +If you already have the codegen backend built in the workspace, you can +point the wrapper at the repo tree directly: + +```bash +export RUST_CUDA_ROOT=/path/to/rust-cuda +# Ensure lib/librustc_codegen_nvvm.so exists at that path, or adjust +# CODEGEN_SO in the script. +``` + +## Submitting to Compiler Explorer + +1. Open an issue on [compiler-explorer/compiler-explorer](https://github.com/compiler-explorer/compiler-explorer) + proposing the new compiler, linking to this directory. +2. Open a PR on [compiler-explorer/infra](https://github.com/compiler-explorer/infra) + that adds `install.sh` to the builder configuration. +3. Copy `rust-cuda.defaults.properties` into + `etc/config/` in the compiler-explorer repo. +4. Copy `rust-cuda.amazon.properties` into the appropriate instance + config directory. + +Key things CE maintainers will want to verify: + +- The wrapper is sandboxed (it only writes to `$TMPDIR` and cleans up). +- Build times are acceptable (first build is slow due to `-Zbuild-std`; subsequent + builds reuse the sysroot cache). +- The CUDA toolkit / `libnvvm` licence permits redistribution on CE's + infrastructure (NVIDIA's EULA generally allows this for development tools). + +## Compilation pipeline + +For reference, the full pipeline that the wrapper reproduces: + +``` + User's .rs file + | + v + [cargo build] + | --target=nvptx64-nvidia-cuda + | -Zbuild-std=core,alloc + | CARGO_ENCODED_RUSTFLAGS with -Zcodegen-backend=... + v + [rustc + rustc_codegen_nvvm] + | Compiles Rust -> NVVM IR (LLVM 7 bitcode dialect) + v + [libnvvm] (from CUDA toolkit) + | Optimises NVVM IR -> PTX + v + .ptx file (stdout) +``` diff --git a/contrib/godbolt/install.sh b/contrib/godbolt/install.sh new file mode 100644 index 00000000..565f6295 --- /dev/null +++ b/contrib/godbolt/install.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# +# install.sh - Install the rust-cuda toolchain for Compiler Explorer. +# +# This script is meant to be run on a CE builder node (or locally for +# testing). It performs the following: +# +# 1. Installs the pinned Rust nightly with required components. +# 2. Clones the rust-cuda repository (or uses a local checkout). +# 3. Builds the rustc_codegen_nvvm codegen backend against libnvvm. +# 4. Copies the backend, cuda_std sources, and the wrapper script into +# a self-contained prefix under /opt/compiler-explorer/rust-cuda/. +# +# Prerequisites: +# - CUDA toolkit installed (CUDA_PATH or /usr/local/cuda) +# - cmake, ninja-build, clang, pkg-config, libssl-dev, zlib1g-dev +# - For LLVM 7 path: the prebuilt LLVM archive is downloaded automatically +# by the codegen's build.rs, or you can pre-install LLVM 7 and export +# LLVM_CONFIG=/path/to/llvm-config-7. +# +# Environment variables: +# INSTALL_PREFIX - Where to install (default: /opt/compiler-explorer/rust-cuda) +# CUDA_PATH - CUDA toolkit root (default: /usr/local/cuda) +# RUST_CUDA_REPO - Path to an existing rust-cuda checkout (skips git clone) +# RUST_CUDA_REF - Git ref to check out (default: main) + +set -euo pipefail + +INSTALL_PREFIX="${INSTALL_PREFIX:-/opt/compiler-explorer/rust-cuda}" +CUDA_PATH="${CUDA_PATH:-/usr/local/cuda}" +RUST_CUDA_REF="${RUST_CUDA_REF:-main}" + +NIGHTLY="nightly-2026-04-02" +COMPONENTS="rust-src,rustc-dev,llvm-tools-preview" + +echo "==> rust-cuda Compiler Explorer installer" +echo " prefix: ${INSTALL_PREFIX}" +echo " CUDA: ${CUDA_PATH}" +echo " nightly: ${NIGHTLY}" + +# --------------------------------------------------------------------------- +# 1. Install the pinned Rust nightly +# --------------------------------------------------------------------------- +echo "==> Installing Rust ${NIGHTLY} ..." +if ! command -v rustup &>/dev/null; then + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal --default-toolchain none + export PATH="${HOME}/.cargo/bin:${PATH}" +fi + +rustup toolchain install "${NIGHTLY}" --component "${COMPONENTS}" +rustup default "${NIGHTLY}" + +echo " rustc: $(rustc --version)" + +# --------------------------------------------------------------------------- +# 2. Get the rust-cuda source +# --------------------------------------------------------------------------- +if [[ -n "${RUST_CUDA_REPO:-}" ]]; then + REPO_DIR="${RUST_CUDA_REPO}" + echo "==> Using existing checkout at ${REPO_DIR}" +else + REPO_DIR="$(mktemp -d -t rust-cuda-src.XXXXXXXXXX)" + echo "==> Cloning rust-cuda (ref: ${RUST_CUDA_REF}) into ${REPO_DIR} ..." + git clone --depth 1 --branch "${RUST_CUDA_REF}" \ + https://github.com/Rust-GPU/rust-cuda.git "${REPO_DIR}" +fi + +# --------------------------------------------------------------------------- +# 3. Build the codegen backend +# --------------------------------------------------------------------------- +echo "==> Building rustc_codegen_nvvm ..." +export LD_LIBRARY_PATH="${CUDA_PATH}/nvvm/lib64:${CUDA_PATH}/lib64:${LD_LIBRARY_PATH:-}" + +cd "${REPO_DIR}" +cargo build -p rustc_codegen_nvvm --release + +# Find the built .so +CODEGEN_SO="$(find target/release -maxdepth 2 -name 'librustc_codegen_nvvm.so' -print -quit 2>/dev/null || true)" +if [[ -z "${CODEGEN_SO}" ]]; then + # Try the deps directory with hash suffix. + CODEGEN_SO="$(find target/release/deps -maxdepth 1 -name 'librustc_codegen_nvvm-*.so' -print -quit 2>/dev/null || true)" +fi +if [[ -z "${CODEGEN_SO}" ]]; then + echo "error: could not find librustc_codegen_nvvm.so after build" >&2 + exit 1 +fi +echo " codegen backend: ${CODEGEN_SO}" + +# --------------------------------------------------------------------------- +# 4. Install into the prefix +# --------------------------------------------------------------------------- +echo "==> Installing to ${INSTALL_PREFIX} ..." +mkdir -p "${INSTALL_PREFIX}"/{bin,lib,crates} + +# Backend shared library. +cp "${CODEGEN_SO}" "${INSTALL_PREFIX}/lib/librustc_codegen_nvvm.so" + +# Copy the crates that kernel code depends on at build time. +for crate in cuda_std cuda_std_macros; do + cp -a "${REPO_DIR}/crates/${crate}" "${INSTALL_PREFIX}/crates/${crate}" +done + +# Copy workspace-level files needed by cargo (Cargo.lock is especially +# important so dependency resolution is reproducible). +cp "${REPO_DIR}/Cargo.lock" "${INSTALL_PREFIX}/" 2>/dev/null || true + +# The wrapper script. +cp "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper.sh" "${INSTALL_PREFIX}/bin/" +chmod +x "${INSTALL_PREFIX}/bin/rust-cuda-wrapper.sh" + +# Version marker. +echo "${NIGHTLY}" > "${INSTALL_PREFIX}/rust-toolchain-version" + +# Also copy any native libs the codegen may need at link time (from the +# same build directory). +for lib in "${REPO_DIR}"/target/release/deps/lib*.so; do + [[ -f "${lib}" ]] && cp "${lib}" "${INSTALL_PREFIX}/lib/" 2>/dev/null || true +done + +echo "==> Installation complete." +echo "" +echo "Test with:" +echo " RUST_CUDA_ROOT=${INSTALL_PREFIX} CUDA_PATH=${CUDA_PATH} \\" +echo " ${INSTALL_PREFIX}/bin/rust-cuda-wrapper.sh contrib/godbolt/test-kernel.rs" diff --git a/contrib/godbolt/rust-cuda-wrapper.sh b/contrib/godbolt/rust-cuda-wrapper.sh new file mode 100644 index 00000000..30dadcc0 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env bash +# +# rust-cuda-wrapper.sh - Compiler Explorer wrapper for rust-cuda. +# +# Godbolt invokes this as the "compiler binary". It accepts a single .rs file +# containing a #[kernel] GPU function, wraps it in a temporary Cargo project +# that depends on cuda_std, builds it with rustc_codegen_nvvm targeting +# nvptx64-nvidia-cuda, and emits the resulting PTX (or LLVM IR) on stdout. +# +# Environment expected to be pre-configured by install.sh: +# RUST_CUDA_ROOT - /opt/compiler-explorer/rust-cuda +# CUDA_PATH - CUDA toolkit root (e.g. /usr/local/cuda) +# +# Usage: +# rust-cuda-wrapper.sh [flags] +# +# Flags: +# --emit=ptx Output PTX assembly (default) +# --emit=llvm-ir Output LLVM IR before libnvvm conversion +# --opt-level=N Optimisation level: 0 or 3 (default 3) +# --gpu-arch=smXX Target GPU arch, e.g. sm_75 (default sm_75) +# --version Print version info and exit + +set -euo pipefail + +# --------------------------------------------------------------------------- +# Defaults +# --------------------------------------------------------------------------- +RUST_CUDA_ROOT="${RUST_CUDA_ROOT:-/opt/compiler-explorer/rust-cuda}" +CUDA_PATH="${CUDA_PATH:-/usr/local/cuda}" + +EMIT="ptx" +OPT_LEVEL="3" +# Default to compute_75 (Turing), matching NvvmArch::default() when llvm19 is off. +GPU_ARCH="compute_75" +INPUT_FILE="" + +# --------------------------------------------------------------------------- +# Parse arguments +# --------------------------------------------------------------------------- +while [[ $# -gt 0 ]]; do + case "$1" in + --emit=*) + EMIT="${1#--emit=}" + shift + ;; + --opt-level=*) + OPT_LEVEL="${1#--opt-level=}" + shift + ;; + --gpu-arch=*) + raw="${1#--gpu-arch=}" + # Accept sm_XX shorthand and convert to compute_XX. + GPU_ARCH="${raw/sm_/compute_}" + shift + ;; + --version) + echo "rust-cuda-wrapper for Compiler Explorer" + echo "Toolchain: $(cat "${RUST_CUDA_ROOT}/rust-toolchain-version" 2>/dev/null || echo unknown)" + echo "CUDA: $(${CUDA_PATH}/bin/nvcc --version 2>/dev/null | grep -oP 'release \K[0-9.]+' || echo unknown)" + exit 0 + ;; + -*) + # Silently ignore other flags Godbolt may pass (e.g. -o, -S). + shift + ;; + *) + INPUT_FILE="$1" + shift + ;; + esac +done + +if [[ -z "${INPUT_FILE}" ]]; then + echo "error: no input file" >&2 + exit 1 +fi + +if [[ ! -f "${INPUT_FILE}" ]]; then + echo "error: input file '${INPUT_FILE}' not found" >&2 + exit 1 +fi + +# --------------------------------------------------------------------------- +# Create a temporary Cargo project +# --------------------------------------------------------------------------- +WORK_DIR="$(mktemp -d -t rust-cuda-godbolt.XXXXXXXXXX)" +cleanup() { rm -rf "${WORK_DIR}"; } +trap cleanup EXIT + +CRATE_DIR="${WORK_DIR}/gpu_kernel" +mkdir -p "${CRATE_DIR}/src" + +# Cargo.toml for the kernel crate. +cat > "${CRATE_DIR}/Cargo.toml" <<'CARGO_EOF' +[package] +name = "gpu_kernel" +version = "0.1.0" +edition = "2024" + +[dependencies] +cuda_std = { path = "__CUDA_STD_PATH__" } + +[lib] +crate-type = ["cdylib", "rlib"] +CARGO_EOF + +sed -i "s|__CUDA_STD_PATH__|${RUST_CUDA_ROOT}/crates/cuda_std|" "${CRATE_DIR}/Cargo.toml" + +# Copy the user's source file as src/lib.rs. +cp "${INPUT_FILE}" "${CRATE_DIR}/src/lib.rs" + +# --------------------------------------------------------------------------- +# Locate the codegen backend +# --------------------------------------------------------------------------- +CODEGEN_SO="${RUST_CUDA_ROOT}/lib/librustc_codegen_nvvm.so" +if [[ ! -f "${CODEGEN_SO}" ]]; then + echo "error: codegen backend not found at ${CODEGEN_SO}" >&2 + exit 1 +fi + +# --------------------------------------------------------------------------- +# Build RUSTFLAGS - mirrors cuda_builder's invoke_rustc() +# --------------------------------------------------------------------------- +RUSTFLAGS_ARRAY=( + "-Zcodegen-backend=${CODEGEN_SO}" + "-Zunstable-options" + "-Zcrate-attr=feature(register_tool)" + "-Zcrate-attr=register_tool(nvvm_internal)" + "-Zcrate-attr=no_std" + "-Zsaturating_float_casts=false" + "-Cpanic=immediate-abort" +) + +# LLVM / libnvvm arguments +LLVM_ARGS="-arch=${GPU_ARCH}" +LLVM_ARGS+=" --override-libm" + +if [[ "${OPT_LEVEL}" == "0" ]]; then + LLVM_ARGS+=" -opt=0" +fi + +# Emit mode +if [[ "${EMIT}" == "llvm-ir" ]]; then + RUSTFLAGS_ARRAY+=("--emit=llvm-ir") +fi + +RUSTFLAGS_ARRAY+=("-Cllvm-args=${LLVM_ARGS}") + +# Join with unit separator (\x1f), the same encoding cargo uses for +# CARGO_ENCODED_RUSTFLAGS to avoid shell quoting issues with spaces. +ENCODED="" +for flag in "${RUSTFLAGS_ARRAY[@]}"; do + if [[ -n "${ENCODED}" ]]; then + ENCODED+=$'\x1f' + fi + ENCODED+="${flag}" +done + +# --------------------------------------------------------------------------- +# Set up library paths for the codegen backend +# --------------------------------------------------------------------------- +EXTRA_LD="${CUDA_PATH}/nvvm/lib64:${CUDA_PATH}/lib64" +CODEGEN_DIR="$(dirname "${CODEGEN_SO}")" +export LD_LIBRARY_PATH="${CODEGEN_DIR}:${EXTRA_LD}:${LD_LIBRARY_PATH:-}" + +# --------------------------------------------------------------------------- +# Run cargo build +# --------------------------------------------------------------------------- +RELEASE_FLAG="" +if [[ "${OPT_LEVEL}" != "0" ]]; then + RELEASE_FLAG="--release" +fi + +BUILD_OUTPUT="$( + cd "${CRATE_DIR}" + CARGO_ENCODED_RUSTFLAGS="${ENCODED}" \ + CARGO_FEATURE_NO_F16_F128=1 \ + cargo build \ + --lib \ + --message-format=json-render-diagnostics \ + -Zbuild-std=core,alloc \ + --target=nvptx64-nvidia-cuda \ + ${RELEASE_FLAG} \ + 2>"${WORK_DIR}/stderr.log" || true +)" + +BUILD_EXIT=$? +STDERR_LOG="${WORK_DIR}/stderr.log" + +# --------------------------------------------------------------------------- +# Extract the artifact path from Cargo's JSON output +# --------------------------------------------------------------------------- +PTX_PATH="" +if [[ -n "${BUILD_OUTPUT}" ]]; then + PTX_PATH="$( + echo "${BUILD_OUTPUT}" \ + | grep '"reason":"compiler-artifact"' \ + | tail -1 \ + | python3 -c " +import sys, json +for line in sys.stdin: + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except json.JSONDecodeError: + continue + if obj.get('reason') == 'compiler-artifact': + for f in obj.get('filenames', []): + if f.endswith('.ptx'): + print(f) + sys.exit(0) +" 2>/dev/null || true + )" +fi + +# For LLVM IR mode, look for .ll files instead. +if [[ "${EMIT}" == "llvm-ir" && -z "${PTX_PATH}" ]]; then + PTX_PATH="$(find "${CRATE_DIR}" -name '*.ll' -path '*/nvptx64-nvidia-cuda/*' 2>/dev/null | head -1 || true)" +fi + +# --------------------------------------------------------------------------- +# Output +# --------------------------------------------------------------------------- +if [[ -n "${PTX_PATH}" && -f "${PTX_PATH}" ]]; then + cat "${PTX_PATH}" +else + # Build failed, relay stderr so Godbolt shows the diagnostics. + if [[ -f "${STDERR_LOG}" ]]; then + cat "${STDERR_LOG}" >&2 + fi + # Also dump any non-JSON lines from stdout (rustc sometimes puts + # diagnostics there). + if [[ -n "${BUILD_OUTPUT}" ]]; then + echo "${BUILD_OUTPUT}" | grep -v '^\s*{' >&2 || true + fi + echo "error: compilation failed, no PTX output produced" >&2 + exit 1 +fi diff --git a/contrib/godbolt/rust-cuda.defaults.properties b/contrib/godbolt/rust-cuda.defaults.properties new file mode 100644 index 00000000..5de19065 --- /dev/null +++ b/contrib/godbolt/rust-cuda.defaults.properties @@ -0,0 +1,65 @@ +# Compiler Explorer -- rust-cuda default properties +# +# This file defines how the rust-cuda "compiler" appears in the Compiler +# Explorer UI. It is loaded by CE's configuration layer; see +# https://github.com/compiler-explorer/compiler-explorer/blob/main/docs/AddingACompiler.md + +# --------------------------------------------------------------------------- +# Compiler identity +# --------------------------------------------------------------------------- +compilerType=rust-cuda +group=rust-cuda +groupName=Rust CUDA (rust-cuda) + +# The wrapper script that Godbolt executes. install.sh places it here. +compiler=/opt/compiler-explorer/rust-cuda/bin/rust-cuda-wrapper.sh + +# Language shown in the UI dropdown. +lang=rust + +# Human-readable name in the compiler selector. +name=rust-cuda (nightly-2026-04-02) + +# Semantic version used for sorting in the compiler list. +semver=0.3.0 + +# --------------------------------------------------------------------------- +# Behaviour +# --------------------------------------------------------------------------- + +# The wrapper always writes PTX to stdout and diagnostics to stderr. +compilerShouldNotProduceOutput=true + +# Godbolt options pane entries (shown as dropdowns / checkboxes). +options=--emit=ptx + +# Default flags passed on every invocation. +defaultOptions=--emit=ptx --opt-level=3 + +# CE will offer these in the "Compiler options" box as suggestions. +supportedOptions=--emit=ptx --emit=llvm-ir --opt-level=0 --opt-level=3 --gpu-arch=sm_52 --gpu-arch=sm_70 --gpu-arch=sm_75 --gpu-arch=sm_80 --gpu-arch=sm_86 --gpu-arch=sm_89 --gpu-arch=sm_90 + +# Output is PTX assembly (NVIDIA's ISA-level text format). +outputIsAsm=true + +# Do not try to demangle -- PTX symbols are already human-readable. +demangler= + +# No binary output to disassemble. +supportsBinary=false + +# No execution support (GPU code cannot run on the CE host). +supportsExecute=false + +# The --version flag is handled by the wrapper. +versionFlag=--version +versionRe=rust-cuda-wrapper.* + +# Instruction-set reference for hover tooltips. +instructionSet=ptx + +# --------------------------------------------------------------------------- +# Source boilerplate +# --------------------------------------------------------------------------- +# The default source shown when a user picks this compiler. +defaultSource=use cuda_std::prelude::*;\n\n#[kernel]\n#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)]\npub unsafe fn vecadd(a: &[f32], b: &[f32], c: *mut f32) {\n let idx = thread::index_1d() as usize;\n if idx < a.len() {\n let elem = unsafe { &mut *c.add(idx) };\n *elem = a[idx] + b[idx];\n }\n} diff --git a/contrib/godbolt/test-kernel.rs b/contrib/godbolt/test-kernel.rs new file mode 100644 index 00000000..54708229 --- /dev/null +++ b/contrib/godbolt/test-kernel.rs @@ -0,0 +1,77 @@ +// A sample Rust CUDA kernel for Compiler Explorer. +// +// This demonstrates shared-memory tiling, thread indexing, and +// synchronisation -- the core patterns used in GPU programming +// with rust-cuda. + +use cuda_std::prelude::*; +use core::mem::MaybeUninit; + +const TILE: usize = 16; + +/// Tiled matrix-vector multiply: y = A * x. +/// +/// Each block collaboratively loads a tile of A into shared memory, +/// then each thread accumulates its dot-product contribution. +/// +/// - `a`: row-major matrix, m rows x n cols +/// - `x`: input vector, length n +/// - `y`: output vector, length m (must be pre-zeroed) +/// - `m`: number of rows +/// - `n`: number of columns +#[kernel] +#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] +pub unsafe fn matvec(a: &[f32], x: &[f32], y: *mut f32, m: usize, n: usize) { + #[address_space(shared)] + static mut TILE_A: [MaybeUninit; TILE] = [MaybeUninit::uninit(); TILE]; + + let row = thread::block_idx_x() as usize * thread::block_dim_x() as usize + + thread::thread_idx_x() as usize; + let tx = thread::thread_idx_x() as usize; + + let mut sum = 0.0f32; + + // Walk across the columns in tiles of size TILE. + let mut col = 0usize; + while col < n { + // Collaboratively load one tile of x into shared memory. + if col + tx < n { + unsafe { + TILE_A[tx].write(x[col + tx]); + } + } else { + unsafe { + TILE_A[tx].write(0.0); + } + } + thread::sync_threads(); + + // Each thread accumulates the dot product for its row. + if row < m { + let mut k = 0usize; + while k < TILE && col + k < n { + sum += a[row * n + (col + k)] * unsafe { TILE_A[k].assume_init() }; + k += 1; + } + } + thread::sync_threads(); + + col += TILE; + } + + if row < m { + let out = unsafe { &mut *y.add(row) }; + *out = sum; + } +} + +/// Element-wise vector addition (simple baseline for comparison). +#[kernel] +#[allow(improper_ctypes_definitions, clippy::missing_safety_doc)] +pub unsafe fn vecadd(a: &[f32], b: &[f32], c: *mut f32) { + let idx = thread::index_1d() as usize; + if idx < a.len() { + let elem = unsafe { &mut *c.add(idx) }; + *elem = a[idx] + b[idx]; + } +} From b32ed08232d0a704c392d99b4f3c8ecb8ece142c Mon Sep 17 00:00:00 2001 From: George Amponis Date: Thu, 7 May 2026 10:22:46 +0200 Subject: [PATCH 2/4] update CE display name to reflect branch and codegen path --- contrib/godbolt/rust-cuda.defaults.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/godbolt/rust-cuda.defaults.properties b/contrib/godbolt/rust-cuda.defaults.properties index 5de19065..1ed61271 100644 --- a/contrib/godbolt/rust-cuda.defaults.properties +++ b/contrib/godbolt/rust-cuda.defaults.properties @@ -18,7 +18,7 @@ compiler=/opt/compiler-explorer/rust-cuda/bin/rust-cuda-wrapper.sh lang=rust # Human-readable name in the compiler selector. -name=rust-cuda (nightly-2026-04-02) +name=rust-cuda main (llvm7) # Semantic version used for sorting in the compiler list. semver=0.3.0 From a4743a8c9c6c771ca1ee801823410750fffd94cc Mon Sep 17 00:00:00 2001 From: George Amponis Date: Thu, 7 May 2026 22:35:08 +0200 Subject: [PATCH 3/4] use semver=main for trunk-first sorting in CE dropdown --- contrib/godbolt/rust-cuda.defaults.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/godbolt/rust-cuda.defaults.properties b/contrib/godbolt/rust-cuda.defaults.properties index 1ed61271..4596aa98 100644 --- a/contrib/godbolt/rust-cuda.defaults.properties +++ b/contrib/godbolt/rust-cuda.defaults.properties @@ -21,7 +21,7 @@ lang=rust name=rust-cuda main (llvm7) # Semantic version used for sorting in the compiler list. -semver=0.3.0 +semver=main # --------------------------------------------------------------------------- # Behaviour From 46e82ecbf38266c71fe052bb4f42f26fbdb9ea5a Mon Sep 17 00:00:00 2001 From: George Amponis Date: Tue, 12 May 2026 00:02:59 +0200 Subject: [PATCH 4/4] refactor(godbolt): rewrite wrapper as a stand-alone rust binary with integration test --- contrib/godbolt/README.md | 27 +- contrib/godbolt/install.sh | 13 +- contrib/godbolt/rust-cuda-wrapper.sh | 241 -------- contrib/godbolt/rust-cuda-wrapper/.gitignore | 1 + contrib/godbolt/rust-cuda-wrapper/Cargo.lock | 571 ++++++++++++++++++ contrib/godbolt/rust-cuda-wrapper/Cargo.toml | 14 + .../rust-cuda-wrapper/src/cargo_toml.template | 10 + contrib/godbolt/rust-cuda-wrapper/src/main.rs | 160 +++++ .../rust-cuda-wrapper/tests/integration.rs | 30 + contrib/godbolt/rust-cuda.defaults.properties | 4 +- 10 files changed, 817 insertions(+), 254 deletions(-) delete mode 100644 contrib/godbolt/rust-cuda-wrapper.sh create mode 100644 contrib/godbolt/rust-cuda-wrapper/.gitignore create mode 100644 contrib/godbolt/rust-cuda-wrapper/Cargo.lock create mode 100644 contrib/godbolt/rust-cuda-wrapper/Cargo.toml create mode 100644 contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template create mode 100644 contrib/godbolt/rust-cuda-wrapper/src/main.rs create mode 100644 contrib/godbolt/rust-cuda-wrapper/tests/integration.rs diff --git a/contrib/godbolt/README.md b/contrib/godbolt/README.md index 2dcfd173..84947b2e 100644 --- a/contrib/godbolt/README.md +++ b/contrib/godbolt/README.md @@ -9,8 +9,8 @@ Rust GPU kernel code and see the resulting PTX assembly. Compiler Explorer expects a single "compiler" binary that reads source on stdin or from a file and writes assembly to stdout. Since rust-cuda has no standalone compiler (the pipeline is `rustc` with a custom codegen backend -plus `cargo` for dependency resolution), the integration uses a wrapper -script that: +plus `cargo` for dependency resolution), the integration uses a small Rust +wrapper binary that: 1. Accepts a `.rs` file containing `#[kernel]` functions. 2. Creates a temporary Cargo project that depends on `cuda_std`. @@ -25,10 +25,10 @@ script that: | File | Purpose | |------|---------| -| `rust-cuda-wrapper.sh` | The wrapper script CE invokes as the "compiler" | +| `rust-cuda-wrapper/` | Rust crate for the wrapper binary CE invokes as the "compiler" | | `rust-cuda.defaults.properties` | CE configuration (compiler type, flags, defaults) | | `rust-cuda.amazon.properties` | CE instance-specific overrides for the AWS fleet | -| `install.sh` | Installs the pinned nightly, builds the codegen backend, and lays out the prefix | +| `install.sh` | Installs the pinned nightly, builds the codegen backend and the wrapper, and lays out the prefix | | `test-kernel.rs` | Sample kernel with shared memory and thread indexing | ## Supported flags @@ -61,7 +61,7 @@ export CUDA_PATH=/usr/local/cuda ./contrib/godbolt/install.sh # Then test: -./contrib/godbolt/rust-cuda-wrapper.sh contrib/godbolt/test-kernel.rs +$RUST_CUDA_ROOT/bin/rust-cuda-wrapper contrib/godbolt/test-kernel.rs ``` You should see PTX assembly printed to stdout. @@ -73,8 +73,21 @@ point the wrapper at the repo tree directly: ```bash export RUST_CUDA_ROOT=/path/to/rust-cuda -# Ensure lib/librustc_codegen_nvvm.so exists at that path, or adjust -# CODEGEN_SO in the script. +# Ensure $RUST_CUDA_ROOT/lib/librustc_codegen_nvvm.so exists. + +cd contrib/godbolt/rust-cuda-wrapper +cargo run --release -- ../test-kernel.rs +``` + +### Running the integration test + +The wrapper crate ships a smoke test that compiles `test-kernel.rs` +end-to-end and asserts the output looks like PTX: + +```bash +cd contrib/godbolt/rust-cuda-wrapper +cargo test # skips without RUST_CUDA_ROOT +RUST_CUDA_ROOT=/path/to/rust-cuda cargo test # runs the real build ``` ## Submitting to Compiler Explorer diff --git a/contrib/godbolt/install.sh b/contrib/godbolt/install.sh index 565f6295..ba17ff82 100644 --- a/contrib/godbolt/install.sh +++ b/contrib/godbolt/install.sh @@ -104,9 +104,14 @@ done # important so dependency resolution is reproducible). cp "${REPO_DIR}/Cargo.lock" "${INSTALL_PREFIX}/" 2>/dev/null || true -# The wrapper script. -cp "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper.sh" "${INSTALL_PREFIX}/bin/" -chmod +x "${INSTALL_PREFIX}/bin/rust-cuda-wrapper.sh" +# Build and install the wrapper binary. +( + cd "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper" + cargo build --release +) +cp "${REPO_DIR}/contrib/godbolt/rust-cuda-wrapper/target/release/rust-cuda-wrapper" \ + "${INSTALL_PREFIX}/bin/" +chmod +x "${INSTALL_PREFIX}/bin/rust-cuda-wrapper" # Version marker. echo "${NIGHTLY}" > "${INSTALL_PREFIX}/rust-toolchain-version" @@ -121,4 +126,4 @@ echo "==> Installation complete." echo "" echo "Test with:" echo " RUST_CUDA_ROOT=${INSTALL_PREFIX} CUDA_PATH=${CUDA_PATH} \\" -echo " ${INSTALL_PREFIX}/bin/rust-cuda-wrapper.sh contrib/godbolt/test-kernel.rs" +echo " ${INSTALL_PREFIX}/bin/rust-cuda-wrapper contrib/godbolt/test-kernel.rs" diff --git a/contrib/godbolt/rust-cuda-wrapper.sh b/contrib/godbolt/rust-cuda-wrapper.sh deleted file mode 100644 index 30dadcc0..00000000 --- a/contrib/godbolt/rust-cuda-wrapper.sh +++ /dev/null @@ -1,241 +0,0 @@ -#!/usr/bin/env bash -# -# rust-cuda-wrapper.sh - Compiler Explorer wrapper for rust-cuda. -# -# Godbolt invokes this as the "compiler binary". It accepts a single .rs file -# containing a #[kernel] GPU function, wraps it in a temporary Cargo project -# that depends on cuda_std, builds it with rustc_codegen_nvvm targeting -# nvptx64-nvidia-cuda, and emits the resulting PTX (or LLVM IR) on stdout. -# -# Environment expected to be pre-configured by install.sh: -# RUST_CUDA_ROOT - /opt/compiler-explorer/rust-cuda -# CUDA_PATH - CUDA toolkit root (e.g. /usr/local/cuda) -# -# Usage: -# rust-cuda-wrapper.sh [flags] -# -# Flags: -# --emit=ptx Output PTX assembly (default) -# --emit=llvm-ir Output LLVM IR before libnvvm conversion -# --opt-level=N Optimisation level: 0 or 3 (default 3) -# --gpu-arch=smXX Target GPU arch, e.g. sm_75 (default sm_75) -# --version Print version info and exit - -set -euo pipefail - -# --------------------------------------------------------------------------- -# Defaults -# --------------------------------------------------------------------------- -RUST_CUDA_ROOT="${RUST_CUDA_ROOT:-/opt/compiler-explorer/rust-cuda}" -CUDA_PATH="${CUDA_PATH:-/usr/local/cuda}" - -EMIT="ptx" -OPT_LEVEL="3" -# Default to compute_75 (Turing), matching NvvmArch::default() when llvm19 is off. -GPU_ARCH="compute_75" -INPUT_FILE="" - -# --------------------------------------------------------------------------- -# Parse arguments -# --------------------------------------------------------------------------- -while [[ $# -gt 0 ]]; do - case "$1" in - --emit=*) - EMIT="${1#--emit=}" - shift - ;; - --opt-level=*) - OPT_LEVEL="${1#--opt-level=}" - shift - ;; - --gpu-arch=*) - raw="${1#--gpu-arch=}" - # Accept sm_XX shorthand and convert to compute_XX. - GPU_ARCH="${raw/sm_/compute_}" - shift - ;; - --version) - echo "rust-cuda-wrapper for Compiler Explorer" - echo "Toolchain: $(cat "${RUST_CUDA_ROOT}/rust-toolchain-version" 2>/dev/null || echo unknown)" - echo "CUDA: $(${CUDA_PATH}/bin/nvcc --version 2>/dev/null | grep -oP 'release \K[0-9.]+' || echo unknown)" - exit 0 - ;; - -*) - # Silently ignore other flags Godbolt may pass (e.g. -o, -S). - shift - ;; - *) - INPUT_FILE="$1" - shift - ;; - esac -done - -if [[ -z "${INPUT_FILE}" ]]; then - echo "error: no input file" >&2 - exit 1 -fi - -if [[ ! -f "${INPUT_FILE}" ]]; then - echo "error: input file '${INPUT_FILE}' not found" >&2 - exit 1 -fi - -# --------------------------------------------------------------------------- -# Create a temporary Cargo project -# --------------------------------------------------------------------------- -WORK_DIR="$(mktemp -d -t rust-cuda-godbolt.XXXXXXXXXX)" -cleanup() { rm -rf "${WORK_DIR}"; } -trap cleanup EXIT - -CRATE_DIR="${WORK_DIR}/gpu_kernel" -mkdir -p "${CRATE_DIR}/src" - -# Cargo.toml for the kernel crate. -cat > "${CRATE_DIR}/Cargo.toml" <<'CARGO_EOF' -[package] -name = "gpu_kernel" -version = "0.1.0" -edition = "2024" - -[dependencies] -cuda_std = { path = "__CUDA_STD_PATH__" } - -[lib] -crate-type = ["cdylib", "rlib"] -CARGO_EOF - -sed -i "s|__CUDA_STD_PATH__|${RUST_CUDA_ROOT}/crates/cuda_std|" "${CRATE_DIR}/Cargo.toml" - -# Copy the user's source file as src/lib.rs. -cp "${INPUT_FILE}" "${CRATE_DIR}/src/lib.rs" - -# --------------------------------------------------------------------------- -# Locate the codegen backend -# --------------------------------------------------------------------------- -CODEGEN_SO="${RUST_CUDA_ROOT}/lib/librustc_codegen_nvvm.so" -if [[ ! -f "${CODEGEN_SO}" ]]; then - echo "error: codegen backend not found at ${CODEGEN_SO}" >&2 - exit 1 -fi - -# --------------------------------------------------------------------------- -# Build RUSTFLAGS - mirrors cuda_builder's invoke_rustc() -# --------------------------------------------------------------------------- -RUSTFLAGS_ARRAY=( - "-Zcodegen-backend=${CODEGEN_SO}" - "-Zunstable-options" - "-Zcrate-attr=feature(register_tool)" - "-Zcrate-attr=register_tool(nvvm_internal)" - "-Zcrate-attr=no_std" - "-Zsaturating_float_casts=false" - "-Cpanic=immediate-abort" -) - -# LLVM / libnvvm arguments -LLVM_ARGS="-arch=${GPU_ARCH}" -LLVM_ARGS+=" --override-libm" - -if [[ "${OPT_LEVEL}" == "0" ]]; then - LLVM_ARGS+=" -opt=0" -fi - -# Emit mode -if [[ "${EMIT}" == "llvm-ir" ]]; then - RUSTFLAGS_ARRAY+=("--emit=llvm-ir") -fi - -RUSTFLAGS_ARRAY+=("-Cllvm-args=${LLVM_ARGS}") - -# Join with unit separator (\x1f), the same encoding cargo uses for -# CARGO_ENCODED_RUSTFLAGS to avoid shell quoting issues with spaces. -ENCODED="" -for flag in "${RUSTFLAGS_ARRAY[@]}"; do - if [[ -n "${ENCODED}" ]]; then - ENCODED+=$'\x1f' - fi - ENCODED+="${flag}" -done - -# --------------------------------------------------------------------------- -# Set up library paths for the codegen backend -# --------------------------------------------------------------------------- -EXTRA_LD="${CUDA_PATH}/nvvm/lib64:${CUDA_PATH}/lib64" -CODEGEN_DIR="$(dirname "${CODEGEN_SO}")" -export LD_LIBRARY_PATH="${CODEGEN_DIR}:${EXTRA_LD}:${LD_LIBRARY_PATH:-}" - -# --------------------------------------------------------------------------- -# Run cargo build -# --------------------------------------------------------------------------- -RELEASE_FLAG="" -if [[ "${OPT_LEVEL}" != "0" ]]; then - RELEASE_FLAG="--release" -fi - -BUILD_OUTPUT="$( - cd "${CRATE_DIR}" - CARGO_ENCODED_RUSTFLAGS="${ENCODED}" \ - CARGO_FEATURE_NO_F16_F128=1 \ - cargo build \ - --lib \ - --message-format=json-render-diagnostics \ - -Zbuild-std=core,alloc \ - --target=nvptx64-nvidia-cuda \ - ${RELEASE_FLAG} \ - 2>"${WORK_DIR}/stderr.log" || true -)" - -BUILD_EXIT=$? -STDERR_LOG="${WORK_DIR}/stderr.log" - -# --------------------------------------------------------------------------- -# Extract the artifact path from Cargo's JSON output -# --------------------------------------------------------------------------- -PTX_PATH="" -if [[ -n "${BUILD_OUTPUT}" ]]; then - PTX_PATH="$( - echo "${BUILD_OUTPUT}" \ - | grep '"reason":"compiler-artifact"' \ - | tail -1 \ - | python3 -c " -import sys, json -for line in sys.stdin: - line = line.strip() - if not line: - continue - try: - obj = json.loads(line) - except json.JSONDecodeError: - continue - if obj.get('reason') == 'compiler-artifact': - for f in obj.get('filenames', []): - if f.endswith('.ptx'): - print(f) - sys.exit(0) -" 2>/dev/null || true - )" -fi - -# For LLVM IR mode, look for .ll files instead. -if [[ "${EMIT}" == "llvm-ir" && -z "${PTX_PATH}" ]]; then - PTX_PATH="$(find "${CRATE_DIR}" -name '*.ll' -path '*/nvptx64-nvidia-cuda/*' 2>/dev/null | head -1 || true)" -fi - -# --------------------------------------------------------------------------- -# Output -# --------------------------------------------------------------------------- -if [[ -n "${PTX_PATH}" && -f "${PTX_PATH}" ]]; then - cat "${PTX_PATH}" -else - # Build failed, relay stderr so Godbolt shows the diagnostics. - if [[ -f "${STDERR_LOG}" ]]; then - cat "${STDERR_LOG}" >&2 - fi - # Also dump any non-JSON lines from stdout (rustc sometimes puts - # diagnostics there). - if [[ -n "${BUILD_OUTPUT}" ]]; then - echo "${BUILD_OUTPUT}" | grep -v '^\s*{' >&2 || true - fi - echo "error: compilation failed, no PTX output produced" >&2 - exit 1 -fi diff --git a/contrib/godbolt/rust-cuda-wrapper/.gitignore b/contrib/godbolt/rust-cuda-wrapper/.gitignore new file mode 100644 index 00000000..ea8c4bf7 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/.gitignore @@ -0,0 +1 @@ +/target diff --git a/contrib/godbolt/rust-cuda-wrapper/Cargo.lock b/contrib/godbolt/rust-cuda-wrapper/Cargo.lock new file mode 100644 index 00000000..e3a467be --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/Cargo.lock @@ -0,0 +1,571 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "bitflags" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rust-cuda-wrapper" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "serde_json", + "tempfile", +] + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/contrib/godbolt/rust-cuda-wrapper/Cargo.toml b/contrib/godbolt/rust-cuda-wrapper/Cargo.toml new file mode 100644 index 00000000..4f77eb17 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "rust-cuda-wrapper" +version = "0.1.0" +edition = "2024" + +# Stand-alone workspace so this crate is independent of the rust-cuda +# workspace's pinned nightly. CE may update it on its own cadence. +[workspace] + +[dependencies] +anyhow = "1.0.102" +clap = { version = "4.6.1", features = ["derive"] } +serde_json = "1.0.149" +tempfile = "3.27.0" diff --git a/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template b/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template new file mode 100644 index 00000000..bacc34ec --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/src/cargo_toml.template @@ -0,0 +1,10 @@ +[package] +name = "gpu_kernel" +version = "0.1.0" +edition = "2024" + +[dependencies] +cuda_std = { path = "__CUDA_STD_PATH__" } + +[lib] +crate-type = ["cdylib", "rlib"] diff --git a/contrib/godbolt/rust-cuda-wrapper/src/main.rs b/contrib/godbolt/rust-cuda-wrapper/src/main.rs new file mode 100644 index 00000000..ae6b9958 --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/src/main.rs @@ -0,0 +1,160 @@ +//! Compiler Explorer wrapper for rust-cuda. +//! +//! Accepts a `.rs` file containing a `#[kernel]` function, drops it into a +//! generated temporary Cargo project that depends on `cuda_std`, invokes +//! `cargo build` with `rustc_codegen_nvvm` as the codegen backend, and +//! writes the resulting PTX (or LLVM IR) to stdout. Compiler diagnostics +//! are relayed to stderr and the process exits with a non-zero status if +//! no artifact was produced. +//! +//! Expects two environment variables: +//! +//! * `RUST_CUDA_ROOT`: install prefix containing `lib/librustc_codegen_nvvm.so` +//! and `crates/cuda_std/` (required). +//! * `CUDA_PATH`: CUDA toolkit root, used to locate `libnvvm` at runtime +//! (defaults to `/usr/local/cuda` if unset). + +use anyhow::{Context, Result}; +use clap::Parser; +use clap::ValueEnum; +use std::path::PathBuf; + +/// Rustflags applied on every build. These mirror the flags `cuda_builder` +/// passes when invoking rustc directly, so PTX produced through the wrapper +/// matches what a normal rust-cuda build would emit. +const STATIC_RUSTFLAGS: [&str; 6] = [ + "-Zunstable-options", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(nvvm_internal)", + "-Zcrate-attr=no_std", + "-Zsaturating_float_casts=false", + "-Cpanic=immediate-abort", +]; + +#[derive(ValueEnum, Clone, Debug)] +enum Emit { + Ptx, + LlvmIr, +} + +#[derive(Parser, Debug)] +#[command(version)] +struct Args { + #[arg(long, default_value_t = 3)] + opt_level: u8, + #[arg(long, default_value = "compute_75")] + gpu_arch: String, + #[arg(long, value_enum, default_value_t = Emit::Ptx)] + emit: Emit, + input: PathBuf, +} + +fn main() -> Result<()> { + let args = Args::parse(); + + let tmp = tempfile::TempDir::new().context("failed to create temp dir")?; + let src_dir = tmp.path().join("gpu_kernel").join("src"); + std::fs::create_dir_all(&src_dir).context("failed to create src dir")?; + + let root = std::env::var("RUST_CUDA_ROOT").context("RUST_CUDA_ROOT not set")?; + let template = include_str!("cargo_toml.template"); + let cargo_toml = template.replace("__CUDA_STD_PATH__", &format!("{}/crates/cuda_std", root)); + std::fs::write( + tmp.path().join("gpu_kernel").join("Cargo.toml"), + &cargo_toml, + ) + .context("failed to write cargo toml")?; + std::fs::copy(&args.input, src_dir.join("lib.rs")).context("failed to copy input file")?; + + let mut rustflags: Vec = vec![format!( + "-Zcodegen-backend={}/lib/librustc_codegen_nvvm.so", + root + )]; + for flag in STATIC_RUSTFLAGS { + rustflags.push(flag.to_string()); + } + let mut llvm_args = format!("-arch={} --override-libm", args.gpu_arch); + if args.opt_level == 0 { + llvm_args.push_str(" -opt=0"); + } + + if matches!(args.emit, Emit::LlvmIr) { + rustflags.push("--emit=llvm-ir".to_string()); + } + + rustflags.push(format!("-Cllvm-args={}", llvm_args)); + + // Cargo reads flags from CARGO_ENCODED_RUSTFLAGS as a list joined by the + // ASCII unit-separator (0x1F), which avoids the quoting ambiguity that + // RUSTFLAGS has with flags containing spaces (e.g. `-Cllvm-args=...`). + let encoded = rustflags.join("\x1f"); + + // The codegen backend dlopen()s libnvvm and friends at load time, so + // their directories must be on LD_LIBRARY_PATH for rustc to start at all. + let cuda_path = std::env::var("CUDA_PATH").unwrap_or_else(|_| "/usr/local/cuda".to_string()); + let codegen_dir = format!("{}/lib", root); + let existing_ld = std::env::var("LD_LIBRARY_PATH").unwrap_or_default(); + let ld_library_path = format!( + "{}:{}/nvvm/lib64:{}/lib64:{}", + codegen_dir, cuda_path, cuda_path, existing_ld + ); + + let crate_dir = tmp.path().join("gpu_kernel"); + let mut cmd = std::process::Command::new("cargo"); + cmd.current_dir(&crate_dir) + .env("CARGO_ENCODED_RUSTFLAGS", &encoded) + // `cuda_std` gates `f16`/`f128` support behind this feature flag; the + // nvptx64 target does not support those types natively. + .env("CARGO_FEATURE_NO_F16_F128", "1") + .env("LD_LIBRARY_PATH", &ld_library_path) + .arg("build") + .arg("--lib") + .arg("--message-format=json-render-diagnostics") + .arg("-Zbuild-std=core,alloc") + .arg("--target=nvptx64-nvidia-cuda"); + + // Any non-zero opt-level maps to a cargo release build; libnvvm performs + // its own optimisation level selection via `-Cllvm-args=-opt=N` above. + if args.opt_level != 0 { + cmd.arg("--release"); + } + + let output = cmd.output().context("failed to spawn cargo")?; + let stdout = String::from_utf8_lossy(&output.stdout); + let mut ptx_path: Option = None; + + for line in stdout.lines() { + let json: serde_json::Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(_) => continue, + }; + + if json.get("reason").and_then(|v| v.as_str()) != Some("compiler-artifact") { + continue; + } + + if let Some(filenames) = json.get("filenames").and_then(|v| v.as_array()) { + for f in filenames { + if let Some(s) = f.as_str() + && s.ends_with(".ptx") + { + ptx_path = Some(s.to_string()); + } + } + } + } + + match ptx_path { + Some(path) => { + let contents = std::fs::read_to_string(&path).context("failed to read PTX file")?; + print!("{}", contents); + } + None => { + eprintln!("{}", String::from_utf8_lossy(&output.stderr)); + eprintln!("error: compilation failed, no PTX output produced"); + std::process::exit(1); + } + } + + Ok(()) +} diff --git a/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs b/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs new file mode 100644 index 00000000..5e00578f --- /dev/null +++ b/contrib/godbolt/rust-cuda-wrapper/tests/integration.rs @@ -0,0 +1,30 @@ +use std::process::Command; + +#[test] +fn compiles_test_kernel_to_ptx() { + if std::env::var("RUST_CUDA_ROOT").is_err() { + eprintln!("skipping: RUST_CUDA_ROOT not set"); + return; + } + + let bin = env!("CARGO_BIN_EXE_rust-cuda-wrapper"); + let kernel = concat!(env!("CARGO_MANIFEST_DIR"), "/../test-kernel.rs"); + + let output = Command::new(bin) + .arg(kernel) + .output() + .expect("failed to run wrapper"); + + assert!( + output.status.success(), + "wrapper exited non-zero. stderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains(".version"), + "stdout doesn't look like PTX. got:\n{}", + stdout + ); +} diff --git a/contrib/godbolt/rust-cuda.defaults.properties b/contrib/godbolt/rust-cuda.defaults.properties index 4596aa98..35b4fb9e 100644 --- a/contrib/godbolt/rust-cuda.defaults.properties +++ b/contrib/godbolt/rust-cuda.defaults.properties @@ -11,8 +11,8 @@ compilerType=rust-cuda group=rust-cuda groupName=Rust CUDA (rust-cuda) -# The wrapper script that Godbolt executes. install.sh places it here. -compiler=/opt/compiler-explorer/rust-cuda/bin/rust-cuda-wrapper.sh +# The wrapper binary that Godbolt executes. install.sh places it here. +compiler=/opt/compiler-explorer/rust-cuda/bin/rust-cuda-wrapper # Language shown in the UI dropdown. lang=rust