From 3108eeb15adf94eec84d5859da66c3301b5cc106 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Mon, 22 Jun 2026 14:37:21 +0200
Subject: [PATCH 1/5] GCN: generate machine code via external
 AMDGPU_LLVM_Backend_jll

Mirror the NVPTX_LLVM_Backend_jll approach for AMDGPU: override `mcgen`
for `GCNCompilerTarget` to emit machine code through the external, up-to-date
`llc` from AMDGPU_LLVM_Backend_jll instead of the in-process LLVM back-end.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 Project.toml      |  2 ++
 src/gcn.jl        | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 test/Project.toml |  1 +
 test/runtests.jl  |  6 ++++++
 4 files changed, 64 insertions(+)

diff --git a/Project.toml b/Project.toml
index d752ddba..8f6a97cb 100644
--- a/Project.toml
+++ b/Project.toml
@@ -22,10 +22,12 @@ Tracy = "e689c965-62c8-4b79-b2c5-8359227902fd"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [weakdeps]
+AMDGPU_LLVM_Backend_jll = "cc5c0156-bd05-5a77-8a68-bb0aafb29019"
 LLVMDowngrader_jll = "f52de702-fb25-5922-94ba-81dd59b07444"
 NVPTX_LLVM_Backend_jll = "ef6e0fe3-e6ef-59c0-bde6-4989574699e0"
 
 [compat]
+AMDGPU_LLVM_Backend_jll = "22"
 ExprTools = "0.1"
 InteractiveUtils = "1"
 LLVM = "9.9"
diff --git a/src/gcn.jl b/src/gcn.jl
index e32b3c21..a2b40828 100644
--- a/src/gcn.jl
+++ b/src/gcn.jl
@@ -1,5 +1,10 @@
 # implementation of the GPUCompiler interfaces for generating GCN code
 
+const AMDGPU_LLVM_Backend_jll =
+    LazyModule("AMDGPU_LLVM_Backend_jll",
+               UUID("cc5c0156-bd05-5a77-8a68-bb0aafb29019"))
+
+
 ## target
 
 export GCNCompilerTarget
@@ -153,6 +158,56 @@ function add_kernarg_address_spaces!(
     return new_f
 end
 
+@unlocked function mcgen(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
+                         mod::LLVM.Module, format=LLVM.API.LLVMAssemblyFile)
+    if !isavailable(AMDGPU_LLVM_Backend_jll) || !AMDGPU_LLVM_Backend_jll.is_available()
+        error("AMDGPU LLVM back-end not loaded; cannot compile to GCN.")
+    end
+
+    target = job.config.target
+    filetype = if format == LLVM.API.LLVMAssemblyFile
+        "asm"
+    elseif format == LLVM.API.LLVMObjectFile
+        "obj"
+    else
+        error("Unsupported GCN output format $format")
+    end
+
+    input  = tempname(cleanup=false) * ".bc"
+    output = tempname(cleanup=false) * (filetype == "asm" ? ".s" : ".o")
+    write(input, mod)
+
+    cmd = `$(AMDGPU_LLVM_Backend_jll.llc()) $input
+              -mtriple=$(llvm_triple(target))
+              -mcpu=$(target.dev_isa)
+              -mattr=$(target.features)
+              --relocation-model=pic
+              -filetype=$filetype
+              -o $output`
+    out = Pipe()
+    proc = run(pipeline(ignorestatus(cmd); stdout=out, stderr=out); wait=false)
+    close(out.in)
+    log = strip(read(out, String))
+    wait(proc)
+    if !success(proc)
+        # keep the input around for debugging
+        msg = "Failed to compile to GCN with external llc"
+        isempty(log) || (msg *= ":\n" * log)
+        msg *= "\nIf you think this is a bug, please file an issue and attach $(input)."
+        isfile(output) && rm(output)
+        error(msg)
+    elseif !isempty(log)
+        # llc only diagnoses on stderr; even successful compilation may e.g. have
+        # ignored an unrecognized CPU or feature, so make sure this surfaces.
+        @warn "External llc reported:\n$log"
+    end
+
+    code = filetype == "asm" ? read(output, String) : String(read(output))
+    rm(input)
+    rm(output)
+    return code
+end
+
 
 ## LLVM passes
 
diff --git a/test/Project.toml b/test/Project.toml
index 511edb16..d01c6e6e 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,4 +1,5 @@
 [deps]
+AMDGPU_LLVM_Backend_jll = "cc5c0156-bd05-5a77-8a68-bb0aafb29019"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 FileCheck = "4e644321-382b-4b05-b0b6-5d23c3d944fb"
 GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
diff --git a/test/runtests.jl b/test/runtests.jl
index 19c23e05..75838c96 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -3,12 +3,14 @@ import GPUCompiler, LLVM
 using GPUCompiler, LLVM
 using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll
 using NVPTX_LLVM_Backend_jll
+using AMDGPU_LLVM_Backend_jll
 
 const init_code = quote
     using GPUCompiler, LLVM
     using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll
     using LLVMDowngrader_jll
     using NVPTX_LLVM_Backend_jll
+    using AMDGPU_LLVM_Backend_jll
 
     # include all helpers
     include(joinpath(@__DIR__, "helpers", "runtime.jl"))
@@ -53,6 +55,10 @@ if filter_tests!(testsuite, args)
             startswith(key, "ptx") && delete!(testsuite, key)
         end
     end
+    if !AMDGPU_LLVM_Backend_jll.is_available()
+        @warn "AMDGPU back-end not available; skipping GCN tests"
+        delete!(testsuite, "gcn")
+    end
 end
 
 runtests(GPUCompiler, args; testsuite, init_code)

From 2744deb63f5dcaaa0e58f058a4b018f6baa8b8fd Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Mon, 22 Jun 2026 17:15:20 +0200
Subject: [PATCH 2/5] GCN: fall back to in-process LLVM back-end when jll
 unavailable

Instead of erroring when AMDGPU_LLVM_Backend_jll is not loaded, fall back
to the (deprecated) in-process LLVM back-end. This keeps existing
consumers working until the external back-end can be required in the next
breaking release. A deprecation warning nudges users to load the jll.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/gcn.jl  | 13 ++++++++++++-
 test/gcn.jl | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/gcn.jl b/src/gcn.jl
index a2b40828..e8a93b6c 100644
--- a/src/gcn.jl
+++ b/src/gcn.jl
@@ -161,7 +161,18 @@ end
 @unlocked function mcgen(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
                          mod::LLVM.Module, format=LLVM.API.LLVMAssemblyFile)
     if !isavailable(AMDGPU_LLVM_Backend_jll) || !AMDGPU_LLVM_Backend_jll.is_available()
-        error("AMDGPU LLVM back-end not loaded; cannot compile to GCN.")
+        # fall back to the in-process LLVM back-end, which is deprecated and will be
+        # removed in the next breaking release in favor of AMDGPU_LLVM_Backend_jll.
+        safe_depwarn(
+            "Generating GCN machine code with the in-process LLVM is deprecated; " *
+            "load AMDGPU_LLVM_Backend_jll to use the external back-end instead.",
+            :mcgen)
+        if :AMDGPU ∉ LLVM.backends()
+            error("AMDGPU LLVM back-end not loaded and the in-process LLVM lacks the " *
+                  "AMDGPU target; cannot compile to GCN.")
+        end
+        return invoke(mcgen, Tuple{CompilerJob, LLVM.Module, typeof(format)},
+                      job, mod, format)
     end
 
     target = job.config.target
diff --git a/test/gcn.jl b/test/gcn.jl
index ddfd899f..03ea3e8e 100644
--- a/test/gcn.jl
+++ b/test/gcn.jl
@@ -442,5 +442,32 @@ end
     GCN.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}})
 end
 
+@testset "in-process fallback" begin
+    # when AMDGPU_LLVM_Backend_jll is unavailable, GCN machine-code generation should
+    # fall back to the (deprecated) in-process LLVM back-end instead of erroring.
+    # this whole file is gated on the in-process AMDGPU back-end being available, so
+    # the fallback path is exercisable here.
+    mod = @eval module $(gensym())
+        kernel() = return
+    end
+
+    # simulate AMDGPU_LLVM_Backend_jll not being loaded
+    pkg = Base.PkgId(Base.UUID("cc5c0156-bd05-5a77-8a68-bb0aafb29019"),
+                     "AMDGPU_LLVM_Backend_jll")
+    saved = get(Base.loaded_modules, pkg, nothing)
+    try
+        delete!(Base.loaded_modules, pkg)
+        @test !GPUCompiler.isavailable(GPUCompiler.AMDGPU_LLVM_Backend_jll)
+
+        # the in-process back-end should still produce valid GCN assembly
+        @test @filecheck begin
+            @check "s_endpgm"
+            GCN.code_native(mod.kernel, Tuple{}; kernel=true)
+        end
+    finally
+        saved === nothing || (Base.loaded_modules[pkg] = saved)
+    end
+end
+
 end
 end # :AMDGPU in LLVM.backends()

From 6217a934394403dd08760c6b168b6dfc4cac29b8 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Mon, 29 Jun 2026 12:45:42 +0200
Subject: [PATCH 3/5] remove test that modifies Julia internals

---
 test/gcn.jl | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/test/gcn.jl b/test/gcn.jl
index 03ea3e8e..ddfd899f 100644
--- a/test/gcn.jl
+++ b/test/gcn.jl
@@ -442,32 +442,5 @@ end
     GCN.code_native(devnull, mod.kernel, Tuple{Float32,Ptr{Float32}})
 end
 
-@testset "in-process fallback" begin
-    # when AMDGPU_LLVM_Backend_jll is unavailable, GCN machine-code generation should
-    # fall back to the (deprecated) in-process LLVM back-end instead of erroring.
-    # this whole file is gated on the in-process AMDGPU back-end being available, so
-    # the fallback path is exercisable here.
-    mod = @eval module $(gensym())
-        kernel() = return
-    end
-
-    # simulate AMDGPU_LLVM_Backend_jll not being loaded
-    pkg = Base.PkgId(Base.UUID("cc5c0156-bd05-5a77-8a68-bb0aafb29019"),
-                     "AMDGPU_LLVM_Backend_jll")
-    saved = get(Base.loaded_modules, pkg, nothing)
-    try
-        delete!(Base.loaded_modules, pkg)
-        @test !GPUCompiler.isavailable(GPUCompiler.AMDGPU_LLVM_Backend_jll)
-
-        # the in-process back-end should still produce valid GCN assembly
-        @test @filecheck begin
-            @check "s_endpgm"
-            GCN.code_native(mod.kernel, Tuple{}; kernel=true)
-        end
-    finally
-        saved === nothing || (Base.loaded_modules[pkg] = saved)
-    end
-end
-
 end
 end # :AMDGPU in LLVM.backends()

From 2ab077c99e0dbef34817ed371d087114b06acd3e Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Mon, 29 Jun 2026 20:52:54 +0200
Subject: [PATCH 4/5] GCN: add a back-end selector to GCNCompilerTarget

Mirror the SPIRVCompilerTarget approach: add a `backend::Symbol` field
selecting between `:external` (the up-to-date `llc` from
AMDGPU_LLVM_Backend_jll) and `:inprocess` (the deprecated in-process LLVM
back-end). The default is `:external` when the jll is loaded, otherwise
`:inprocess`. `mcgen` now dispatches on this selector instead of probing
jll availability, the back-end participates in `runtime_slug`, and an
unknown back-end is rejected with a clear error.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/gcn.jl          | 29 ++++++++++++++++++++---------
 test/gcn.jl         | 31 +++++++++++++++++++++++++++++++
 test/helpers/gcn.jl |  4 ++--
 3 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/src/gcn.jl b/src/gcn.jl
index e8a93b6c..33e3860d 100644
--- a/src/gcn.jl
+++ b/src/gcn.jl
@@ -12,8 +12,10 @@ export GCNCompilerTarget
 Base.@kwdef struct GCNCompilerTarget <: AbstractCompilerTarget
     dev_isa::String
     features::String=""
+
+    backend::Symbol = isavailable(AMDGPU_LLVM_Backend_jll) ? :external : :inprocess
 end
-GCNCompilerTarget(dev_isa; features="") = GCNCompilerTarget(dev_isa, features)
+GCNCompilerTarget(dev_isa; kwargs...) = GCNCompilerTarget(; dev_isa, kwargs...)
 
 llvm_triple(::GCNCompilerTarget) = "amdgcn-amd-amdhsa"
 
@@ -40,7 +42,7 @@ end
 
 # TODO: encode debug build or not in the compiler job
 #       https://github.com/JuliaGPU/CUDAnative.jl/issues/368
-runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.config.target.dev_isa)$(job.config.target.features)"
+runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.config.target.dev_isa)$(job.config.target.features)-$(job.config.target.backend)"
 
 const gcn_intrinsics = () # TODO: ("vprintf", "__assertfail", "malloc", "free")
 isintrinsic(::CompilerJob{GCNCompilerTarget}, fn::String) = in(fn, gcn_intrinsics)
@@ -160,22 +162,31 @@ end
 
 @unlocked function mcgen(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
                          mod::LLVM.Module, format=LLVM.API.LLVMAssemblyFile)
-    if !isavailable(AMDGPU_LLVM_Backend_jll) || !AMDGPU_LLVM_Backend_jll.is_available()
-        # fall back to the in-process LLVM back-end, which is deprecated and will be
-        # removed in the next breaking release in favor of AMDGPU_LLVM_Backend_jll.
+    target = job.config.target
+
+    if target.backend === :inprocess
+        # the in-process LLVM back-end is deprecated and will be removed in the next
+        # breaking release in favor of AMDGPU_LLVM_Backend_jll (backend=:external).
         safe_depwarn(
             "Generating GCN machine code with the in-process LLVM is deprecated; " *
-            "load AMDGPU_LLVM_Backend_jll to use the external back-end instead.",
+            "load AMDGPU_LLVM_Backend_jll and use `backend=:external` instead.",
             :mcgen)
         if :AMDGPU ∉ LLVM.backends()
-            error("AMDGPU LLVM back-end not loaded and the in-process LLVM lacks the " *
-                  "AMDGPU target; cannot compile to GCN.")
+            error("The in-process LLVM lacks the AMDGPU target; cannot compile to GCN. " *
+                  "Load AMDGPU_LLVM_Backend_jll and use `backend=:external` instead.")
         end
         return invoke(mcgen, Tuple{CompilerJob, LLVM.Module, typeof(format)},
                       job, mod, format)
+    elseif target.backend !== :external
+        error("Unsupported GCN back-end $(repr(target.backend)); " *
+              "expected :external or :inprocess.")
+    end
+
+    if !isavailable(AMDGPU_LLVM_Backend_jll) || !AMDGPU_LLVM_Backend_jll.is_available()
+        error("The :external GCN back-end requires AMDGPU_LLVM_Backend_jll, which " *
+              "should be installed and loaded first.")
     end
 
-    target = job.config.target
     filetype = if format == LLVM.API.LLVMAssemblyFile
         "asm"
     elseif format == LLVM.API.LLVMObjectFile
diff --git a/test/gcn.jl b/test/gcn.jl
index ddfd899f..e0f4c478 100644
--- a/test/gcn.jl
+++ b/test/gcn.jl
@@ -3,6 +3,37 @@ if :AMDGPU in LLVM.backends()
 # XXX: generic `sink` generates an instruction selection error
 sink_gcn(i) = sink(i, Val(5))
 
+@testset "backend selector" begin
+    # in the test environment AMDGPU_LLVM_Backend_jll is loaded, so the default is :external
+    @test GCNCompilerTarget(dev_isa="gfx900").backend === :external
+
+    # both constructor forms accept an explicit backend, alongside the other options
+    @test GCNCompilerTarget(dev_isa="gfx900"; backend=:inprocess).backend === :inprocess
+    @test GCNCompilerTarget("gfx900"; backend=:inprocess).backend === :inprocess
+    let target = GCNCompilerTarget("gfx900"; features="+wavefrontsize64", backend=:external)
+        @test target.dev_isa == "gfx900"
+        @test target.features == "+wavefrontsize64"
+        @test target.backend === :external
+    end
+
+    mod = @eval module $(gensym())
+        kernel() = return
+    end
+
+    # the backend participates in the runtime slug, so different back-ends don't share a cache
+    job_ext, _ = GCN.create_job(mod.kernel, Tuple{}; backend=:external)
+    job_inp, _ = GCN.create_job(mod.kernel, Tuple{}; backend=:inprocess)
+    @test endswith(GPUCompiler.runtime_slug(job_ext), "-external")
+    @test endswith(GPUCompiler.runtime_slug(job_inp), "-inprocess")
+    @test GPUCompiler.runtime_slug(job_ext) != GPUCompiler.runtime_slug(job_inp)
+
+    # the explicit :external backend generates machine code through the external llc
+    @test (GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:external); true)
+
+    # an unknown back-end is rejected at machine-code generation
+    @test_throws "Unsupported GCN back-end" GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:bogus)
+end
+
 @testset "IR" begin
 
 @testset "kernel calling convention" begin
diff --git a/test/helpers/gcn.jl b/test/helpers/gcn.jl
index c894fbd3..b5745b56 100644
--- a/test/helpers/gcn.jl
+++ b/test/helpers/gcn.jl
@@ -6,10 +6,10 @@ import ..TestRuntime
 struct CompilerParams <: AbstractCompilerParams end
 GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime
 
-function create_job(@nospecialize(func), @nospecialize(types); kwargs...)
+function create_job(@nospecialize(func), @nospecialize(types); backend::Symbol=:external, kwargs...)
     config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS)
     source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter())
-    target = GCNCompilerTarget(dev_isa="gfx900")
+    target = GCNCompilerTarget(dev_isa="gfx900"; backend)
     params = CompilerParams()
     config = CompilerConfig(target, params; kernel=false, config_kwargs...)
     CompilerJob(source, config), kwargs

From 24cfac312ccfc64a4adf737f3dbf7883cbd28a57 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Tue, 30 Jun 2026 11:54:35 +0200
Subject: [PATCH 5/5] GCN: address review on back-end selector

- drop the redundant blank line after the AMDGPU_LLVM_Backend_jll const
- remove the deprecation warning from the :inprocess mcgen path; the
  selector makes the choice explicit, and dropping it lets the in-process
  path run under --depwarn=error
- test machine-code generation for the :inprocess back-end too

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 src/gcn.jl  | 7 -------
 test/gcn.jl | 3 +++
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/gcn.jl b/src/gcn.jl
index 33e3860d..be047143 100644
--- a/src/gcn.jl
+++ b/src/gcn.jl
@@ -4,7 +4,6 @@ const AMDGPU_LLVM_Backend_jll =
     LazyModule("AMDGPU_LLVM_Backend_jll",
                UUID("cc5c0156-bd05-5a77-8a68-bb0aafb29019"))
 
-
 ## target
 
 export GCNCompilerTarget
@@ -165,12 +164,6 @@ end
     target = job.config.target
 
     if target.backend === :inprocess
-        # the in-process LLVM back-end is deprecated and will be removed in the next
-        # breaking release in favor of AMDGPU_LLVM_Backend_jll (backend=:external).
-        safe_depwarn(
-            "Generating GCN machine code with the in-process LLVM is deprecated; " *
-            "load AMDGPU_LLVM_Backend_jll and use `backend=:external` instead.",
-            :mcgen)
         if :AMDGPU ∉ LLVM.backends()
             error("The in-process LLVM lacks the AMDGPU target; cannot compile to GCN. " *
                   "Load AMDGPU_LLVM_Backend_jll and use `backend=:external` instead.")
diff --git a/test/gcn.jl b/test/gcn.jl
index e0f4c478..99dae742 100644
--- a/test/gcn.jl
+++ b/test/gcn.jl
@@ -30,6 +30,9 @@ sink_gcn(i) = sink(i, Val(5))
     # the explicit :external backend generates machine code through the external llc
     @test (GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:external); true)
 
+    # the :inprocess backend generates machine code through the in-process LLVM back-end
+    @test (GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:inprocess); true)
+
     # an unknown back-end is rejected at machine-code generation
     @test_throws "Unsupported GCN back-end" GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:bogus)
 end