diff --git a/Project.toml b/Project.toml index d752ddba..8f6a97cb 100644 --- a/Project.toml +++ b/Project.toml @@ -22,10 +22,12 @@ Tracy = "e689c965-62c8-4b79-b2c5-8359227902fd" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [weakdeps] +AMDGPU_LLVM_Backend_jll = "cc5c0156-bd05-5a77-8a68-bb0aafb29019" LLVMDowngrader_jll = "f52de702-fb25-5922-94ba-81dd59b07444" NVPTX_LLVM_Backend_jll = "ef6e0fe3-e6ef-59c0-bde6-4989574699e0" [compat] +AMDGPU_LLVM_Backend_jll = "22" ExprTools = "0.1" InteractiveUtils = "1" LLVM = "9.9" diff --git a/src/gcn.jl b/src/gcn.jl index e32b3c21..be047143 100644 --- a/src/gcn.jl +++ b/src/gcn.jl @@ -1,5 +1,9 @@ # implementation of the GPUCompiler interfaces for generating GCN code +const AMDGPU_LLVM_Backend_jll = + LazyModule("AMDGPU_LLVM_Backend_jll", + UUID("cc5c0156-bd05-5a77-8a68-bb0aafb29019")) + ## target export GCNCompilerTarget @@ -7,8 +11,10 @@ export GCNCompilerTarget Base.@kwdef struct GCNCompilerTarget <: AbstractCompilerTarget dev_isa::String features::String="" + + backend::Symbol = isavailable(AMDGPU_LLVM_Backend_jll) ? :external : :inprocess end -GCNCompilerTarget(dev_isa; features="") = GCNCompilerTarget(dev_isa, features) +GCNCompilerTarget(dev_isa; kwargs...) = GCNCompilerTarget(; dev_isa, kwargs...) llvm_triple(::GCNCompilerTarget) = "amdgcn-amd-amdhsa" @@ -35,7 +41,7 @@ end # TODO: encode debug build or not in the compiler job # https://github.com/JuliaGPU/CUDAnative.jl/issues/368 -runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.config.target.dev_isa)$(job.config.target.features)" +runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.config.target.dev_isa)$(job.config.target.features)-$(job.config.target.backend)" const gcn_intrinsics = () # TODO: ("vprintf", "__assertfail", "malloc", "free") isintrinsic(::CompilerJob{GCNCompilerTarget}, fn::String) = in(fn, gcn_intrinsics) @@ -153,6 +159,70 @@ function add_kernarg_address_spaces!( return new_f end +@unlocked function mcgen(@nospecialize(job::CompilerJob{GCNCompilerTarget}), + mod::LLVM.Module, format=LLVM.API.LLVMAssemblyFile) + target = job.config.target + + if target.backend === :inprocess + if :AMDGPU ∉ LLVM.backends() + error("The in-process LLVM lacks the AMDGPU target; cannot compile to GCN. " * + "Load AMDGPU_LLVM_Backend_jll and use `backend=:external` instead.") + end + return invoke(mcgen, Tuple{CompilerJob, LLVM.Module, typeof(format)}, + job, mod, format) + elseif target.backend !== :external + error("Unsupported GCN back-end $(repr(target.backend)); " * + "expected :external or :inprocess.") + end + + if !isavailable(AMDGPU_LLVM_Backend_jll) || !AMDGPU_LLVM_Backend_jll.is_available() + error("The :external GCN back-end requires AMDGPU_LLVM_Backend_jll, which " * + "should be installed and loaded first.") + end + + filetype = if format == LLVM.API.LLVMAssemblyFile + "asm" + elseif format == LLVM.API.LLVMObjectFile + "obj" + else + error("Unsupported GCN output format $format") + end + + input = tempname(cleanup=false) * ".bc" + output = tempname(cleanup=false) * (filetype == "asm" ? ".s" : ".o") + write(input, mod) + + cmd = `$(AMDGPU_LLVM_Backend_jll.llc()) $input + -mtriple=$(llvm_triple(target)) + -mcpu=$(target.dev_isa) + -mattr=$(target.features) + --relocation-model=pic + -filetype=$filetype + -o $output` + out = Pipe() + proc = run(pipeline(ignorestatus(cmd); stdout=out, stderr=out); wait=false) + close(out.in) + log = strip(read(out, String)) + wait(proc) + if !success(proc) + # keep the input around for debugging + msg = "Failed to compile to GCN with external llc" + isempty(log) || (msg *= ":\n" * log) + msg *= "\nIf you think this is a bug, please file an issue and attach $(input)." + isfile(output) && rm(output) + error(msg) + elseif !isempty(log) + # llc only diagnoses on stderr; even successful compilation may e.g. have + # ignored an unrecognized CPU or feature, so make sure this surfaces. + @warn "External llc reported:\n$log" + end + + code = filetype == "asm" ? read(output, String) : String(read(output)) + rm(input) + rm(output) + return code +end + ## LLVM passes diff --git a/test/Project.toml b/test/Project.toml index 511edb16..d01c6e6e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +AMDGPU_LLVM_Backend_jll = "cc5c0156-bd05-5a77-8a68-bb0aafb29019" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" FileCheck = "4e644321-382b-4b05-b0b6-5d23c3d944fb" GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55" diff --git a/test/gcn.jl b/test/gcn.jl index ddfd899f..99dae742 100644 --- a/test/gcn.jl +++ b/test/gcn.jl @@ -3,6 +3,40 @@ if :AMDGPU in LLVM.backends() # XXX: generic `sink` generates an instruction selection error sink_gcn(i) = sink(i, Val(5)) +@testset "backend selector" begin + # in the test environment AMDGPU_LLVM_Backend_jll is loaded, so the default is :external + @test GCNCompilerTarget(dev_isa="gfx900").backend === :external + + # both constructor forms accept an explicit backend, alongside the other options + @test GCNCompilerTarget(dev_isa="gfx900"; backend=:inprocess).backend === :inprocess + @test GCNCompilerTarget("gfx900"; backend=:inprocess).backend === :inprocess + let target = GCNCompilerTarget("gfx900"; features="+wavefrontsize64", backend=:external) + @test target.dev_isa == "gfx900" + @test target.features == "+wavefrontsize64" + @test target.backend === :external + end + + mod = @eval module $(gensym()) + kernel() = return + end + + # the backend participates in the runtime slug, so different back-ends don't share a cache + job_ext, _ = GCN.create_job(mod.kernel, Tuple{}; backend=:external) + job_inp, _ = GCN.create_job(mod.kernel, Tuple{}; backend=:inprocess) + @test endswith(GPUCompiler.runtime_slug(job_ext), "-external") + @test endswith(GPUCompiler.runtime_slug(job_inp), "-inprocess") + @test GPUCompiler.runtime_slug(job_ext) != GPUCompiler.runtime_slug(job_inp) + + # the explicit :external backend generates machine code through the external llc + @test (GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:external); true) + + # the :inprocess backend generates machine code through the in-process LLVM back-end + @test (GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:inprocess); true) + + # an unknown back-end is rejected at machine-code generation + @test_throws "Unsupported GCN back-end" GCN.code_native(devnull, mod.kernel, Tuple{}; backend=:bogus) +end + @testset "IR" begin @testset "kernel calling convention" begin diff --git a/test/helpers/gcn.jl b/test/helpers/gcn.jl index c894fbd3..b5745b56 100644 --- a/test/helpers/gcn.jl +++ b/test/helpers/gcn.jl @@ -6,10 +6,10 @@ import ..TestRuntime struct CompilerParams <: AbstractCompilerParams end GPUCompiler.runtime_module(::CompilerJob{<:Any,CompilerParams}) = TestRuntime -function create_job(@nospecialize(func), @nospecialize(types); kwargs...) +function create_job(@nospecialize(func), @nospecialize(types); backend::Symbol=:external, kwargs...) config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) - target = GCNCompilerTarget(dev_isa="gfx900") + target = GCNCompilerTarget(dev_isa="gfx900"; backend) params = CompilerParams() config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs diff --git a/test/runtests.jl b/test/runtests.jl index 19c23e05..75838c96 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,12 +3,14 @@ import GPUCompiler, LLVM using GPUCompiler, LLVM using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll using NVPTX_LLVM_Backend_jll +using AMDGPU_LLVM_Backend_jll const init_code = quote using GPUCompiler, LLVM using SPIRV_LLVM_Backend_jll, SPIRV_LLVM_Translator_jll, SPIRV_Tools_jll using LLVMDowngrader_jll using NVPTX_LLVM_Backend_jll + using AMDGPU_LLVM_Backend_jll # include all helpers include(joinpath(@__DIR__, "helpers", "runtime.jl")) @@ -53,6 +55,10 @@ if filter_tests!(testsuite, args) startswith(key, "ptx") && delete!(testsuite, key) end end + if !AMDGPU_LLVM_Backend_jll.is_available() + @warn "AMDGPU back-end not available; skipping GCN tests" + delete!(testsuite, "gcn") + end end runtests(GPUCompiler, args; testsuite, init_code)