Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Use new non-recursive codegen #162

Merged
merged 2 commits into from
Mar 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,13 @@ julia:1.4-debug:
CI_CLONE_ARGS: '-b release-1.4'
CI_BUILD_ARGS: 'BINARYBUILDER_LLVM_ASSERTS=1 debug'

# julia:nightly:
# extends:
# - .julia:nightly
# - .test
# tags:
# - nvidia
# - sm_75
# variables:
# CI_THOROUGH: 'true'
# allow_failure: true
julia:nightly:
extends:
- .julia:nightly
- .test
tags:
- nvidia
allow_failure: true


# CUDA versions
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ CUDAapi = "3.1, 4.0"
CUDAdrv = "6.2.1"
Cthulhu = "1.0"
DataStructures = "0.15, 0.16, 0.17"
LLVM = "1.2"
LLVM = "1.3.4"
MacroTools = "0.5"
TimerOutputs = "0.5"
julia = "1.3"
Expand Down
181 changes: 158 additions & 23 deletions src/compiler/irgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,159 @@ Base.showerror(io::IO, err::MethodSubstitutionWarning) =
print(io, "You called $(err.original), maybe you intended to call $(err.substitute) instead?")
const method_substitution_whitelist = [:hypot]

if VERSION >= v"1.5.0-DEV.393"

# JuliaLang/julia#25984 significantly restructured the compiler

# TODO: deduplicate some code

function compile_method_instance(job::CompilerJob, method_instance::Core.MethodInstance, world)
# set-up the compiler interface
call_stack = [method_instance]
function hook_emit_function(method_instance, code)
push!(call_stack, method_instance)

# check for Base functions that exist in CUDAnative too
# FIXME: this might be too coarse
method = method_instance.def
if Base.moduleroot(method.module) == Base &&
isdefined(CUDAnative, method_instance.def.name) &&
!in(method_instance.def.name, method_substitution_whitelist)
substitute_function = getfield(CUDAnative, method.name)
tt = Tuple{method_instance.specTypes.parameters[2:end]...}
if hasmethod(substitute_function, tt)
method′ = which(substitute_function, tt)
if Base.moduleroot(method′.module) == CUDAnative
@warn "calls to Base intrinsics might be GPU incompatible" exception=(MethodSubstitutionWarning(method, method′), backtrace(job, call_stack))
end
end
end
end
function hook_emitted_function(method, code)
@compiler_assert last(call_stack) == method job
pop!(call_stack)
end
param_kwargs = [:track_allocations => false,
:code_coverage => false,
:static_alloc => false,
:prefer_specsig => true,
:emit_function => hook_emit_function,
:emitted_function => hook_emitted_function]
if LLVM.version() >= v"8.0" && VERSION >= v"1.3.0-DEV.547"
push!(param_kwargs, :gnu_pubnames => false)

debug_info_kind = if Base.JLOptions().debug_level == 0
LLVM.API.LLVMDebugEmissionKindNoDebug
elseif Base.JLOptions().debug_level == 1
LLVM.API.LLVMDebugEmissionKindLineTablesOnly
elseif Base.JLOptions().debug_level >= 2
LLVM.API.LLVMDebugEmissionKindFullDebug
end

#if CUDAdrv.release() < v"10.2"
# FIXME: LLVM's debug info crashes CUDA
# FIXME: this ought to be fixed on 10.2?
@debug "Incompatibility detected between CUDA and LLVM 8.0+; disabling debug info emission" maxlog=1
debug_info_kind = LLVM.API.LLVMDebugEmissionKindNoDebug
#end

push!(param_kwargs, :debug_info_kind => Cint(debug_info_kind))
end
params = Base.CodegenParams(;param_kwargs...)

# generate IR
native_code = ccall(:jl_create_native, Ptr{Cvoid},
(Vector{Core.MethodInstance}, Base.CodegenParams),
[method_instance], params)
@assert native_code != C_NULL
llvm_mod_ref = ccall(:jl_get_llvm_module, LLVM.API.LLVMModuleRef,
(Ptr{Cvoid},), native_code)
@assert llvm_mod_ref != C_NULL
llvm_mod = LLVM.Module(llvm_mod_ref)

# get the top-level code
code = Core.Compiler.inf_for_methodinstance(method_instance, world, world)

# get the top-level function index
llvm_func_idx = Ref{Int32}(-1)
llvm_specfunc_idx = Ref{Int32}(-1)
ccall(:jl_breakpoint, Nothing, ())
ccall(:jl_get_function_id, Nothing,
(Ptr{Cvoid}, Any, Ptr{Int32}, Ptr{Int32}),
native_code, code, llvm_func_idx, llvm_specfunc_idx)
@assert llvm_func_idx[] != -1
@assert llvm_specfunc_idx[] != -1

# get the top-level function)
llvm_func_ref = ccall(:jl_get_llvm_function, LLVM.API.LLVMValueRef,
(Ptr{Cvoid}, UInt32), native_code, llvm_func_idx[]-1)
@assert llvm_func_ref != C_NULL
llvm_func = LLVM.Function(llvm_func_ref)
llvm_specfunc_ref = ccall(:jl_get_llvm_function, LLVM.API.LLVMValueRef,
(Ptr{Cvoid}, UInt32), native_code, llvm_specfunc_idx[]-1)
@assert llvm_specfunc_ref != C_NULL
llvm_specfunc = LLVM.Function(llvm_specfunc_ref)

# configure the module
# NOTE: NVPTX::TargetMachine's data layout doesn't match the NVPTX user guide,
# so we specify it ourselves
if Int === Int64
triple!(llvm_mod, "nvptx64-nvidia-cuda")
datalayout!(llvm_mod, "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64")
else
triple!(llvm_mod, "nvptx-nvidia-cuda")
datalayout!(llvm_mod, "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64")
end

return llvm_specfunc, llvm_mod
end

function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)
entry, mod = @timeit_debug to "emission" compile_method_instance(job, method_instance, world)

# clean up incompatibilities
@timeit_debug to "clean-up" for llvmf in functions(mod)
# only occurs in debug builds
delete!(function_attributes(llvmf), EnumAttribute("sspstrong", 0, JuliaContext()))
end

# add the global exception indicator flag
emit_exception_flag!(mod)

# rename the entry point
if job.name !== nothing
llvmfn = safe_name(string("julia_", job.name))
else
# strip the globalUnique counter
llvmfn = LLVM.name(entry)
end
LLVM.name!(entry, llvmfn)

# promote entry-points to kernels and mangle its name
if job.kernel
entry = promote_kernel!(job, mod, entry)
LLVM.name!(entry, mangle_call(entry, job.tt))
end

# minimal required optimization
@timeit_debug to "rewrite" ModulePassManager() do pm
global current_job
current_job = job

linkage!(entry, LLVM.API.LLVMExternalLinkage)
internalize!(pm, [LLVM.name(entry)])

add!(pm, ModulePass("LowerThrow", lower_throw!))
add!(pm, FunctionPass("HideUnreachable", hide_unreachable!))
add!(pm, ModulePass("HideTrap", hide_trap!))
run!(pm, mod)
end

return mod, entry
end

else

function compile_method_instance(job::CompilerJob, method_instance::Core.MethodInstance, world)
function postprocess(ir)
# get rid of jfptr wrappers
Expand Down Expand Up @@ -210,33 +363,13 @@ function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)

# clean up incompatibilities
@timeit_debug to "clean-up" for llvmf in functions(mod)
llvmfn = LLVM.name(llvmf)

# only occurs in debug builds
delete!(function_attributes(llvmf), EnumAttribute("sspstrong", 0, JuliaContext()))

# rename functions
# make function names safe for ptxas
# (LLVM should to do this, but fails, see eg. D17738 and D19126)
llvmfn = LLVM.name(llvmf)
if !isdeclaration(llvmf)
# Julia disambiguates local functions by prefixing with `#\d#`.
# since we don't use a global function namespace, get rid of those tags.
if occursin(r"^julia_#\d+#", llvmfn)
llvmfn′ = replace(llvmfn, r"#\d+#"=>"")
if !haskey(functions(mod), llvmfn′)
LLVM.name!(llvmf, llvmfn′)
llvmfn = llvmfn′
end
end

# anonymous functions are just named `#\d`, make that somewhat more readable
m = match(r"_#(\d+)_", llvmfn)
if m !== nothing
llvmfn′ = replace(llvmfn, m.match=>"_anonymous$(m.captures[1])_")
LLVM.name!(llvmf, llvmfn′)
llvmfn = llvmfn′
end

# finally, make function names safe for ptxas
# (LLVM should to do this, but fails, see eg. D17738 and D19126)
llvmfn′ = safe_name(llvmfn)
if llvmfn != llvmfn′
LLVM.name!(llvmf, llvmfn′)
Expand Down Expand Up @@ -280,6 +413,8 @@ function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)
return mod, entry
end

end


## name mangling

Expand Down
2 changes: 2 additions & 0 deletions test/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

############################################################################################

if VERSION < v"1.5.0-DEV.393"
@testset "method caching" begin

import InteractiveUtils: _dump_function
Expand All @@ -27,6 +28,7 @@ else
params)
end

end
end

############################################################################################
Expand Down
50 changes: 4 additions & 46 deletions test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true))

# module should contain our function + a generic call wrapper
@test occursin(r"define void @.*julia_valid_kernel.*\(\)", ir)
@test occursin(r"define\ .* void\ @.*julia_valid_kernel.*\(\)"x, ir)
@test !occursin("define %jl_value_t* @jlcall_", ir)

# there should be no debug metadata
Expand Down Expand Up @@ -130,21 +130,6 @@ end
CUDAnative.code_llvm(devnull, D32593, Tuple{CuDeviceVector{D32593_struct,AS.Global}})
end

@testset "kernel names" begin
regular() = return
closure = ()->return

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; kwargs...))
@test occursin(name, code)
end

test_name(regular, "julia_regular")
test_name(regular, "julia_regular"; kernel=true)
test_name(closure, "julia_anonymous")
test_name(closure, "julia_anonymous"; kernel=true)
end

@testset "PTX TBAA" begin
load(ptr) = unsafe_load(ptr)
store(ptr) = unsafe_store!(ptr, 0)
Expand Down Expand Up @@ -256,7 +241,7 @@ end
end

asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}))
@test occursin(r"call.uni\s+julia_child_"m, asm)
@test occursin(r"call.uni\s+julia_.*child_"m, asm)
end

@testset "kernel functions" begin
Expand Down Expand Up @@ -314,15 +299,15 @@ end
end

asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}))
@test occursin(r".func julia_child_", asm)
@test occursin(r".func julia_.*child_", asm)

function parent2(i)
child(i+1)
return
end

asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}))
@test occursin(r".func julia_child_", asm)
@test occursin(r".func julia_.*child_", asm)
end

@testset "child function reuse bis" begin
Expand Down Expand Up @@ -386,21 +371,6 @@ end
CUDAnative.code_ptx(devnull, kernel, Tuple{Float64})
end

@testset "kernel names" begin
regular() = nothing
closure = ()->nothing

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; kwargs...))
@test occursin(name, code)
end

test_name(regular, "julia_regular")
test_name(regular, "julia_regular"; kernel=true)
test_name(closure, "julia_anonymous")
test_name(closure, "julia_anonymous"; kernel=true)
end

@testset "exception arguments" begin
function kernel(a)
unsafe_store!(a, trunc(Int, unsafe_load(a)))
Expand Down Expand Up @@ -478,18 +448,6 @@ end

# some validation happens in the emit_function hook, which is called by code_llvm

@testset "recursion" begin
@eval recurse_outer(i) = i > 0 ? i : recurse_inner(i)
@eval @noinline recurse_inner(i) = i < 0 ? i : recurse_outer(i)

@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int})) do msg
occursin("recursion is currently not supported", msg) &&
occursin("[1] recurse_outer", msg) &&
occursin("[2] recurse_inner", msg) &&
occursin("[3] recurse_outer", msg)
end
end

@testset "base intrinsics" begin
foobar(i) = sin(i)

Expand Down
Loading