Skip to content
This repository has been archived by the owner on May 27, 2021. It is now read-only.

Commit

Permalink
Merge pull request #162 from JuliaGPU/tb/codegen-norecursion
Browse files Browse the repository at this point in the history
Use new non-recursive codegen
  • Loading branch information
maleadt authored Mar 27, 2020
2 parents 5f0d25b + 50b7f30 commit 0a22ed5
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 96 deletions.
17 changes: 7 additions & 10 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,13 @@ julia:1.4-debug:
CI_CLONE_ARGS: '-b release-1.4'
CI_BUILD_ARGS: 'BINARYBUILDER_LLVM_ASSERTS=1 debug'

# julia:nightly:
# extends:
# - .julia:nightly
# - .test
# tags:
# - nvidia
# - sm_75
# variables:
# CI_THOROUGH: 'true'
# allow_failure: true
julia:nightly:
extends:
- .julia:nightly
- .test
tags:
- nvidia
allow_failure: true


# CUDA versions
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ CUDAapi = "3.1, 4.0"
CUDAdrv = "6.2.1"
Cthulhu = "1.0"
DataStructures = "0.15, 0.16, 0.17"
LLVM = "1.2"
LLVM = "1.3.4"
MacroTools = "0.5"
TimerOutputs = "0.5"
julia = "1.3"
Expand Down
181 changes: 158 additions & 23 deletions src/compiler/irgen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,159 @@ Base.showerror(io::IO, err::MethodSubstitutionWarning) =
print(io, "You called $(err.original), maybe you intended to call $(err.substitute) instead?")
const method_substitution_whitelist = [:hypot]

if VERSION >= v"1.5.0-DEV.393"

# JuliaLang/julia#25984 significantly restructured the compiler

# TODO: deduplicate some code

function compile_method_instance(job::CompilerJob, method_instance::Core.MethodInstance, world)
# set-up the compiler interface
call_stack = [method_instance]
function hook_emit_function(method_instance, code)
push!(call_stack, method_instance)

# check for Base functions that exist in CUDAnative too
# FIXME: this might be too coarse
method = method_instance.def
if Base.moduleroot(method.module) == Base &&
isdefined(CUDAnative, method_instance.def.name) &&
!in(method_instance.def.name, method_substitution_whitelist)
substitute_function = getfield(CUDAnative, method.name)
tt = Tuple{method_instance.specTypes.parameters[2:end]...}
if hasmethod(substitute_function, tt)
method′ = which(substitute_function, tt)
if Base.moduleroot(method′.module) == CUDAnative
@warn "calls to Base intrinsics might be GPU incompatible" exception=(MethodSubstitutionWarning(method, method′), backtrace(job, call_stack))
end
end
end
end
function hook_emitted_function(method, code)
@compiler_assert last(call_stack) == method job
pop!(call_stack)
end
param_kwargs = [:track_allocations => false,
:code_coverage => false,
:static_alloc => false,
:prefer_specsig => true,
:emit_function => hook_emit_function,
:emitted_function => hook_emitted_function]
if LLVM.version() >= v"8.0" && VERSION >= v"1.3.0-DEV.547"
push!(param_kwargs, :gnu_pubnames => false)

debug_info_kind = if Base.JLOptions().debug_level == 0
LLVM.API.LLVMDebugEmissionKindNoDebug
elseif Base.JLOptions().debug_level == 1
LLVM.API.LLVMDebugEmissionKindLineTablesOnly
elseif Base.JLOptions().debug_level >= 2
LLVM.API.LLVMDebugEmissionKindFullDebug
end

#if CUDAdrv.release() < v"10.2"
# FIXME: LLVM's debug info crashes CUDA
# FIXME: this ought to be fixed on 10.2?
@debug "Incompatibility detected between CUDA and LLVM 8.0+; disabling debug info emission" maxlog=1
debug_info_kind = LLVM.API.LLVMDebugEmissionKindNoDebug
#end

push!(param_kwargs, :debug_info_kind => Cint(debug_info_kind))
end
params = Base.CodegenParams(;param_kwargs...)

# generate IR
native_code = ccall(:jl_create_native, Ptr{Cvoid},
(Vector{Core.MethodInstance}, Base.CodegenParams),
[method_instance], params)
@assert native_code != C_NULL
llvm_mod_ref = ccall(:jl_get_llvm_module, LLVM.API.LLVMModuleRef,
(Ptr{Cvoid},), native_code)
@assert llvm_mod_ref != C_NULL
llvm_mod = LLVM.Module(llvm_mod_ref)

# get the top-level code
code = Core.Compiler.inf_for_methodinstance(method_instance, world, world)

# get the top-level function index
llvm_func_idx = Ref{Int32}(-1)
llvm_specfunc_idx = Ref{Int32}(-1)
ccall(:jl_breakpoint, Nothing, ())
ccall(:jl_get_function_id, Nothing,
(Ptr{Cvoid}, Any, Ptr{Int32}, Ptr{Int32}),
native_code, code, llvm_func_idx, llvm_specfunc_idx)
@assert llvm_func_idx[] != -1
@assert llvm_specfunc_idx[] != -1

# get the top-level function)
llvm_func_ref = ccall(:jl_get_llvm_function, LLVM.API.LLVMValueRef,
(Ptr{Cvoid}, UInt32), native_code, llvm_func_idx[]-1)
@assert llvm_func_ref != C_NULL
llvm_func = LLVM.Function(llvm_func_ref)
llvm_specfunc_ref = ccall(:jl_get_llvm_function, LLVM.API.LLVMValueRef,
(Ptr{Cvoid}, UInt32), native_code, llvm_specfunc_idx[]-1)
@assert llvm_specfunc_ref != C_NULL
llvm_specfunc = LLVM.Function(llvm_specfunc_ref)

# configure the module
# NOTE: NVPTX::TargetMachine's data layout doesn't match the NVPTX user guide,
# so we specify it ourselves
if Int === Int64
triple!(llvm_mod, "nvptx64-nvidia-cuda")
datalayout!(llvm_mod, "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64")
else
triple!(llvm_mod, "nvptx-nvidia-cuda")
datalayout!(llvm_mod, "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64")
end

return llvm_specfunc, llvm_mod
end

function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)
entry, mod = @timeit_debug to "emission" compile_method_instance(job, method_instance, world)

# clean up incompatibilities
@timeit_debug to "clean-up" for llvmf in functions(mod)
# only occurs in debug builds
delete!(function_attributes(llvmf), EnumAttribute("sspstrong", 0, JuliaContext()))
end

# add the global exception indicator flag
emit_exception_flag!(mod)

# rename the entry point
if job.name !== nothing
llvmfn = safe_name(string("julia_", job.name))
else
# strip the globalUnique counter
llvmfn = LLVM.name(entry)
end
LLVM.name!(entry, llvmfn)

# promote entry-points to kernels and mangle its name
if job.kernel
entry = promote_kernel!(job, mod, entry)
LLVM.name!(entry, mangle_call(entry, job.tt))
end

# minimal required optimization
@timeit_debug to "rewrite" ModulePassManager() do pm
global current_job
current_job = job

linkage!(entry, LLVM.API.LLVMExternalLinkage)
internalize!(pm, [LLVM.name(entry)])

add!(pm, ModulePass("LowerThrow", lower_throw!))
add!(pm, FunctionPass("HideUnreachable", hide_unreachable!))
add!(pm, ModulePass("HideTrap", hide_trap!))
run!(pm, mod)
end

return mod, entry
end

else

function compile_method_instance(job::CompilerJob, method_instance::Core.MethodInstance, world)
function postprocess(ir)
# get rid of jfptr wrappers
Expand Down Expand Up @@ -210,33 +363,13 @@ function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)

# clean up incompatibilities
@timeit_debug to "clean-up" for llvmf in functions(mod)
llvmfn = LLVM.name(llvmf)

# only occurs in debug builds
delete!(function_attributes(llvmf), EnumAttribute("sspstrong", 0, JuliaContext()))

# rename functions
# make function names safe for ptxas
# (LLVM should to do this, but fails, see eg. D17738 and D19126)
llvmfn = LLVM.name(llvmf)
if !isdeclaration(llvmf)
# Julia disambiguates local functions by prefixing with `#\d#`.
# since we don't use a global function namespace, get rid of those tags.
if occursin(r"^julia_#\d+#", llvmfn)
llvmfn′ = replace(llvmfn, r"#\d+#"=>"")
if !haskey(functions(mod), llvmfn′)
LLVM.name!(llvmf, llvmfn′)
llvmfn = llvmfn′
end
end

# anonymous functions are just named `#\d`, make that somewhat more readable
m = match(r"_#(\d+)_", llvmfn)
if m !== nothing
llvmfn′ = replace(llvmfn, m.match=>"_anonymous$(m.captures[1])_")
LLVM.name!(llvmf, llvmfn′)
llvmfn = llvmfn′
end

# finally, make function names safe for ptxas
# (LLVM should to do this, but fails, see eg. D17738 and D19126)
llvmfn′ = safe_name(llvmfn)
if llvmfn != llvmfn′
LLVM.name!(llvmf, llvmfn′)
Expand Down Expand Up @@ -280,6 +413,8 @@ function irgen(job::CompilerJob, method_instance::Core.MethodInstance, world)
return mod, entry
end

end


## name mangling

Expand Down
2 changes: 2 additions & 0 deletions test/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

############################################################################################

if VERSION < v"1.5.0-DEV.393"
@testset "method caching" begin

import InteractiveUtils: _dump_function
Expand All @@ -27,6 +28,7 @@ else
params)
end

end
end

############################################################################################
Expand Down
50 changes: 4 additions & 46 deletions test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ir = sprint(io->CUDAnative.code_llvm(io, valid_kernel, Tuple{}; optimize=false, dump_module=true))

# module should contain our function + a generic call wrapper
@test occursin(r"define void @.*julia_valid_kernel.*\(\)", ir)
@test occursin(r"define\ .* void\ @.*julia_valid_kernel.*\(\)"x, ir)
@test !occursin("define %jl_value_t* @jlcall_", ir)

# there should be no debug metadata
Expand Down Expand Up @@ -130,21 +130,6 @@ end
CUDAnative.code_llvm(devnull, D32593, Tuple{CuDeviceVector{D32593_struct,AS.Global}})
end

@testset "kernel names" begin
regular() = return
closure = ()->return

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_llvm(io, f, Tuple{}; kwargs...))
@test occursin(name, code)
end

test_name(regular, "julia_regular")
test_name(regular, "julia_regular"; kernel=true)
test_name(closure, "julia_anonymous")
test_name(closure, "julia_anonymous"; kernel=true)
end

@testset "PTX TBAA" begin
load(ptr) = unsafe_load(ptr)
store(ptr) = unsafe_store!(ptr, 0)
Expand Down Expand Up @@ -256,7 +241,7 @@ end
end

asm = sprint(io->CUDAnative.code_ptx(io, parent, Tuple{Int64}))
@test occursin(r"call.uni\s+julia_child_"m, asm)
@test occursin(r"call.uni\s+julia_.*child_"m, asm)
end

@testset "kernel functions" begin
Expand Down Expand Up @@ -314,15 +299,15 @@ end
end

asm = sprint(io->CUDAnative.code_ptx(io, parent1, Tuple{Int}))
@test occursin(r".func julia_child_", asm)
@test occursin(r".func julia_.*child_", asm)

function parent2(i)
child(i+1)
return
end

asm = sprint(io->CUDAnative.code_ptx(io, parent2, Tuple{Int}))
@test occursin(r".func julia_child_", asm)
@test occursin(r".func julia_.*child_", asm)
end

@testset "child function reuse bis" begin
Expand Down Expand Up @@ -386,21 +371,6 @@ end
CUDAnative.code_ptx(devnull, kernel, Tuple{Float64})
end

@testset "kernel names" begin
regular() = nothing
closure = ()->nothing

function test_name(f, name; kwargs...)
code = sprint(io->CUDAnative.code_ptx(io, f, Tuple{}; kwargs...))
@test occursin(name, code)
end

test_name(regular, "julia_regular")
test_name(regular, "julia_regular"; kernel=true)
test_name(closure, "julia_anonymous")
test_name(closure, "julia_anonymous"; kernel=true)
end

@testset "exception arguments" begin
function kernel(a)
unsafe_store!(a, trunc(Int, unsafe_load(a)))
Expand Down Expand Up @@ -478,18 +448,6 @@ end

# some validation happens in the emit_function hook, which is called by code_llvm

@testset "recursion" begin
@eval recurse_outer(i) = i > 0 ? i : recurse_inner(i)
@eval @noinline recurse_inner(i) = i < 0 ? i : recurse_outer(i)

@test_throws_message(CUDAnative.KernelError, CUDAnative.code_llvm(devnull, recurse_outer, Tuple{Int})) do msg
occursin("recursion is currently not supported", msg) &&
occursin("[1] recurse_outer", msg) &&
occursin("[2] recurse_inner", msg) &&
occursin("[3] recurse_outer", msg)
end
end

@testset "base intrinsics" begin
foobar(i) = sin(i)

Expand Down
Loading

0 comments on commit 0a22ed5

Please sign in to comment.