-
Notifications
You must be signed in to change notification settings - Fork 52
/
interface.jl
299 lines (223 loc) · 10.5 KB
/
interface.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# interfaces for defining new compilers
# the definition of a new GPU compiler is typically split in two:
# - a generic compiler that lives in GPUCompiler.jl (e.g., emitting PTX, SPIR-V, etc)
# - a more specific version in a package that targets an environment (e.g. CUDA, ROCm, etc)
#
# the first level of customizability is found in the AbstractCompilerTarget hierarchy,
# with methods and interfaces that can only be implemented within GPUCompiler.jl.
#
# further customization should be put in a concrete instance of the AbstractCompilerParams
# type, and can be used to customize interfaces defined on CompilerJob.
## target
export AbstractCompilerTarget
# container for state handled by targets defined in GPUCompiler.jl
abstract type AbstractCompilerTarget end
source_code(@nospecialize(target::AbstractCompilerTarget)) = "text"
llvm_triple(@nospecialize(target::AbstractCompilerTarget)) = error("Not implemented")
function llvm_machine(@nospecialize(target::AbstractCompilerTarget))
triple = llvm_triple(target)
t = Target(triple=triple)
tm = TargetMachine(t, triple)
asm_verbosity!(tm, true)
return tm
end
llvm_datalayout(target::AbstractCompilerTarget) = DataLayout(llvm_machine(target))
# the target's datalayout, with Julia's non-integral address spaces added to it
function julia_datalayout(@nospecialize(target::AbstractCompilerTarget))
dl = llvm_datalayout(target)
dl === nothing && return nothing
DataLayout(string(dl) * "-ni:10:11:12:13")
end
have_fma(@nospecialize(target::AbstractCompilerTarget), T::Type) = false
## params
export AbstractCompilerParams
# container for state handled by external users of GPUCompiler.jl
abstract type AbstractCompilerParams end
## config
export CompilerConfig
# the configuration of the compiler
"""
CompilerConfig(target, params; kernel=true, entry_abi=:specfunc, name=nothing,
always_inline=false)
Construct a `CompilerConfig` that will be used to drive compilation for the given `target`
and `params`.
Several keyword arguments can be used to customize the compilation process:
- `kernel`: specifies if the function should be compiled as a kernel, or as a regular
function. This is used to determine the calling convention and for validation purposes.
- `entry_abi`: can be either `:specfunc` the default, or `:func`. `:specfunc` expects the
arguments to be passed in registers, simple return values are returned in registers as
well, and complex return values are returned on the stack using `sret`, the calling
convention is `fastcc`. The `:func` abi is simpler with a calling convention of the first
argument being the function itself (to support closures), the second argument being a
pointer to a vector of boxed Julia values and the third argument being the number of
values, the return value will also be boxed. The `:func` abi will internally call the
`:specfunc` abi, but is generally easier to invoke directly.
- `name`: the name that will be used for the entrypoint function. If `nothing` (the
default), the name will be generated automatically.
- `always_inline` specifies if the Julia front-end should inline all functions into one if
possible.
"""
struct CompilerConfig{T,P}
target::T
params::P
kernel::Bool
name::Union{Nothing,String}
entry_abi::Symbol
always_inline::Bool
function CompilerConfig(target::AbstractCompilerTarget,
params::AbstractCompilerParams;
kernel=true,
name=nothing,
entry_abi=:specfunc,
always_inline=false)
if entry_abi ∉ (:specfunc, :func)
error("Unknown entry_abi=$entry_abi")
end
new{typeof(target), typeof(params)}(target, params, kernel, name, entry_abi,
always_inline)
end
end
# copy constructor
CompilerConfig(cfg::CompilerConfig; target=cfg.target, params=cfg.params,
kernel=cfg.kernel, name=cfg.name, entry_abi=cfg.entry_abi,
always_inline=cfg.always_inline) =
CompilerConfig(target, params; kernel, entry_abi, name, always_inline)
function Base.show(io::IO, @nospecialize(cfg::CompilerConfig{T})) where {T}
print(io, "CompilerConfig for ", T)
end
function Base.hash(cfg::CompilerConfig, h::UInt)
h = hash(cfg.target, h)
h = hash(cfg.params, h)
h = hash(cfg.kernel, h)
h = hash(cfg.name, h)
h = hash(cfg.entry_abi, h)
h = hash(cfg.always_inline, h)
return h
end
## job
export CompilerJob
using Core: MethodInstance
# a specific invocation of the compiler, bundling everything needed to generate code
struct CompilerJob{T,P}
source::MethodInstance
config::CompilerConfig{T,P}
world::UInt
CompilerJob(src::MethodInstance, cfg::CompilerConfig{T,P},
world=tls_world_age()) where {T,P} =
new{T,P}(src, cfg, world)
end
## contexts
if VERSION >= v"1.9.0-DEV.516"
const JuliaContextType = ThreadSafeContext
else
const JuliaContextType = Context
end
## interfaces and fallback definitions
# Has the runtime available and does not require special handling
uses_julia_runtime(@nospecialize(job::CompilerJob)) = false
# Should emit PTLS lookup that can be relocated
dump_native(@nospecialize(job::CompilerJob)) = false
# the Julia module to look up target-specific runtime functions in (this includes both
# target-specific functions from the GPU runtime library, like `malloc`, but also
# replacements functions for operations like `Base.sin`)
runtime_module(@nospecialize(job::CompilerJob)) = error("Not implemented")
# check if a function is an intrinsic that can assumed to be always available
isintrinsic(@nospecialize(job::CompilerJob), fn::String) = false
# provide a specific interpreter to use.
get_interpreter(@nospecialize(job::CompilerJob)) =
GPUInterpreter(ci_cache(job), method_table(job), job.world,
inference_params(job), optimization_params(job))
# does this target support throwing Julia exceptions with jl_throw?
# if not, calls to throw will be replaced with calls to the GPU runtime
can_throw(@nospecialize(job::CompilerJob)) = uses_julia_runtime(job)
# does this target support loading from Julia safepoints?
# if not, safepoints at function entry will not be emitted
can_safepoint(@nospecialize(job::CompilerJob)) = uses_julia_runtime(job)
# generate a string that represents the type of compilation, for selecting a compiled
# instance of the runtime library. this slug should encode everything that affects
# the generated code of this compiler job (with exception of the function source)
runtime_slug(@nospecialize(job::CompilerJob)) = error("Not implemented")
# early processing of the newly generated LLVM IR module
process_module!(@nospecialize(job::CompilerJob), mod::LLVM.Module) = return
# the type of the kernel state object, or Nothing if this back-end doesn't need one.
#
# the generated code will be rewritten to include an object of this type as the first
# argument to each kernel, and pass that object to every function that accesses the kernel
# state (possibly indirectly) via the `kernel_state_pointer` function.
kernel_state_type(@nospecialize(job::CompilerJob)) = Nothing
# Does the target need to pass kernel arguments by value?
needs_byval(@nospecialize(job::CompilerJob)) = true
# early processing of the newly identified LLVM kernel function
function process_entry!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
entry::LLVM.Function)
ctx = context(mod)
if job.config.kernel && needs_byval(job)
# pass all bitstypes by value; by default Julia passes aggregates by reference
# (this improves performance, and is mandated by certain back-ends like SPIR-V).
args = classify_arguments(job, function_type(entry))
for arg in args
if arg.cc == BITS_REF
attr = if LLVM.version() >= v"12"
TypeAttribute("byval", eltype(arg.codegen.typ); ctx)
else
EnumAttribute("byval", 0; ctx)
end
push!(parameter_attributes(entry, arg.codegen.i), attr)
end
end
end
return entry
end
# post-Julia optimization processing of the module
optimize_module!(@nospecialize(job::CompilerJob), mod::LLVM.Module) = return
# whether an LLVM function is valid for this back-end
validate_module(@nospecialize(job::CompilerJob), mod::LLVM.Module) = IRError[]
# finalization of the module, before deferred codegen and optimization
function finish_module!(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry::LLVM.Function)
return entry
end
# final processing of the IR, right before validation and machine-code generation
function finish_ir!(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry::LLVM.Function)
return entry
end
add_lowering_passes!(@nospecialize(job::CompilerJob), pm::LLVM.PassManager) = return
link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
undefined_fns::Vector{String}) = return
# whether pointer is a valid call target
valid_function_pointer(@nospecialize(job::CompilerJob), ptr::Ptr{Cvoid}) = false
# the codeinfo cache to use
function ci_cache(@nospecialize(job::CompilerJob))
lock(GLOBAL_CI_CACHES_LOCK) do
cache = get!(GLOBAL_CI_CACHES, (typeof(job.config.target), inference_params(job), optimization_params(job))) do
CodeCache()
end
return cache
end
end
# the method table to use
method_table(@nospecialize(job::CompilerJob)) = GLOBAL_METHOD_TABLE
# the inference parameters to use when constructing the GPUInterpreter
function inference_params(@nospecialize(job::CompilerJob))
return InferenceParams(;unoptimize_throw_blocks=false)
end
# the optimization parameters to use when constructing the GPUInterpreter
function optimization_params(@nospecialize(job::CompilerJob))
kwargs = NamedTuple()
if VERSION < v"1.8.0-DEV.486"
kwargs = (kwargs..., unoptimize_throw_blocks=false)
end
if job.config.always_inline
kwargs = (kwargs..., inline_cost_threshold=typemax(Int))
end
return OptimizationParams(;kwargs...)
end
# how much debuginfo to emit
function llvm_debug_info(@nospecialize(job::CompilerJob))
if Base.JLOptions().debug_level == 0
LLVM.API.LLVMDebugEmissionKindNoDebug
elseif Base.JLOptions().debug_level == 1
LLVM.API.LLVMDebugEmissionKindLineTablesOnly
elseif Base.JLOptions().debug_level >= 2
LLVM.API.LLVMDebugEmissionKindFullDebug
end
end