Line | Exclusive | Inclusive | Code |
---|---|---|---|
1 | # This file is a part of Julia. License is MIT: https://julialang.org/license | ||
2 | |||
3 | ##################### | ||
4 | # OptimizationState # | ||
5 | ##################### | ||
6 | |||
7 | mutable struct OptimizationState | ||
8 | linfo::MethodInstance | ||
9 | result_vargs::Vector{Any} | ||
10 | calledges::Vector{Any} | ||
11 | src::CodeInfo | ||
12 | mod::Module | ||
13 | nargs::Int | ||
14 | min_valid::UInt | ||
15 | max_valid::UInt | ||
16 | params::Params | ||
17 | sp::SimpleVector # static parameters | ||
18 | slottypes::Vector{Any} | ||
19 | const_api::Bool | ||
20 | function OptimizationState(frame::InferenceState) | ||
21 | s_edges = frame.stmt_edges[1] | ||
22 | if s_edges === () | ||
23 | s_edges = [] | ||
24 | frame.stmt_edges[1] = s_edges | ||
25 | end | ||
26 | src = frame.src | ||
27 | return new(frame.linfo, frame.result.vargs, | ||
28 | s_edges::Vector{Any}, | ||
29 | src, frame.mod, frame.nargs, | ||
30 | frame.min_valid, frame.max_valid, | ||
31 | frame.params, frame.sp, frame.slottypes, false) | ||
32 | end | ||
33 | function OptimizationState(linfo::MethodInstance, src::CodeInfo, | ||
34 | params::Params) | ||
35 | # prepare src for running optimization passes | ||
36 | # if it isn't already | ||
37 | nssavalues = src.ssavaluetypes | ||
38 | if nssavalues isa Int | ||
39 | src.ssavaluetypes = Any[ Any for i = 1:nssavalues ] | ||
40 | end | ||
41 | nslots = length(src.slotnames) | ||
42 | slottypes = Any[ Any for i = 1:nslots ] | ||
43 | s_edges = [] | ||
44 | # cache some useful state computations | ||
45 | toplevel = !isa(linfo.def, Method) | ||
46 | if !toplevel | ||
47 | meth = linfo.def | ||
48 | inmodule = meth.module | ||
49 | nargs = meth.nargs | ||
50 | else | ||
51 | inmodule = linfo.def::Module | ||
52 | nargs = 0 | ||
53 | end | ||
54 | result_vargs = Any[] # if you want something more accurate, set it yourself :P | ||
55 | return new(linfo, result_vargs, | ||
56 | s_edges::Vector{Any}, | ||
57 | src, inmodule, nargs, | ||
58 | min_world(linfo), max_world(linfo), | ||
59 | params, spvals_from_meth_instance(linfo), slottypes, false) | ||
60 | end | ||
61 | end | ||
62 | |||
63 | function OptimizationState(linfo::MethodInstance, params::Params) | ||
64 | src = retrieve_code_info(linfo) | ||
65 | src === nothing && return nothing | ||
66 | return OptimizationState(linfo, src, params) | ||
67 | end | ||
68 | |||
69 | |||
70 | ############# | ||
71 | # constants # | ||
72 | ############# | ||
73 | |||
74 | # The slot has uses that are not statically dominated by any assignment | ||
75 | # This is implied by `SLOT_USEDUNDEF`. | ||
76 | # If this is not set, all the uses are (statically) dominated by the defs. | ||
77 | # In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA. | ||
78 | const SLOT_STATICUNDEF = 1 | ||
79 | const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once | ||
80 | const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError | ||
81 | # const SLOT_CALLED = 64 | ||
82 | |||
83 | const IR_FLAG_INBOUNDS = 0x01 | ||
84 | |||
85 | # known to be always effect-free (in particular nothrow) | ||
86 | const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields] | ||
87 | |||
88 | # known to be effect-free if the are nothrow | ||
89 | const _PURE_OR_ERROR_BUILTINS = [ | ||
90 | fieldtype, apply_type, isa, UnionAll, | ||
91 | getfield, arrayref, isdefined, Core.sizeof, | ||
92 | Core.kwfunc | ||
93 | ] | ||
94 | |||
95 | const TOP_TUPLE = GlobalRef(Core, :tuple) | ||
96 | |||
97 | ######### | ||
98 | # logic # | ||
99 | ######### | ||
100 | |||
101 | _topmod(sv::OptimizationState) = _topmod(sv.mod) | ||
102 | |||
103 | function update_valid_age!(min_valid::UInt, max_valid::UInt, sv::OptimizationState) | ||
104 | sv.min_valid = max(sv.min_valid, min_valid) | ||
105 | sv.max_valid = min(sv.max_valid, max_valid) | ||
106 | @assert(!isa(sv.linfo.def, Method) || | ||
107 | (sv.min_valid == typemax(UInt) && sv.max_valid == typemin(UInt)) || | ||
108 | sv.min_valid <= sv.params.world <= sv.max_valid, | ||
109 | "invalid age range update") | ||
110 | nothing | ||
111 | end | ||
112 | |||
113 | update_valid_age!(li::MethodInstance, sv::OptimizationState) = update_valid_age!(min_world(li), max_world(li), sv) | ||
114 | |||
115 | function add_backedge!(li::MethodInstance, caller::OptimizationState) | ||
116 | isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs | ||
117 | push!(caller.calledges, li) | ||
118 | update_valid_age!(li, caller) | ||
119 | nothing | ||
120 | end | ||
121 | |||
122 | function isinlineable(m::Method, me::OptimizationState, bonus::Int=0) | ||
123 | # compute the cost (size) of inlining this code | ||
124 | inlineable = false | ||
125 | cost_threshold = me.params.inline_cost_threshold | ||
126 | if m.module === _topmod(m.module) | ||
127 | # a few functions get special treatment | ||
128 | name = m.name | ||
129 | sig = m.sig | ||
130 | if ((name === :+ || name === :* || name === :min || name === :max) && | ||
131 | isa(sig,DataType) && | ||
132 | sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}}) | ||
133 | inlineable = true | ||
134 | elseif (name === :iterate || name === :unsafe_convert || | ||
135 | name === :cconvert) | ||
136 | cost_threshold *= 4 | ||
137 | end | ||
138 | end | ||
139 | if !inlineable | ||
140 | inlineable = inline_worthy(me.src.code, me.src, me.sp, me.slottypes, me.params, cost_threshold + bonus) | ||
141 | end | ||
142 | return inlineable | ||
143 | end | ||
144 | |||
145 | # These affect control flow within the function (so may not be removed | ||
146 | # if there is no usage within the function), but don't affect the purity | ||
147 | # of the function as a whole. | ||
148 | function stmt_affects_purity(@nospecialize(stmt), ir) | ||
149 | if isa(stmt, GotoNode) || isa(stmt, ReturnNode) | ||
150 | return false | ||
151 | end | ||
152 | if isa(stmt, GotoIfNot) | ||
153 | t = argextype(stmt.cond, ir, ir.spvals) | ||
154 | return !(t ⊑ Bool) | ||
155 | end | ||
156 | if isa(stmt, Expr) | ||
157 | return stmt.head != :simdloop && stmt.head != :enter | ||
158 | end | ||
159 | return true | ||
160 | end | ||
161 | |||
162 | # run the optimization work | ||
163 | function optimize(opt::OptimizationState, @nospecialize(result)) | ||
164 | def = opt.linfo.def | ||
165 | nargs = Int(opt.nargs) - 1 | ||
166 | 1 (2.70%) |
1 (100.00%)
samples spent calling
run_passes
@timeit "optimizer" ir = run_passes(opt.src, nargs, opt)
|
|
167 | force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] == :noinline, ir.meta) | ||
168 | |||
169 | # compute inlining and other related optimizations | ||
170 | if (isa(result, Const) || isconstType(result)) | ||
171 | proven_pure = false | ||
172 | # must be proven pure to use const_api; otherwise we might skip throwing errors | ||
173 | # (issue #20704) | ||
174 | # TODO: Improve this analysis; if a function is marked @pure we should really | ||
175 | # only care about certain errors (e.g. method errors and type errors). | ||
176 | if length(ir.stmts) < 10 | ||
177 | proven_pure = true | ||
178 | for i in 1:length(ir.stmts) | ||
179 | stmt = ir.stmts[i] | ||
180 | if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, ir.types[i], ir, ir.spvals) | ||
181 | proven_pure = false | ||
182 | break | ||
183 | end | ||
184 | end | ||
185 | if proven_pure | ||
186 | for fl in opt.src.slotflags | ||
187 | if (fl & SLOT_USEDUNDEF) != 0 | ||
188 | proven_pure = false | ||
189 | break | ||
190 | end | ||
191 | end | ||
192 | end | ||
193 | end | ||
194 | if proven_pure | ||
195 | opt.src.pure = true | ||
196 | end | ||
197 | |||
198 | if proven_pure && !coverage_enabled() | ||
199 | # use constant calling convention | ||
200 | # Do not emit `jl_fptr_const_return` if coverage is enabled | ||
201 | # so that we don't need to add coverage support | ||
202 | # to the `jl_call_method_internal` fast path | ||
203 | # Still set pure flag to make sure `inference` tests pass | ||
204 | # and to possibly enable more optimization in the future | ||
205 | if !(isa(result, Const) && !is_inlineable_constant(result.val)) | ||
206 | opt.const_api = true | ||
207 | end | ||
208 | force_noinline || (opt.src.inlineable = true) | ||
209 | end | ||
210 | end | ||
211 | |||
212 | replace_code_newstyle!(opt.src, ir, nargs) | ||
213 | |||
214 | # determine and cache inlineability | ||
215 | if !force_noinline | ||
216 | # don't keep ASTs for functions specialized on a Union argument | ||
217 | # TODO: this helps avoid a type-system bug mis-computing sparams during intersection | ||
218 | sig = unwrap_unionall(opt.linfo.specTypes) | ||
219 | if isa(sig, DataType) && sig.name === Tuple.name | ||
220 | for P in sig.parameters | ||
221 | P = unwrap_unionall(P) | ||
222 | if isa(P, Union) | ||
223 | force_noinline = true | ||
224 | break | ||
225 | end | ||
226 | end | ||
227 | else | ||
228 | force_noinline = true | ||
229 | end | ||
230 | if !opt.src.inlineable && result === Union{} | ||
231 | force_noinline = true | ||
232 | end | ||
233 | end | ||
234 | if force_noinline | ||
235 | opt.src.inlineable = false | ||
236 | elseif isa(def, Method) | ||
237 | if opt.src.inlineable && isdispatchtuple(opt.linfo.specTypes) | ||
238 | # obey @inline declaration if a dispatch barrier would not help | ||
239 | else | ||
240 | bonus = 0 | ||
241 | if result ⊑ Tuple && !isbitstype(widenconst(result)) | ||
242 | bonus = opt.params.inline_tupleret_bonus | ||
243 | end | ||
244 | if opt.src.inlineable | ||
245 | # For functions declared @inline, increase the cost threshold 20x | ||
246 | bonus += opt.params.inline_cost_threshold*19 | ||
247 | end | ||
248 | opt.src.inlineable = isinlineable(def, opt, bonus) | ||
249 | end | ||
250 | end | ||
251 | nothing | ||
252 | end | ||
253 | |||
254 | |||
255 | # whether `f` is pure for inference | ||
256 | function is_pure_intrinsic_infer(f::IntrinsicFunction) | ||
257 | return !(f === Intrinsics.pointerref || # this one is volatile | ||
258 | f === Intrinsics.pointerset || # this one is never effect-free | ||
259 | f === Intrinsics.llvmcall || # this one is never effect-free | ||
260 | f === Intrinsics.arraylen || # this one is volatile | ||
261 | f === Intrinsics.sqrt_llvm || # this one may differ at runtime (by a few ulps) | ||
262 | f === Intrinsics.cglobal) # cglobal lookup answer changes at runtime | ||
263 | end | ||
264 | |||
265 | # whether `f` is pure for optimizations | ||
266 | function is_pure_intrinsic_optim(f::IntrinsicFunction) | ||
267 | return !(f === Intrinsics.pointerref || # this one is volatile | ||
268 | f === Intrinsics.pointerset || # this one is never effect-free | ||
269 | f === Intrinsics.llvmcall || # this one is never effect-free | ||
270 | f === Intrinsics.arraylen || # this one is volatile | ||
271 | f === Intrinsics.checked_sdiv_int || # these may throw errors | ||
272 | f === Intrinsics.checked_udiv_int || | ||
273 | f === Intrinsics.checked_srem_int || | ||
274 | f === Intrinsics.checked_urem_int || | ||
275 | f === Intrinsics.cglobal) # cglobal throws an error for symbol-not-found | ||
276 | end | ||
277 | |||
278 | ## Computing the cost of a function body | ||
279 | |||
280 | # saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below | ||
281 | plus_saturate(x::Int, y::Int) = max(x, y, x+y) | ||
282 | |||
283 | # known return type | ||
284 | isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T) | ||
285 | |||
286 | function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params) | ||
287 | head = ex.head | ||
288 | if is_meta_expr_head(head) | ||
289 | return 0 | ||
290 | elseif head === :call | ||
291 | farg = ex.args[1] | ||
292 | ftyp = argextype(farg, src, spvals, slottypes) | ||
293 | if ftyp === IntrinsicFunction && farg isa SSAValue | ||
294 | # if this comes from code that was already inlined into another function, | ||
295 | # Consts have been widened. try to recover in simple cases. | ||
296 | farg = src.code[farg.id] | ||
297 | if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter) | ||
298 | ftyp = argextype(farg, src, spvals, slottypes) | ||
299 | end | ||
300 | end | ||
301 | f = singleton_type(ftyp) | ||
302 | if isa(f, IntrinsicFunction) | ||
303 | iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1 | ||
304 | if !isassigned(T_IFUNC_COST, iidx) | ||
305 | # unknown/unhandled intrinsic | ||
306 | return params.inline_nonleaf_penalty | ||
307 | end | ||
308 | return T_IFUNC_COST[iidx] | ||
309 | end | ||
310 | if isa(f, Builtin) | ||
311 | # The efficiency of operations like a[i] and s.b | ||
312 | # depend strongly on whether the result can be | ||
313 | # inferred, so check the type of ex | ||
314 | if f === Main.Core.getfield || f === Main.Core.tuple | ||
315 | # we might like to penalize non-inferrability, but | ||
316 | # tuple iteration/destructuring makes that impossible | ||
317 | # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty) | ||
318 | return 0 | ||
319 | elseif f === Main.Core.arrayref && length(ex.args) >= 3 | ||
320 | atyp = argextype(ex.args[3], src, spvals, slottypes) | ||
321 | return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty | ||
322 | end | ||
323 | fidx = find_tfunc(f) | ||
324 | if fidx === nothing | ||
325 | # unknown/unhandled builtin or anonymous function | ||
326 | # Use the generic cost of a direct function call | ||
327 | return 20 | ||
328 | end | ||
329 | return T_FFUNC_COST[fidx] | ||
330 | end | ||
331 | return params.inline_nonleaf_penalty | ||
332 | elseif head === :foreigncall || head === :invoke | ||
333 | # Calls whose "return type" is Union{} do not actually return: | ||
334 | # they are errors. Since these are not part of the typical | ||
335 | # run-time of the function, we omit them from | ||
336 | # consideration. This way, non-inlined error branches do not | ||
337 | # prevent inlining. | ||
338 | extyp = line == -1 ? Any : src.ssavaluetypes[line] | ||
339 | return extyp === Union{} ? 0 : 20 | ||
340 | elseif head === :return | ||
341 | a = ex.args[1] | ||
342 | if a isa Expr | ||
343 | return statement_cost(a, -1, src, spvals, slottypes, params) | ||
344 | end | ||
345 | return 0 | ||
346 | elseif head === :(=) | ||
347 | if ex.args[1] isa GlobalRef | ||
348 | cost = 20 | ||
349 | else | ||
350 | cost = 0 | ||
351 | end | ||
352 | a = ex.args[2] | ||
353 | if a isa Expr | ||
354 | cost = plus_saturate(cost, statement_cost(a, -1, src, spvals, slottypes, params)) | ||
355 | end | ||
356 | return cost | ||
357 | elseif head === :copyast | ||
358 | return 100 | ||
359 | elseif head === :enter | ||
360 | # try/catch is a couple function calls, | ||
361 | # but don't inline functions with try/catch | ||
362 | # since these aren't usually performance-sensitive functions, | ||
363 | # and llvm is more likely to miscompile them when these functions get large | ||
364 | return typemax(Int) | ||
365 | elseif head === :gotoifnot | ||
366 | target = ex.args[2]::Int | ||
367 | # loops are generally always expensive | ||
368 | # but assume that forward jumps are already counted for from | ||
369 | # summing the cost of the not-taken branch | ||
370 | return target < line ? 40 : 0 | ||
371 | end | ||
372 | return 0 | ||
373 | end | ||
374 | |||
375 | function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, | ||
376 | params::Params, cost_threshold::Integer=params.inline_cost_threshold) | ||
377 | bodycost::Int = 0 | ||
378 | for line = 1:length(body) | ||
379 | stmt = body[line] | ||
380 | if stmt isa Expr | ||
381 | thiscost = statement_cost(stmt, line, src, spvals, slottypes, params)::Int | ||
382 | elseif stmt isa GotoNode | ||
383 | # loops are generally always expensive | ||
384 | # but assume that forward jumps are already counted for from | ||
385 | # summing the cost of the not-taken branch | ||
386 | thiscost = stmt.label < line ? 40 : 0 | ||
387 | else | ||
388 | continue | ||
389 | end | ||
390 | bodycost = plus_saturate(bodycost, thiscost) | ||
391 | bodycost > cost_threshold && return false | ||
392 | end | ||
393 | return true | ||
394 | end | ||
395 | |||
396 | function is_known_call(e::Expr, @nospecialize(func), src, spvals::SimpleVector, slottypes::Vector{Any} = empty_slottypes) | ||
397 | if e.head !== :call | ||
398 | return false | ||
399 | end | ||
400 | f = argextype(e.args[1], src, spvals, slottypes) | ||
401 | return isa(f, Const) && f.val === func | ||
402 | end | ||
403 | |||
404 | function renumber_ir_elements!(body::Vector{Any}, changemap::Vector{Int}) | ||
405 | return renumber_ir_elements!(body, changemap, changemap) | ||
406 | end | ||
407 | |||
408 | function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int}) | ||
409 | for i = 2:length(labelchangemap) | ||
410 | labelchangemap[i] += labelchangemap[i - 1] | ||
411 | end | ||
412 | if ssachangemap !== labelchangemap | ||
413 | for i = 2:length(ssachangemap) | ||
414 | ssachangemap[i] += ssachangemap[i - 1] | ||
415 | end | ||
416 | end | ||
417 | (labelchangemap[end] != 0 && ssachangemap[end] != 0) || return | ||
418 | for i = 1:length(body) | ||
419 | el = body[i] | ||
420 | if isa(el, GotoNode) | ||
421 | body[i] = GotoNode(el.label + labelchangemap[el.label]) | ||
422 | elseif isa(el, SSAValue) | ||
423 | body[i] = SSAValue(el.id + ssachangemap[el.id]) | ||
424 | elseif isa(el, Expr) | ||
425 | if el.head === :gotoifnot | ||
426 | cond = el.args[1] | ||
427 | if isa(cond, SSAValue) | ||
428 | el.args[1] = SSAValue(cond.id + ssachangemap[cond.id]) | ||
429 | end | ||
430 | tgt = el.args[2]::Int | ||
431 | el.args[2] = tgt + labelchangemap[tgt] | ||
432 | elseif el.head === :enter | ||
433 | tgt = el.args[1]::Int | ||
434 | el.args[1] = tgt + labelchangemap[tgt] | ||
435 | elseif !is_meta_expr_head(el.head) | ||
436 | if el.head === :(=) && el.args[2] isa Expr && !is_meta_expr_head(el.args[2].head) | ||
437 | el = el.args[2]::Expr | ||
438 | end | ||
439 | args = el.args | ||
440 | for i = 1:length(args) | ||
441 | el = args[i] | ||
442 | if isa(el, SSAValue) | ||
443 | args[i] = SSAValue(el.id + ssachangemap[el.id]) | ||
444 | end | ||
445 | end | ||
446 | end | ||
447 | end | ||
448 | end | ||
449 | end | ||
450 | |||
451 | include("compiler/ssair/driver.jl") |