StatProfilerHTML.jl report
Generated on ons 9 okt 2019 08:41:28
File source code
Line Exclusive Inclusive Code
1 # This file is a part of Julia. License is MIT: https://julialang.org/license
2
3 #####################
4 # OptimizationState #
5 #####################
6
7 mutable struct OptimizationState
8 linfo::MethodInstance
9 result_vargs::Vector{Any}
10 calledges::Vector{Any}
11 src::CodeInfo
12 mod::Module
13 nargs::Int
14 min_valid::UInt
15 max_valid::UInt
16 params::Params
17 sp::SimpleVector # static parameters
18 slottypes::Vector{Any}
19 const_api::Bool
20 function OptimizationState(frame::InferenceState)
21 s_edges = frame.stmt_edges[1]
22 if s_edges === ()
23 s_edges = []
24 frame.stmt_edges[1] = s_edges
25 end
26 src = frame.src
27 return new(frame.linfo, frame.result.vargs,
28 s_edges::Vector{Any},
29 src, frame.mod, frame.nargs,
30 frame.min_valid, frame.max_valid,
31 frame.params, frame.sp, frame.slottypes, false)
32 end
33 function OptimizationState(linfo::MethodInstance, src::CodeInfo,
34 params::Params)
35 # prepare src for running optimization passes
36 # if it isn't already
37 nssavalues = src.ssavaluetypes
38 if nssavalues isa Int
39 src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
40 end
41 nslots = length(src.slotnames)
42 slottypes = Any[ Any for i = 1:nslots ]
43 s_edges = []
44 # cache some useful state computations
45 toplevel = !isa(linfo.def, Method)
46 if !toplevel
47 meth = linfo.def
48 inmodule = meth.module
49 nargs = meth.nargs
50 else
51 inmodule = linfo.def::Module
52 nargs = 0
53 end
54 result_vargs = Any[] # if you want something more accurate, set it yourself :P
55 return new(linfo, result_vargs,
56 s_edges::Vector{Any},
57 src, inmodule, nargs,
58 min_world(linfo), max_world(linfo),
59 params, spvals_from_meth_instance(linfo), slottypes, false)
60 end
61 end
62
63 function OptimizationState(linfo::MethodInstance, params::Params)
64 src = retrieve_code_info(linfo)
65 src === nothing && return nothing
66 return OptimizationState(linfo, src, params)
67 end
68
69
70 #############
71 # constants #
72 #############
73
74 # The slot has uses that are not statically dominated by any assignment
75 # This is implied by `SLOT_USEDUNDEF`.
76 # If this is not set, all the uses are (statically) dominated by the defs.
77 # In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
78 const SLOT_STATICUNDEF = 1
79 const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
80 const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
81 # const SLOT_CALLED = 64
82
83 const IR_FLAG_INBOUNDS = 0x01
84
85 # known to be always effect-free (in particular nothrow)
86 const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
87
88 # known to be effect-free if the are nothrow
89 const _PURE_OR_ERROR_BUILTINS = [
90 fieldtype, apply_type, isa, UnionAll,
91 getfield, arrayref, isdefined, Core.sizeof,
92 Core.kwfunc
93 ]
94
95 const TOP_TUPLE = GlobalRef(Core, :tuple)
96
97 #########
98 # logic #
99 #########
100
101 _topmod(sv::OptimizationState) = _topmod(sv.mod)
102
103 function update_valid_age!(min_valid::UInt, max_valid::UInt, sv::OptimizationState)
104 sv.min_valid = max(sv.min_valid, min_valid)
105 sv.max_valid = min(sv.max_valid, max_valid)
106 @assert(!isa(sv.linfo.def, Method) ||
107 (sv.min_valid == typemax(UInt) && sv.max_valid == typemin(UInt)) ||
108 sv.min_valid <= sv.params.world <= sv.max_valid,
109 "invalid age range update")
110 nothing
111 end
112
113 update_valid_age!(li::MethodInstance, sv::OptimizationState) = update_valid_age!(min_world(li), max_world(li), sv)
114
115 function add_backedge!(li::MethodInstance, caller::OptimizationState)
116 isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
117 push!(caller.calledges, li)
118 update_valid_age!(li, caller)
119 nothing
120 end
121
122 function isinlineable(m::Method, me::OptimizationState, bonus::Int=0)
123 # compute the cost (size) of inlining this code
124 inlineable = false
125 cost_threshold = me.params.inline_cost_threshold
126 if m.module === _topmod(m.module)
127 # a few functions get special treatment
128 name = m.name
129 sig = m.sig
130 if ((name === :+ || name === :* || name === :min || name === :max) &&
131 isa(sig,DataType) &&
132 sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}})
133 inlineable = true
134 elseif (name === :iterate || name === :unsafe_convert ||
135 name === :cconvert)
136 cost_threshold *= 4
137 end
138 end
139 if !inlineable
140 inlineable = inline_worthy(me.src.code, me.src, me.sp, me.slottypes, me.params, cost_threshold + bonus)
141 end
142 return inlineable
143 end
144
145 # These affect control flow within the function (so may not be removed
146 # if there is no usage within the function), but don't affect the purity
147 # of the function as a whole.
148 function stmt_affects_purity(@nospecialize(stmt), ir)
149 if isa(stmt, GotoNode) || isa(stmt, ReturnNode)
150 return false
151 end
152 if isa(stmt, GotoIfNot)
153 t = argextype(stmt.cond, ir, ir.spvals)
154 return !(t ⊑ Bool)
155 end
156 if isa(stmt, Expr)
157 return stmt.head != :simdloop && stmt.head != :enter
158 end
159 return true
160 end
161
162 # run the optimization work
163
1 (2.70%) samples spent in optimize
0 (ex.), 1 (100.00%) (incl.) when called from typeinf line 35
function optimize(opt::OptimizationState, @nospecialize(result))
164 def = opt.linfo.def
165 nargs = Int(opt.nargs) - 1
166 1 (2.70%)
1 (100.00%) samples spent calling run_passes
@timeit "optimizer" ir = run_passes(opt.src, nargs, opt)
167 force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] == :noinline, ir.meta)
168
169 # compute inlining and other related optimizations
170 if (isa(result, Const) || isconstType(result))
171 proven_pure = false
172 # must be proven pure to use const_api; otherwise we might skip throwing errors
173 # (issue #20704)
174 # TODO: Improve this analysis; if a function is marked @pure we should really
175 # only care about certain errors (e.g. method errors and type errors).
176 if length(ir.stmts) < 10
177 proven_pure = true
178 for i in 1:length(ir.stmts)
179 stmt = ir.stmts[i]
180 if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, ir.types[i], ir, ir.spvals)
181 proven_pure = false
182 break
183 end
184 end
185 if proven_pure
186 for fl in opt.src.slotflags
187 if (fl & SLOT_USEDUNDEF) != 0
188 proven_pure = false
189 break
190 end
191 end
192 end
193 end
194 if proven_pure
195 opt.src.pure = true
196 end
197
198 if proven_pure && !coverage_enabled()
199 # use constant calling convention
200 # Do not emit `jl_fptr_const_return` if coverage is enabled
201 # so that we don't need to add coverage support
202 # to the `jl_call_method_internal` fast path
203 # Still set pure flag to make sure `inference` tests pass
204 # and to possibly enable more optimization in the future
205 if !(isa(result, Const) && !is_inlineable_constant(result.val))
206 opt.const_api = true
207 end
208 force_noinline || (opt.src.inlineable = true)
209 end
210 end
211
212 replace_code_newstyle!(opt.src, ir, nargs)
213
214 # determine and cache inlineability
215 if !force_noinline
216 # don't keep ASTs for functions specialized on a Union argument
217 # TODO: this helps avoid a type-system bug mis-computing sparams during intersection
218 sig = unwrap_unionall(opt.linfo.specTypes)
219 if isa(sig, DataType) && sig.name === Tuple.name
220 for P in sig.parameters
221 P = unwrap_unionall(P)
222 if isa(P, Union)
223 force_noinline = true
224 break
225 end
226 end
227 else
228 force_noinline = true
229 end
230 if !opt.src.inlineable && result === Union{}
231 force_noinline = true
232 end
233 end
234 if force_noinline
235 opt.src.inlineable = false
236 elseif isa(def, Method)
237 if opt.src.inlineable && isdispatchtuple(opt.linfo.specTypes)
238 # obey @inline declaration if a dispatch barrier would not help
239 else
240 bonus = 0
241 if result ⊑ Tuple && !isbitstype(widenconst(result))
242 bonus = opt.params.inline_tupleret_bonus
243 end
244 if opt.src.inlineable
245 # For functions declared @inline, increase the cost threshold 20x
246 bonus += opt.params.inline_cost_threshold*19
247 end
248 opt.src.inlineable = isinlineable(def, opt, bonus)
249 end
250 end
251 nothing
252 end
253
254
255 # whether `f` is pure for inference
256 function is_pure_intrinsic_infer(f::IntrinsicFunction)
257 return !(f === Intrinsics.pointerref || # this one is volatile
258 f === Intrinsics.pointerset || # this one is never effect-free
259 f === Intrinsics.llvmcall || # this one is never effect-free
260 f === Intrinsics.arraylen || # this one is volatile
261 f === Intrinsics.sqrt_llvm || # this one may differ at runtime (by a few ulps)
262 f === Intrinsics.cglobal) # cglobal lookup answer changes at runtime
263 end
264
265 # whether `f` is pure for optimizations
266 function is_pure_intrinsic_optim(f::IntrinsicFunction)
267 return !(f === Intrinsics.pointerref || # this one is volatile
268 f === Intrinsics.pointerset || # this one is never effect-free
269 f === Intrinsics.llvmcall || # this one is never effect-free
270 f === Intrinsics.arraylen || # this one is volatile
271 f === Intrinsics.checked_sdiv_int || # these may throw errors
272 f === Intrinsics.checked_udiv_int ||
273 f === Intrinsics.checked_srem_int ||
274 f === Intrinsics.checked_urem_int ||
275 f === Intrinsics.cglobal) # cglobal throws an error for symbol-not-found
276 end
277
278 ## Computing the cost of a function body
279
280 # saturating sum (inputs are nonnegative), prevents overflow with typemax(Int) below
281 plus_saturate(x::Int, y::Int) = max(x, y, x+y)
282
283 # known return type
284 isknowntype(@nospecialize T) = (T == Union{}) || isconcretetype(T)
285
286 function statement_cost(ex::Expr, line::Int, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any}, params::Params)
287 head = ex.head
288 if is_meta_expr_head(head)
289 return 0
290 elseif head === :call
291 farg = ex.args[1]
292 ftyp = argextype(farg, src, spvals, slottypes)
293 if ftyp === IntrinsicFunction && farg isa SSAValue
294 # if this comes from code that was already inlined into another function,
295 # Consts have been widened. try to recover in simple cases.
296 farg = src.code[farg.id]
297 if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
298 ftyp = argextype(farg, src, spvals, slottypes)
299 end
300 end
301 f = singleton_type(ftyp)
302 if isa(f, IntrinsicFunction)
303 iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
304 if !isassigned(T_IFUNC_COST, iidx)
305 # unknown/unhandled intrinsic
306 return params.inline_nonleaf_penalty
307 end
308 return T_IFUNC_COST[iidx]
309 end
310 if isa(f, Builtin)
311 # The efficiency of operations like a[i] and s.b
312 # depend strongly on whether the result can be
313 # inferred, so check the type of ex
314 if f === Main.Core.getfield || f === Main.Core.tuple
315 # we might like to penalize non-inferrability, but
316 # tuple iteration/destructuring makes that impossible
317 # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
318 return 0
319 elseif f === Main.Core.arrayref && length(ex.args) >= 3
320 atyp = argextype(ex.args[3], src, spvals, slottypes)
321 return isknowntype(atyp) ? 4 : params.inline_nonleaf_penalty
322 end
323 fidx = find_tfunc(f)
324 if fidx === nothing
325 # unknown/unhandled builtin or anonymous function
326 # Use the generic cost of a direct function call
327 return 20
328 end
329 return T_FFUNC_COST[fidx]
330 end
331 return params.inline_nonleaf_penalty
332 elseif head === :foreigncall || head === :invoke
333 # Calls whose "return type" is Union{} do not actually return:
334 # they are errors. Since these are not part of the typical
335 # run-time of the function, we omit them from
336 # consideration. This way, non-inlined error branches do not
337 # prevent inlining.
338 extyp = line == -1 ? Any : src.ssavaluetypes[line]
339 return extyp === Union{} ? 0 : 20
340 elseif head === :return
341 a = ex.args[1]
342 if a isa Expr
343 return statement_cost(a, -1, src, spvals, slottypes, params)
344 end
345 return 0
346 elseif head === :(=)
347 if ex.args[1] isa GlobalRef
348 cost = 20
349 else
350 cost = 0
351 end
352 a = ex.args[2]
353 if a isa Expr
354 cost = plus_saturate(cost, statement_cost(a, -1, src, spvals, slottypes, params))
355 end
356 return cost
357 elseif head === :copyast
358 return 100
359 elseif head === :enter
360 # try/catch is a couple function calls,
361 # but don't inline functions with try/catch
362 # since these aren't usually performance-sensitive functions,
363 # and llvm is more likely to miscompile them when these functions get large
364 return typemax(Int)
365 elseif head === :gotoifnot
366 target = ex.args[2]::Int
367 # loops are generally always expensive
368 # but assume that forward jumps are already counted for from
369 # summing the cost of the not-taken branch
370 return target < line ? 40 : 0
371 end
372 return 0
373 end
374
375 function inline_worthy(body::Array{Any,1}, src::CodeInfo, spvals::SimpleVector, slottypes::Vector{Any},
376 params::Params, cost_threshold::Integer=params.inline_cost_threshold)
377 bodycost::Int = 0
378 for line = 1:length(body)
379 stmt = body[line]
380 if stmt isa Expr
381 thiscost = statement_cost(stmt, line, src, spvals, slottypes, params)::Int
382 elseif stmt isa GotoNode
383 # loops are generally always expensive
384 # but assume that forward jumps are already counted for from
385 # summing the cost of the not-taken branch
386 thiscost = stmt.label < line ? 40 : 0
387 else
388 continue
389 end
390 bodycost = plus_saturate(bodycost, thiscost)
391 bodycost > cost_threshold && return false
392 end
393 return true
394 end
395
396 function is_known_call(e::Expr, @nospecialize(func), src, spvals::SimpleVector, slottypes::Vector{Any} = empty_slottypes)
397 if e.head !== :call
398 return false
399 end
400 f = argextype(e.args[1], src, spvals, slottypes)
401 return isa(f, Const) && f.val === func
402 end
403
404 function renumber_ir_elements!(body::Vector{Any}, changemap::Vector{Int})
405 return renumber_ir_elements!(body, changemap, changemap)
406 end
407
408 function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
409 for i = 2:length(labelchangemap)
410 labelchangemap[i] += labelchangemap[i - 1]
411 end
412 if ssachangemap !== labelchangemap
413 for i = 2:length(ssachangemap)
414 ssachangemap[i] += ssachangemap[i - 1]
415 end
416 end
417 (labelchangemap[end] != 0 && ssachangemap[end] != 0) || return
418 for i = 1:length(body)
419 el = body[i]
420 if isa(el, GotoNode)
421 body[i] = GotoNode(el.label + labelchangemap[el.label])
422 elseif isa(el, SSAValue)
423 body[i] = SSAValue(el.id + ssachangemap[el.id])
424 elseif isa(el, Expr)
425 if el.head === :gotoifnot
426 cond = el.args[1]
427 if isa(cond, SSAValue)
428 el.args[1] = SSAValue(cond.id + ssachangemap[cond.id])
429 end
430 tgt = el.args[2]::Int
431 el.args[2] = tgt + labelchangemap[tgt]
432 elseif el.head === :enter
433 tgt = el.args[1]::Int
434 el.args[1] = tgt + labelchangemap[tgt]
435 elseif !is_meta_expr_head(el.head)
436 if el.head === :(=) && el.args[2] isa Expr && !is_meta_expr_head(el.args[2].head)
437 el = el.args[2]::Expr
438 end
439 args = el.args
440 for i = 1:length(args)
441 el = args[i]
442 if isa(el, SSAValue)
443 args[i] = SSAValue(el.id + ssachangemap[el.id])
444 end
445 end
446 end
447 end
448 end
449 end
450
451 include("compiler/ssair/driver.jl")