Skip to content

Commit da3f4e2

Browse files
committed
Merge branch 'nhd-snoopi-inferrable-set--TimerOutputs-profile' into nhd-snoop-all
2 parents a99b8fc + 67c447c commit da3f4e2

File tree

8 files changed

+291
-3
lines changed

8 files changed

+291
-3
lines changed

Project.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version = "2.1.2"
66
[deps]
77
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
88
Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f"
9+
FlameGraphs = "08572546-2f56-4bcf-ba4e-bab62c3a3f89"
910
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
1011
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
1112
Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
@@ -15,12 +16,14 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
1516
[compat]
1617
Cthulhu = "1.2"
1718
CSV = "0.7"
19+
FlameGraphs = "0.2"
1820
OrderedCollections = "1"
1921
YAML = "0.4"
2022
SnoopCompileCore = "~2.1.2"
2123
julia = "1"
2224

2325
[extras]
26+
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
2427
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
2528
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
2629
FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
@@ -32,4 +35,4 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
3235
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3336

3437
[targets]
35-
test = ["ColorTypes", "Documenter", "FixedPointNumbers", "InteractiveUtils", "JLD", "MethodAnalysis", "Pkg", "SparseArrays", "Test"]
38+
test = ["AbstractTrees", "ColorTypes", "Documenter", "FixedPointNumbers", "InteractiveUtils", "JLD", "MethodAnalysis", "Pkg", "SparseArrays", "Test"]

SnoopCompileCore/src/SnoopCompileCore.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ if VERSION >= v"1.2.0-DEV.573"
1111
include("snoopi.jl")
1212
end
1313

14+
if VERSION >= v"1.6.0-DEV.1190" # https://github.com/JuliaLang/julia/pull/37749
15+
include("snoopi_deep.jl")
16+
end
1417

1518
if VERSION >= v"1.6.0-DEV.154"
1619
include("snoopr.jl")

SnoopCompileCore/src/snoopi.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export @snoopi
1+
export @snoopi, @snoopi_deep
22

33
const __inf_timing__ = Tuple{Float64,MethodInstance}[]
44

@@ -51,7 +51,7 @@ end
5151
inf_timing = @snoopi tmin=0.0 commands
5252
5353
Execute `commands` while snooping on inference. Returns an array of `(t, linfo)`
54-
tuples, where `t` is the amount of time spent infering `linfo` (a `MethodInstance`).
54+
tuples, where `t` is the amount of time spent inferring `linfo` (a `MethodInstance`).
5555
5656
Methods that take less time than `tmin` will not be reported.
5757
"""
@@ -99,5 +99,6 @@ function __init__()
9999
end
100100
precompile(start_timing, ())
101101
precompile(stop_timing, ())
102+
102103
nothing
103104
end
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
function start_deep_timing()
2+
Core.Compiler.Timings.reset_timings()
3+
Core.Compiler.__set_measure_typeinf(true)
4+
end
5+
function stop_deep_timing()
6+
Core.Compiler.__set_measure_typeinf(false)
7+
Core.Compiler.Timings.close_current_timer()
8+
end
9+
10+
function finish_snoopi_deep()
11+
return Core.Compiler.Timings._timings[1]
12+
end
13+
14+
function _snoopi_deep(cmd::Expr)
15+
return quote
16+
start_deep_timing()
17+
try
18+
$(esc(cmd))
19+
finally
20+
stop_deep_timing()
21+
end
22+
finish_snoopi_deep()
23+
end
24+
end
25+
26+
"""
27+
timing_tree = @snoopi_deep commands
28+
29+
Produce a profile of julia's type inference, containing the amount of time spent inferring
30+
every `MethodInstance` processed while executing `commands`.
31+
32+
The top-level node in this profile tree is `ROOT`, which contains the time spent _not_ in
33+
julia's type inference (codegen, llvm_opt, runtime, etc).
34+
35+
To make use of these results, see the processing functions in SnoopCompile:
36+
- [`SnoopCompile.flatten_times(timing_tree)`](@ref)
37+
- [`SnoopCompile.to_flamegraph(timing_tree)`](@ref)
38+
39+
# Examples
40+
```julia
41+
julia> timing = SnoopCompileCore.@snoopi_deep begin
42+
@eval sort(rand(100)) # Evaluate some code and profile julia's type inference
43+
end;
44+
45+
julia> using SnoopCompile, ProfileView
46+
47+
julia> times = SnoopCompile.flatten_times(timing, tmin_secs=0.001)
48+
4-element Vector{Any}:
49+
0.001088448 => Core.Compiler.Timings.InferenceFrameInfo(MethodInstance for fpsort!(...
50+
0.001618478 => Core.Compiler.Timings.InferenceFrameInfo(MethodInstance for rand!(...
51+
0.002289655 => Core.Compiler.Timings.InferenceFrameInfo(MethodInstance for _rand_max383!(...
52+
0.093143594 => Core.Compiler.Timings.InferenceFrameInfo(MethodInstance for ROOT(), ...
53+
54+
julia> fg = SnoopCompile.to_flamegraph(timing)
55+
Node(FlameGraphs.NodeData(ROOT() at typeinfer.jl:70, 0x00, 0:15355670))
56+
57+
julia> ProfileView.view(fg); # Display the FlameGraph in a package that supports it
58+
59+
julia> fg = SnoopCompile.to_flamegraph(timing; tmin_secs=0.0001) # Skip very tiny frames
60+
Node(FlameGraphs.NodeData(ROOT() at typeinfer.jl:70, 0x00, 0:15355670))
61+
```
62+
63+
"""
64+
macro snoopi_deep(cmd)
65+
return _snoopi_deep(cmd)
66+
end
67+
68+
# These are okay to come at the top-level because we're only measuring inference, and
69+
# inference results will be cached in a `.ji` file.
70+
@assert precompile(Core.Compiler.Timings.reset_timings, ())
71+
@assert precompile(start_deep_timing, ())
72+
@assert precompile(stop_deep_timing, ())
73+
@assert precompile(finish_snoopi_deep, ())

src/SnoopCompile.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ if VERSION >= v"1.2.0-DEV.573"
2525
include("parcel_snoopi.jl")
2626
end
2727

28+
if VERSION >= v"1.6.0-DEV.1190" # https://github.com/JuliaLang/julia/pull/37749
29+
include("parcel_snoopi_deep.jl")
30+
end
31+
2832
# TODO(PR): It seems reasonable to me to leave this alaways available, since even if you're
2933
# using a version of julia that doesn't support `@snoopl`, it seems like it's still nice to
3034
# let you _read and reason about_ the results of a snoopl run?

src/parcel_snoopi_deep.jl

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import FlameGraphs
2+
3+
using Base.StackTraces: StackFrame
4+
using FlameGraphs.LeftChildRightSiblingTrees: Node, addchild
5+
using Core.Compiler.Timings: Timing
6+
7+
"""
8+
flatten_times(timing::Core.Compiler.Timings.Timing; tmin_secs = 0.0)
9+
10+
Flatten the execution graph of Timings returned from `@snoopi_deep` into a Vector of pairs,
11+
with the exclusive time for each invocation of type inference, skipping any frames that
12+
took less than `tmin_secs` seconds. Results are sorted by time.
13+
"""
14+
function flatten_times(timing::Core.Compiler.Timings.Timing; tmin_secs = 0.0)
15+
out = Pair{Float64,Core.Compiler.Timings.InferenceFrameInfo}[]
16+
frontier = [timing]
17+
while !isempty(frontier)
18+
t = popfirst!(frontier)
19+
exclusive_time = (t.time / 1e9)
20+
if exclusive_time >= tmin_secs
21+
push!(out, exclusive_time => t.mi_info)
22+
end
23+
append!(frontier, t.children)
24+
end
25+
return sort(out; by=tl->tl[1])
26+
end
27+
28+
struct InclusiveTiming
29+
mi_info::Core.Compiler.Timings.InferenceFrameInfo
30+
inclusive_time::UInt64
31+
start_time::UInt64
32+
children::Vector{InclusiveTiming}
33+
end
34+
35+
inclusive_time(t::InclusiveTiming) = t.inclusive_time
36+
37+
function build_inclusive_times(t::Timing)
38+
child_times = InclusiveTiming[
39+
build_inclusive_times(child)
40+
for child in t.children
41+
]
42+
incl_time = t.time + sum(inclusive_time, child_times; init=UInt64(0))
43+
return InclusiveTiming(t.mi_info, incl_time, t.start_time, child_times)
44+
end
45+
46+
"""
47+
to_flamegraph(t::Core.Compiler.Timings.Timing; tmin_secs=0.0)
48+
to_flamegraph(t::SnoopCompile.InclusiveTiming; tmin_secs=0.0)
49+
50+
Convert the call tree of inference timings returned from `@snoopi_deep` into a FlameGraph.
51+
Returns a FlameGraphs.FlameGraph structure that represents the timing trace recorded for
52+
type inference.
53+
54+
Frames that take less than `tmin_secs` seconds of _inclusive time_ will not be included
55+
in the resultant FlameGraph (meaning total time including it and all of its children).
56+
This can be helpful if you have a very big profile, to save on processing time.
57+
58+
# Examples
59+
```julia
60+
julia> timing = SnoopCompileCore.@snoopi_deep begin
61+
@eval sort(rand(100)) # Evaluate some code and profile julia's type inference
62+
end;
63+
64+
julia> fg = SnoopCompile.to_flamegraph(timing)
65+
Node(FlameGraphs.NodeData(ROOT() at typeinfer.jl:70, 0x00, 0:15355670))
66+
67+
julia> ProfileView.view(fg); # Display the FlameGraph in a package that supports it
68+
69+
julia> fg = SnoopCompile.to_flamegraph(timing; tmin_secs=0.0001) # Skip very tiny frames
70+
Node(FlameGraphs.NodeData(ROOT() at typeinfer.jl:70, 0x00, 0:15355670))
71+
```
72+
73+
NOTE: This function must touch every frame in the provided `Timing` to build inclusive
74+
timing information (`InclusiveTiming`). If you have a very large profile, and you plan to
75+
call this function multiple times (say with different values for `tmin_secs`), you can save
76+
some intermediate time by first calling [`build_inclusive_times(t)`](@ref), only once,
77+
and then passing in the `InclusiveTiming` object for all subsequent calls.
78+
"""
79+
function to_flamegraph(t::Timing; tmin_secs = 0.0)
80+
it = build_inclusive_times(t)
81+
to_flamegraph(it; tmin_secs=tmin_secs)
82+
end
83+
84+
function to_flamegraph(to::InclusiveTiming; tmin_secs = 0.0)
85+
tmin_ns = UInt64(round(tmin_secs * 1e9))
86+
87+
# Compute a "root" frame for the top-level node, to cover the whole profile
88+
node_data = _flamegraph_frame(to, to.start_time; toplevel=true)
89+
root = Node(node_data)
90+
return _build_flamegraph!(root, to, to.start_time, tmin_ns)
91+
end
92+
function _build_flamegraph!(root, to::InclusiveTiming, start_ns, tmin_ns)
93+
for child in to.children
94+
if child.inclusive_time > tmin_ns
95+
node_data = _flamegraph_frame(child, start_ns; toplevel=false)
96+
node = addchild(root, node_data)
97+
_build_flamegraph!(node, child, start_ns, tmin_ns)
98+
end
99+
end
100+
return root
101+
end
102+
103+
function frame_name(mi_info::Core.Compiler.Timings.InferenceFrameInfo)
104+
frame_name(mi_info.mi::Core.Compiler.MethodInstance)
105+
end
106+
function frame_name(mi::Core.Compiler.MethodInstance)
107+
frame_name(mi.def.name, mi.specTypes)
108+
end
109+
# Special printing for Type Tuples so they're less ugly in the FlameGraph
110+
function frame_name(name, ::Type{TT}) where TT<:Tuple
111+
io = IOBuffer()
112+
Base.show_tuple_as_call(io, name, TT)
113+
v = String(take!(io))
114+
return v
115+
end
116+
117+
# NOTE: The "root" node doesn't cover the whole profile, because it's only the _complement_
118+
# of the inference times (so it's missing the _overhead_ from the measurement).
119+
# SO we need to manually create a root node that covers the whole thing.
120+
function max_end_time(t::InclusiveTiming)
121+
return maximum(child.start_time + child.inclusive_time for child in t.children;
122+
init = UInt64(t.start_time + t.inclusive_time))
123+
end
124+
125+
# Make a flat frame for this Timing
126+
function _flamegraph_frame(to::InclusiveTiming, start_ns; toplevel)
127+
# TODO: Use a better conversion to a StackFrame so this contains the right kind of data
128+
mi = to.mi_info.mi
129+
tt = Symbol(frame_name(to.mi_info))
130+
sf = StackFrame(tt, mi.def.file, mi.def.line, mi, false, false, UInt64(0x0))
131+
status = 0x0 # "default" status -- See FlameGraphs.jl
132+
start = to.start_time - start_ns
133+
if toplevel
134+
# Compute a range over the whole profile for the top node.
135+
range = Int(start) : Int(max_end_time(to) - start_ns)
136+
else
137+
range = Int(start) : Int(start + to.inclusive_time)
138+
end
139+
return FlameGraphs.NodeData(sf, status, range)
140+
end

test/runtests.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ if VERSION >= v"1.2.0-DEV.573"
44
include("snoopi.jl")
55
end
66

7+
if VERSION >= v"1.6.0-DEV.1190" # https://github.com/JuliaLang/julia/pull/37749
8+
@testset "snoopi_deep" begin
9+
include("snoopi_deep.jl")
10+
end
11+
end
12+
713
if VERSION >= v"1.6.0-DEV.1192" # https://github.com/JuliaLang/julia/pull/37136
814
@testset "snoopl" begin
915
include("snoopl.jl")

test/snoopi_deep.jl

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
using SnoopCompile
2+
using SnoopCompile.SnoopCompileCore
3+
using Test
4+
5+
using AbstractTrees # For FlameGraphs tests
6+
7+
@testset "@snoopi_deep" begin
8+
# WARMUP (to compile all the small, reachable methods)
9+
@eval module M # Example with some functions that include type instability
10+
i(x) = x+5
11+
h(a::Array) = i(a[1]::Integer) + 2
12+
g(y::Integer) = h(Any[y])
13+
end
14+
M.g(2) # Warmup all deeply reachable functions
15+
16+
# Redefine the module, so the snoop will only show these functions:
17+
@eval module M # Example with some functions that include type instability
18+
i(x) = x+5
19+
h(a::Array) = i(a[1]::Integer) + 2
20+
g(y::Integer) = h(Any[y])
21+
end
22+
23+
timing = SnoopCompileCore.@snoopi_deep begin
24+
M.g(2)
25+
end
26+
times = SnoopCompile.flatten_times(timing)
27+
@test length(times) == 5 # ROOT, g(...), h(...), i(::Integer), i(::Int)
28+
names = [mi_info.mi.def.name for (time, mi_info) in times]
29+
@test sort(names) == [:ROOT, :g, :h, :i, :i]
30+
31+
longest_frame_time = times[end][1]
32+
@test length(SnoopCompile.flatten_times(timing, tmin_secs=longest_frame_time)) == 1
33+
end
34+
35+
@testset "flamegraph_export" begin
36+
@eval module M # Take another timing
37+
i(x) = x+5
38+
h(a::Array) = i(a[1]::Integer) + 2
39+
g(y::Integer) = h(Any[y])
40+
end
41+
42+
timing = SnoopCompileCore.@snoopi_deep begin
43+
M.g(2)
44+
end
45+
times = SnoopCompile.flatten_times(timing)
46+
47+
fg = SnoopCompile.to_flamegraph(timing)
48+
@test length(collect(AbstractTrees.PreOrderDFS(fg))) == 5
49+
# Test that the span covers the whole tree.
50+
for leaf in AbstractTrees.PreOrderDFS(fg)
51+
@test leaf.data.span.start in fg.data.span
52+
@test leaf.data.span.stop in fg.data.span
53+
end
54+
55+
cutoff_bottom_frame = (times[1][1] + times[2][1]) / 2
56+
fg2 = SnoopCompile.to_flamegraph(timing, tmin_secs = cutoff_bottom_frame)
57+
@test length(collect(AbstractTrees.PreOrderDFS(fg2))) == (length(collect(AbstractTrees.PreOrderDFS(fg))) - 1)
58+
end

0 commit comments

Comments
 (0)