Skip to content

Commit 1ca4fa3

Browse files
author
Aravind Sankaran
committed
Added gls algorithms from linnea
1 parent 4563954 commit 1ca4fa3

33 files changed

+2172
-0
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
## Installation of Linnea
2+
3+
```
4+
git clone https://github.com/HPAC/linnea.git
5+
cd linnea/
6+
git checkout traces
7+
pip install .
8+
```
9+
10+
## Algorithm Generation
11+
12+
```
13+
python gls.py
14+
```
15+
16+
## Running the experiments
17+
18+
```
19+
cd experiments
20+
./run.sh
21+
```
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
using LinearAlgebra.BLAS
2+
using LinearAlgebra
3+
4+
"""
5+
algorithm0(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
6+
7+
Compute
8+
b = ((X^T M^-1 X)^-1 X^T M^-1 y).
9+
10+
Requires at least Julia v1.0.
11+
12+
# Arguments
13+
- `ml0::Array{Float64,2}`: Matrix X of size 1000 x 100 with property FullRank.
14+
- `ml1::Array{Float64,2}`: Matrix M of size 1000 x 1000 with property SPD.
15+
- `ml2::Array{Float64,1}`: Vector y of size 1000.
16+
"""
17+
function algorithm0(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
18+
# cost: 4.45e+08 FLOPs
19+
run_id = get(ENV, "LINNEA_RUN_ID", -1)
20+
t_start = time_ns()
21+
# X: ml0, full, M: ml1, full, y: ml2, full
22+
# (L2 L2^T) = M
23+
t1 = time_ns()
24+
LAPACK.potrf!('L', ml1)
25+
t2 = time_ns()
26+
println("$(run_id) $(t1) LAPACK.potrf 333333333.3333333 $(t2-t1)")
27+
28+
# X: ml0, full, y: ml2, full, L2: ml1, lower_triangular
29+
# tmp12 = (L2^-1 X)
30+
t1 = time_ns()
31+
trsm!('L', 'L', 'N', 'N', 1.0, ml1, ml0)
32+
t2 = time_ns()
33+
println("$(run_id) $(t1) trsm 100000000 $(t2-t1)")
34+
35+
# y: ml2, full, L2: ml1, lower_triangular, tmp12: ml0, full
36+
# tmp38 = (L2^-1 y)
37+
t1 = time_ns()
38+
trsv!('L', 'N', 'N', ml1, ml2)
39+
t2 = time_ns()
40+
println("$(run_id) $(t1) trsv 1000000 $(t2-t1)")
41+
42+
# tmp12: ml0, full, tmp38: ml2, full
43+
ml3 = Array{Float64}(undef, 100, 100)
44+
# tmp14 = (tmp12^T tmp12)
45+
t1 = time_ns()
46+
syrk!('L', 'T', 1.0, ml0, 0.0, ml3)
47+
t2 = time_ns()
48+
println("$(run_id) $(t1) syrk 10000000 $(t2-t1)")
49+
50+
# tmp12: ml0, full, tmp38: ml2, full, tmp14: ml3, symmetric_lower_triangular
51+
ml4 = Array{Float64}(undef, 100)
52+
# tmp21 = (tmp12^T tmp38)
53+
t1 = time_ns()
54+
gemv!('T', 1.0, ml0, ml2, 0.0, ml4)
55+
t2 = time_ns()
56+
println("$(run_id) $(t1) gemv 200000 $(t2-t1)")
57+
58+
# tmp14: ml3, symmetric_lower_triangular, tmp21: ml4, full
59+
# (L15 L15^T) = tmp14
60+
t1 = time_ns()
61+
LAPACK.potrf!('L', ml3)
62+
t2 = time_ns()
63+
println("$(run_id) $(t1) LAPACK.potrf 333333.3333333333 $(t2-t1)")
64+
65+
# tmp21: ml4, full, L15: ml3, lower_triangular
66+
# tmp23 = (L15^-1 tmp21)
67+
t1 = time_ns()
68+
trsv!('L', 'N', 'N', ml3, ml4)
69+
t2 = time_ns()
70+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
71+
72+
# L15: ml3, lower_triangular, tmp23: ml4, full
73+
# tmp24 = (L15^-T tmp23)
74+
t1 = time_ns()
75+
trsv!('L', 'T', 'N', ml3, ml4)
76+
t2 = time_ns()
77+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
78+
79+
t_end = time_ns()
80+
println("$(run_id) $(t_end) algorithm0 444886666.6666666 $(t_end-t_start)")
81+
# tmp24: ml4, full
82+
# b = tmp24
83+
return (ml4)
84+
end
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
using LinearAlgebra.BLAS
2+
using LinearAlgebra
3+
4+
"""
5+
algorithm1(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
6+
7+
Compute
8+
b = ((X^T M^-1 X)^-1 X^T M^-1 y).
9+
10+
Requires at least Julia v1.0.
11+
12+
# Arguments
13+
- `ml0::Array{Float64,2}`: Matrix X of size 1000 x 100 with property FullRank.
14+
- `ml1::Array{Float64,2}`: Matrix M of size 1000 x 1000 with property SPD.
15+
- `ml2::Array{Float64,1}`: Vector y of size 1000.
16+
"""
17+
function algorithm1(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
18+
# cost: 4.45e+08 FLOPs
19+
run_id = get(ENV, "LINNEA_RUN_ID", -1)
20+
t_start = time_ns()
21+
# X: ml0, full, M: ml1, full, y: ml2, full
22+
# (L2 L2^T) = M
23+
t1 = time_ns()
24+
LAPACK.potrf!('L', ml1)
25+
t2 = time_ns()
26+
println("$(run_id) $(t1) LAPACK.potrf 333333333.3333333 $(t2-t1)")
27+
28+
# X: ml0, full, y: ml2, full, L2: ml1, lower_triangular
29+
# tmp38 = (L2^-1 y)
30+
t1 = time_ns()
31+
trsv!('L', 'N', 'N', ml1, ml2)
32+
t2 = time_ns()
33+
println("$(run_id) $(t1) trsv 1000000 $(t2-t1)")
34+
35+
# X: ml0, full, L2: ml1, lower_triangular, tmp38: ml2, full
36+
# tmp12 = (L2^-1 X)
37+
t1 = time_ns()
38+
trsm!('L', 'L', 'N', 'N', 1.0, ml1, ml0)
39+
t2 = time_ns()
40+
println("$(run_id) $(t1) trsm 100000000 $(t2-t1)")
41+
42+
# tmp38: ml2, full, tmp12: ml0, full
43+
ml3 = Array{Float64}(undef, 100, 100)
44+
# tmp14 = (tmp12^T tmp12)
45+
t1 = time_ns()
46+
syrk!('L', 'T', 1.0, ml0, 0.0, ml3)
47+
t2 = time_ns()
48+
println("$(run_id) $(t1) syrk 10000000 $(t2-t1)")
49+
50+
# tmp38: ml2, full, tmp12: ml0, full, tmp14: ml3, symmetric_lower_triangular
51+
# (L15 L15^T) = tmp14
52+
t1 = time_ns()
53+
LAPACK.potrf!('L', ml3)
54+
t2 = time_ns()
55+
println("$(run_id) $(t1) LAPACK.potrf 333333.3333333333 $(t2-t1)")
56+
57+
# tmp38: ml2, full, tmp12: ml0, full, L15: ml3, lower_triangular
58+
ml4 = Array{Float64}(undef, 100)
59+
# tmp21 = (tmp12^T tmp38)
60+
t1 = time_ns()
61+
gemv!('T', 1.0, ml0, ml2, 0.0, ml4)
62+
t2 = time_ns()
63+
println("$(run_id) $(t1) gemv 200000 $(t2-t1)")
64+
65+
# L15: ml3, lower_triangular, tmp21: ml4, full
66+
# tmp23 = (L15^-1 tmp21)
67+
t1 = time_ns()
68+
trsv!('L', 'N', 'N', ml3, ml4)
69+
t2 = time_ns()
70+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
71+
72+
# L15: ml3, lower_triangular, tmp23: ml4, full
73+
# tmp24 = (L15^-T tmp23)
74+
t1 = time_ns()
75+
trsv!('L', 'T', 'N', ml3, ml4)
76+
t2 = time_ns()
77+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
78+
79+
t_end = time_ns()
80+
println("$(run_id) $(t_end) algorithm1 444886666.6666666 $(t_end-t_start)")
81+
# tmp24: ml4, full
82+
# b = tmp24
83+
return (ml4)
84+
end
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
using LinearAlgebra.BLAS
2+
using LinearAlgebra
3+
4+
"""
5+
algorithm2(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
6+
7+
Compute
8+
b = ((X^T M^-1 X)^-1 X^T M^-1 y).
9+
10+
Requires at least Julia v1.0.
11+
12+
# Arguments
13+
- `ml0::Array{Float64,2}`: Matrix X of size 1000 x 100 with property FullRank.
14+
- `ml1::Array{Float64,2}`: Matrix M of size 1000 x 1000 with property SPD.
15+
- `ml2::Array{Float64,1}`: Vector y of size 1000.
16+
"""
17+
function algorithm2(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
18+
# cost: 4.47e+08 FLOPs
19+
run_id = get(ENV, "LINNEA_RUN_ID", -1)
20+
t_start = time_ns()
21+
# X: ml0, full, M: ml1, full, y: ml2, full
22+
# (L2 L2^T) = M
23+
t1 = time_ns()
24+
LAPACK.potrf!('L', ml1)
25+
t2 = time_ns()
26+
println("$(run_id) $(t1) LAPACK.potrf 333333333.3333333 $(t2-t1)")
27+
28+
# X: ml0, full, y: ml2, full, L2: ml1, lower_triangular
29+
# tmp12 = (L2^-1 X)
30+
t1 = time_ns()
31+
trsm!('L', 'L', 'N', 'N', 1.0, ml1, ml0)
32+
t2 = time_ns()
33+
println("$(run_id) $(t1) trsm 100000000 $(t2-t1)")
34+
35+
# y: ml2, full, L2: ml1, lower_triangular, tmp12: ml0, full
36+
# tmp67 = (L2^-1 y)
37+
t1 = time_ns()
38+
trsv!('L', 'N', 'N', ml1, ml2)
39+
t2 = time_ns()
40+
println("$(run_id) $(t1) trsv 1000000 $(t2-t1)")
41+
42+
# tmp12: ml0, full, tmp67: ml2, full
43+
ml3 = Array{Float64}(undef, 100, 100)
44+
# tmp14 = (tmp12^T tmp12)
45+
t1 = time_ns()
46+
syrk!('L', 'T', 1.0, ml0, 0.0, ml3)
47+
t2 = time_ns()
48+
println("$(run_id) $(t1) syrk 10000000 $(t2-t1)")
49+
50+
# tmp12: ml0, full, tmp67: ml2, full, tmp14: ml3, symmetric_lower_triangular
51+
ml4 = Array{Float64}(undef, 100)
52+
# tmp21 = (tmp12^T tmp67)
53+
t1 = time_ns()
54+
gemv!('T', 1.0, ml0, ml2, 0.0, ml4)
55+
t2 = time_ns()
56+
println("$(run_id) $(t1) gemv 200000 $(t2-t1)")
57+
58+
# tmp14: ml3, symmetric_lower_triangular, tmp21: ml4, full
59+
t1 = time_ns()
60+
for i = 1:100-1;
61+
view(ml3, i, i+1:100)[:] = view(ml3, i+1:100, i);
62+
end;
63+
t2 = time_ns()
64+
println("$(run_id) $(t1) view 1000000 $(t2-t1)")
65+
66+
# (Q16 R17) = tmp14
67+
t1 = time_ns()
68+
ml3 = qr!(ml3)
69+
t2 = time_ns()
70+
println("$(run_id) $(t1) qr 2666666.6666666665 $(t2-t1)")
71+
72+
# tmp21: ml4, full, Q16: ml3, QRfact_Q, R17: ml3, QRfact_R
73+
ml5 = Array(ml3.Q)
74+
ml6 = Array{Float64}(undef, 100)
75+
# tmp25 = (Q16^T tmp21)
76+
t1 = time_ns()
77+
gemv!('T', 1.0, ml5, ml4, 0.0, ml6)
78+
t2 = time_ns()
79+
println("$(run_id) $(t1) gemv 20000 $(t2-t1)")
80+
81+
# R17: ml3, QRfact_R, tmp25: ml6, full
82+
ml7 = ml3.R
83+
# tmp24 = (R17^-1 tmp25)
84+
t1 = time_ns()
85+
trsv!('U', 'N', 'N', ml7, ml6)
86+
t2 = time_ns()
87+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
88+
89+
t_end = time_ns()
90+
println("$(run_id) $(t_end) algorithm2 447230000.0 $(t_end-t_start)")
91+
# tmp24: ml6, full
92+
# b = tmp24
93+
return (ml6)
94+
end
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
using LinearAlgebra.BLAS
2+
using LinearAlgebra
3+
4+
"""
5+
algorithm3(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
6+
7+
Compute
8+
b = ((X^T M^-1 X)^-1 X^T M^-1 y).
9+
10+
Requires at least Julia v1.0.
11+
12+
# Arguments
13+
- `ml0::Array{Float64,2}`: Matrix X of size 1000 x 100 with property FullRank.
14+
- `ml1::Array{Float64,2}`: Matrix M of size 1000 x 1000 with property SPD.
15+
- `ml2::Array{Float64,1}`: Vector y of size 1000.
16+
"""
17+
function algorithm3(ml0::Array{Float64,2}, ml1::Array{Float64,2}, ml2::Array{Float64,1})
18+
# cost: 4.55e+08 FLOPs
19+
run_id = get(ENV, "LINNEA_RUN_ID", -1)
20+
t_start = time_ns()
21+
# X: ml0, full, M: ml1, full, y: ml2, full
22+
# (L2 L2^T) = M
23+
t1 = time_ns()
24+
LAPACK.potrf!('L', ml1)
25+
t2 = time_ns()
26+
println("$(run_id) $(t1) LAPACK.potrf 333333333.3333333 $(t2-t1)")
27+
28+
# X: ml0, full, y: ml2, full, L2: ml1, lower_triangular
29+
# tmp67 = (L2^-1 y)
30+
t1 = time_ns()
31+
trsv!('L', 'N', 'N', ml1, ml2)
32+
t2 = time_ns()
33+
println("$(run_id) $(t1) trsv 1000000 $(t2-t1)")
34+
35+
# X: ml0, full, L2: ml1, lower_triangular, tmp67: ml2, full
36+
# tmp12 = (L2^-1 X)
37+
t1 = time_ns()
38+
trsm!('L', 'L', 'N', 'N', 1.0, ml1, ml0)
39+
t2 = time_ns()
40+
println("$(run_id) $(t1) trsm 100000000 $(t2-t1)")
41+
42+
# tmp67: ml2, full, tmp12: ml0, full
43+
ml3 = Array{Float64}(undef, 100, 100)
44+
# tmp14 = (tmp12^T tmp12)
45+
t1 = time_ns()
46+
gemm!('T', 'N', 1.0, ml0, ml0, 0.0, ml3)
47+
t2 = time_ns()
48+
println("$(run_id) $(t1) gemm 20000000 $(t2-t1)")
49+
50+
# tmp67: ml2, full, tmp12: ml0, full, tmp14: ml3, full
51+
# (L15 L15^T) = tmp14
52+
t1 = time_ns()
53+
LAPACK.potrf!('L', ml3)
54+
t2 = time_ns()
55+
println("$(run_id) $(t1) LAPACK.potrf 333333.3333333333 $(t2-t1)")
56+
57+
# tmp67: ml2, full, tmp12: ml0, full, L15: ml3, lower_triangular
58+
ml4 = Array{Float64}(undef, 100)
59+
# tmp21 = (tmp12^T tmp67)
60+
t1 = time_ns()
61+
gemv!('T', 1.0, ml0, ml2, 0.0, ml4)
62+
t2 = time_ns()
63+
println("$(run_id) $(t1) gemv 200000 $(t2-t1)")
64+
65+
# L15: ml3, lower_triangular, tmp21: ml4, full
66+
# tmp23 = (L15^-1 tmp21)
67+
t1 = time_ns()
68+
trsv!('L', 'N', 'N', ml3, ml4)
69+
t2 = time_ns()
70+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
71+
72+
# L15: ml3, lower_triangular, tmp23: ml4, full
73+
# tmp24 = (L15^-T tmp23)
74+
t1 = time_ns()
75+
trsv!('L', 'T', 'N', ml3, ml4)
76+
t2 = time_ns()
77+
println("$(run_id) $(t1) trsv 10000 $(t2-t1)")
78+
79+
t_end = time_ns()
80+
println("$(run_id) $(t_end) algorithm3 454886666.6666666 $(t_end-t_start)")
81+
# tmp24: ml4, full
82+
# b = tmp24
83+
return (ml4)
84+
end

0 commit comments

Comments
 (0)