Skip to content

Commit 0ced574

Browse files
Fix scalar indexing issue and add sparse+sparse benchmarks
Co-authored-by: albertomercurio <61953577+albertomercurio@users.noreply.github.com>
1 parent 168dede commit 0ced574

File tree

5 files changed

+77
-6
lines changed

5 files changed

+77
-6
lines changed

benchmarks/matrix_benchmarks.jl

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,66 @@ function benchmark_sparse_dense_add!(
228228
return nothing
229229
end
230230

231+
"""
232+
benchmark_sparse_sparse_add!(SUITE, array_constructor, array_type_name; N=10000, T=Float64)
233+
234+
Benchmark sparse + sparse matrix addition for CSC, CSR, and COO formats.
235+
236+
# Arguments
237+
- `SUITE`: The BenchmarkGroup to add benchmarks to
238+
- `array_constructor`: Function to construct arrays (e.g., `Array`, `JLArray`)
239+
- `array_type_name`: String name for the array type (for display)
240+
241+
# Keyword Arguments
242+
- `N`: Size of the matrix (default: 10000)
243+
- `T`: Element type (default: Float64)
244+
"""
245+
function benchmark_sparse_sparse_add!(
246+
SUITE,
247+
array_constructor,
248+
array_type_name;
249+
N = 10000,
250+
T = Float64,
251+
)
252+
# Create two sparse matrices with 1% density
253+
sm_a_csc_std = sprand(T, N, N, 0.01)
254+
sm_b_csc_std = sprand(T, N, N, 0.01)
255+
256+
# Convert to different formats
257+
sm_a_csc = DeviceSparseMatrixCSC(sm_a_csc_std)
258+
sm_b_csc = DeviceSparseMatrixCSC(sm_b_csc_std)
259+
sm_a_csr = DeviceSparseMatrixCSR(sm_a_csc_std)
260+
sm_b_csr = DeviceSparseMatrixCSR(sm_b_csc_std)
261+
sm_a_coo = DeviceSparseMatrixCOO(sm_a_csc_std)
262+
sm_b_coo = DeviceSparseMatrixCOO(sm_b_csc_std)
263+
264+
# Adapt to device
265+
dsm_a_csc = adapt(array_constructor, sm_a_csc)
266+
dsm_b_csc = adapt(array_constructor, sm_b_csc)
267+
dsm_a_csr = adapt(array_constructor, sm_a_csr)
268+
dsm_b_csr = adapt(array_constructor, sm_b_csr)
269+
dsm_a_coo = adapt(array_constructor, sm_a_coo)
270+
dsm_b_coo = adapt(array_constructor, sm_b_coo)
271+
272+
# Level 3: Format (CSC, CSR, COO - will be plotted together)
273+
SUITE["Sparse + Sparse Addition"][array_type_name]["CSC"] = @benchmarkable begin
274+
$dsm_a_csc + $dsm_b_csc
275+
_synchronize_backend($dsm_a_csc)
276+
end
277+
278+
SUITE["Sparse + Sparse Addition"][array_type_name]["CSR"] = @benchmarkable begin
279+
$dsm_a_csr + $dsm_b_csr
280+
_synchronize_backend($dsm_a_csr)
281+
end
282+
283+
SUITE["Sparse + Sparse Addition"][array_type_name]["COO"] = @benchmarkable begin
284+
$dsm_a_coo + $dsm_b_coo
285+
_synchronize_backend($dsm_a_coo)
286+
end
287+
288+
return nothing
289+
end
290+
231291
"""
232292
benchmark_kron!(SUITE, array_constructor, array_type_name; N=100, T=Float64)
233293

benchmarks/runbenchmarks.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ benchmark_matrix_vector_mul!(SUITE, Array, "Array")
2626
benchmark_matrix_matrix_mul!(SUITE, Array, "Array")
2727
benchmark_three_arg_dot!(SUITE, Array, "Array")
2828
benchmark_sparse_dense_add!(SUITE, Array, "Array")
29+
benchmark_sparse_sparse_add!(SUITE, Array, "Array")
2930
benchmark_kron!(SUITE, Array, "Array")
3031
benchmark_conversions!(SUITE, Array, "Array")
3132

@@ -37,6 +38,7 @@ benchmark_matrix_vector_mul!(SUITE, JLArray, "JLArray")
3738
benchmark_matrix_matrix_mul!(SUITE, JLArray, "JLArray")
3839
benchmark_three_arg_dot!(SUITE, JLArray, "JLArray")
3940
benchmark_sparse_dense_add!(SUITE, JLArray, "JLArray")
41+
benchmark_sparse_sparse_add!(SUITE, JLArray, "JLArray")
4042
benchmark_kron!(SUITE, JLArray, "JLArray")
4143
benchmark_conversions!(SUITE, JLArray, "JLArray")
4244

src/matrix_csc/matrix_csc.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -368,11 +368,15 @@ function Base.:+(A::DeviceSparseMatrixCSC, B::DeviceSparseMatrixCSC)
368368
ndrange = (n,),
369369
)
370370

371-
# Build colptr for result matrix
371+
# Build colptr for result matrix using cumsum
372+
# colptr_C[i+1] = 1 + sum(nnz_per_col[1:i])
373+
cumsum_nnz = _cumsum_AK(nnz_per_col)
372374
colptr_C = similar(getcolptr(A), n + 1)
373-
colptr_C[1] = one(Ti)
374-
colptr_C[2:end] .= _cumsum_AK(nnz_per_col)
375+
# Set colptr_C[2:end] to cumsum + 1
376+
colptr_C[2:end] .= cumsum_nnz
375377
colptr_C[2:end] .+= one(Ti)
378+
# Set colptr_C[1] to 1 using broadcasting
379+
colptr_C[1:1] .= one(Ti)
376380

377381
# Allocate result arrays
378382
nnz_total = allowed_getindex(colptr_C, n + 1) - one(Ti)

src/matrix_csr/matrix_csr.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,15 @@ function Base.:+(A::DeviceSparseMatrixCSR, B::DeviceSparseMatrixCSR)
365365
ndrange = (m,),
366366
)
367367

368-
# Build rowptr for result matrix
368+
# Build rowptr for result matrix using cumsum
369+
# rowptr_C[i+1] = 1 + sum(nnz_per_row[1:i])
370+
cumsum_nnz = _cumsum_AK(nnz_per_row)
369371
rowptr_C = similar(getrowptr(A), m + 1)
370-
rowptr_C[1] = one(Ti)
371-
rowptr_C[2:end] .= _cumsum_AK(nnz_per_row)
372+
# Set rowptr_C[2:end] to cumsum + 1
373+
rowptr_C[2:end] .= cumsum_nnz
372374
rowptr_C[2:end] .+= one(Ti)
375+
# Set rowptr_C[1] to 1 using broadcasting
376+
rowptr_C[1:1] .= one(Ti)
373377

374378
# Allocate result arrays
375379
nnz_total = allowed_getindex(rowptr_C, m + 1) - one(Ti)

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[deps]
22
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
33
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
4+
DeviceSparseArrays = "da3fe0eb-88a8-4d14-ae1a-857c283e9c70"
45
JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
56
JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
67
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

0 commit comments

Comments
 (0)