From de0abae44f7fa34f82bbafdb9eb623ce9a08ad70 Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Wed, 21 Jan 2026 01:12:45 +0100 Subject: [PATCH 1/3] Avoid AccelerationMax buffer --- src/SPHCellList.jl | 23 +++++++++-------------- src/TimeStepping.jl | 14 ++++++++++---- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/SPHCellList.jl b/src/SPHCellList.jl index 692285ce..da852ce0 100644 --- a/src/SPHCellList.jl +++ b/src/SPHCellList.jl @@ -37,7 +37,7 @@ using LinearAlgebra SimConstants, SimParticles, ParticleRanges, CellDict, NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, - ∇◌rᵢ, AccelerationMax; + ∇◌rᵢ; Position = SimParticles.Position, Density = SimParticles.Density, Pressure = SimParticles.Pressure, @@ -84,7 +84,6 @@ using LinearAlgebra dρdtI[i] = dρdt_acc Acceleration[i] = acc_acc - AccelerationMax[i] = norm(acc_acc) end return nothing @@ -95,7 +94,7 @@ using LinearAlgebra SimConstants, SimParticles, ParticleRanges, CellDict, NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, - ∇◌rᵢ, AccelerationMax; + ∇◌rᵢ; Position = SimParticles.Position, Density = SimParticles.Density, Pressure = SimParticles.Pressure, @@ -153,7 +152,6 @@ using LinearAlgebra Acceleration[i] = acc_acc Kernel[i] = kernel_acc KernelGradient[i] = kernel_grad_acc - AccelerationMax[i] = norm(acc_acc) end return nothing @@ -164,7 +162,7 @@ using LinearAlgebra SimConstants, SimParticles, ParticleRanges, CellDict, NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, - ∇◌rᵢ, AccelerationMax; + ∇◌rᵢ; Position = SimParticles.Position, Density = SimParticles.Density, Pressure = SimParticles.Pressure, @@ -221,7 +219,6 @@ using LinearAlgebra Acceleration[i] = acc_acc ∇Cᵢ[i] = shift_c_acc ∇◌rᵢ[i] = shift_r_acc - AccelerationMax[i] = norm(acc_acc) end return nothing @@ -232,7 +229,7 @@ using LinearAlgebra SimConstants, SimParticles, ParticleRanges, CellDict, NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, - ∇◌rᵢ, AccelerationMax; + ∇◌rᵢ; Position = SimParticles.Position, Density = SimParticles.Density, Pressure = SimParticles.Pressure, @@ -295,7 +292,6 @@ using LinearAlgebra KernelGradient[i] = kernel_grad_acc ∇Cᵢ[i] = shift_c_acc ∇◌rᵢ[i] = shift_r_acc - AccelerationMax[i] = norm(acc_acc) end return nothing @@ -721,7 +717,6 @@ using LinearAlgebra TimeSteppingMode = SimMetaData.TimeSteppingMode @no_escape begin - AccelerationMax = @alloc(FloatType, length(Position)) dt₂ = dt * 0.5 SimMetaData.IndexCounter = UpdateNeighbors!(SimParticles, SimKernel.H⁻¹, SortingScratchSpace, ParticleRanges, UniqueCells, CellDict) @@ -737,7 +732,7 @@ using LinearAlgebra @timeit SimMetaData.HourGlass "00b Init NeighborLoop" NeighborLoopPerParticle!( SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, SimConstants, SimParticles, ParticleRanges, CellDict, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, AccelerationMax, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, ) end @@ -775,7 +770,7 @@ using LinearAlgebra @timeit SimMetaData.HourGlass "04 First NeighborLoop" NeighborLoopPerParticle!( SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, SimConstants, SimParticles, ParticleRanges, CellDict, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, AccelerationMax, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, ) @timeit SimMetaData.HourGlass "05 Update To Half TimeStep" HalfTimeStep(SimMetaData, SimConstants, SimParticles, Positionₙ⁺, Velocityₙ⁺, ρₙ⁺, dρdtI, dt₂) @@ -788,7 +783,7 @@ using LinearAlgebra @timeit SimMetaData.HourGlass "08 Second NeighborLoop" NeighborLoopPerParticle!( SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, SimConstants, SimParticles, ParticleRanges, CellDict, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, AccelerationMax, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, Position = Positionₙ⁺, Density = ρₙ⁺, Velocity = Velocityₙ⁺, @@ -809,7 +804,7 @@ using LinearAlgebra @timeit SimMetaData.HourGlass "06 NeighborLoop" NeighborLoopPerParticle!( SimDensityDiffusion, SimViscosity, SimKernel, SimMetaData, SimConstants, SimParticles, ParticleRanges, CellDict, - NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, AccelerationMax, + NeighborCellLists, dρdtI, Acceleration, ∇Cᵢ, ∇◌rᵢ, Position = Positionₙ⁺, Density = ρₙ⁺, Velocity = Velocityₙ⁺, @@ -824,7 +819,7 @@ using LinearAlgebra @timeit SimMetaData.HourGlass "10 Update MetaData" UpdateMetaData!(SimMetaData, dt) - @timeit SimMetaData.HourGlass "11 Update TimeStep" dt = UpdateTimeStep(AccelerationMax, SimConstants, SimKernel) + @timeit SimMetaData.HourGlass "11 Update TimeStep" dt = UpdateTimeStep(Acceleration, SimConstants, SimKernel) end end diff --git a/src/TimeStepping.jl b/src/TimeStepping.jl index 7973253b..f1383a5c 100644 --- a/src/TimeStepping.jl +++ b/src/TimeStepping.jl @@ -34,20 +34,26 @@ function Δt(max_acceleration, SimulationConstants, SPHKernel) end """ - UpdateTimeStep(AccelerationMax, SimConstants, SimKernel) + UpdateTimeStep(Acceleration, SimConstants, SimKernel) Computes and returns the updated time step based on maximum acceleration across all particles. # Arguments -- `AccelerationMax`: Array of acceleration magnitudes for each particle. +- `Acceleration`: Vector of per-particle acceleration vectors. - `SimConstants`: Struct containing simulation parameters. - `SimKernel`: Struct containing kernel parameters. # Returns - The calculated adaptive time step `dt`. """ -function UpdateTimeStep(AccelerationMax, SimConstants, SimKernel) - max_acceleration = maximum(AccelerationMax) +function UpdateTimeStep(Acceleration, SimConstants, SimKernel) + max_acceleration = zero(eltype(eltype(Acceleration))) + @inbounds @simd for i in eachindex(Acceleration) + acc_norm = norm(Acceleration[i]) + if acc_norm > max_acceleration + max_acceleration = acc_norm + end + end return Δt(max_acceleration, SimConstants, SimKernel) end From af325aa81b939f8d0f3912194768cab4653f0a52 Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Wed, 21 Jan 2026 01:19:41 +0100 Subject: [PATCH 2/3] Thread max acceleration reduction --- src/TimeStepping.jl | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/TimeStepping.jl b/src/TimeStepping.jl index f1383a5c..edd7f2d9 100644 --- a/src/TimeStepping.jl +++ b/src/TimeStepping.jl @@ -48,10 +48,20 @@ Computes and returns the updated time step based on maximum acceleration across """ function UpdateTimeStep(Acceleration, SimConstants, SimKernel) max_acceleration = zero(eltype(eltype(Acceleration))) - @inbounds @simd for i in eachindex(Acceleration) - acc_norm = norm(Acceleration[i]) - if acc_norm > max_acceleration - max_acceleration = acc_norm + @no_escape begin + thread_max = @alloc(eltype(eltype(Acceleration)), nthreads()) + fill!(thread_max, max_acceleration) + @inbounds Threads.@threads for i in eachindex(Acceleration) + acc_norm = norm(Acceleration[i]) + tid = threadid() + if acc_norm > thread_max[tid] + thread_max[tid] = acc_norm + end + end + @inbounds for i in eachindex(thread_max) + if thread_max[i] > max_acceleration + max_acceleration = thread_max[i] + end end end return Δt(max_acceleration, SimConstants, SimKernel) From e81f92356b01e7c27b4711f939609cad9838dec7 Mon Sep 17 00:00:00 2001 From: AhmedSalih3d <36305327+AhmedSalih3d@users.noreply.github.com> Date: Wed, 21 Jan 2026 01:19:57 +0100 Subject: [PATCH 3/3] Avoid threadid in max reduction --- src/TimeStepping.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/TimeStepping.jl b/src/TimeStepping.jl index edd7f2d9..8b58cb97 100644 --- a/src/TimeStepping.jl +++ b/src/TimeStepping.jl @@ -49,20 +49,20 @@ Computes and returns the updated time step based on maximum acceleration across function UpdateTimeStep(Acceleration, SimConstants, SimKernel) max_acceleration = zero(eltype(eltype(Acceleration))) @no_escape begin - thread_max = @alloc(eltype(eltype(Acceleration)), nthreads()) - fill!(thread_max, max_acceleration) + max_storage = @alloc(eltype(eltype(Acceleration)), 1) + max_storage[1] = max_acceleration + max_lock = Threads.SpinLock() @inbounds Threads.@threads for i in eachindex(Acceleration) acc_norm = norm(Acceleration[i]) - tid = threadid() - if acc_norm > thread_max[tid] - thread_max[tid] = acc_norm - end - end - @inbounds for i in eachindex(thread_max) - if thread_max[i] > max_acceleration - max_acceleration = thread_max[i] + if acc_norm > max_storage[1] + lock(max_lock) + if acc_norm > max_storage[1] + max_storage[1] = acc_norm + end + unlock(max_lock) end end + max_acceleration = max_storage[1] end return Δt(max_acceleration, SimConstants, SimKernel) end