We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent eae1659 commit bbfe28bCopy full SHA for bbfe28b
python/cuda_cccl/benchmarks/compute/bench_reduce.py
@@ -217,12 +217,13 @@ def reduce_pointer_lambda(input_array, build_only):
217
h_init = np.zeros(1, dtype=input_array.dtype)
218
219
# Use a lambda function directly as the reducer
220
- op = lambda a, b: a + b
221
- alg = cuda.compute.make_reduce_into(input_array, res, op, h_init)
+ alg = cuda.compute.make_reduce_into(input_array, res, lambda x, y: x + y, h_init)
222
if not build_only:
223
- temp_storage_bytes = alg(None, input_array, res, op, size, h_init)
+ temp_storage_bytes = alg(
+ None, input_array, res, lambda x, y: x + y, size, h_init
224
+ )
225
temp_storage = cp.empty(temp_storage_bytes, dtype=np.uint8)
- alg(temp_storage, input_array, res, op, size, h_init)
226
+ alg(temp_storage, input_array, res, lambda x, y: x + y, size, h_init)
227
228
cp.cuda.runtime.deviceSynchronize()
229
0 commit comments