WIP add test

shino16 · shino16 · commit 3d9be2fafd85 · 2025-12-09T23:37:54.000-08:00
diff --git a/thunder/tests/opinfos.py b/thunder/tests/opinfos.py
@@ -4473,6 +4473,28 @@ def make_nd_idx(dim_length: int, indices: int, ndim: int):
 shape_ops.append(getitem_opinfo)
 
 
+def setitem_sample_generator(op, device, dtype, requires_grad, **kwargs):
+    for sample in getitem_sample_generator(op, device, dtype, requires_grad, **kwargs):
+        tensor, key = sample.args
+
+        indexed_tensor = tensor[key]
+        value = make_tensor(indexed_tensor.shape, device=device, dtype=dtype, requires_grad=requires_grad)
+        yield SampleInput(tensor, key, value)
+
+        pre_broadcast_shape = tuple(random.choice((s, 1)) for s in indexed_tensor.shape)
+        value = make_tensor(pre_broadcast_shape, device=device, dtype=dtype, requires_grad=requires_grad)
+        yield SampleInput(tensor, key, value)
+
+
+setitem_opinfo = OpInfo(
+    operator.setitem,
+    sample_input_generator=setitem_sample_generator,
+    torch_reference=operator.setitem,
+    numpy_reference=operator.setitem,
+)
+shape_ops.append(setitem_opinfo)
+
+
 def movedim_sample_generator(op, device, dtype, requires_grad, **kwargs):
     make = partial(make_tensor, device=device, dtype=dtype, requires_grad=requires_grad)
 
diff --git a/thunder/tests/test_grad.py b/thunder/tests/test_grad.py
@@ -42,6 +42,7 @@
     "index_select",
     # Finite difference approximation doesn't work for this function
     "embedding",
+    "setitem",
     "index_put",
     "batch_norm",
     "instance_norm",
@@ -689,6 +690,34 @@ def test_vjp_correctness_embedding_manual(op, device, dtype, executor, comp):
         comp(actual_out, out)
 
 
+@ops((get_opinfo("setitem"),), supported_dtypes=(dtypes.float64,))
+def test_vjp_correctness_setitem_manual(op, device, dtype, executor, comp):
+    for sample in op.sample_inputs(device, dtype, requires_grad=True):
+
+        def torch_reference(tensor, idx, value):
+            cloned = tensor * 1
+            op.torch_reference(cloned, idx, value)
+            return cloned
+
+        def op_fn(tensor, idx, value):
+            cloned = tensor * 1
+            op.op(cloned, idx, value)
+            return cloned
+
+        out = torch_reference(*sample.args, **sample.kwargs)
+        v = make_tensor_like(out)
+        expected = torch.autograd.grad(out, (sample.args[0], sample.args[2]), v)
+
+        # Compute vjp result using Thunder
+        flat_op, flat_args, spec = flatten_func(op_fn, sample.args, sample.kwargs)
+        initial_trace = thunder.trace()(vjp(flat_op), flat_args, (v,))
+        jfn = executor.make_callable(initial_trace.python_callable(), disable_torch_autograd=True)
+        actual_out, actual_grad = jfn(flat_args, (v,))
+        comp(actual_out, out)
+        comp(actual_grad[0], expected[0])
+        comp(actual_grad[-1], expected[1])
+
+
 @ops((op for op in opinfos if op.name == "type_as"), supported_dtypes=(dtypes.float64,))
 def test_vjp_correctness_type_as_manual(op, device, dtype, executor, comp):
     for sample in op.sample_inputs(device, dtype, requires_grad=True):
diff --git a/thunder/torch/__init__.py b/thunder/torch/__init__.py
@@ -269,6 +269,13 @@ def _copy_(a, b, /):
     return prims.copy_(b, a, grad_enabled=cd.is_grad_enabled if cd is not None else False)
 
 
+def _clone_via_copy(t: TensorProxy) -> TensorProxy:
+    """Produces a functional clone using an explicit copy instead of prims.clone."""
+    cd = get_compile_data()
+    buf = prims.empty(t.shape, device=t.device, dtype=t.dtype)
+    return prims.copy_(t, buf, grad_enabled=cd.is_grad_enabled if cd is not None else False)
+
+
 @torchsymbol(torch.Tensor.copy_, is_method=True)  # , tags=(prims.OpTags.IN_PLACE,))
 def copy_(a, b, /):
     return _copy_(a, b)
@@ -2609,6 +2616,20 @@ def mod_(a, b):
 
 @torchsymbol(torch.mul, is_method=True)
 def mul(a, b, /):
+    def _is_scalar_one(x):
+        if isinstance(x, NumberProxy):
+            try:
+                return pyval(x) == 1
+            except Exception:
+                return False
+        if isinstance(x, bool):
+            return x is True
+        return isinstance(x, (int, float)) and x == 1
+
+    if isinstance(a, TensorProxy) and _is_scalar_one(b):
+        return _clone_via_copy(a)
+    if isinstance(b, TensorProxy) and _is_scalar_one(a):
+        return _clone_via_copy(b)
     return clang.mul(a, b)