grad outerworld test

2026-06-24 02:14:17 +00:00 · 2025-11-16 07:43:51 -08:00
2 changed files with 46 additions and 0 deletions
--- a/test/test_outerworld.py
+++ b/test/test_outerworld.py
@ -57,6 +57,41 @@ class TestOuterRange(unittest.TestCase):
    # TODO: testing allclose
    assert Tensor.allclose(ref, out, atol=1e-6), f"{ref.numpy()=}, {out.numpy()=}"

+  def test_range_grad(self):
+    def range_matmul(vec, mats):
+      # vec: (1, 10), mats: (3, 10, 10)
+      # assume vec, mats already have requires_grad set however you like
+
+      i = UOp.range(3, -100, AxisType.OUTER)      # loop axis
+      vec_i = Tensor(vec.uop.after(i))            # "loop-carried" vector
+      vi = UOp.variable("i", i.vmin, i.vmax).bind(i)
+
+      body = (vec_i.contiguous() @ mats[vi])      # matmul using loop index
+      out = Tensor(vec.uop.after(vec_i.uop.store(body.uop).end(i)))
+      return out
+
+    vec = Tensor.randn(1, 3, requires_grad=True)
+    mats = Tensor.randn(3, 3, 3, requires_grad=True)
+    Tensor.realize(vec, mats)
+
+    ref = ((vec @ mats[0]) @ mats[1]) @ mats[2]
+    loss = (1.0 - ref).square().mean()
+    loss.backward()
+    Tensor.realize(vec.grad, mats.grad)
+    print(vec.grad.numpy())
+    print(mats.grad.numpy())
+    vec.grad = None
+    mats.grad = None
+
+    out = range_matmul(vec, mats)
+    loss = (1.0 - out).square().mean()
+    loss.backward()
+    Tensor.realize(vec.grad, mats.grad)
+
+    print(vec.grad, mats.grad)   # should be non-None and finite
+    print(vec.grad.numpy())
+    print(mats.grad.numpy())
+
 class TestOuterworld(unittest.TestCase):
  def test_range_plus_1(self):
    t = Tensor.arange(100).reshape(10,10).realize()
--- a/tinygrad/gradient.py
+++ b/tinygrad/gradient.py
@ -43,6 +43,17 @@ pm_gradient = PatternMatcher([
  (UPat(Ops.KERNEL, name="k"), lambda ctx, k: k.arg.grad_fxn(ctx, k)),
  # there's no gradient for bitcast
  (UPat(Ops.BITCAST), lambda: (None,)),
+
+  # RANGE: loop index / axis, not a differentiable quantity
+  (UPat(Ops.RANGE), lambda: (None,)),
+
+  # STORE: buffer write. Gradient flows only into the value being stored.
+  # src layout is roughly (buffer, value, *axes_or_indices)
+  (UPat(Ops.STORE), lambda ctx: (None, ctx)),
+
+  # END: loop terminator / "end of range" node.
+  # Just pass the gradient into the body (first src), ignore the ranges.
+  (UPat(Ops.END, name="ret"), lambda ctx, ret: (ctx, *[None]*(len(ret.src) - 1))),
 ])

 def _deepwalk(root:UOp, targets:set[UOp]) -> list[UOp]: