remove old schedule (#15930)

* remove old schedule * tests * r * x
2026-06-24 02:14:17 +00:00 · 2026-04-25 16:46:36 +03:00 · 2026-04-25 16:46:36 +03:00 · e0ff6cc15c
commit e0ff6cc15c
parent 9a23de7d27
7 changed files with 54 additions and 86 deletions
--- a/docs/developer/developer.md
+++ b/docs/developer/developer.md
@ -19,7 +19,7 @@ The `UOp` graph specifies the compute in terms of low level tinygrad ops. Not al

 The [scheduler](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/schedule/__init__.py) converts the graph of UOps into a list of `ExecItem`. One `ExecItem` is one kernel on the GPU, and the scheduler is responsible for breaking the large compute graph into subgraphs that can fit in a kernel. `ast` specifies what compute to run, and `bufs` specifies what buffers to run it on.

-::: tinygrad.schedule.ExecItem
+::: tinygrad.engine.realize.ExecItem

 ## Lowering

--- a/docs/tensor/properties.md
+++ b/docs/tensor/properties.md
@ -19,8 +19,8 @@

 ## tinygrad ops

-::: tinygrad.Tensor.schedule_with_vars
-::: tinygrad.Tensor.schedule
+::: tinygrad.Tensor.linear_with_vars
+::: tinygrad.Tensor.schedule_linear
 ::: tinygrad.Tensor.realize
 ::: tinygrad.Tensor.replace
 ::: tinygrad.Tensor.assign
--- a/test/backend/test_uops.py
+++ b/test/backend/test_uops.py
@ -7,8 +7,7 @@ from tinygrad.dtype import dtypes, DType, AddrSpace, ConstFloat  # noqa: F401
 from tinygrad.device import Buffer, Device
 from tinygrad.uop.ops import Ops, UOp, KernelInfo, AxisType
 from tinygrad.renderer.cstyle import CStyleLanguage
-from tinygrad.engine.realize import CompiledRunner, get_program, get_runner
-from tinygrad.schedule import ExecItem
+from tinygrad.engine.realize import CompiledRunner, get_program, run_linear
 from tinygrad.device import is_dtype_supported
 from tinygrad.codegen.opt import Opt, OptOps
 from tinygrad.renderer.ptx import PTXRenderer
@ -281,7 +280,7 @@ class TestZeroRange(unittest.TestCase):

 class TestUOpPrograms(unittest.TestCase):
  def _run(self, prog:UOp, *tensors:Tensor):
-    ExecItem(prog, [t.uop.buffer for t in tensors], prg=get_runner(Device.DEFAULT, prog)).run(wait=True)
+    run_linear(UOp(Ops.LINEAR, src=(prog.call(*[t.uop.buf_uop for t in tensors]),)), do_update_stats=False)

  def test_simple(self):
    out = Tensor.empty(10,10,dtype=dtypes.int)
--- a/test/external/fuzz_graph.py
+++ b/test/external/fuzz_graph.py
@ -4,8 +4,7 @@ from tinygrad.device import Buffer, Device
 from tinygrad.helpers import Context, getenv, from_mv
 from tinygrad.dtype import dtypes
 from tinygrad.tensor import Tensor, _to_np_dtype
-from tinygrad.engine.realize import BufferXfer, get_runner
-from tinygrad.schedule import ExecItem
+from tinygrad.engine.realize import BufferXfer, get_runner, ExecItem
 from tinygrad.uop.ops import UOp, Ops
 from tinygrad.engine.jit import apply_graph_to_jit

--- a/test/testextra/test_tk.py
+++ b/test/testextra/test_tk.py
@ -1,9 +1,8 @@
 import unittest, math, time

-from tinygrad import Tensor, Device, dtypes, Context
+from tinygrad import Tensor, Device, dtypes, Context, GlobalCounters
 from tinygrad.uop.ops import UOp, Ops
-from tinygrad.engine.realize import get_runner
-from tinygrad.schedule import ExecItem
+from tinygrad.engine.realize import run_linear
 from tinygrad.engine.jit import TinyJit
 import numpy as np

@ -67,8 +66,9 @@ class TestTK(unittest.TestCase):
      c = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b, c)

-    ei = ExecItem(sink, [t.uop.buffer for t in (c, a, b)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (c, a, b)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    c = c.float()

    ref = a.matmul(b, dtype=dtypes.float32).float()
@ -115,8 +115,9 @@ class TestTK(unittest.TestCase):
      c = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b, c)

-    ei = ExecItem(sink, [t.uop.buffer for t in (c, a, b)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (c, a, b)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    c = c.float()

    ref = a.matmul(b.transpose(2, 3), dtype=dtypes.float32).float()
@ -151,8 +152,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float()
@ -190,8 +192,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float()
@ -232,8 +235,9 @@ class TestTK(unittest.TestCase):
      c = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b, c)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, c, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, c, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()
    c = c.float()

@ -272,8 +276,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float()
@ -309,8 +314,9 @@ class TestTK(unittest.TestCase):
        b = Tensor.empty(1, 1, N, N, dtype="float32")
        Tensor.realize(a, b)

-      ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-      for _ in range(5): ei.run(wait=True)
+      linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+      for _ in range(5): run_linear(linear, do_update_stats=False)
      b = b.float()

      ref = a.float() + 1
@ -354,8 +360,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().max(axis=2, keepdim=True).expand(a.shape)
@ -399,8 +406,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, M, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().max(axis=2, keepdim=True).expand(a.shape)
@ -444,8 +452,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().sum(axis=2, keepdim=True).expand(a.shape)
@ -489,8 +498,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, M, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().sum(axis=2, keepdim=True).expand(a.shape)
@ -549,8 +559,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, BLOCK_SIZE, N, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().softmax(axis=3)
@ -609,8 +620,9 @@ class TestTK(unittest.TestCase):
      b = Tensor.empty(1, 1, N, BLOCK_SIZE, dtype="float32")
      Tensor.realize(a, b)

-    ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
-    for _ in range(5): ei.run(wait=True)
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
+
+    for _ in range(5): run_linear(linear, do_update_stats=False)
    b = b.float()

    ref = a.float().softmax(axis=2)
@ -719,9 +731,11 @@ class TestTK(unittest.TestCase):
      out = Tensor.empty(B, N, H, D, dtype=dtypes.bfloat16)
      Tensor.realize(q, k, v, out)

-    ei = ExecItem(sink, [t.uop.buffer for t in (out, q, k, v)], prg=get_runner(Device.DEFAULT, sink))
+    linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (out, q, k, v)]),))
    for _ in range(5):
-      et = ei.run(wait=True)
+      GlobalCounters.reset()
+      with Context(DEBUG=2): run_linear(linear)
+      et = GlobalCounters.time_sum_s
      attn_flops = 2 * B * H * N * N * D + \
                   4 * B * H * N * N + \
                   2 * B * H * N * N * D
--- a/tinygrad/schedule/init.py
+++ b/tinygrad/schedule/init.py
@ -1,12 +1,8 @@
 import time, inspect
-from typing import cast
 from collections import deque
-from dataclasses import replace
-from tinygrad.uop.ops import UOp, Ops, buffers, UOpMetaClass, track_rewrites, graph_rewrite, gate_kernel_sink, KernelInfo
+from tinygrad.uop.ops import UOp, Ops, UOpMetaClass, track_rewrites, graph_rewrite, gate_kernel_sink, KernelInfo
 from tinygrad.uop.spec import type_verify, tensor_spec
-from tinygrad.device import Buffer, MultiBuffer
-from tinygrad.helpers import DEBUG, cpu_profile, TracingKey, SPEC, pluralize, SCACHE, BASEDIR, flatten, BEAM, partition
-from tinygrad.engine.realize import ExecItem
+from tinygrad.helpers import DEBUG, cpu_profile, TracingKey, SPEC, pluralize, SCACHE, BASEDIR, flatten, partition

 # **** schedule linearizer

@ -70,31 +66,6 @@ def create_schedule(sched_sink:UOp) -> UOp:
        if in_degree[x] == 0: queue.append(x)
  return UOp(Ops.LINEAR, src=tuple(linearized))

-def linear_to_schedule(linear:UOp) -> list[ExecItem]:
-  """Convert a LINEAR UOp to a list of ExecItems."""
-  schedule: list[ExecItem] = []
-  for si in linear.src:
-    ast, buf_uops = si.src[0], si.src[1:]
-    # create subbuffers if needed
-    if ast.op is Ops.BUFFER_VIEW:
-      base = buf_uops[1].buffer
-      assert isinstance(base, Buffer), "base can't be MultiBuffer"
-      buffers[buf_uops[0]] = base.view(buf_uops[0].arg, ast.dtype, ast.arg[1]*base.dtype.itemsize)
-    # set beam on KernelInfo when beam search is enabled
-    if ast.op is Ops.SINK and BEAM >= 1 and ast.arg.beam == 0: ast = ast.replace(arg=replace(ast.arg, beam=BEAM.value))
-    ubufs = [b.buffer for b in buf_uops if b.op is not Ops.BIND]
-    metadata = si.arg.metadata
-    if ast.op is Ops.CUSTOM_FUNCTION and ast.arg == "graph":
-      schedule.append(ExecItem(ast, flatten([b.bufs if isinstance(b, MultiBuffer) else [b] for b in ubufs]), metadata))
-    elif any(isinstance(x, MultiBuffer) for x in ubufs):
-      assert all(isinstance(x, MultiBuffer) for x in ubufs), "kernel must all be multibuffer"
-      dnums = [x for x in ast.variables() if x.expr == '_device_num']
-      for j, bufs in enumerate(zip(*[x.bufs for x in cast(tuple[MultiBuffer, ...], ubufs)])):
-        schedule.append(ExecItem(ast, list(bufs), metadata, {dnums[0].expr:j} if len(dnums) else {}))
-    else:
-      schedule.append(ExecItem(ast, cast(list[Buffer|None], ubufs), metadata))
-  return schedule
-
 from tinygrad.schedule.memory import memory_plan_rewrite
 from tinygrad.engine.realize import capturing
 from tinygrad.schedule.rangeify import get_kernel_graph
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -12,7 +12,7 @@ from tinygrad.helpers import suppress_finalizing, disable_gc
 from tinygrad.gradient import compute_gradient
 from tinygrad.mixin import OpMixin
 from tinygrad.uop.ops import smax, UOp, Ops, sint, all_metadata, _index_to_concrete_int, Variable, _broadcast_shape
-from tinygrad.schedule import ExecItem, create_linear_with_vars, linear_to_schedule
+from tinygrad.schedule import create_linear_with_vars
 from tinygrad.device import Buffer, canonicalize_device
 from tinygrad.engine.realize import run_linear
 from tinygrad.callify import transform_to_call
@ -232,21 +232,6 @@ class Tensor(OpMixin):
    _apply_map_to_tensors(becomes_map, name="buffers")
    return create_linear_with_vars(big_sink)

-  def schedule_with_vars(self, *lst:Tensor) -> tuple[list[ExecItem], dict[str, int]]:
-    """
-    Creates the schedule needed to realize these Tensor(s), with Variables.
-
-    NOTE: A Tensor can only be scheduled once.
-    """
-    linear, var_vals = self.linear_with_vars(*lst)
-    return linear_to_schedule(linear), var_vals
-
-  def schedule(self, *lst:Tensor) -> list[ExecItem]:
-    """Creates the schedule needed to realize these Tensor(s)."""
-    schedule, var_vals = self.schedule_with_vars(*lst)
-    assert len(var_vals) == 0
-    return schedule
-
  def schedule_linear(self, *lst:Tensor) -> UOp:
    """Creates the schedule needed to realize these Tensor(s)."""
    linear, var_vals = self.linear_with_vars(*lst)