remove old schedule (#15930)

* remove old schedule

* tests

* r

* x
This commit is contained in:
nimlgen 2026-04-25 16:46:36 +03:00 committed by GitHub
commit e0ff6cc15c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 54 additions and 86 deletions

View file

@ -19,7 +19,7 @@ The `UOp` graph specifies the compute in terms of low level tinygrad ops. Not al
The [scheduler](https://github.com/tinygrad/tinygrad/tree/master/tinygrad/schedule/__init__.py) converts the graph of UOps into a list of `ExecItem`. One `ExecItem` is one kernel on the GPU, and the scheduler is responsible for breaking the large compute graph into subgraphs that can fit in a kernel. `ast` specifies what compute to run, and `bufs` specifies what buffers to run it on.
::: tinygrad.schedule.ExecItem
::: tinygrad.engine.realize.ExecItem
## Lowering

View file

@ -19,8 +19,8 @@
## tinygrad ops
::: tinygrad.Tensor.schedule_with_vars
::: tinygrad.Tensor.schedule
::: tinygrad.Tensor.linear_with_vars
::: tinygrad.Tensor.schedule_linear
::: tinygrad.Tensor.realize
::: tinygrad.Tensor.replace
::: tinygrad.Tensor.assign

View file

@ -7,8 +7,7 @@ from tinygrad.dtype import dtypes, DType, AddrSpace, ConstFloat # noqa: F401
from tinygrad.device import Buffer, Device
from tinygrad.uop.ops import Ops, UOp, KernelInfo, AxisType
from tinygrad.renderer.cstyle import CStyleLanguage
from tinygrad.engine.realize import CompiledRunner, get_program, get_runner
from tinygrad.schedule import ExecItem
from tinygrad.engine.realize import CompiledRunner, get_program, run_linear
from tinygrad.device import is_dtype_supported
from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.renderer.ptx import PTXRenderer
@ -281,7 +280,7 @@ class TestZeroRange(unittest.TestCase):
class TestUOpPrograms(unittest.TestCase):
def _run(self, prog:UOp, *tensors:Tensor):
ExecItem(prog, [t.uop.buffer for t in tensors], prg=get_runner(Device.DEFAULT, prog)).run(wait=True)
run_linear(UOp(Ops.LINEAR, src=(prog.call(*[t.uop.buf_uop for t in tensors]),)), do_update_stats=False)
def test_simple(self):
out = Tensor.empty(10,10,dtype=dtypes.int)

View file

@ -4,8 +4,7 @@ from tinygrad.device import Buffer, Device
from tinygrad.helpers import Context, getenv, from_mv
from tinygrad.dtype import dtypes
from tinygrad.tensor import Tensor, _to_np_dtype
from tinygrad.engine.realize import BufferXfer, get_runner
from tinygrad.schedule import ExecItem
from tinygrad.engine.realize import BufferXfer, get_runner, ExecItem
from tinygrad.uop.ops import UOp, Ops
from tinygrad.engine.jit import apply_graph_to_jit

View file

@ -1,9 +1,8 @@
import unittest, math, time
from tinygrad import Tensor, Device, dtypes, Context
from tinygrad import Tensor, Device, dtypes, Context, GlobalCounters
from tinygrad.uop.ops import UOp, Ops
from tinygrad.engine.realize import get_runner
from tinygrad.schedule import ExecItem
from tinygrad.engine.realize import run_linear
from tinygrad.engine.jit import TinyJit
import numpy as np
@ -67,8 +66,9 @@ class TestTK(unittest.TestCase):
c = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b, c)
ei = ExecItem(sink, [t.uop.buffer for t in (c, a, b)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (c, a, b)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
c = c.float()
ref = a.matmul(b, dtype=dtypes.float32).float()
@ -115,8 +115,9 @@ class TestTK(unittest.TestCase):
c = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b, c)
ei = ExecItem(sink, [t.uop.buffer for t in (c, a, b)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (c, a, b)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
c = c.float()
ref = a.matmul(b.transpose(2, 3), dtype=dtypes.float32).float()
@ -151,8 +152,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float()
@ -190,8 +192,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float()
@ -232,8 +235,9 @@ class TestTK(unittest.TestCase):
c = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b, c)
ei = ExecItem(sink, [t.uop.buffer for t in (b, c, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, c, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
c = c.float()
@ -272,8 +276,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float()
@ -309,8 +314,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float() + 1
@ -354,8 +360,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().max(axis=2, keepdim=True).expand(a.shape)
@ -399,8 +406,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, M, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().max(axis=2, keepdim=True).expand(a.shape)
@ -444,8 +452,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().sum(axis=2, keepdim=True).expand(a.shape)
@ -489,8 +498,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, M, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().sum(axis=2, keepdim=True).expand(a.shape)
@ -549,8 +559,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, BLOCK_SIZE, N, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().softmax(axis=3)
@ -609,8 +620,9 @@ class TestTK(unittest.TestCase):
b = Tensor.empty(1, 1, N, BLOCK_SIZE, dtype="float32")
Tensor.realize(a, b)
ei = ExecItem(sink, [t.uop.buffer for t in (b, a)], prg=get_runner(Device.DEFAULT, sink))
for _ in range(5): ei.run(wait=True)
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (b, a)]),))
for _ in range(5): run_linear(linear, do_update_stats=False)
b = b.float()
ref = a.float().softmax(axis=2)
@ -719,9 +731,11 @@ class TestTK(unittest.TestCase):
out = Tensor.empty(B, N, H, D, dtype=dtypes.bfloat16)
Tensor.realize(q, k, v, out)
ei = ExecItem(sink, [t.uop.buffer for t in (out, q, k, v)], prg=get_runner(Device.DEFAULT, sink))
linear = UOp(Ops.LINEAR, src=(sink.call(*[t.uop.buf_uop for t in (out, q, k, v)]),))
for _ in range(5):
et = ei.run(wait=True)
GlobalCounters.reset()
with Context(DEBUG=2): run_linear(linear)
et = GlobalCounters.time_sum_s
attn_flops = 2 * B * H * N * N * D + \
4 * B * H * N * N + \
2 * B * H * N * N * D

View file

@ -1,12 +1,8 @@
import time, inspect
from typing import cast
from collections import deque
from dataclasses import replace
from tinygrad.uop.ops import UOp, Ops, buffers, UOpMetaClass, track_rewrites, graph_rewrite, gate_kernel_sink, KernelInfo
from tinygrad.uop.ops import UOp, Ops, UOpMetaClass, track_rewrites, graph_rewrite, gate_kernel_sink, KernelInfo
from tinygrad.uop.spec import type_verify, tensor_spec
from tinygrad.device import Buffer, MultiBuffer
from tinygrad.helpers import DEBUG, cpu_profile, TracingKey, SPEC, pluralize, SCACHE, BASEDIR, flatten, BEAM, partition
from tinygrad.engine.realize import ExecItem
from tinygrad.helpers import DEBUG, cpu_profile, TracingKey, SPEC, pluralize, SCACHE, BASEDIR, flatten, partition
# **** schedule linearizer
@ -70,31 +66,6 @@ def create_schedule(sched_sink:UOp) -> UOp:
if in_degree[x] == 0: queue.append(x)
return UOp(Ops.LINEAR, src=tuple(linearized))
def linear_to_schedule(linear:UOp) -> list[ExecItem]:
"""Convert a LINEAR UOp to a list of ExecItems."""
schedule: list[ExecItem] = []
for si in linear.src:
ast, buf_uops = si.src[0], si.src[1:]
# create subbuffers if needed
if ast.op is Ops.BUFFER_VIEW:
base = buf_uops[1].buffer
assert isinstance(base, Buffer), "base can't be MultiBuffer"
buffers[buf_uops[0]] = base.view(buf_uops[0].arg, ast.dtype, ast.arg[1]*base.dtype.itemsize)
# set beam on KernelInfo when beam search is enabled
if ast.op is Ops.SINK and BEAM >= 1 and ast.arg.beam == 0: ast = ast.replace(arg=replace(ast.arg, beam=BEAM.value))
ubufs = [b.buffer for b in buf_uops if b.op is not Ops.BIND]
metadata = si.arg.metadata
if ast.op is Ops.CUSTOM_FUNCTION and ast.arg == "graph":
schedule.append(ExecItem(ast, flatten([b.bufs if isinstance(b, MultiBuffer) else [b] for b in ubufs]), metadata))
elif any(isinstance(x, MultiBuffer) for x in ubufs):
assert all(isinstance(x, MultiBuffer) for x in ubufs), "kernel must all be multibuffer"
dnums = [x for x in ast.variables() if x.expr == '_device_num']
for j, bufs in enumerate(zip(*[x.bufs for x in cast(tuple[MultiBuffer, ...], ubufs)])):
schedule.append(ExecItem(ast, list(bufs), metadata, {dnums[0].expr:j} if len(dnums) else {}))
else:
schedule.append(ExecItem(ast, cast(list[Buffer|None], ubufs), metadata))
return schedule
from tinygrad.schedule.memory import memory_plan_rewrite
from tinygrad.engine.realize import capturing
from tinygrad.schedule.rangeify import get_kernel_graph

View file

@ -12,7 +12,7 @@ from tinygrad.helpers import suppress_finalizing, disable_gc
from tinygrad.gradient import compute_gradient
from tinygrad.mixin import OpMixin
from tinygrad.uop.ops import smax, UOp, Ops, sint, all_metadata, _index_to_concrete_int, Variable, _broadcast_shape
from tinygrad.schedule import ExecItem, create_linear_with_vars, linear_to_schedule
from tinygrad.schedule import create_linear_with_vars
from tinygrad.device import Buffer, canonicalize_device
from tinygrad.engine.realize import run_linear
from tinygrad.callify import transform_to_call
@ -232,21 +232,6 @@ class Tensor(OpMixin):
_apply_map_to_tensors(becomes_map, name="buffers")
return create_linear_with_vars(big_sink)
def schedule_with_vars(self, *lst:Tensor) -> tuple[list[ExecItem], dict[str, int]]:
"""
Creates the schedule needed to realize these Tensor(s), with Variables.
NOTE: A Tensor can only be scheduled once.
"""
linear, var_vals = self.linear_with_vars(*lst)
return linear_to_schedule(linear), var_vals
def schedule(self, *lst:Tensor) -> list[ExecItem]:
"""Creates the schedule needed to realize these Tensor(s)."""
schedule, var_vals = self.schedule_with_vars(*lst)
assert len(var_vals) == 0
return schedule
def schedule_linear(self, *lst:Tensor) -> UOp:
"""Creates the schedule needed to realize these Tensor(s)."""
linear, var_vals = self.linear_with_vars(*lst)