mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
Compare commits
3 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
17acc48b3e | ||
|
|
f7c0a3239d |
||
|
|
0563aebd9e |
3 changed files with 20 additions and 3 deletions
|
|
@ -29,8 +29,7 @@ if __name__ == "__main__":
|
||||||
opt.zero_grad()
|
opt.zero_grad()
|
||||||
samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0])
|
samples = Tensor.randint(getenv("BS", 512), high=X_train.shape[0])
|
||||||
loss = model(X_train[samples]).sparse_categorical_crossentropy(Y_train[samples]).backward()
|
loss = model(X_train[samples]).sparse_categorical_crossentropy(Y_train[samples]).backward()
|
||||||
opt.step()
|
return loss.realize(*opt.schedule_step())
|
||||||
return loss
|
|
||||||
|
|
||||||
@TinyJit
|
@TinyJit
|
||||||
def get_test_acc() -> Tensor: return (model(X_test).argmax(axis=1) == Y_test).mean()*100
|
def get_test_acc() -> Tensor: return (model(X_test).argmax(axis=1) == Y_test).mean()*100
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,24 @@ class TestTiny(unittest.TestCase):
|
||||||
probs = Tensor.rand(1, 1, 28, 28).sequential(layers).tolist()
|
probs = Tensor.rand(1, 1, 28, 28).sequential(layers).tolist()
|
||||||
self.assertEqual(len(probs[0]), 10)
|
self.assertEqual(len(probs[0]), 10)
|
||||||
|
|
||||||
|
# TODO: this is failing because of how swizzling rewrites the ShapeTracker of the final STORE
|
||||||
|
@unittest.skipIf(IMAGE>0 or (CI and Device.DEFAULT == "DSP"), "failing because of make things that can't be images not images")
|
||||||
|
def test_mnist_backward(self):
|
||||||
|
# NOTE: we don't have the whole model here for speed
|
||||||
|
layers = [
|
||||||
|
nn.Conv2d(1, 32, 5), Tensor.relu,
|
||||||
|
nn.Conv2d(32, 32, 5), Tensor.relu]
|
||||||
|
|
||||||
|
# replace random weights with ones
|
||||||
|
# TODO: there's a bug here where it's tying two of the biases together. we need UNIQUE const
|
||||||
|
#Tensor.realize(*[p.replace(Tensor.ones_like(p).contiguous()) for p in nn.state.get_parameters(layers)])
|
||||||
|
for p in nn.state.get_parameters(layers): p.replace(Tensor.empty(p.shape))
|
||||||
|
|
||||||
|
# realize gradients
|
||||||
|
for x in nn.state.get_parameters(layers): x.requires_grad_()
|
||||||
|
Tensor.empty(4, 1, 28, 28).sequential(layers).sum().backward()
|
||||||
|
Tensor.realize(*[x.grad for x in nn.state.get_parameters(layers) if x.grad is not None])
|
||||||
|
|
||||||
# *** image ***
|
# *** image ***
|
||||||
|
|
||||||
@unittest.skipIf(Device.DEFAULT != "GPU", "image only supported on GPU")
|
@unittest.skipIf(Device.DEFAULT != "GPU", "image only supported on GPU")
|
||||||
|
|
|
||||||
|
|
@ -252,7 +252,7 @@ class Tensor(MathTrait):
|
||||||
# create the schedule
|
# create the schedule
|
||||||
schedule, var_vals = create_schedule_with_vars(sink)
|
schedule, var_vals = create_schedule_with_vars(sink)
|
||||||
schedule = memory_planner(schedule)
|
schedule = memory_planner(schedule)
|
||||||
if DEBUG >= 1 and len(schedule) >= 10: print(f"scheduled {len(schedule)} kernels in {(time.perf_counter()-st)*1000:.2f} ms")
|
if DEBUG >= 1 and len(schedule) > 1: print(f"scheduled {len(schedule)} kernels in {(time.perf_counter()-st)*1000:.2f} ms")
|
||||||
return schedule, var_vals
|
return schedule, var_vals
|
||||||
|
|
||||||
def schedule(self, *lst:Tensor) -> list[ScheduleItem]:
|
def schedule(self, *lst:Tensor) -> list[ScheduleItem]:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue