multicore all the way

This commit is contained in:
George Hotz 2025-03-31 14:17:19 +08:00
commit 996d0ac1d2

View file

@ -482,9 +482,9 @@ class Kernel:
k.apply_opt(Opt(OptOps.UPCAST, 0, 128))
# make all non first dimensions local
if getenv("MULTICORE", 1):
if getenv("MULTICORE", 0) and len(k.full_shape) >= 1 and k.full_shape[0] > 1:
if k.full_shape[0]%2 == 1: k.apply_opt(Opt(OptOps.PADTO, 0, 2))
k.apply_opt(Opt(OptOps.LOCAL, 0, k.full_shape[0]//2))
if k.full_shape[0] > 2: k.apply_opt(Opt(OptOps.LOCAL, 0, k.full_shape[0]//2))
for i in range(1, k.first_reduce-1): k.apply_opt(Opt(OptOps.LOCAL, 1, 0))
else:
for i in range(1, k.first_reduce): k.apply_opt(Opt(OptOps.LOCAL, 1, 0))