This commit is contained in:
George Hotz 2025-03-25 19:14:57 +08:00
commit 311df3ff21
2 changed files with 4 additions and 3 deletions

View file

@ -122,11 +122,12 @@ if __name__ == "__main__":
elif knum == 15:
# 28x28, 192 chan, 3x3 dwconv
k.apply_opt(Opt(OptOps.UNROLL, 1, 0))
k.apply_opt(Opt(OptOps.PADTO, 2, 128))
k.apply_opt(Opt(OptOps.UPCAST, 2, 128))
#k.apply_opt(Opt(OptOps.PADTO, 2, 128))
k.apply_opt(Opt(OptOps.UPCAST, 2, 64))
#k.apply_opt(Opt(OptOps.UNROLL, 0, 0))
#k.apply_opt(Opt(OptOps.UPCAST, 2, 32))
#k.apply_opt(Opt(OptOps.UPCAST, 1, 4))
pass
elif knum == 3:
k.apply_opt(Opt(OptOps.UNROLL, 0, 8))
k.apply_opt(Opt(OptOps.UPCAST, 1, 16))

View file

@ -158,7 +158,7 @@ def split_load_store(ctx:Renderer|None, ls:UOp, idx:UOp):
must_divide = True
if ctx is not None and ctx.device == "DSP":
lengths = [128,64,32,16,8,4]
if ls.dtype.count in [128+64, 128*2+64, 128*4+64]: return None # leave 192 alone
#if ls.dtype.count in [128+64, 128*2+64, 128*4+64]: return None # leave 192 alone
# we really want stores to be 128 for fast casting
#if ls.op is Ops.LOAD: lengths = [192]+lengths
#if ls.op is Ops.LOAD: lengths = [1536,1024,512,384,256,192,96]+lengths