less terrible first

This commit is contained in:
George Hotz 2025-03-26 15:21:38 +08:00
commit 2e4cae342b
2 changed files with 5 additions and 2 deletions

View file

@ -106,7 +106,10 @@ if __name__ == "__main__":
#k.apply_opt(Opt(OptOps.UPCAST, 0, 2))
"""
# blocked
if k.full_shape[-3:] == (32,3,3):
if len(k.full_shape) == 6 and k.full_shape[-4:] == (32,3,3,3):
k.apply_opt(Opt(OptOps.UPCAST, 2, 32))
k.apply_opt(Opt(OptOps.UPCAST, 1, 4))
elif k.full_shape[-3:] == (32,3,3):
#if k.full_shape[-4]%4 != 0: k.apply_opt(Opt(OptOps.PADTO, len(k.full_shape)-4, 4))
# 3x3 dwconv
k.apply_opt(Opt(OptOps.UNROLL, 0, 0))

View file

@ -159,7 +159,7 @@ def split_load_store(ctx:Renderer|None, ls:UOp, idx:UOp):
if ctx is not None and ctx.device == "DSP":
lengths = [128,64,32,16,8,4]
#if ls.dtype.count in [128+64, 128*2+64, 128*4+64]: return None # leave 192 alone
if ls.dtype.count in [192, 288]: return None # leave 192 alone
if ls.dtype.count in [192, 288, 160, 96]: return None # leave 192 alone
# we really want stores to be 128 for fast casting
#if ls.op is Ops.LOAD: lengths = [192]+lengths
#if ls.op is Ops.LOAD: lengths = [1536,1024,512,384,256,192,96]+lengths