mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
opts back
This commit is contained in:
parent
1e2becfeae
commit
95261b6193
2 changed files with 4 additions and 4 deletions
|
|
@ -454,7 +454,7 @@ class Kernel:
|
|||
# if this is small, swap it
|
||||
# NOTE: this is breaking something (should be fixed w/o padto)
|
||||
# kernel 23 is broken with this
|
||||
#if k.full_shape[0] <= 6: k.apply_opt(Opt(OptOps.SWAP, 0, 1))
|
||||
if k.full_shape[0] <= 6: k.apply_opt(Opt(OptOps.SWAP, 0, 1))
|
||||
elif k.full_shape[-4:] == (32,3,3,3):
|
||||
# 3x3 normal conv
|
||||
k.apply_opt(Opt(OptOps.UNROLL, 2, 0))
|
||||
|
|
@ -478,7 +478,7 @@ class Kernel:
|
|||
k.apply_opt(Opt(OptOps.UPCAST, 2, 32))
|
||||
if k.full_shape[1]%4 == 0: k.apply_opt(Opt(OptOps.UPCAST, 1, 4))
|
||||
# if the more is small, upcast it (kernel 50 is broken with this)
|
||||
#if k.full_shape[0] <= 6: k.apply_opt(Opt(OptOps.UPCAST, 0, 0))
|
||||
if k.full_shape[0] <= 6: k.apply_opt(Opt(OptOps.UPCAST, 0, 0))
|
||||
elif len(k.full_shape) == 2 and k.first_reduce == 1:
|
||||
# unroll to 4 if we can
|
||||
if k.full_shape[k.first_reduce]%4 == 0: k.apply_opt(Opt(OptOps.UNROLL, 0, 4))
|
||||
|
|
|
|||
|
|
@ -373,8 +373,8 @@ dsp_pm_late = PatternMatcher([
|
|||
|
||||
#(UPat(Ops.BITCAST, src=(UPat(Ops.LOAD, name="ld"),), name="bc"),
|
||||
# lambda ld, bc: ld.src[0].src[0].cast(bc.dtype.ptr(ld.src[0].dtype.size)).load(dtype=bc.dtype)),
|
||||
#(UPat(Ops.GEP, name="x"), lambda x: UOp(Ops.CUSTOM, x.dtype, x.src,
|
||||
# "__builtin_shufflevector({0}, {0}, "+','.join([f'{y:4d}' for y in x.arg])+")") if len(x.arg) > 1 and x.src[0].dtype.count > 1 else None),
|
||||
(UPat(Ops.GEP, name="x"), lambda x: UOp(Ops.CUSTOM, x.dtype, x.src,
|
||||
"__builtin_shufflevector({0}, {0}, "+','.join([f'{y:4d}' for y in x.arg])+")") if len(x.arg) > 1 and x.src[0].dtype.count > 4 else None),
|
||||
(UPat.var("x")+UPat(Ops.VECTORIZE,src=UPat.var("y")),
|
||||
lambda x,y: x+UOp(Ops.CUSTOMI,x.dtype,(y,),arg="{0}") if x.op is not Ops.CUSTOMI or x.arg != "{0}" else None),
|
||||
(UPat.var("x")*UPat(Ops.VECTORIZE,src=UPat.var("y")),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue