mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
Compare commits
1 commit
master
...
heur_group
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4ec83a47eb |
1 changed files with 11 additions and 0 deletions
|
|
@ -48,6 +48,17 @@ def hand_coded_optimizations(k:Scheduler) -> Scheduler:
|
|||
# make a copy so it does not mutate the input
|
||||
k = k.copy()
|
||||
|
||||
# when TC fails for kernels with large reductions, try GROUP to parallelize the reduction
|
||||
if k.ren.has_local and k.ren.has_shared and k.reduceop is not None:
|
||||
reduce_axes = k.axes_of(AxisType.REDUCE)
|
||||
if reduce_axes and resolve(k.full_shape[reduce_axes[0]] >= 256, False):
|
||||
for amt in [16, 8]:
|
||||
if k.full_shape[reduce_axes[0]] % amt == 0:
|
||||
try:
|
||||
k.apply_opt(Opt(OptOps.GROUP, 0, amt))
|
||||
break
|
||||
except KernelOptError: pass
|
||||
|
||||
# upcast float4 images, this must be early so we don't accidentally add locals before the upcast
|
||||
for buf_index,buf in enumerate(k.bufs):
|
||||
if isinstance(buf.src[0].dtype, ImageDType):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue