mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
IMAGE hand_coded_optimizations update
This commit is contained in:
parent
0a8e61d0c5
commit
adeeb33dc4
2 changed files with 7 additions and 1 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
|
@ -327,7 +327,7 @@ jobs:
|
|||
llvm: 'true'
|
||||
- name: Test openpilot model kernel count and gate usage
|
||||
run: |
|
||||
ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1468 ALLOWED_GATED_READ_IMAGE=4 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
|
||||
ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1361 ALLOWED_GATED_READ_IMAGE=55 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
|
||||
- name: Test openpilot CL compile fp32 (test correctness)
|
||||
run: |
|
||||
DEV=CL IMAGE=1 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx
|
||||
|
|
|
|||
|
|
@ -101,6 +101,12 @@ def hand_coded_optimizations(k:Scheduler) -> Scheduler:
|
|||
# for Schedule, we check if the range is used in INDEX gates or WHERE gates
|
||||
is_masked = k.rngs[axis] in where_gate_rngs
|
||||
if k.full_shape[axis] <= 7 and is_masked and prod(k.full_shape[j] for j in to_upcast) * k.full_shape[axis] <= 7 * 7:
|
||||
# upcasting a masked global axis moves that range out of the launch grid into each work-item
|
||||
# under IMAGE, skip the upcast unless enough global work-items remain after it to hide memory latency
|
||||
if IMAGE and k.axis_types[axis] is AxisType.GLOBAL:
|
||||
global_upcast = prod(k.full_shape[i] for i in to_upcast if k.axis_types[i] is AxisType.GLOBAL) * k.full_shape[axis]
|
||||
global_items_after = prod(k.full_shape[i] for i in k.axes_of(AxisType.GLOBAL)) // global_upcast
|
||||
if resolve(global_items_after < getenv("OCCUPANCY_FLOOR", 4096), False): continue
|
||||
if DEBUG >= 4: print(f"upcasting masked axis : {axis}")
|
||||
to_upcast.append(axis)
|
||||
for axis in to_upcast[::-1]: k.apply_opt(Opt(OptOps.UPCAST, axis, 0))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue