mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
Compare commits
1 commit
update_ben
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
687ade119e |
2 changed files with 7 additions and 1 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
|
@ -327,7 +327,7 @@ jobs:
|
||||||
llvm: 'true'
|
llvm: 'true'
|
||||||
- name: Test openpilot model kernel count and gate usage
|
- name: Test openpilot model kernel count and gate usage
|
||||||
run: |
|
run: |
|
||||||
ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1468 ALLOWED_GATED_READ_IMAGE=4 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
|
ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1361 ALLOWED_GATED_READ_IMAGE=55 FLOAT16=1 DEV=CL IMAGE=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916
|
||||||
- name: Test openpilot CL compile fp32 (test correctness)
|
- name: Test openpilot CL compile fp32 (test correctness)
|
||||||
run: |
|
run: |
|
||||||
DEV=CL IMAGE=1 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx
|
DEV=CL IMAGE=1 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,12 @@ def hand_coded_optimizations(k:Scheduler) -> Scheduler:
|
||||||
# for Schedule, we check if the range is used in INDEX gates or WHERE gates
|
# for Schedule, we check if the range is used in INDEX gates or WHERE gates
|
||||||
is_masked = k.rngs[axis] in where_gate_rngs
|
is_masked = k.rngs[axis] in where_gate_rngs
|
||||||
if k.full_shape[axis] <= 7 and is_masked and prod(k.full_shape[j] for j in to_upcast) * k.full_shape[axis] <= 7 * 7:
|
if k.full_shape[axis] <= 7 and is_masked and prod(k.full_shape[j] for j in to_upcast) * k.full_shape[axis] <= 7 * 7:
|
||||||
|
# upcasting a masked global axis moves that range out of the launch grid into each work-item
|
||||||
|
# under IMAGE, skip the upcast unless enough global work-items remain after it to hide memory latency
|
||||||
|
if IMAGE and k.axis_types[axis] is AxisType.GLOBAL:
|
||||||
|
global_upcast = prod(k.full_shape[i] for i in to_upcast if k.axis_types[i] is AxisType.GLOBAL) * k.full_shape[axis]
|
||||||
|
global_items_after = prod(k.full_shape[i] for i in k.axes_of(AxisType.GLOBAL)) // global_upcast
|
||||||
|
if resolve(global_items_after < getenv("OCCUPANCY_FLOOR", 4096), False): continue
|
||||||
if DEBUG >= 4: print(f"upcasting masked axis : {axis}")
|
if DEBUG >= 4: print(f"upcasting masked axis : {axis}")
|
||||||
to_upcast.append(axis)
|
to_upcast.append(axis)
|
||||||
for axis in to_upcast[::-1]: k.apply_opt(Opt(OptOps.UPCAST, axis, 0))
|
for axis in to_upcast[::-1]: k.apply_opt(Opt(OptOps.UPCAST, axis, 0))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue