add TestKernelOpts.test_tensor_core_opts run in benchmarks

This commit is contained in:
ignaciosica 2025-06-20 20:10:44 -03:00
commit 04a8eca8e3

View file

@ -67,7 +67,7 @@ jobs:
- name: Test speed vs torch
run: BIG=2 MPS=1 python3.11 test/test_speed_v_torch.py | tee torch_speed.txt
- name: Test tensor cores
run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops TestKernelOpts.test_tensor_core_opts
- name: Test AMX tensor cores
run: |
DEBUG=2 CPU=1 AMX=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
@ -196,8 +196,8 @@ jobs:
run: NV=1 python test/external/external_benchmark_multitensor_allreduce.py
- name: Test tensor cores
run: |
NV=1 ALLOW_TF32=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
PTX=1 ALLOW_TF32=1 NV=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
NV=1 ALLOW_TF32=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops TestKernelOpts.test_tensor_core_opts
PTX=1 ALLOW_TF32=1 NV=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops TestKernelOpts.test_tensor_core_opts
- name: Run Tensor Core GEMM (CUDA)
run: |
CUDA=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul.txt
@ -396,8 +396,8 @@ jobs:
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test tensor cores
run: |
AMD=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded_amd TestLinearizer.test_tensor_cores_padded_uops
AMD=1 AMD_LLVM=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded_amd TestLinearizer.test_tensor_cores_padded_uops
AMD=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded_amd TestLinearizer.test_tensor_cores_padded_uops TestKernelOpts.test_tensor_core_opts
AMD=1 AMD_LLVM=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_emulation TestLinearizer.test_tensor_cores_padded_amd TestLinearizer.test_tensor_cores_padded_uops TestKernelOpts.test_tensor_core_opts
AMD=1 SHOULD_USE_TC=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py
- name: Run Tensor Core GEMM (AMD)
run: AMD=1 SHOULD_USE_TC=1 HALF=1 DEBUG=2 ATOL=2e-2 python3 extra/gemm/simple_matmul.py | tee matmul_amd.txt