mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
NV tensor_cores in kernel.py (#4399)
This commit is contained in:
parent
0deaaf2bc8
commit
d4062cb6fc
2 changed files with 4 additions and 0 deletions
3
.github/workflows/benchmark.yml
vendored
3
.github/workflows/benchmark.yml
vendored
|
|
@ -123,6 +123,8 @@ jobs:
|
|||
CUDA=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_bfloat16.txt
|
||||
- name: Run Tensor Core GEMM (PTX)
|
||||
run: CUDA=1 PTX=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_ptx.txt
|
||||
- name: Run Tensor Core GEMM (NV)
|
||||
run: NV=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_nv.txt
|
||||
- name: Fuzz Padded Tensor Core GEMM(CUDA)
|
||||
run: CUDA=1 M_START=12 M_STOP=20 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 python3 ./extra/gemm/fuzz_matmul.py
|
||||
- name: Fuzz Padded Tensor Core GEMM(PTX)
|
||||
|
|
@ -160,6 +162,7 @@ jobs:
|
|||
matmul.txt
|
||||
matmul_bfloat16.txt
|
||||
matmul_ptx.txt
|
||||
matmul_nv.txt
|
||||
llama_unjitted.txt
|
||||
llama_jitted.txt
|
||||
llama_beam.txt
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ tensor_cores: Dict[str, List[TensorCore]] = {
|
|||
"CUDA": [TensorCore(dims=(8,16,16), threads=[(0,2),(0,2),(1,2),(1,2),(0,2)], thread_local_sizes=[[2,2,2],[2,2],[2,2]], thread_local_aliases=[ [[0],[-2],[5],[0],[0],[-1,1,2,-3],[3,4]], [[5],[0],[0],[4],[3],[-1,1,2,-2],[0]], [[2],[-2],[5],[1],[-1],[0],[3,4]] ], dtype_in=di, dtype_out=do) for (di, do) in ([(dtypes.half, dtypes.float)] if getenv("PTX") else [(dtypes.half, dtypes.float), (dtypes.bfloat16, dtypes.float)])], # noqa: E501
|
||||
}
|
||||
tensor_cores["AMD"] = tensor_cores["HSA"]
|
||||
tensor_cores["NV"] = tensor_cores["CUDA"]
|
||||
|
||||
class LocalBuffer(NamedTuple):
|
||||
name: str
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue