NV tensor_cores in kernel.py (#4399)

This commit is contained in:
chenyu 2024-05-02 22:33:08 -04:00 committed by GitHub
commit d4062cb6fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 4 additions and 0 deletions

View file

@ -123,6 +123,8 @@ jobs:
CUDA=1 BFLOAT16=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_bfloat16.txt
- name: Run Tensor Core GEMM (PTX)
run: CUDA=1 PTX=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_ptx.txt
- name: Run Tensor Core GEMM (NV)
run: NV=1 HALF=1 DEBUG=2 python3 extra/gemm/simple_matmul.py | tee matmul_nv.txt
- name: Fuzz Padded Tensor Core GEMM(CUDA)
run: CUDA=1 M_START=12 M_STOP=20 M_STEP=1 N_START=6 N_STOP=10 N_STEP=1 K_START=28 K_STOP=36 K_STEP=1 HALF=1 TC_OPT=2 python3 ./extra/gemm/fuzz_matmul.py
- name: Fuzz Padded Tensor Core GEMM(PTX)
@ -160,6 +162,7 @@ jobs:
matmul.txt
matmul_bfloat16.txt
matmul_ptx.txt
matmul_nv.txt
llama_unjitted.txt
llama_jitted.txt
llama_beam.txt

View file

@ -59,6 +59,7 @@ tensor_cores: Dict[str, List[TensorCore]] = {
"CUDA": [TensorCore(dims=(8,16,16), threads=[(0,2),(0,2),(1,2),(1,2),(0,2)], thread_local_sizes=[[2,2,2],[2,2],[2,2]], thread_local_aliases=[ [[0],[-2],[5],[0],[0],[-1,1,2,-3],[3,4]], [[5],[0],[0],[4],[3],[-1,1,2,-2],[0]], [[2],[-2],[5],[1],[-1],[0],[3,4]] ], dtype_in=di, dtype_out=do) for (di, do) in ([(dtypes.half, dtypes.float)] if getenv("PTX") else [(dtypes.half, dtypes.float), (dtypes.bfloat16, dtypes.float)])], # noqa: E501
}
tensor_cores["AMD"] = tensor_cores["HSA"]
tensor_cores["NV"] = tensor_cores["CUDA"]
class LocalBuffer(NamedTuple):
name: str