gemm multiple is only for cdna4 asm (#14814)

* gemm multiple is only for cdna4 asm * move to backend * and arch * path
2026-06-24 02:14:17 +00:00 · 2026-02-17 13:00:02 +08:00 · 2026-02-17 13:00:02 +08:00 · f590564bf7
commit f590564bf7
parent 5bd2862d1a
3 changed files with 2 additions and 2 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -658,7 +658,7 @@ jobs:
          AMD: 0
        run: |
          PYTHONPATH=. NULL=1 EMULATE=AMD python extra/mmapeak/mmapeak.py
-          PYTHONPATH=. NULL=1 EMULATE=AMD_CDNA4 python3 -m pytest -n=auto test/testextra/test_tk.py test/testextra/test_asm_gemm.py
+          PYTHONPATH=. NULL=1 EMULATE=AMD_CDNA4 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py
      - name: Run ASM matmul on MOCKGPU
        run: PYTHONPATH="." AMD=1 MOCKGPU=1 N=256 python3 extra/gemm/amd_asm_matmul.py
      - name: Run LLVM test
--- a/extra/gemm/asm/cdna/gemm.py
+++ b/extra/gemm/asm/cdna/gemm.py
@ -42,7 +42,7 @@ def can_use_asm_gemm(a:Tensor, b:Tensor) -> bool:
  else: dname = a.device
  arch = getattr(Device[dname].renderer, "arch", "")
  if batch not in {1, 2}: return todo(f"GEMM batch size {batch}")
-  if M % TILE_M != 0 or N % TILE_N != 0 or K % TILE_K != 0:
+  if (M % TILE_M != 0 or N % TILE_N != 0 or K % TILE_K != 0) and arch == "gfx950":
    return todo(f"GEMM shape ({M},{N},{K}) not a multiple of ({TILE_M},{TILE_N},{TILE_K})")
  return True

--- a/test/testextra/test_asm_gemm.py
+++ b/test/testextra/test_asm_gemm.py