MOCK+AMD and MOCK+NV interfaces (#15858)

MOCK+AMD is an alias for MOCKKFD+AMD, MOCKNVK+NV is renamed to MOCK+NV
2026-06-24 02:14:17 +00:00 · 2026-04-21 15:22:16 -07:00 · 2026-04-21 15:22:16 -07:00 · 697e7aa819
commit 697e7aa819
parent 75ee51a446
6 changed files with 21 additions and 20 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -773,7 +773,7 @@ jobs:
          cuda: 'true'
          ocelot: 'true'
      - name: Set env
-        run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCKNVK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
+        run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
      - name: Check Device.DEFAULT and print some source
        run: |
          python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
@ -870,7 +870,7 @@ jobs:
        python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20
    - name: Run pytest (ptx)
      env:
-        DEV: "MOCKNVK+NV:PTX"
+        DEV: "MOCK+NV:PTX"
        FORWARD_ONLY: 1
        # TODO: failing due to library loading error
        CAPTURE_PROCESS_REPLAY: 0
--- a/test/amd/test_sqtt_encoder.py
+++ b/test/amd/test_sqtt_encoder.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 """Tests for SQTT encoder: verifies the emulator produces correct SQTT traces for known kernels.

-Run with: DEV=MOCKKFD+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
+Run with: DEV=MOCK+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
 """
 import ctypes, unittest
 from tinygrad.helpers import Context
--- a/test/mockgpu/amd/README
+++ b/test/mockgpu/amd/README
@ -20,17 +20,17 @@ test_llvm.py tests asm/disasm on the LLVM tests, confirming it behaves the same

 tinygrad's dtype tests should pass with and without LLVM. they run in about 12 seconds.

-`DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
-`DEV=MOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
+`DEV=MOCK+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
+`DEV=MOCK+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`

 The ops tests also pass, but they are very slow, so you should run them one at a time.

-`SKIP_SLOW_TEST=1 DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_ops.py`
-`SKIP_SLOW_TEST=1 DEV=NOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
+`SKIP_SLOW_TEST=1 DEV=MOCK+AMD pytest -n=12 test/backend/test_ops.py`
+`SKIP_SLOW_TEST=1 DEV=NOCK+AMD:LLVM pytest -n=12 test/backend/test_ops.py`

 When something is caught by main tinygrad tests, a local regression test should be added to `test/amd`.
 While working with tinygrad, you can dump the assembly with `DEBUG=7`. These tests all pass on real hardware
-If a test is failing with `DEV=MOCKKFD+AMD` it's because an instruction is emulated incorrectly.
+If a test is failing with `DEV=MOCK+AMD` it's because an instruction is emulated incorrectly.
 You can test with just `DEV=AMD` to test on real hardware, if it works on real hardware there's a bug in the emulator.
 IMPORTANT: if a test is failing in the emulator, it's an instruction bug. Use DEBUG=7, get the instructions, and debug.

--- a/test/mockgpu/mockgpu.py
+++ b/test/mockgpu/mockgpu.py
@ -11,8 +11,8 @@ libc = ctypes.CDLL(ctypes.util.find_library("c"))
 libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
 libc.mmap.restype = ctypes.c_void_p

-drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
-                                              "MOCKNVK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
+drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCK+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
+                                              "MOCK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
 tracked_fds = {}

 original_memoryview = builtins.memoryview
@ -67,7 +67,7 @@ class MockFileIOInterface(FileIOInterface):
    if self.fd in tracked_fds:
      tracked_fds[self.fd].close(self.fd)
      tracked_fds.pop(self.fd)
-    else: os.close(self.fd)
+    elif self.fd is not None: os.close(self.fd)

  def ioctl(self, request, arg):
    if self.fd in tracked_fds:
--- a/tinygrad/runtime/ops_amd.py
+++ b/tinygrad/runtime/ops_amd.py
@ -945,15 +945,16 @@ class USBIface(PCIIface):

  def sleep(self, timeout): pass

-def mock_iface(iface): return type(f"MOCK{iface.__name__}", (iface,), {})
-
 class AMDDevice(HCQCompiled):
  def is_am(self) -> bool: return isinstance(self.iface, (PCIIface, USBIface))
  def is_usb(self) -> bool: return isinstance(self.iface, USBIface)

  def __init__(self, device:str=""):
    self.device_id = int(device.split(":")[1]) if ":" in device else 0
-    self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock_iface(KFDIface), mock_iface(PCIIface), mock_iface(USBIface))
+
+    def mock(iface, name=None): return type(name or f"MOCK{iface.__name__}", (iface,), {})
+    self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock(KFDIface, "MOCKIface"), mock(KFDIface), mock(PCIIface), mock(USBIface))
+
    self.target:tuple[int, ...] = ((trgt:=self.iface.props['gfx_target_version']) // 10000, (trgt // 100) % 100, trgt % 100)
    self.arch = "gfx%d%x%x" % self.target
    if self.target < (9,4,2) or self.target >= (13,0,0): raise RuntimeError(f"Unsupported arch: {self.arch}")
--- a/tinygrad/runtime/ops_nv.py
+++ b/tinygrad/runtime/ops_nv.py
@ -240,7 +240,7 @@ class NVVideoQueue(NVCommandQueue):

 class NVArgsState(CLikeArgsState):
  def __init__(self, buf:HCQBuffer, prg:NVProgram, bufs:tuple[HCQBuffer, ...], vals:tuple[int, ...]=()):
-    if isinstance(prg.dev.iface, MOCKNVKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
+    if isinstance(prg.dev.iface, MOCKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
    super().__init__(buf, prg, bufs, vals=vals, prefix=prg.cbuf_0 or None)

 class NVProgram(HCQProgram):
@ -251,14 +251,14 @@ class NVProgram(HCQProgram):
    if (NAK:=isinstance(dev.renderer, NAKRenderer)):
      image, self.cbuf_0 = memoryview(bytearray(lib[ctypes.sizeof(info:=mesa.struct_nak_shader_info.from_buffer_copy(lib)):])), []
      self.regs_usage, self.shmem_usage, self.lcmem_usage = info.num_gprs, round_up(info.cs.smem_size, 128), round_up(info.slm_size, 16)
-    elif isinstance(dev.iface, MOCKNVKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
+    elif isinstance(dev.iface, MOCKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
    else: image, sections, relocs = elf_loader(self.lib, force_section_align=128)
    # NOTE: Ensure at least 4KB of space after the program to mitigate prefetch memory faults.
    self.lib_gpu = self.dev.allocator.alloc(round_up((prog_sz:=image.nbytes), 0x1000) + 0x1000, buf_spec:=BufferSpec(nolru=True))
    prog_addr = self.lib_gpu.va_addr
    if not NAK:
      # For MOCKGPU, the lib is PTX code, so some values are emulated.
-      self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKNVKIface) else 0
+      self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKIface) else 0
      for sh in sections: # pylint: disable=possibly-used-before-assignment
        if sh.name == f".nv.shared.{self.name}": self.shmem_usage = round_up(0x400 + sh.header.sh_size, 128)
        if sh.name == f".text.{self.name}": prog_addr, prog_sz = self.lib_gpu.va_addr+sh.header.sh_addr, sh.header.sh_size
@ -474,7 +474,7 @@ class NVKIface:

  def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0, cpu_addr=None, **kwargs) -> HCQBuffer:
    # Uncached memory is "system". Use huge pages only for gpu memory.
-    page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKNVKIface) else
+    page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKIface) else
                                                                                             4 << 10))
    size = round_up(size, page_size)
    va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access) if (alloced:=cpu_addr is None) else cpu_addr
@ -578,14 +578,14 @@ class PCIIface(PCIIfaceBase):
    for _ in self.dev_impl.gsp.stat_q.read_resp(): pass
    if self.dev_impl.is_err_state: raise RuntimeError("Device fault detected")

-class MOCKNVKIface(NVKIface): count = 1
+class MOCKIface(NVKIface): count = 1

 class NVDevice(HCQCompiled[NVSignal]):
  def is_nvd(self) -> bool: return isinstance(self.iface, PCIIface)

  def __init__(self, device:str=""):
    self.device_id = int(device.split(":")[1]) if ":" in device else 0
-    self.iface = self._select_iface(NVKIface, PCIIface, MOCKNVKIface)
+    self.iface = self._select_iface(NVKIface, PCIIface, MOCKIface)

    device_params = nv_gpu.NV0080_ALLOC_PARAMETERS(deviceId=self.iface.gpu_instance, hClientShare=self.iface.root,
                                                   vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_OPTIONAL_MULTIPLE_VASPACES)