MOCK+AMD and MOCK+NV interfaces (#15858)

MOCK+AMD is an alias for MOCKKFD+AMD, MOCKNVK+NV is renamed to MOCK+NV
This commit is contained in:
Christopher Milan 2026-04-21 15:22:16 -07:00 committed by GitHub
commit 697e7aa819
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 21 additions and 20 deletions

View file

@ -773,7 +773,7 @@ jobs:
cuda: 'true'
ocelot: 'true'
- name: Set env
run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCKNVK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
- name: Check Device.DEFAULT and print some source
run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
@ -870,7 +870,7 @@ jobs:
python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20
- name: Run pytest (ptx)
env:
DEV: "MOCKNVK+NV:PTX"
DEV: "MOCK+NV:PTX"
FORWARD_ONLY: 1
# TODO: failing due to library loading error
CAPTURE_PROCESS_REPLAY: 0

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""Tests for SQTT encoder: verifies the emulator produces correct SQTT traces for known kernels.
Run with: DEV=MOCKKFD+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
Run with: DEV=MOCK+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
"""
import ctypes, unittest
from tinygrad.helpers import Context

View file

@ -20,17 +20,17 @@ test_llvm.py tests asm/disasm on the LLVM tests, confirming it behaves the same
tinygrad's dtype tests should pass with and without LLVM. they run in about 12 seconds.
`DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
`DEV=MOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
`DEV=MOCK+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
`DEV=MOCK+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
The ops tests also pass, but they are very slow, so you should run them one at a time.
`SKIP_SLOW_TEST=1 DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_ops.py`
`SKIP_SLOW_TEST=1 DEV=NOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
`SKIP_SLOW_TEST=1 DEV=MOCK+AMD pytest -n=12 test/backend/test_ops.py`
`SKIP_SLOW_TEST=1 DEV=NOCK+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
When something is caught by main tinygrad tests, a local regression test should be added to `test/amd`.
While working with tinygrad, you can dump the assembly with `DEBUG=7`. These tests all pass on real hardware
If a test is failing with `DEV=MOCKKFD+AMD` it's because an instruction is emulated incorrectly.
If a test is failing with `DEV=MOCK+AMD` it's because an instruction is emulated incorrectly.
You can test with just `DEV=AMD` to test on real hardware, if it works on real hardware there's a bug in the emulator.
IMPORTANT: if a test is failing in the emulator, it's an instruction bug. Use DEBUG=7, get the instructions, and debug.

View file

@ -11,8 +11,8 @@ libc = ctypes.CDLL(ctypes.util.find_library("c"))
libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
libc.mmap.restype = ctypes.c_void_p
drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
"MOCKNVK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCK+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
"MOCK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
tracked_fds = {}
original_memoryview = builtins.memoryview
@ -67,7 +67,7 @@ class MockFileIOInterface(FileIOInterface):
if self.fd in tracked_fds:
tracked_fds[self.fd].close(self.fd)
tracked_fds.pop(self.fd)
else: os.close(self.fd)
elif self.fd is not None: os.close(self.fd)
def ioctl(self, request, arg):
if self.fd in tracked_fds:

View file

@ -945,15 +945,16 @@ class USBIface(PCIIface):
def sleep(self, timeout): pass
def mock_iface(iface): return type(f"MOCK{iface.__name__}", (iface,), {})
class AMDDevice(HCQCompiled):
def is_am(self) -> bool: return isinstance(self.iface, (PCIIface, USBIface))
def is_usb(self) -> bool: return isinstance(self.iface, USBIface)
def __init__(self, device:str=""):
self.device_id = int(device.split(":")[1]) if ":" in device else 0
self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock_iface(KFDIface), mock_iface(PCIIface), mock_iface(USBIface))
def mock(iface, name=None): return type(name or f"MOCK{iface.__name__}", (iface,), {})
self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock(KFDIface, "MOCKIface"), mock(KFDIface), mock(PCIIface), mock(USBIface))
self.target:tuple[int, ...] = ((trgt:=self.iface.props['gfx_target_version']) // 10000, (trgt // 100) % 100, trgt % 100)
self.arch = "gfx%d%x%x" % self.target
if self.target < (9,4,2) or self.target >= (13,0,0): raise RuntimeError(f"Unsupported arch: {self.arch}")

View file

@ -240,7 +240,7 @@ class NVVideoQueue(NVCommandQueue):
class NVArgsState(CLikeArgsState):
def __init__(self, buf:HCQBuffer, prg:NVProgram, bufs:tuple[HCQBuffer, ...], vals:tuple[int, ...]=()):
if isinstance(prg.dev.iface, MOCKNVKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
if isinstance(prg.dev.iface, MOCKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
super().__init__(buf, prg, bufs, vals=vals, prefix=prg.cbuf_0 or None)
class NVProgram(HCQProgram):
@ -251,14 +251,14 @@ class NVProgram(HCQProgram):
if (NAK:=isinstance(dev.renderer, NAKRenderer)):
image, self.cbuf_0 = memoryview(bytearray(lib[ctypes.sizeof(info:=mesa.struct_nak_shader_info.from_buffer_copy(lib)):])), []
self.regs_usage, self.shmem_usage, self.lcmem_usage = info.num_gprs, round_up(info.cs.smem_size, 128), round_up(info.slm_size, 16)
elif isinstance(dev.iface, MOCKNVKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
elif isinstance(dev.iface, MOCKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
else: image, sections, relocs = elf_loader(self.lib, force_section_align=128)
# NOTE: Ensure at least 4KB of space after the program to mitigate prefetch memory faults.
self.lib_gpu = self.dev.allocator.alloc(round_up((prog_sz:=image.nbytes), 0x1000) + 0x1000, buf_spec:=BufferSpec(nolru=True))
prog_addr = self.lib_gpu.va_addr
if not NAK:
# For MOCKGPU, the lib is PTX code, so some values are emulated.
self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKNVKIface) else 0
self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKIface) else 0
for sh in sections: # pylint: disable=possibly-used-before-assignment
if sh.name == f".nv.shared.{self.name}": self.shmem_usage = round_up(0x400 + sh.header.sh_size, 128)
if sh.name == f".text.{self.name}": prog_addr, prog_sz = self.lib_gpu.va_addr+sh.header.sh_addr, sh.header.sh_size
@ -474,7 +474,7 @@ class NVKIface:
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0, cpu_addr=None, **kwargs) -> HCQBuffer:
# Uncached memory is "system". Use huge pages only for gpu memory.
page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKNVKIface) else
page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKIface) else
4 << 10))
size = round_up(size, page_size)
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access) if (alloced:=cpu_addr is None) else cpu_addr
@ -578,14 +578,14 @@ class PCIIface(PCIIfaceBase):
for _ in self.dev_impl.gsp.stat_q.read_resp(): pass
if self.dev_impl.is_err_state: raise RuntimeError("Device fault detected")
class MOCKNVKIface(NVKIface): count = 1
class MOCKIface(NVKIface): count = 1
class NVDevice(HCQCompiled[NVSignal]):
def is_nvd(self) -> bool: return isinstance(self.iface, PCIIface)
def __init__(self, device:str=""):
self.device_id = int(device.split(":")[1]) if ":" in device else 0
self.iface = self._select_iface(NVKIface, PCIIface, MOCKNVKIface)
self.iface = self._select_iface(NVKIface, PCIIface, MOCKIface)
device_params = nv_gpu.NV0080_ALLOC_PARAMETERS(deviceId=self.iface.gpu_instance, hClientShare=self.iface.root,
vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_OPTIONAL_MULTIPLE_VASPACES)