mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
MOCK+AMD and MOCK+NV interfaces (#15858)
MOCK+AMD is an alias for MOCKKFD+AMD, MOCKNVK+NV is renamed to MOCK+NV
This commit is contained in:
parent
75ee51a446
commit
697e7aa819
6 changed files with 21 additions and 20 deletions
4
.github/workflows/test.yml
vendored
4
.github/workflows/test.yml
vendored
|
|
@ -773,7 +773,7 @@ jobs:
|
|||
cuda: 'true'
|
||||
ocelot: 'true'
|
||||
- name: Set env
|
||||
run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCKNVK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
|
||||
run: printf "${{ matrix.backend == 'ptx' && 'DEV=MOCK+CUDA:PTX' || matrix.backend == 'nv' && 'DEV=MOCK+NV\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV
|
||||
- name: Check Device.DEFAULT and print some source
|
||||
run: |
|
||||
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
|
||||
|
|
@ -870,7 +870,7 @@ jobs:
|
|||
python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20
|
||||
- name: Run pytest (ptx)
|
||||
env:
|
||||
DEV: "MOCKNVK+NV:PTX"
|
||||
DEV: "MOCK+NV:PTX"
|
||||
FORWARD_ONLY: 1
|
||||
# TODO: failing due to library loading error
|
||||
CAPTURE_PROCESS_REPLAY: 0
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Tests for SQTT encoder: verifies the emulator produces correct SQTT traces for known kernels.
|
||||
|
||||
Run with: DEV=MOCKKFD+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
|
||||
Run with: DEV=MOCK+AMD python -m pytest test/amd/test_sqtt_encoder.py -v
|
||||
"""
|
||||
import ctypes, unittest
|
||||
from tinygrad.helpers import Context
|
||||
|
|
|
|||
|
|
@ -20,17 +20,17 @@ test_llvm.py tests asm/disasm on the LLVM tests, confirming it behaves the same
|
|||
|
||||
tinygrad's dtype tests should pass with and without LLVM. they run in about 12 seconds.
|
||||
|
||||
`DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
|
||||
`DEV=MOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
|
||||
`DEV=MOCK+AMD pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
|
||||
`DEV=MOCK+AMD:LLVM pytest -n=12 test/backend/test_dtype_alu.py test/backend/test_dtype.py`
|
||||
|
||||
The ops tests also pass, but they are very slow, so you should run them one at a time.
|
||||
|
||||
`SKIP_SLOW_TEST=1 DEV=MOCKKFD+AMD pytest -n=12 test/backend/test_ops.py`
|
||||
`SKIP_SLOW_TEST=1 DEV=NOCKKFD+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
|
||||
`SKIP_SLOW_TEST=1 DEV=MOCK+AMD pytest -n=12 test/backend/test_ops.py`
|
||||
`SKIP_SLOW_TEST=1 DEV=NOCK+AMD:LLVM pytest -n=12 test/backend/test_ops.py`
|
||||
|
||||
When something is caught by main tinygrad tests, a local regression test should be added to `test/amd`.
|
||||
While working with tinygrad, you can dump the assembly with `DEBUG=7`. These tests all pass on real hardware
|
||||
If a test is failing with `DEV=MOCKKFD+AMD` it's because an instruction is emulated incorrectly.
|
||||
If a test is failing with `DEV=MOCK+AMD` it's because an instruction is emulated incorrectly.
|
||||
You can test with just `DEV=AMD` to test on real hardware, if it works on real hardware there's a bug in the emulator.
|
||||
IMPORTANT: if a test is failing in the emulator, it's an instruction bug. Use DEBUG=7, get the instructions, and debug.
|
||||
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ libc = ctypes.CDLL(ctypes.util.find_library("c"))
|
|||
libc.mmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_long]
|
||||
libc.mmap.restype = ctypes.c_void_p
|
||||
|
||||
drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
|
||||
"MOCKNVK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
|
||||
drivers = [cls() for t in DEV.value if (cls:={"MOCKPCI+AMD": AMDriver, "MOCKKFD+AMD": AMDDriver, "MOCK+AMD": AMDDriver, "MOCKUSB+AMD": AMUSBDriver,
|
||||
"MOCK+NV": NVDriver}.get(f"{t.interface}+{t.device}"))]
|
||||
tracked_fds = {}
|
||||
|
||||
original_memoryview = builtins.memoryview
|
||||
|
|
@ -67,7 +67,7 @@ class MockFileIOInterface(FileIOInterface):
|
|||
if self.fd in tracked_fds:
|
||||
tracked_fds[self.fd].close(self.fd)
|
||||
tracked_fds.pop(self.fd)
|
||||
else: os.close(self.fd)
|
||||
elif self.fd is not None: os.close(self.fd)
|
||||
|
||||
def ioctl(self, request, arg):
|
||||
if self.fd in tracked_fds:
|
||||
|
|
|
|||
|
|
@ -945,15 +945,16 @@ class USBIface(PCIIface):
|
|||
|
||||
def sleep(self, timeout): pass
|
||||
|
||||
def mock_iface(iface): return type(f"MOCK{iface.__name__}", (iface,), {})
|
||||
|
||||
class AMDDevice(HCQCompiled):
|
||||
def is_am(self) -> bool: return isinstance(self.iface, (PCIIface, USBIface))
|
||||
def is_usb(self) -> bool: return isinstance(self.iface, USBIface)
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||||
self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock_iface(KFDIface), mock_iface(PCIIface), mock_iface(USBIface))
|
||||
|
||||
def mock(iface, name=None): return type(name or f"MOCK{iface.__name__}", (iface,), {})
|
||||
self.iface = self._select_iface(KFDIface, PCIIface, USBIface, mock(KFDIface, "MOCKIface"), mock(KFDIface), mock(PCIIface), mock(USBIface))
|
||||
|
||||
self.target:tuple[int, ...] = ((trgt:=self.iface.props['gfx_target_version']) // 10000, (trgt // 100) % 100, trgt % 100)
|
||||
self.arch = "gfx%d%x%x" % self.target
|
||||
if self.target < (9,4,2) or self.target >= (13,0,0): raise RuntimeError(f"Unsupported arch: {self.arch}")
|
||||
|
|
|
|||
|
|
@ -240,7 +240,7 @@ class NVVideoQueue(NVCommandQueue):
|
|||
|
||||
class NVArgsState(CLikeArgsState):
|
||||
def __init__(self, buf:HCQBuffer, prg:NVProgram, bufs:tuple[HCQBuffer, ...], vals:tuple[int, ...]=()):
|
||||
if isinstance(prg.dev.iface, MOCKNVKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
|
||||
if isinstance(prg.dev.iface, MOCKIface): prg.cbuf_0[80:82] = [len(bufs), len(vals)]
|
||||
super().__init__(buf, prg, bufs, vals=vals, prefix=prg.cbuf_0 or None)
|
||||
|
||||
class NVProgram(HCQProgram):
|
||||
|
|
@ -251,14 +251,14 @@ class NVProgram(HCQProgram):
|
|||
if (NAK:=isinstance(dev.renderer, NAKRenderer)):
|
||||
image, self.cbuf_0 = memoryview(bytearray(lib[ctypes.sizeof(info:=mesa.struct_nak_shader_info.from_buffer_copy(lib)):])), []
|
||||
self.regs_usage, self.shmem_usage, self.lcmem_usage = info.num_gprs, round_up(info.cs.smem_size, 128), round_up(info.slm_size, 16)
|
||||
elif isinstance(dev.iface, MOCKNVKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
|
||||
elif isinstance(dev.iface, MOCKIface): image, sections, relocs = memoryview(bytearray(lib) + b'\x00' * (4 - len(lib)%4)).cast("I"), [], [] # type: ignore
|
||||
else: image, sections, relocs = elf_loader(self.lib, force_section_align=128)
|
||||
# NOTE: Ensure at least 4KB of space after the program to mitigate prefetch memory faults.
|
||||
self.lib_gpu = self.dev.allocator.alloc(round_up((prog_sz:=image.nbytes), 0x1000) + 0x1000, buf_spec:=BufferSpec(nolru=True))
|
||||
prog_addr = self.lib_gpu.va_addr
|
||||
if not NAK:
|
||||
# For MOCKGPU, the lib is PTX code, so some values are emulated.
|
||||
self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKNVKIface) else 0
|
||||
self.regs_usage, self.shmem_usage, self.lcmem_usage, cbuf0_size = 0, 0x400, 0x240, 0x160 if isinstance(dev.iface, MOCKIface) else 0
|
||||
for sh in sections: # pylint: disable=possibly-used-before-assignment
|
||||
if sh.name == f".nv.shared.{self.name}": self.shmem_usage = round_up(0x400 + sh.header.sh_size, 128)
|
||||
if sh.name == f".text.{self.name}": prog_addr, prog_sz = self.lib_gpu.va_addr+sh.header.sh_addr, sh.header.sh_size
|
||||
|
|
@ -474,7 +474,7 @@ class NVKIface:
|
|||
|
||||
def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, map_flags=0, cpu_addr=None, **kwargs) -> HCQBuffer:
|
||||
# Uncached memory is "system". Use huge pages only for gpu memory.
|
||||
page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKNVKIface) else
|
||||
page_size = mmap.PAGESIZE if uncached or host else ((2 << 20) if size >= (8 << 20) else (mmap.PAGESIZE if isinstance(self, MOCKIface) else
|
||||
4 << 10))
|
||||
size = round_up(size, page_size)
|
||||
va_addr = self._alloc_gpu_vaddr(size, alignment=page_size, force_low=cpu_access) if (alloced:=cpu_addr is None) else cpu_addr
|
||||
|
|
@ -578,14 +578,14 @@ class PCIIface(PCIIfaceBase):
|
|||
for _ in self.dev_impl.gsp.stat_q.read_resp(): pass
|
||||
if self.dev_impl.is_err_state: raise RuntimeError("Device fault detected")
|
||||
|
||||
class MOCKNVKIface(NVKIface): count = 1
|
||||
class MOCKIface(NVKIface): count = 1
|
||||
|
||||
class NVDevice(HCQCompiled[NVSignal]):
|
||||
def is_nvd(self) -> bool: return isinstance(self.iface, PCIIface)
|
||||
|
||||
def __init__(self, device:str=""):
|
||||
self.device_id = int(device.split(":")[1]) if ":" in device else 0
|
||||
self.iface = self._select_iface(NVKIface, PCIIface, MOCKNVKIface)
|
||||
self.iface = self._select_iface(NVKIface, PCIIface, MOCKIface)
|
||||
|
||||
device_params = nv_gpu.NV0080_ALLOC_PARAMETERS(deviceId=self.iface.gpu_instance, hClientShare=self.iface.root,
|
||||
vaMode=nv_gpu.NV_DEVICE_ALLOCATION_VAMODE_OPTIONAL_MULTIPLE_VASPACES)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue