mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
ocelot: use c.DLL (#16540)
This commit is contained in:
parent
11fee53527
commit
4e2e2e9956
3 changed files with 11 additions and 23 deletions
|
|
@ -1,15 +1,14 @@
|
|||
from __future__ import annotations
|
||||
from typing import Any
|
||||
import ctypes, time
|
||||
from test.mockgpu.helpers import ptx_run
|
||||
from tinygrad.runtime.autogen import cuda as orig_cuda
|
||||
from test.mockgpu.helpers import _try_dlopen_gpuocelot
|
||||
from tinygrad.helpers import mv_address
|
||||
|
||||
for attr in dir(orig_cuda):
|
||||
if not attr.startswith('__'):
|
||||
globals()[attr] = getattr(orig_cuda, attr)
|
||||
|
||||
gpuocelot_lib = _try_dlopen_gpuocelot()
|
||||
|
||||
# Global state
|
||||
class CUDAState:
|
||||
|
|
@ -128,7 +127,7 @@ def cuModuleUnload(hmod) -> int:
|
|||
def cuLaunchKernel(f, gx: int, gy: int, gz: int, lx: int, ly: int, lz: int, sharedMemBytes: int,
|
||||
hStream: Any, kernelParams: Any, extra: Any) -> int:
|
||||
cargs = [ctypes.cast(getattr(extra, field[0]), ctypes.c_void_p) for field in extra._real_fields_]
|
||||
try: gpuocelot_lib.ptx_run(ctypes.cast(f.value, ctypes.c_char_p), len(cargs), (ctypes.c_void_p*len(cargs))(*cargs), lx, ly, lz, gx, gy, gz, 0)
|
||||
try: ptx_run(ctypes.cast(f.value, ctypes.c_char_p), len(cargs), (ctypes.c_void_p*len(cargs))(*cargs), lx, ly, lz, gx, gy, gz, 0)
|
||||
except Exception as e:
|
||||
print("Error in cuLaunchKernel:", e)
|
||||
return orig_cuda.CUDA_ERROR_LAUNCH_FAILED
|
||||
|
|
|
|||
|
|
@ -1,18 +1,11 @@
|
|||
import ctypes, ctypes.util
|
||||
import ctypes
|
||||
from tinygrad.runtime.support import c
|
||||
|
||||
def _try_dlopen_gpuocelot():
|
||||
GPUOCELOT_PATHS = [ctypes.util.find_library("gpuocelot")] if ctypes.util.find_library("gpuocelot") is not None else []
|
||||
GPUOCELOT_PATHS += ["libgpuocelot.so", "/usr/local/lib/libgpuocelot.so",
|
||||
"libgpuocelot.dylib", "/usr/local/lib/libgpuocelot.dylib", "/opt/homebrew/lib/libgpuocelot.dylib"]
|
||||
for path in GPUOCELOT_PATHS:
|
||||
try:
|
||||
gpuocelot_lib = ctypes.CDLL(path)
|
||||
gpuocelot_lib.ptx_run.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_void_p), ctypes.c_int, ctypes.c_int,
|
||||
ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int]
|
||||
except OSError: pass
|
||||
else: return gpuocelot_lib
|
||||
print("Could not find libgpuocelot.so")
|
||||
return None
|
||||
gpuocelot_lib = c.DLL("ocelot", "gpuocelot")
|
||||
@gpuocelot_lib.bind(None, ctypes.c_char_p, ctypes.c_int, ctypes.POINTER(ctypes.c_void_p), ctypes.c_int, ctypes.c_int, ctypes.c_int,
|
||||
ctypes.c_int, ctypes.c_int, ctypes.c_int, ctypes.c_int)
|
||||
def ptx_run(source:bytes, n_args:int, args:c.POINTER[ctypes.c_void_p], blck_x:int, blck_y:int, blck_z:int,
|
||||
grid_x:int, grid_y:int, grid_z:int, shared_mem_size:int): pass
|
||||
|
||||
class PythonRemu:
|
||||
"""Python RDNA3/RDNA4 emulator wrapper used by mockgpu."""
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import ctypes, time
|
|||
from tinygrad.runtime.autogen import nv_570 as nv_gpu
|
||||
from enum import Enum, auto
|
||||
from test.mockgpu.gpu import VirtGPU
|
||||
from test.mockgpu.helpers import _try_dlopen_gpuocelot
|
||||
from test.mockgpu.helpers import ptx_run
|
||||
from tinygrad.helpers import to_mv
|
||||
from tinygrad.runtime.support.c import init_c_struct_t
|
||||
|
||||
|
|
@ -16,8 +16,6 @@ def make_qmd_struct_type():
|
|||
return init_c_struct_t(0x40 * 4, tuple(fields))
|
||||
qmd_struct_t = make_qmd_struct_type()
|
||||
|
||||
gpuocelot_lib = _try_dlopen_gpuocelot()
|
||||
|
||||
class SchedResult(Enum): CONT = auto(); YIELD = auto() # noqa: E702
|
||||
|
||||
class GPFIFO:
|
||||
|
|
@ -97,9 +95,7 @@ class GPFIFO:
|
|||
cargs = [ctypes.cast(args[i], ctypes.c_void_p) for i in range(args_cnt)] + [ctypes.cast(vals[i], ctypes.c_void_p) for i in range(vals_cnt)]
|
||||
gx, gy, gz = qmd.cta_raster_width, qmd.cta_raster_height, qmd.cta_raster_depth
|
||||
lx, ly, lz = qmd.cta_thread_dimension0, qmd.cta_thread_dimension1, qmd.cta_thread_dimension2
|
||||
try:
|
||||
gpuocelot_lib.ptx_run(ctypes.cast(prg_addr, ctypes.c_char_p), args_cnt+vals_cnt,
|
||||
(ctypes.c_void_p*len(cargs))(*cargs), lx, ly, lz, gx, gy, gz, 0)
|
||||
try: ptx_run(ctypes.cast(prg_addr, ctypes.c_char_p), args_cnt+vals_cnt, (ctypes.c_void_p*len(cargs))(*cargs), lx, ly, lz, gx, gy, gz, 0)
|
||||
except Exception as e: print("failed to execute:", e)
|
||||
if qmd.release0_enable:
|
||||
rel0 = to_mv(qmd.release0_address_lower + (qmd.release0_address_upper << 32), 0x10).cast('Q')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue