replace CompilerSet with list (#15530)

* replace CompilerSet with list

* oops

* default Renderer list
This commit is contained in:
Christopher Milan 2026-03-30 20:07:52 -07:00 committed by GitHub
commit 6fb038d109
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 78 additions and 90 deletions

View file

@ -265,50 +265,39 @@ class Compiler:
return lib
def disassemble(self, lib:bytes): pass
@dataclass(frozen=True)
class CompilerSet: cset:list[tuple[type[Renderer]|functools.partial, ContextVar|None]]; ctrl_var:ContextVar|None = None # noqa: E702
class Compiled:
profile_events:list[ProfileEvent] = [ProfileDeviceEvent("CPU")] # NOTE: CPU is the default device.
def __init__(self, device:str, allocator:Allocator, compilers:CompilerSet|None, runtime, graph=None):
def __init__(self, device:str, allocator:Allocator, renderers:list[type[Renderer]|functools.partial], runtime, graph=None):
from tinygrad.renderer import Renderer
self.device, self.allocator, self.runtime, self.graph = device, allocator, runtime, graph
self.comps_ctrl_var = compilers.ctrl_var if compilers is not None else None
self.comp_sets:dict[str, tuple[ContextVar|None, type[Renderer]|functools.partial]] = {}
self.cached_pair:dict[Any, Renderer] = {}
for ren, var in (compilers.cset if compilers is not None else [(Renderer, None)]):
self.comp_sets[var.key.split('_', 1)[-1] if var is not None else self._compiler_name(ren)] = (var, ren)
self.device, self.allocator, self.runtime, self.graph, self.renderers = device, allocator, runtime, graph, renderers or [Renderer]
self.cached_renderer:dict[Any, Renderer] = {}
@property
def renderer(self) -> Renderer: return self._select_compiler_pair()
def renderer(self) -> Renderer: return self._select_renderer()
@property
def compiler(self) -> Compiler:
if (ret:=self.renderer.compiler) is None: raise RuntimeError(f"no compiler for {self.device}")
return ret
def _compiler_name(self, r:type[Renderer]|functools.partial) -> str:
def _renderer_name(self, r:type[Renderer]|functools.partial) -> str:
return unwrap_class_type(r).__name__.upper().removesuffix("RENDERER").removeprefix(devname:=self.device.split(':')[0].upper()) or devname
def _select_compiler_pair(self) -> Renderer:
def _renderer_var(self, r:type[Renderer]|functools.partial) -> ContextVar|None:
return ContextVar._cache.get(f"{self.device}_{self._renderer_name(r)}", None)
def _select_renderer(self) -> Renderer:
# select forced compiler from global env var.
forced_comps = set([self.comp_sets[val][1]] if self.comps_ctrl_var is not None and (val:=self.comps_ctrl_var.value) else [])
forced_comps = set([r for r in self.renderers if self._renderer_name(r) == val] if
(ctrl:=ContextVar._cache.get(f"{self.device}_CC", None)) is not None and (val:=ctrl.value) else [])
# add forced compilers from individual env vars (only if global env var is not set, as it takes precedence).
if not forced_comps: forced_comps |= set(rc for en, rc in self.comp_sets.values() if en is not None and en.value == 1)
if not forced_comps: forced_comps |= set(r for r in self.renderers if (en:=self._renderer_var(r)) is not None and en.value == 1)
if len(forced_comps) > 1: raise RuntimeError(f"{self.device}: multiple compilers set in env {forced_comps}")
# select remaining compilers (all or forced only)
comps = list(rc for en, rc in self.comp_sets.values())
# remove disabled compilers
for en, rc in self.comp_sets.values():
if en is not None and en.value == 0 and en.key in os.environ and rc in comps: comps.remove(rc)
return select_first_inited(list(forced_comps) if len(forced_comps)>0 else comps, f"No compiler for {self.device} is available", self.cached_pair)
return select_first_inited(list(forced_comps) if len(forced_comps)>0 else self.renderers, f"No renderer for {self.device} is available",
self.cached_renderer)
def synchronize(self):
"""
@ -384,23 +373,23 @@ def enumerate_devices_str() -> Generator[str, None, None]:
compilers_results, any_works = [], False
try:
d = Device[device]
default_comp_pairs, default_compiler, cc_ctrl_var = d.comp_sets, d.compiler, d.comps_ctrl_var
default_renderers, default_renderer = d.renderers, d.renderer
try:
for k,(en,r) in default_comp_pairs.items():
d.comp_sets = {k:(None,r)} # env var set to None, so it doesn't interfere
d.comps_ctrl_var = None
for r in default_renderers:
d.renderers = [r]
try:
# d.renderer, d.compiler = r(), c()
with Context(CACHELEVEL=0): test = (Tensor([1,2,3], device=device) * 2).tolist()
with Context(CACHELEVEL=0, **({f"{device}_CC": d._renderer_name(r)} if (ctrl:=f"{device}_CC") in ContextVar._cache else {})):
test = (Tensor([1,2,3], device=device) * 2).tolist()
if test != [2,4,6]: raise ValueError(f"got {test} instead of [2, 4, 6]")
set_text = f'({cc_ctrl_var.key}={d._compiler_name(r)} to make default)' if cc_ctrl_var is not None else ''
default_text = '(default)' if type(default_compiler) is type(d.compiler) else set_text
compilers_results.append(f"{colored('+', 'green')} {d._compiler_name(r)} {default_text}")
set_text = f'({ctrl}={d._renderer_name(r)} to make default)' if (ctrl:=f"{device}_CC") in ContextVar._cache else ''
default_text = '(default)' if type(default_renderer) is type(d.renderer) else set_text
compilers_results.append(f"{colored('+', 'green')} {d._renderer_name(r)} {default_text}")
any_works = True
except Exception as e: compilers_results.append(f"{colored('-', 'yellow')} {d._compiler_name(r)}: {e}")
except Exception as e: compilers_results.append(f"{colored('-', 'yellow')} {d._renderer_name(r)}: {e}")
finally:
# put the defaults back!
d.comp_sets, d.comps_ctrl_var = default_comp_pairs, cc_ctrl_var
d.renderers = default_renderers
result = (colored('PASS', 'green') if any_works else f"{colored('FAIL', 'yellow')}") + ''.join([f'\n{" "*16} {x}' for x in compilers_results])
except Exception as e:
result = f"{colored('FAIL', 'red')} {e}"

View file

@ -6,9 +6,10 @@ from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
from tinygrad.runtime.support.hcq import MMIOInterface, BumpAllocator, hcq_filter_visible_devices
from tinygrad.uop.ops import sint
from tinygrad.device import Compiled, BufferSpec, CompilerSet
from tinygrad.device import Compiled, BufferSpec
from tinygrad.renderer import Renderer
from tinygrad.helpers import getenv, round_up, data64_le, DEBUG, PROFILE, ProfileEvent, lo32, hi32, colored, prod, ContextVar
from tinygrad.helpers import VIZ, AMD_CC, AMD_LLVM, AMD_HIPCC, ceildiv, unwrap
from tinygrad.helpers import VIZ, ceildiv, unwrap
from tinygrad.renderer.cstyle import AMDHIPRenderer, AMDHIPCCRenderer
from tinygrad.renderer.llvmir import AMDLLVMRenderer
from tinygrad.runtime.autogen import kfd, hsa, sqtt, amdgpu_kd, amdgpu_drm
@ -967,11 +968,10 @@ class AMDDevice(HCQCompiled):
self.sdma_queues:dict = {}
self.has_sdma_queue = self.sdma_queue(0) is not None
compilers = CompilerSet([(functools.partial(AMDHIPRenderer, self.arch), None),
(functools.partial(AMDLLVMRenderer, self.arch), AMD_LLVM),
(functools.partial(AMDHIPCCRenderer, self.arch), AMD_HIPCC)], ctrl_var=AMD_CC)
renderers:list[type[Renderer]|functools.partial] = [functools.partial(AMDHIPRenderer, self.arch), functools.partial(AMDLLVMRenderer, self.arch),
functools.partial(AMDHIPCCRenderer, self.arch)]
super().__init__(device, AMDAllocator(self), compilers, functools.partial(AMDProgram, self), AMDSignal,
super().__init__(device, AMDAllocator(self), renderers, functools.partial(AMDProgram, self), AMDSignal,
functools.partial(AMDComputeAQLQueue if self.is_aql else AMDComputeQueue, self),
functools.partial(AMDCopyQueue, self, max_copy_size=self.max_copy_size) if self.has_sdma_queue else None,
kernargs_size=(8 << 10) if self.is_usb() else (16 << 20), sigalloc_size=0x100 if self.is_usb() else 0x1000,

View file

@ -5,7 +5,7 @@ from tinygrad.runtime.autogen import opencl as cl
from tinygrad.runtime.support import c
from tinygrad.helpers import to_char_p_p, from_mv, OSX, DEBUG, mv_address, suppress_finalizing
from tinygrad.renderer.cstyle import OpenCLRenderer, IntelRenderer
from tinygrad.device import BufferSpec, LRUAllocator, Compiled, Compiler, CompileError, CompilerSet
from tinygrad.device import BufferSpec, LRUAllocator, Compiled, Compiler, CompileError
from tinygrad.dtype import ImageDType
CC_CB = c.CFUNCTYPE[None, [c.POINTER[ctypes.c_char], c.POINTER[None], cl.size_t, c.POINTER[None]]]
@ -119,7 +119,7 @@ class CLDevice(Compiled):
renderer = IntelRenderer if "cl_intel_subgroup_matrix_multiply_accumulate" in self.device_exts else OpenCLRenderer
self.cl_compiler = CLCompiler(self, f"{hashlib.md5(self.device_name.encode() + self.driver_version.encode()).hexdigest()}")
super().__init__(device, CLAllocator(self), CompilerSet([(renderer, None)]), functools.partial(CLProgram, self))
super().__init__(device, CLAllocator(self), [renderer], functools.partial(CLProgram, self))
def synchronize(self):
check(cl.clFinish(self.queue))

View file

@ -1,8 +1,8 @@
from __future__ import annotations
import platform, sys, ctypes, functools, time, mmap, threading, queue
from tinygrad.helpers import to_mv, OSX, WIN, mv_address, suppress_finalizing, unwrap, data64_le
from tinygrad.helpers import CPU_CC, CPU_LVP, CPU_LLVM
from tinygrad.device import BufferSpec, CompilerSet
from tinygrad.device import BufferSpec
from tinygrad.renderer import Renderer
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, HCQArgsState, HCQSignal, HCQProgram, MMIOInterface
from tinygrad.runtime.support.hcq import CLikeArgsState
from tinygrad.renderer.cstyle import ClangJITRenderer
@ -136,5 +136,5 @@ class CPUDevice(HCQCompiled):
def __init__(self, device:str=""):
self.tasks:queue.Queue = queue.Queue()
CPUWorker(self, self.tasks, thread_id=0).start()
compilers = CompilerSet([(ClangJITRenderer, None), (CPULLVMRenderer, CPU_LLVM), (LVPRenderer, CPU_LVP)], ctrl_var=CPU_CC)
super().__init__(device, CPUAllocator(self), compilers, functools.partial(CPUProgram, self), CPUSignal, CPUComputeQueue)
renderers:list[type[Renderer]|functools.partial] = [ClangJITRenderer, CPULLVMRenderer, LVPRenderer]
super().__init__(device, CPUAllocator(self), renderers, functools.partial(CPUProgram, self), CPUSignal, CPUComputeQueue)

View file

@ -1,7 +1,8 @@
from __future__ import annotations
import ctypes, functools
from tinygrad.helpers import DEBUG, getenv, mv_address, suppress_finalizing, CUDA_CC, CUDA_PTX, CUDA_NVCC
from tinygrad.device import Compiled, BufferSpec, LRUAllocator, CompilerSet
from tinygrad.helpers import DEBUG, getenv, mv_address, suppress_finalizing
from tinygrad.device import Compiled, BufferSpec, LRUAllocator
from tinygrad.renderer import Renderer
from tinygrad.renderer.cstyle import CUDARenderer
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.runtime.autogen import cuda
@ -118,10 +119,10 @@ class CUDADevice(Compiled):
CUDADevice.devices.append(self)
from tinygrad.runtime.graph.cuda import CUDAGraph
compilers = CompilerSet([(functools.partial(CUDARenderer, self.arch, device="CUDA"), None),
(functools.partial(PTXRenderer, self.arch, device="CUDA"), CUDA_PTX),
(functools.partial(CUDARenderer, self.arch, device="CUDA", use_nvcc=True), CUDA_NVCC)], ctrl_var=CUDA_CC)
super().__init__(device, CUDAAllocator(self), compilers, functools.partial(CUDAProgram, self), None if MOCKGPU else CUDAGraph)
renderers:list[type[Renderer]|functools.partial] = [functools.partial(CUDARenderer, self.arch, device="CUDA"),
functools.partial(PTXRenderer, self.arch, device="CUDA"),
functools.partial(CUDARenderer, self.arch, device="CUDA", use_nvcc=True)]
super().__init__(device, CUDAAllocator(self), renderers, functools.partial(CUDAProgram, self), None if MOCKGPU else CUDAGraph)
def synchronize(self):
check(cuda.cuCtxSetCurrent(self.context))

View file

@ -15,7 +15,7 @@ class DiskDevice(Compiled):
self.size: int|None = None
self.fd: int|None = None
self.count = 0
super().__init__(device, DiskAllocator(self), None, None)
super().__init__(device, DiskAllocator(self), [], None)
def _might_open(self, size:int):
assert self.size is None or size <= self.size, f"can't reopen Disk tensor with larger size, opened with {self.size}, tried to open with {size}"
if self.size is not None and hasattr(self, "mem"):

View file

@ -1,7 +1,7 @@
from __future__ import annotations
import ctypes, os, mmap, tempfile, pathlib, array, functools, threading, contextlib, sys, subprocess, struct
assert sys.platform != 'win32'
from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler, CompilerSet
from tinygrad.device import BufferSpec, Compiled, Allocator, Compiler
from tinygrad.dtype import dtypes, DType, PtrDType
from tinygrad.uop.ops import Ops, UOp
from tinygrad.helpers import getenv, round_up, mv_address, to_mv, cpu_objdump, system, DEBUG, suppress_finalizing
@ -146,10 +146,10 @@ class DSPCompiler(Compiler):
class DSPDevice(Compiled):
def __init__(self, device:str=""):
if getenv("MOCKDSP"): super().__init__(device, DSPAllocator(self), CompilerSet([(MockDSPRenderer, None)]), MockDSPProgram)
if getenv("MOCKDSP"): super().__init__(device, DSPAllocator(self), [MockDSPRenderer], MockDSPProgram)
else:
self.ion_fd = os.open('/dev/ion', os.O_RDONLY)
super().__init__(device, DSPAllocator(self), CompilerSet([(DSPRenderer, None)]), functools.partial(DSPProgram, self))
super().__init__(device, DSPAllocator(self), [DSPRenderer], functools.partial(DSPProgram, self))
fastrpc_shell = memoryview(bytearray(pathlib.Path('/dsp/cdsp/fastrpc_shell_3').read_bytes()))
self.shell_buf = self.allocator.alloc(round_up(fastrpc_shell.nbytes, 0x1000), BufferSpec(nolru=True))
ctypes.memmove(self.shell_buf.va_addr, mv_address(fastrpc_shell), fastrpc_shell.nbytes)

View file

@ -1,6 +1,6 @@
import ctypes, functools
from tinygrad.helpers import mv_address, getenv, suppress_finalizing
from tinygrad.device import Compiled, LRUAllocator, BufferSpec, CompilerSet
from tinygrad.device import Compiled, LRUAllocator, BufferSpec
from tinygrad.runtime.autogen import hip
from tinygrad.renderer.cstyle import HIPRenderer
from tinygrad.runtime.support.c import init_c_var, init_c_struct_t
@ -15,8 +15,7 @@ class HIPDevice(Compiled):
self.arch = init_c_var(hip.hipDeviceProp_t, lambda x: check(hip.hipGetDeviceProperties(x, self.device_id))).gcnArchName.decode()
self.time_event_st, self.time_event_en = [init_c_var(hip.hipEvent_t, lambda x: hip.hipEventCreate(ctypes.byref(x), 0)) for _ in range(2)]
compilers = CompilerSet([(functools.partial(HIPRenderer, self.arch), None)])
super().__init__(device, HIPAllocator(self), compilers, functools.partial(HIPProgram, self))
super().__init__(device, HIPAllocator(self), [functools.partial(HIPRenderer, self.arch)], functools.partial(HIPProgram, self))
def synchronize(self):
check(hip.hipSetDevice(self.device_id))
check(hip.hipDeviceSynchronize())

View file

@ -1,7 +1,7 @@
import subprocess, pathlib, struct, ctypes, tempfile, functools, decimal, platform
from tinygrad.helpers import prod, to_mv, round_up, cache_dir, PROFILE, ProfileRangeEvent, cpu_profile, unwrap, suppress_finalizing
import tinygrad.runtime.support.objc as objc
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, ProfileDeviceEvent, CompilerSet
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, ProfileDeviceEvent
from tinygrad.renderer.cstyle import MetalRenderer
from tinygrad.runtime.autogen import metal
from tinygrad.runtime.support.c import DLL
@ -42,7 +42,7 @@ class MetalDevice(Compiled):
from tinygrad.runtime.graph.metal import MetalGraph
# NOTE: GitHub CI macOS runners use paravirtualized metal which is broken with graph.
# This can be reproduced locally with any virtualization software (like utm) that can create macOS VMs with apple's own virtualization framework.
super().__init__(device, MetalAllocator(self), CompilerSet([(MetalRenderer, None)]),
super().__init__(device, MetalAllocator(self), [MetalRenderer],
functools.partial(MetalProgram, self), MetalGraph if 'virtual' not in from_ns_str(self.sysdevice.name()).lower() else None)
def synchronize(self):

View file

@ -8,4 +8,4 @@ class NpyAllocator(Allocator['NpyDevice']):
def _copyout(self, dest:memoryview, src:np.ndarray): dest[:] = self._as_buffer(src)
class NpyDevice(Compiled):
def __init__(self, device:str): super().__init__(device, NpyAllocator(self), None, None)
def __init__(self, device:str): super().__init__(device, NpyAllocator(self), [], None)

View file

@ -1,9 +1,9 @@
import functools
from tinygrad.device import Compiled, Allocator, CompilerSet
from tinygrad.device import Compiled, Allocator
from tinygrad.engine.jit import MultiGraphRunner
from tinygrad.renderer.cstyle import Renderer, CStyleLanguage, AMDHIPRenderer, QCOMCLRenderer
from tinygrad.uop.ops import Ops
from tinygrad.helpers import cpu_profile, EMULATE, NULL_QCOMCL, NULL_IR3, NULL_NAK, NULL_ALLOW_COPYOUT
from tinygrad.helpers import cpu_profile, EMULATE, NULL_ALLOW_COPYOUT
from tinygrad.renderer.nir import IR3Renderer, NAKRenderer
class NullRenderer(CStyleLanguage):
@ -39,7 +39,7 @@ class NullDevice(Compiled):
case "AMD_CDNA4": renderer = functools.partial(AMDHIPRenderer, "gfx950")
case "": renderer = NullRenderer
case _: raise RuntimeError(f"can't EMULATE device: {EMULATE.value}")
compilers = CompilerSet([(renderer, None), (functools.partial(QCOMCLRenderer, 0x6030001), NULL_QCOMCL), # adreno 630
(functools.partial(IR3Renderer, 0x6030001), NULL_IR3), # adreno 630
(functools.partial(NAKRenderer, "sm_120", 48), NULL_NAK)]) # 5090
super().__init__(device, NullAllocator(self), compilers, functools.partial(NullProgram, device), NullGraph)
# adreno 630, 5090
renderers:list[type[Renderer]|functools.partial] = [renderer, functools.partial(QCOMCLRenderer, 0x6030001),
functools.partial(IR3Renderer, 0x6030001), functools.partial(NAKRenderer, "sm_120", 48)]
super().__init__(device, NullAllocator(self), renderers, functools.partial(NullProgram, device), NullGraph)

View file

@ -6,9 +6,9 @@ from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQProgram, HCQSignal, BumpAllocator
from tinygrad.runtime.support.hcq import MMIOInterface, FileIOInterface, MOCKGPU, hcq_filter_visible_devices, hcq_profile
from tinygrad.uop.ops import sint
from tinygrad.device import Compiled, BufferSpec, CompilerSet
from tinygrad.helpers import getenv, mv_address, round_up, data64, data64_le, prod, OSX, hi32, lo32, NV_CC, NV_PTX, NV_NAK, NV_NVCC, PROFILE
from tinygrad.helpers import ContextVar, VIZ, ProfileEvent
from tinygrad.device import Compiled, BufferSpec
from tinygrad.renderer import Renderer
from tinygrad.helpers import getenv, mv_address, round_up, data64, data64_le, prod, OSX, hi32, lo32, PROFILE, ContextVar, VIZ, ProfileEvent
from tinygrad.renderer.ptx import PTXRenderer
from tinygrad.renderer.cstyle import CUDARenderer
from tinygrad.runtime.autogen import nv_570, nv_580, mesa
@ -618,11 +618,11 @@ class NVDevice(HCQCompiled[NVSignal]):
self.arch: str = "sm_120" if self.sm_version==0xa04 else f"sm_{(self.sm_version>>8)&0xff}{(val>>4) if (val:=self.sm_version&0xff) > 0xf else val}"
self.sass_version = ((self.sm_version & 0xf00) >> 4) | (self.sm_version & 0xf)
compilers = CompilerSet(ctrl_var=NV_CC, cset=[(functools.partial(CUDARenderer, self.arch), None),
(functools.partial(PTXRenderer, self.arch, device="NV"), NV_PTX),
(functools.partial(NAKRenderer, self.arch, self.max_warps_per_sm), NV_NAK),
(functools.partial(CUDARenderer, self.arch, use_nvcc=True), NV_NVCC)])
super().__init__(device, NVAllocator(self), compilers, functools.partial(NVProgram, self), NVSignal, NVComputeQueue, NVCopyQueue)
renderers:list[type[Renderer]|functools.partial] = [
functools.partial(CUDARenderer, self.arch), functools.partial(PTXRenderer, self.arch, device="NV"),
functools.partial(NAKRenderer, self.arch, self.max_warps_per_sm), functools.partial(CUDARenderer, self.arch, use_nvcc=True)
]
super().__init__(device, NVAllocator(self), renderers, functools.partial(NVProgram, self), NVSignal, NVComputeQueue, NVCopyQueue)
self.pma_enabled = PMA.value > 0 and PROFILE >= 1
if self.pma_enabled: self._prof_init()

View file

@ -6,7 +6,7 @@ from typing import Any, TYPE_CHECKING
import pickle, base64, itertools, time, sys, functools
from tinygrad.dtype import DType, dtypes, ImageDType, PtrDType, truncate, storage_fmt_for_dtype, to_storage_scalar, from_storage_scalar
from tinygrad.helpers import all_same, getenv, flatten, get_single_element, EMULATE
from tinygrad.device import Compiled, Compiler, Allocator, CompilerSet
from tinygrad.device import Compiled, Compiler, Allocator
from tinygrad.codegen.opt import tc
from tinygrad.uop.ops import exec_alu, python_alu, Ops, UOp, GroupOp, bitcast
from tinygrad.renderer import Renderer
@ -231,4 +231,4 @@ class PythonAllocator(Allocator['PythonDevice']):
class PythonDevice(Compiled):
def __init__(self, device:str):
super().__init__(device, PythonAllocator(self), CompilerSet([(PythonRenderer, None)]), PythonProgram)
super().__init__(device, PythonAllocator(self), [PythonRenderer], PythonProgram)

View file

@ -2,14 +2,14 @@ from __future__ import annotations
import os, ctypes, functools, mmap, struct, array, math, sys, weakref, contextlib
assert sys.platform != 'win32'
from typing import Any, cast
from tinygrad.device import BufferSpec, CompilerSet, Device
from tinygrad.device import BufferSpec, Device
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface
from tinygrad.runtime.autogen import kgsl, mesa
from tinygrad.renderer.cstyle import QCOMCLRenderer
from tinygrad.renderer.nir import IR3Renderer
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, cpu_profile, lo32, suppress_finalizing
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE
from tinygrad.helpers import next_power2, flatten, PROFILE
from tinygrad.dtype import ImageDType, dtypes
from tinygrad.runtime.support.system import System
if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import
@ -378,10 +378,8 @@ class QCOMDevice(HCQCompiled):
if PROFILE and self.gpu_id[:2] < (7, 3):
System.write_sysfs("/sys/class/kgsl/kgsl-3d0/idle_timer", value="4000000000", msg="Failed to disable suspend mode", expected="4294967276")
compilers = CompilerSet(ctrl_var=QCOM_CC, cset=[(functools.partial(QCOMCLRenderer, info.chip_id), None),
(functools.partial(IR3Renderer, info.chip_id), QCOM_IR3)])
super().__init__(device, QCOMAllocator(self), compilers, functools.partial(QCOMProgram, self), QCOMSignal,
functools.partial(QCOMComputeQueue, self), None)
super().__init__(device, QCOMAllocator(self), [functools.partial(QCOMCLRenderer, info.chip_id), functools.partial(IR3Renderer, info.chip_id)],
functools.partial(QCOMProgram, self), QCOMSignal, functools.partial(QCOMComputeQueue, self), None)
def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False) -> HCQBuffer:
flags |= flag("KGSL_MEMALIGN", alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP

View file

@ -10,7 +10,7 @@ TINYFS_TIMEOUT = getenv("TINYFS_TIMEOUT", 60)
class TinyFSDevice(Compiled):
def __init__(self, device:str):
self.op = device[len("tinyfs:"):].upper()
super().__init__(device, TinyFSAllocator(self), None, None, None)
super().__init__(device, TinyFSAllocator(self), [], None, None)
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((TINYFS_ENDPOINT.rsplit(":", 1)[0], int(TINYFS_ENDPOINT.rsplit(":", 1)[1])))

View file

@ -1,5 +1,5 @@
import functools, struct
from tinygrad.device import Compiled, Allocator, BufferSpec, CompilerSet
from tinygrad.device import Compiled, Allocator, BufferSpec
from tinygrad.renderer.wgsl import WGSLRenderer
from tinygrad.helpers import round_up, suppress_finalizing
from tinygrad.runtime.autogen import webgpu
@ -217,7 +217,7 @@ class WebGpuDevice(Compiled):
self.device_res = _run(webgpu.wgpuAdapterRequestDeviceF, webgpu.WGPURequestDeviceCallbackInfo, webgpu.WGPURequestDeviceCallback,
webgpu.WGPURequestDeviceStatus, 1, 2, adapter_res, dev_desc)
super().__init__(device, WebGpuAllocator(self), CompilerSet([(WGSLRenderer, None)]),
super().__init__(device, WebGpuAllocator(self), [WGSLRenderer],
functools.partial(WebGPUProgram, (self.device_res, webgpu.WGPUFeatureName_TimestampQuery in supported)))
def synchronize(self):

View file

@ -5,10 +5,11 @@ try: import fcntl # windows misses that
except ImportError: fcntl = None #type:ignore[assignment]
from tinygrad.helpers import PROFILE, getenv, to_mv, from_mv, cpu_profile, ProfileRangeEvent, select_first_inited, unwrap, suppress_finalizing
from tinygrad.helpers import TracingKey
from tinygrad.device import BufferSpec, Compiled, LRUAllocator, ProfileDeviceEvent, ProfileProgramEvent, CompilerSet
from tinygrad.device import BufferSpec, Compiled, LRUAllocator, ProfileDeviceEvent, ProfileProgramEvent
from tinygrad.uop.ops import sym_infer, sint, UOp
from tinygrad.runtime.autogen import libc
from tinygrad.runtime.support.memory import BumpAllocator
from tinygrad.renderer import Renderer
class MMIOInterface:
def __init__(self, addr:int, nbytes:int, fmt='B'): self.mv, self.addr, self.nbytes, self.fmt = to_mv(addr, nbytes).cast(fmt), addr, nbytes, fmt
@ -361,7 +362,7 @@ class HCQCompiled(Compiled, Generic[SignalType]):
signal_pool: dict[str, list[HCQBuffer]] = collections.defaultdict(list) # per peer group
cpu_devices: list[HCQCompiled] = []
def __init__(self, device:str, allocator:HCQAllocatorBase, compilers:CompilerSet, runtime, signal_t:Type[SignalType],
def __init__(self, device:str, allocator:HCQAllocatorBase, compilers:list[type[Renderer]|functools.partial], runtime, signal_t:Type[SignalType],
comp_queue_t:Callable[..., HWQueue], copy_queue_t:Callable[..., HWQueue]|None=None, kernargs_size=(16 << 20), sigalloc_size=0x1000,
can_recover:bool=False):
self.device_id:int = int(device.split(":")[1]) if ":" in device else 0