mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
why were these ever called buffer (#2483)
This commit is contained in:
parent
acbe6d1b53
commit
756b01f46f
11 changed files with 11 additions and 11 deletions
|
|
@ -18,7 +18,7 @@ class _Device:
|
|||
@functools.lru_cache(maxsize=None) # this class is a singleton, pylint: disable=method-cache-max-size-none
|
||||
def __getitem__(self, x:str) -> Union[Interpreted, Compiled]:
|
||||
x = x.split(":")[0].upper()
|
||||
return [cls for cname, cls in inspect.getmembers(importlib.import_module(f'tinygrad.runtime.ops_{x.lower()}')) if (cname.lower() == x.lower() + "buffer") and x in self._buffers][0]
|
||||
return [cls for cname, cls in inspect.getmembers(importlib.import_module(f'tinygrad.runtime.ops_{x.lower()}')) if (cname.lower() == x.lower() + "device") and x in self._buffers][0]
|
||||
@functools.cached_property
|
||||
def DEFAULT(self) -> str:
|
||||
device_from_env: Optional[str] = functools.reduce(lambda val, ele: ele if getenv(ele) == 1 else val, self._buffers, None) # type: ignore
|
||||
|
|
|
|||
|
|
@ -33,4 +33,4 @@ class ClangProgram:
|
|||
if wait: return time.perf_counter()-st
|
||||
|
||||
renderer = functools.partial(uops_to_cstyle, CStyleLanguage(buffer_suffix=" restrict", arg_int_prefix="const int"))
|
||||
ClangBuffer = Compiled(RawMallocBuffer, LinearizerOptions(supports_float4=False, has_local=False), renderer, compile_clang, ClangProgram)
|
||||
ClangDevice = Compiled(RawMallocBuffer, LinearizerOptions(supports_float4=False, has_local=False), renderer, compile_clang, ClangProgram)
|
||||
|
|
|
|||
|
|
@ -46,4 +46,4 @@ numpy_fxn_for_op: Dict[Op, Callable] = {
|
|||
TernaryOps.WHERE: np.where,
|
||||
}
|
||||
|
||||
CPUBuffer = Interpreted(RawNumpyBuffer, numpy_fxn_for_op)
|
||||
CPUDevice = Interpreted(RawNumpyBuffer, numpy_fxn_for_op)
|
||||
|
|
|
|||
|
|
@ -86,5 +86,5 @@ class CUDAProgram:
|
|||
end.synchronize()
|
||||
return start.time_till(end)*1e-3
|
||||
|
||||
CUDABuffer = Compiled(RawCUDABuffer, LinearizerOptions(supports_float4=False if getenv("PTX") else True, supports_float4_alu=False, global_max = [65535, 65535, 2147483647], local_max = [64, 1024, 1024]),
|
||||
CUDADevice = Compiled(RawCUDABuffer, LinearizerOptions(supports_float4=False if getenv("PTX") else True, supports_float4_alu=False, global_max = [65535, 65535, 2147483647], local_max = [64, 1024, 1024]),
|
||||
CUDARenderer, compile_cuda, CUDAProgram, cuda.Context.synchronize)
|
||||
|
|
|
|||
|
|
@ -56,4 +56,4 @@ class RawDiskBuffer(RawBufferMapped):
|
|||
return instance
|
||||
|
||||
disk_fxn_for_op: Dict[Op, Callable] = { BufferOps.MEM: lambda x: x, UnaryOps.NOOP: lambda x: x, UnaryOps.CAST: RawDiskBuffer.cast, MovementOps.AS_STRIDED: RawDiskBuffer.as_strided }
|
||||
DiskBuffer = Interpreted(RawDiskBuffer, disk_fxn_for_op)
|
||||
DiskDevice = Interpreted(RawDiskBuffer, disk_fxn_for_op)
|
||||
|
|
|
|||
|
|
@ -108,4 +108,4 @@ class CLProgram:
|
|||
return None
|
||||
return None
|
||||
|
||||
GPUBuffer = Compiled(CLBuffer, LinearizerOptions(), OpenCLRenderer, compile_gpu, CLProgram, CL.synchronize)
|
||||
GPUDevice = Compiled(CLBuffer, LinearizerOptions(), OpenCLRenderer, compile_gpu, CLProgram, CL.synchronize)
|
||||
|
|
|
|||
|
|
@ -137,4 +137,4 @@ class HIPGraph:
|
|||
update_stats(f"<batched {len(self.jit_cache)}>", self.op_estimate, self.mem_estimate, var_vals, et, buf_count=len(input_rawbuffers), jit=jit, num_kernels=len(self.jit_cache))
|
||||
return et
|
||||
|
||||
HIPBuffer = Compiled(RawHIPBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph)
|
||||
HIPDevice = Compiled(RawHIPBuffer, LinearizerOptions(device="HIP"), HIPRenderer, compile_hip, HIPProgram, hip.hipDeviceSynchronize, graph=HIPGraph)
|
||||
|
|
|
|||
|
|
@ -65,4 +65,4 @@ class LLVMProgram:
|
|||
cfunc(*[x._buf if not isinstance(x, int) else x for x in bufs])
|
||||
if wait: return time.perf_counter()-st
|
||||
|
||||
LLVMBuffer = Compiled(RawMallocBuffer, LinearizerOptions(supports_float4=False, has_local=False, has_shared=False), uops_to_llvm_ir, compile_llvm, LLVMProgram)
|
||||
LLVMDevice = Compiled(RawMallocBuffer, LinearizerOptions(supports_float4=False, has_local=False, has_shared=False), uops_to_llvm_ir, compile_llvm, LLVMProgram)
|
||||
|
|
|
|||
|
|
@ -152,4 +152,4 @@ class MetalGraph:
|
|||
update_stats(f"<batched {len(self.jit_cache)}>", self.op_estimate, self.mem_estimate, var_vals, et, buf_count=len(input_rawbuffers), jit=jit, num_kernels=len(self.jit_cache))
|
||||
return et
|
||||
|
||||
MetalBuffer = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), MetalRenderer, compile_metal, MetalProgram, METAL.synchronize, graph=MetalGraph)
|
||||
MetalDevice = Compiled(RawMetalBuffer, LinearizerOptions(device="METAL"), MetalRenderer, compile_metal, MetalProgram, METAL.synchronize, graph=MetalGraph)
|
||||
|
|
|
|||
|
|
@ -52,4 +52,4 @@ torch_fxn_for_op: Dict[Op, Callable] = {
|
|||
TernaryOps.WHERE: lambda x, y, z: torch.where(x != 0, y, z),
|
||||
}
|
||||
|
||||
TorchBuffer = Interpreted(RawTorchBuffer, torch_fxn_for_op)
|
||||
TorchDevice = Interpreted(RawTorchBuffer, torch_fxn_for_op)
|
||||
|
|
|
|||
|
|
@ -42,4 +42,4 @@ class RawWebGPUBuffer(RawBufferCopyIn):
|
|||
def toCPU(self) -> np.ndarray: return np.frombuffer(wgpu_device.queue.read_buffer(self._buf, 0), dtype=np.dtype(self.dtype.np, metadata={"backing": self})) # type: ignore
|
||||
|
||||
renderer = functools.partial(uops_to_cstyle, WGSLLanguage())
|
||||
WebGpuBuffer = Compiled(RawWebGPUBuffer, LinearizerOptions(device="WEBGPU", supports_float4=False, local_max=[256, 256, 64], global_max=[65535, 65535, 65535]), renderer, lambda x: x, WebGPUProgram)
|
||||
WebGpuDevice = Compiled(RawWebGPUBuffer, LinearizerOptions(device="WEBGPU", supports_float4=False, local_max=[256, 256, 64], global_max=[65535, 65535, 65535]), renderer, lambda x: x, WebGPUProgram)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue