mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
qcom: sync cpu cache when from_blob (#14518)
* um * fx * d * x * x * x * x * f * ren
This commit is contained in:
parent
ee9d6a1f36
commit
2f55005ad9
2 changed files with 28 additions and 4 deletions
11
test/external/external_fuzz_qcom_cpu_cache.py
vendored
Normal file
11
test/external/external_fuzz_qcom_cpu_cache.py
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#!/usr/bin/env python3
|
||||
from tinygrad.tensor import Tensor
|
||||
import numpy as np
|
||||
|
||||
while True:
|
||||
arr = np.ones(1000000, dtype=np.uint8)
|
||||
print(f"numpy: {(arr + 1)[:10]}")
|
||||
|
||||
ptr = arr.ctypes.data
|
||||
tensor = Tensor.from_blob(ptr, arr.shape, dtype='uint8', device='QCOM').realize() + 1
|
||||
print(f"from_blob: {tensor.numpy()[:10]}")
|
||||
|
|
@ -2,21 +2,33 @@ from __future__ import annotations
|
|||
import os, ctypes, functools, mmap, struct, array, math, sys, weakref, contextlib
|
||||
assert sys.platform != 'win32'
|
||||
from typing import Any
|
||||
from tinygrad.device import BufferSpec, CompilerSet, CompilerPair
|
||||
from tinygrad.device import BufferSpec, CompilerSet, CompilerPair, Device
|
||||
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
|
||||
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface
|
||||
from tinygrad.runtime.autogen import kgsl, mesa
|
||||
from tinygrad.runtime.ops_cl import CLCompiler, CLDevice
|
||||
from tinygrad.renderer.cstyle import QCOMRenderer
|
||||
from tinygrad.renderer.nir import IR3Renderer
|
||||
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, prod, fromimport, cpu_profile, lo32, PROFILE, suppress_finalizing
|
||||
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC
|
||||
from tinygrad.dtype import ImageDType
|
||||
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, fromimport, cpu_profile, lo32, suppress_finalizing
|
||||
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE
|
||||
from tinygrad.dtype import ImageDType, dtypes
|
||||
from tinygrad.runtime.support.system import System
|
||||
if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import
|
||||
|
||||
BUFTYPE_BUF, BUFTYPE_TEX, BUFTYPE_IBO = 0, 1, 2
|
||||
|
||||
@functools.cache
|
||||
def dcache_flush():
|
||||
from tinygrad.uop.ops import UOp, Ops, KernelInfo
|
||||
from tinygrad.codegen import get_program
|
||||
buf, n = UOp(Ops.PARAM, dtypes.uint8.ptr(), arg=0), UOp(Ops.PARAM, dtypes.uint8.ptr(), arg=1)
|
||||
i = UOp.range(n.cast(dtypes.int), 0, dtype=dtypes.int)
|
||||
flush = UOp(Ops.CUSTOM, dtypes.void, (buf.cast(dtypes.ulong) + i.cast(dtypes.ulong) * UOp.const(dtypes.ulong, 64),),
|
||||
arg='__asm__ volatile("dc cvac, %0" :: "r"({0}) : "memory");')
|
||||
sink = UOp.sink(flush.end(i), UOp(Ops.CUSTOM, dtypes.void, (), arg='__asm__ volatile("dsb sy" ::: "memory");'), arg=KernelInfo(name="dcache_flush"))
|
||||
ps = get_program(UOp(Ops.PROGRAM, src=(sink, UOp(Ops.DEVICE, arg="CPU"), UOp(Ops.LINEAR, src=tuple(sink.toposort())))), Device["CPU"].renderer)
|
||||
return Device["CPU"].runtime(ps.function_name, ps.lib)
|
||||
|
||||
#Parse C-style defines: <regname>_<field_x>__SHIFT and <regname>_<field_y>__MASK from the adreno module into the following format:
|
||||
# qreg.<regname>(<field_x>=..., <field_y>=..., ..., <field_n>=...)
|
||||
def _qreg_exec(__reg, __val=0, **kwargs):
|
||||
|
|
@ -391,6 +403,7 @@ class QCOMDevice(HCQCompiled):
|
|||
|
||||
def _gpu_map(self, ptr:int, size:int, **kwargs) -> HCQBuffer:
|
||||
ptr_aligned, size_aligned = (ptr & ~0xfff), round_up(size + (ptr & 0xfff), 0x1000)
|
||||
dcache_flush().fxn(ctypes.c_uint64(ptr_line_aligned:=ptr & ~63), ctypes.c_uint64(ceildiv(ptr + size - ptr_line_aligned, 64)))
|
||||
try:
|
||||
mi = kgsl.IOCTL_KGSL_MAP_USER_MEM(self.fd, hostptr=ptr_aligned, len=size_aligned, memtype=kgsl.KGSL_USER_MEM_TYPE_ADDR)
|
||||
return HCQBuffer(mi.gpuaddr + (ptr - ptr_aligned), size=size, meta=(mi, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self, **kwargs)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue