qcom: sync cpu cache when from_blob (#14518)

* um

* fx

* d

* x

* x

* x

* x

* f

* ren
This commit is contained in:
nimlgen 2026-02-03 21:51:03 +03:00 committed by GitHub
commit 2f55005ad9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 28 additions and 4 deletions

View file

@ -0,0 +1,11 @@
#!/usr/bin/env python3
from tinygrad.tensor import Tensor
import numpy as np
while True:
arr = np.ones(1000000, dtype=np.uint8)
print(f"numpy: {(arr + 1)[:10]}")
ptr = arr.ctypes.data
tensor = Tensor.from_blob(ptr, arr.shape, dtype='uint8', device='QCOM').realize() + 1
print(f"from_blob: {tensor.numpy()[:10]}")

View file

@ -2,21 +2,33 @@ from __future__ import annotations
import os, ctypes, functools, mmap, struct, array, math, sys, weakref, contextlib
assert sys.platform != 'win32'
from typing import Any
from tinygrad.device import BufferSpec, CompilerSet, CompilerPair
from tinygrad.device import BufferSpec, CompilerSet, CompilerPair, Device
from tinygrad.runtime.support.hcq import HCQBuffer, HWQueue, HCQProgram, HCQCompiled, HCQAllocatorBase, HCQSignal, HCQArgsState, BumpAllocator
from tinygrad.runtime.support.hcq import FileIOInterface, MMIOInterface
from tinygrad.runtime.autogen import kgsl, mesa
from tinygrad.runtime.ops_cl import CLCompiler, CLDevice
from tinygrad.renderer.cstyle import QCOMRenderer
from tinygrad.renderer.nir import IR3Renderer
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, prod, fromimport, cpu_profile, lo32, PROFILE, suppress_finalizing
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC
from tinygrad.dtype import ImageDType
from tinygrad.helpers import getenv, mv_address, to_mv, round_up, data64_le, ceildiv, prod, fromimport, cpu_profile, lo32, suppress_finalizing
from tinygrad.helpers import next_power2, flatten, QCOM_IR3, QCOM_CC, PROFILE
from tinygrad.dtype import ImageDType, dtypes
from tinygrad.runtime.support.system import System
if getenv("IOCTL"): import extra.qcom_gpu_driver.opencl_ioctl # noqa: F401 # pylint: disable=unused-import
BUFTYPE_BUF, BUFTYPE_TEX, BUFTYPE_IBO = 0, 1, 2
@functools.cache
def dcache_flush():
from tinygrad.uop.ops import UOp, Ops, KernelInfo
from tinygrad.codegen import get_program
buf, n = UOp(Ops.PARAM, dtypes.uint8.ptr(), arg=0), UOp(Ops.PARAM, dtypes.uint8.ptr(), arg=1)
i = UOp.range(n.cast(dtypes.int), 0, dtype=dtypes.int)
flush = UOp(Ops.CUSTOM, dtypes.void, (buf.cast(dtypes.ulong) + i.cast(dtypes.ulong) * UOp.const(dtypes.ulong, 64),),
arg='__asm__ volatile("dc cvac, %0" :: "r"({0}) : "memory");')
sink = UOp.sink(flush.end(i), UOp(Ops.CUSTOM, dtypes.void, (), arg='__asm__ volatile("dsb sy" ::: "memory");'), arg=KernelInfo(name="dcache_flush"))
ps = get_program(UOp(Ops.PROGRAM, src=(sink, UOp(Ops.DEVICE, arg="CPU"), UOp(Ops.LINEAR, src=tuple(sink.toposort())))), Device["CPU"].renderer)
return Device["CPU"].runtime(ps.function_name, ps.lib)
#Parse C-style defines: <regname>_<field_x>__SHIFT and <regname>_<field_y>__MASK from the adreno module into the following format:
# qreg.<regname>(<field_x>=..., <field_y>=..., ..., <field_n>=...)
def _qreg_exec(__reg, __val=0, **kwargs):
@ -391,6 +403,7 @@ class QCOMDevice(HCQCompiled):
def _gpu_map(self, ptr:int, size:int, **kwargs) -> HCQBuffer:
ptr_aligned, size_aligned = (ptr & ~0xfff), round_up(size + (ptr & 0xfff), 0x1000)
dcache_flush().fxn(ctypes.c_uint64(ptr_line_aligned:=ptr & ~63), ctypes.c_uint64(ceildiv(ptr + size - ptr_line_aligned, 64)))
try:
mi = kgsl.IOCTL_KGSL_MAP_USER_MEM(self.fd, hostptr=ptr_aligned, len=size_aligned, memtype=kgsl.KGSL_USER_MEM_TYPE_ADDR)
return HCQBuffer(mi.gpuaddr + (ptr - ptr_aligned), size=size, meta=(mi, False), view=MMIOInterface(ptr, size, fmt='B'), owner=self, **kwargs)