kfd: disable copy_from_fd while debugging (#4091)

* kfd: disable copy_from_fd while debugging

* increase timeout to a minute
This commit is contained in:
George Hotz 2024-04-05 18:02:58 -07:00 committed by GitHub
commit 8739d33fe9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Tuple, Any
import os, fcntl, ctypes, functools, re, pathlib, mmap, struct, errno, io
import os, fcntl, ctypes, functools, re, pathlib, mmap, struct, errno
from tinygrad.device import Compiled, LRUAllocator, Compiler, CompilerOptions
from tinygrad.buffer import BufferOptions
from tinygrad.helpers import getenv, from_mv, init_c_struct_t, to_mv, round_up
@ -261,30 +261,23 @@ class KFDAllocator(LRUAllocator):
# self.device.synchronize()
# return to_mv(src.va_addr, src.size)
def transfer(self, dest, src, sz:int, src_dev:KFDDevice, dest_dev:KFDDevice):
dest_dev._gpu_map(src)
q = HWComputeQueue().signal(sig := KFDDevice._get_signal())
HWCopyQueue().wait(sig).copy(dest.va_addr, src.va_addr, sz).signal(sigc := KFDDevice._get_signal()).submit(dest_dev)
HWComputeQueue().wait(sigc).submit(dest_dev)
q.wait(sigc).submit(src_dev)
#def copy_from_fd(self, dest, fd, offset, size):
# fo = io.FileIO(fd, "a+b", closefd=False)
# fo.seek(offset - (minor_offset:=offset % PAGE_SIZE))
# copied_in, total_copy_size = 0, round_up(size+minor_offset, PAGE_SIZE)
# for i in range(0, size+minor_offset, self.b[0].size):
# local_size = min(self.b[0].size, total_copy_size-i)
# copy_size = min(local_size-minor_offset, size-copied_in)
# if copy_size == 0: break
def copy_from_fd(self, dest, fd, offset, size):
fo = io.FileIO(fd, "a+b", closefd=False)
fo.seek(offset - (minor_offset:=offset % PAGE_SIZE))
copied_in, total_copy_size = 0, round_up(size+minor_offset, PAGE_SIZE)
for i in range(0, size+minor_offset, self.b[0].size):
local_size = min(self.b[0].size, total_copy_size-i)
copy_size = min(local_size-minor_offset, size-copied_in)
if copy_size == 0: break
# fo.readinto(to_mv(self.b[1].va_addr, local_size))
# if i != 0: self.device._wait_signal(self.device.signal_sdma)
# self.b = self.b[::-1]
# self.device._submit_sdma(dest.va_addr+copied_in, self.b[0].va_addr+minor_offset, copy_size, completion_signal=self.device.signal_sdma)
fo.readinto(to_mv(self.b[1].va_addr, local_size))
if i != 0: self.device._wait_signal(self.device.signal_sdma)
self.b = self.b[::-1]
self.device._submit_sdma(dest.va_addr+copied_in, self.b[0].va_addr+minor_offset, copy_size, completion_signal=self.device.signal_sdma)
copied_in += copy_size
minor_offset = 0 # only on the first
self.device._wait_signal(self.device.signal_sdma)
# copied_in += copy_size
# minor_offset = 0 # only on the first
# self.device._wait_signal(self.device.signal_sdma)
def copyin(self, dest, src: memoryview):
for i in range(0, src.nbytes, self.b[0].size):
@ -301,6 +294,13 @@ class KFDAllocator(LRUAllocator):
self.device._wait_signal(self.device.signal_sdma)
ctypes.memmove(from_mv(dest[i:]), self.b[0].va_addr, lsize)
def transfer(self, dest, src, sz:int, src_dev:KFDDevice, dest_dev:KFDDevice):
dest_dev._gpu_map(src)
q = HWComputeQueue().signal(sig := KFDDevice._get_signal())
HWCopyQueue().wait(sig).copy(dest.va_addr, src.va_addr, sz).signal(sigc := KFDDevice._get_signal()).submit(dest_dev)
HWComputeQueue().wait(sigc).submit(dest_dev)
q.wait(sigc).submit(src_dev)
MAP_FIXED, MAP_NORESERVE = 0x10, 0x400
class KFDDevice(Compiled):
kfd:int = -1
@ -353,7 +353,7 @@ class KFDDevice(Compiled):
return ret
@classmethod
def _wait_signal(self, signal:hsa.amd_signal_t, timeout=10000):
def _wait_signal(self, signal:hsa.amd_signal_t, timeout=60000):
assert signal.event_id != 0, "can't wait on this signal"
evt_arr = (kfd.struct_kfd_event_data * 1)()
evt_arr[0].event_id = signal.event_id