test profiler timer sync across devs (#5751)

* test profiler timer sync across devs

* more correct

* typo
This commit is contained in:
nimlgen 2024-07-27 16:47:37 +03:00 committed by GitHub
commit ed1d784077
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,4 +1,4 @@
import unittest, ctypes, struct, contextlib, tempfile, pathlib, json, time, atexit
import unittest, ctypes, struct, contextlib, tempfile, pathlib, json, time, atexit, random
from tinygrad import Device, Tensor, dtypes
from tinygrad.helpers import CI, getenv, Context
from tinygrad.device import Buffer, BufferOptions, HCQCompiled
@ -370,8 +370,16 @@ class TestHCQ(unittest.TestCase):
assert buf2.as_buffer()[0] == i
@contextlib.contextmanager
def helper_collect_profile(*devs):
for dev in devs: dev._prof_setup()
def helper_collect_profile(*devs, random_setup_delay=False):
if random_setup_delay:
devs = list(devs)
for dev in devs: dev.synchronize()
random.shuffle(devs)
for dev in devs:
dev._prof_setup()
time.sleep(random.randint(1, 1000) / 1000)
else:
for dev in devs: dev._prof_setup()
profile_dict = {}
_, tmp = tempfile.mkstemp()
@ -475,5 +483,31 @@ class TestProfiler(unittest.TestCase):
copyin_node_2 = helper_profile_filter_node(profile, name=f"CPU -> {Device.DEFAULT}:1")[0]
helper_validate_node(copyin_node_2, profile=profile, pid_name=f"{Device.DEFAULT}:1", tid_name="DMA")
@unittest.skipIf(CI, "skip CI")
def test_profile_sync(self):
mv = memoryview(bytearray(struct.pack("ff", 0, 1)))
expected_diff = 100000 # sleep in us
devs = [Device[f"{Device.DEFAULT}:{i}"] for i in range(6)]
bufs = [Buffer(f"{Device.DEFAULT}:{i}", 2, dtypes.float, options=BufferOptions(nolru=True)).ensure_allocated() for i in range(6)]
# enqueue ops on different queues to check the timer sync
cpu_time = []
with helper_collect_profile(*devs, random_setup_delay=True) as profile:
for i in range(6):
x = time.perf_counter_ns()
time.sleep(expected_diff / 1e6)
bufs[i].copyin(mv)
cpu_time.append(((time.perf_counter_ns() - x) / 1000) - expected_diff)
nodes = [helper_profile_filter_node(profile, name=f"CPU -> {Device.canonicalize(f'{Device.DEFAULT}:{i}')}")[-1] for i in range(6)]
avg_diff = []
for i in range(1, 6):
diff = nodes[i]['ts'] - nodes[i-1]['ts'] - cpu_time[i]
avg_diff.append(diff - expected_diff)
assert expected_diff * 0.998 < diff < expected_diff * 1.002, "more that 0.2% diff"
print(f"total avg delay is {sum(avg_diff) / len(avg_diff)} us")
if __name__ == "__main__":
unittest.main()