move cpu_profile and shared ProfileEvents from device.py to helpers [pr] (#11126)

* move cpu_profile and shared ProfileEvents to helpers [pr]

* TestProfiler.test_cpu_profile

* update test_viz.py

* TestProfiler.test_profile_multiops ordering, it's different streams now
This commit is contained in:
qazal 2025-07-08 12:14:03 +03:00 committed by GitHub
commit 3dfc0ff887
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 56 additions and 53 deletions

View file

@ -1,6 +1,6 @@
import sys, pickle, decimal, json
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent
from tinygrad.helpers import tqdm, temp
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent
from tinygrad.helpers import tqdm, temp, ProfileEvent, ProfileRangeEvent
devices:dict[str, tuple[decimal.Decimal, decimal.Decimal, int]] = {}
def prep_ts(device:str, ts:decimal.Decimal, is_copy): return int(decimal.Decimal(ts) + devices[device][is_copy])

View file

@ -1,7 +1,7 @@
import unittest, struct, contextlib, statistics, time
from tinygrad import Device, Tensor, dtypes, TinyJit
from tinygrad.helpers import CI, getenv, Context
from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileRangeEvent, ProfileDeviceEvent, ProfileGraphEvent, cpu_profile
from tinygrad.helpers import CI, getenv, Context, ProfileRangeEvent, cpu_profile, cpu_events
from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileDeviceEvent, ProfileGraphEvent
from tinygrad.runtime.support.hcq import HCQCompiled
from tinygrad.engine.realize import get_runner
@ -11,6 +11,7 @@ MOCKGPU = getenv("MOCKGPU")
def helper_collect_profile(*devs):
for dev in devs: dev.synchronize()
Compiled.profile_events = [x for x in Compiled.profile_events if isinstance(x, ProfileDeviceEvent) and x.device.startswith("METAL")]
cpu_events.clear()
profile_list = []
with Context(PROFILE=1):
@ -18,6 +19,7 @@ def helper_collect_profile(*devs):
for dev in devs: dev.synchronize()
for dev in devs: dev._at_profile_finalize()
for x in Compiled.profile_events: profile_list.append(x)
profile_list.extend(cpu_events)
def helper_profile_filter_device(profile, device:str):
assert any(getattr(x, "device", None) == device and isinstance(x, ProfileDeviceEvent) for x in profile), f"device {device} is not registred"
@ -73,13 +75,15 @@ class TestProfiler(unittest.TestCase):
evs = [x for x in profile if isinstance(x, ProfileRangeEvent)]
assert len(evs) == 3, "3 kernel runs are expected"
assert evs[0].is_copy, "kernel should be copy"
assert evs[1].name == runner_name, "kernel name is not correct"
assert not evs[1].is_copy, "kernel should not be copy"
assert evs[2].is_copy, "kernel should be copy"
# NOTE: order of events does not matter, the tool is responsible for sorting them
copy_events = [e for e in evs if e.is_copy]
self.assertEqual(len(copy_events), 2)
for i in range(1, 3):
assert evs[i].st > evs[i-1].en, "timestamp not aranged"
prg_events = [e for e in evs if not e.is_copy]
assert prg_events[0].name == runner_name, "kernel name is not correct"
#for i in range(1, 3):
# assert evs[i].st > evs[i-1].en, "timestamp not aranged"
def test_profile_multidev(self):
d1 = Device[f"{Device.DEFAULT}:1"]

View file

@ -1,11 +1,11 @@
import unittest, decimal, json
from dataclasses import dataclass
from tinygrad.uop.ops import UOp, UPat, Ops, PatternMatcher, TrackedPatternMatcher, TracingKey
from tinygrad.uop.ops import UOp, UPat, Ops, PatternMatcher, TrackedPatternMatcher
from tinygrad.uop.ops import graph_rewrite, track_rewrites, TRACK_MATCH_STATS
from tinygrad.uop.symbolic import sym
from tinygrad.dtype import dtypes
from tinygrad.helpers import PROFILE, colored, ansistrip, flatten
from tinygrad.helpers import PROFILE, colored, ansistrip, flatten, TracingKey, ProfileRangeEvent
from tinygrad.device import Buffer
@track_rewrites(name=True)
@ -230,7 +230,7 @@ class TestVizIntegration(TestViz):
self.assertEqual(lst[0]["name"], "Schedule 1 Kernel n1")
self.assertEqual(lst[1]["name"], prg.name)
from tinygrad.device import ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent, ProfileGraphEntry
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent, ProfileGraphEntry
from tinygrad.viz.serve import get_profile
class TestVizProfiler(unittest.TestCase):

View file

@ -1,13 +1,12 @@
from __future__ import annotations
from dataclasses import dataclass, replace, field
from collections import defaultdict
from typing import Optional, Any, Generic, TypeVar, Iterator, Generator
from typing import Optional, Any, Generic, TypeVar, Iterator
import importlib, inspect, functools, pathlib, os, ctypes, ctypes.util, platform, contextlib, sys, re, atexit, pickle, decimal, time
from tinygrad.helpers import CI, OSX, LRU, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, from_mv, PROFILE, temp, mv_address, \
cpu_time_execution, colored, Context, round_up, DISABLE_COMPILER_CACHE, ALLOW_DEVICE_USAGE
cpu_time_execution, colored, Context, round_up, DISABLE_COMPILER_CACHE, ALLOW_DEVICE_USAGE, cpu_events, ProfileEvent
from tinygrad.dtype import DType, ImageDType, PtrDType, dtypes, _to_np_dtype
from tinygrad.renderer import Renderer
from tinygrad.uop.ops import TracingKey
# **************** Device ****************
@ -51,15 +50,10 @@ atexit.register(lambda: [Device[dn].finalize() for dn in Device._opened_devices]
# **************** Profile ****************
class ProfileEvent: pass
@dataclass(frozen=True)
class ProfileDeviceEvent(ProfileEvent):
device:str; comp_tdiff:decimal.Decimal=decimal.Decimal(0); copy_tdiff:decimal.Decimal=decimal.Decimal(0) # noqa: E702
@dataclass
class ProfileRangeEvent(ProfileEvent): device:str; name:str|TracingKey; st:decimal.Decimal; en:decimal.Decimal|None=None; is_copy:bool=False # noqa: E702
@dataclass(frozen=True)
class ProfilePointEvent(ProfileEvent): device:str; name:str; st:decimal.Decimal; ref:int; arg:dict=field(default_factory=dict) # noqa: E702
@ -72,14 +66,6 @@ class ProfileGraphEntry: device:str; name:str; st_id:int; en_id:int; is_copy:boo
@dataclass(frozen=True)
class ProfileGraphEvent(ProfileEvent): ents:list[ProfileGraphEntry]; deps:list[list[int]]; sigs:list[decimal.Decimal] # noqa: E702
@contextlib.contextmanager
def cpu_profile(name:str|TracingKey, device="CPU", is_copy=False, display=True) -> Generator[ProfileRangeEvent, None, None]:
res = ProfileRangeEvent(device, name, decimal.Decimal(time.perf_counter_ns()) / 1000, is_copy=is_copy)
try: yield res
finally:
res.en = decimal.Decimal(time.perf_counter_ns()) / 1000
if PROFILE and display: Compiled.profile_events.append(res)
# **************** Buffer + Allocators ****************
@ -396,7 +382,7 @@ if PROFILE:
for dev in devs: dev.synchronize()
for dev in devs: dev._at_profile_finalize()
with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(Compiled.profile_events+Buffer.profile_events, f)
with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(cpu_events+Compiled.profile_events+Buffer.profile_events, f)
if not getenv("SQTT", 0):
from tinygrad.uop.ops import launch_viz

View file

@ -1,9 +1,9 @@
from typing import Optional, cast, Generator
import time, pprint
from dataclasses import dataclass, replace, field
from tinygrad.helpers import all_same, colored, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA
from tinygrad.helpers import all_same, colored, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA, TracingKey
from tinygrad.helpers import DEVECTORIZE, time_to_str, VALIDATE_WITH_CPU, getenv
from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer, graph_rewrite, print_uops, track_rewrites, TracingKey
from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer, graph_rewrite, print_uops, track_rewrites
from tinygrad.device import Device, Buffer
from tinygrad.renderer import Renderer, ProgramSpec, Estimates
from tinygrad.engine.schedule import ScheduleItem

View file

@ -1,8 +1,8 @@
from __future__ import annotations
import os, functools, platform, time, re, contextlib, operator, hashlib, pickle, sqlite3, tempfile, pathlib, string, ctypes, sys, gzip, getpass
import urllib.request, subprocess, shutil, math, types, copyreg, inspect, importlib
import urllib.request, subprocess, shutil, math, types, copyreg, inspect, importlib, decimal
from dataclasses import dataclass
from typing import Union, ClassVar, Optional, Iterable, Any, TypeVar, Callable, Sequence, TypeGuard, Iterator, Generic
from typing import Union, ClassVar, Optional, Iterable, Any, TypeVar, Callable, Sequence, TypeGuard, Iterator, Generic, Generator
T = TypeVar("T")
U = TypeVar("U")
@ -181,6 +181,28 @@ class Profiling(contextlib.ContextDecorator):
colored(_format_fcn(fcn).ljust(50), "yellow"),
colored(f"<- {(scallers[0][1][2]/tottime)*100:3.0f}% {_format_fcn(scallers[0][0])}", "BLACK") if scallers else '')
@dataclass(frozen=True)
class TracingKey:
display_name:str # display name of this trace event
keys:tuple[str, ...]=() # optional keys to search for related traces
fmt:str|None=None # optional detailed formatting
cat:str|None=None # optional category to color this by
class ProfileEvent: pass
@dataclass
class ProfileRangeEvent(ProfileEvent): device:str; name:str|TracingKey; st:decimal.Decimal; en:decimal.Decimal|None=None; is_copy:bool=False # noqa: E702
cpu_events:list[ProfileEvent] = []
@contextlib.contextmanager
def cpu_profile(name:str|TracingKey, device="CPU", is_copy=False, display=True) -> Generator[ProfileRangeEvent, None, None]:
res = ProfileRangeEvent(device, name, decimal.Decimal(time.perf_counter_ns()) / 1000, is_copy=is_copy)
try: yield res
finally:
res.en = decimal.Decimal(time.perf_counter_ns()) / 1000
if PROFILE and display: cpu_events.append(res)
# *** universal database cache ***
cache_dir: str = os.path.join(getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" if OSX else "~/.cache")), "tinygrad")

View file

@ -6,8 +6,8 @@ from dataclasses import dataclass
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
from tinygrad.runtime.support.hcq import MMIOInterface
from tinygrad.uop.ops import sint
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, PROFILE
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, all_same, flatten, DEBUG, AMD_LLVM
from tinygrad.device import Compiled, BufferSpec
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, all_same, flatten, DEBUG, AMD_LLVM, PROFILE, ProfileEvent
from tinygrad.renderer.cstyle import AMDRenderer
from tinygrad.renderer.llvmir import AMDLLVMRenderer
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt

View file

@ -1,7 +1,7 @@
import os, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform
from typing import Any, Union, cast
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, cpu_profile, ProfileDeviceEvent, ProfileRangeEvent
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE, ProfileRangeEvent, cpu_profile
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, ProfileDeviceEvent
from tinygrad.renderer.cstyle import MetalRenderer
class objc_id(ctypes.c_void_p): # This prevents ctypes from converting response to plain int, and dict.fromkeys() can use it to dedup

View file

@ -1,9 +1,9 @@
from __future__ import annotations
from typing import cast, Callable, Type, TypeVar, Generic, Any, ClassVar
import contextlib, decimal, statistics, time, ctypes, array, os, fcntl, struct, traceback
from tinygrad.helpers import PROFILE, getenv, to_mv, round_up
from tinygrad.helpers import PROFILE, getenv, to_mv, round_up, ProfileRangeEvent
from tinygrad.renderer import Renderer
from tinygrad.device import BufferSpec, Compiler, Compiled, LRUAllocator, ProfileRangeEvent, ProfileDeviceEvent, ProfileProgramEvent
from tinygrad.device import BufferSpec, Compiler, Compiled, LRUAllocator, ProfileDeviceEvent, ProfileProgramEvent
from tinygrad.uop.ops import sym_infer, sint, Variable, UOp
from tinygrad.runtime.autogen import libc

View file

@ -6,7 +6,7 @@ from tinygrad.uop import Ops, GroupOp
from tinygrad.uop.mathtraits import MathTrait
from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate
from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey
if TYPE_CHECKING:
from tinygrad.shape.shapetracker import ShapeTracker
from tinygrad.device import Buffer, MultiBuffer
@ -769,13 +769,6 @@ class TrackedGraphRewrite:
depth:int # depth if it's a subrewrite
bottom_up:bool
@dataclass(frozen=True)
class TracingKey:
display_name:str # display name of this trace event
keys:tuple[str, ...]=() # optional keys to search for related traces
fmt:str|None=None # optional detailed formatting
cat:str|None=None # optional category to color this by
tracked_keys:list[TracingKey] = []
tracked_ctxs:list[list[TrackedGraphRewrite]] = []
_name_cnt:dict[str, itertools.count] = {}
@ -794,8 +787,6 @@ def track_rewrites(name:Callable[..., str|TracingKey]|bool=True):
if TRACK_MATCH_STATS >= 2:
tracked_keys.append(key:=TracingKey(n:=f"{fn} n{next(_name_cnt.setdefault(fn, itertools.count(1)))}", (n,), cat=fn))
tracked_ctxs.append([])
# late import!
from tinygrad.device import cpu_profile
with cpu_profile(key, "TINY") as e:
ret = func(*args, **kwargs)
if TRACK_MATCH_STATS >= 2 and callable(name):

View file

@ -3,9 +3,9 @@ import multiprocessing, pickle, difflib, os, threading, json, time, sys, webbrow
from http.server import BaseHTTPRequestHandler
from urllib.parse import parse_qs, urlparse
from typing import Any, TypedDict, Generator
from tinygrad.helpers import colored, getenv, tqdm, unwrap, word_wrap, TRACEMETA
from tinygrad.uop.ops import TrackedGraphRewrite, TracingKey, UOp, Ops, printable, GroupOp, srender, sint
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent, ProfileGraphEntry, ProfilePointEvent
from tinygrad.helpers import colored, getenv, tqdm, unwrap, word_wrap, TRACEMETA, ProfileEvent, ProfileRangeEvent, TracingKey
from tinygrad.uop.ops import TrackedGraphRewrite, UOp, Ops, printable, GroupOp, srender, sint
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent, ProfileGraphEntry, ProfilePointEvent
from tinygrad.dtype import dtypes
uops_colors = {Ops.LOAD: "#ffc0c0", Ops.STORE: "#87CEEB", Ops.CONST: "#e0e0e0", Ops.VCONST: "#e0e0e0", Ops.REDUCE: "#FF5B5B",