mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
move cpu_profile and shared ProfileEvents from device.py to helpers [pr] (#11126)
* move cpu_profile and shared ProfileEvents to helpers [pr] * TestProfiler.test_cpu_profile * update test_viz.py * TestProfiler.test_profile_multiops ordering, it's different streams now
This commit is contained in:
parent
397826f0b4
commit
3dfc0ff887
11 changed files with 56 additions and 53 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import sys, pickle, decimal, json
|
||||
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent
|
||||
from tinygrad.helpers import tqdm, temp
|
||||
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent
|
||||
from tinygrad.helpers import tqdm, temp, ProfileEvent, ProfileRangeEvent
|
||||
|
||||
devices:dict[str, tuple[decimal.Decimal, decimal.Decimal, int]] = {}
|
||||
def prep_ts(device:str, ts:decimal.Decimal, is_copy): return int(decimal.Decimal(ts) + devices[device][is_copy])
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import unittest, struct, contextlib, statistics, time
|
||||
from tinygrad import Device, Tensor, dtypes, TinyJit
|
||||
from tinygrad.helpers import CI, getenv, Context
|
||||
from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileRangeEvent, ProfileDeviceEvent, ProfileGraphEvent, cpu_profile
|
||||
from tinygrad.helpers import CI, getenv, Context, ProfileRangeEvent, cpu_profile, cpu_events
|
||||
from tinygrad.device import Buffer, BufferSpec, Compiled, ProfileDeviceEvent, ProfileGraphEvent
|
||||
from tinygrad.runtime.support.hcq import HCQCompiled
|
||||
from tinygrad.engine.realize import get_runner
|
||||
|
||||
|
|
@ -11,6 +11,7 @@ MOCKGPU = getenv("MOCKGPU")
|
|||
def helper_collect_profile(*devs):
|
||||
for dev in devs: dev.synchronize()
|
||||
Compiled.profile_events = [x for x in Compiled.profile_events if isinstance(x, ProfileDeviceEvent) and x.device.startswith("METAL")]
|
||||
cpu_events.clear()
|
||||
|
||||
profile_list = []
|
||||
with Context(PROFILE=1):
|
||||
|
|
@ -18,6 +19,7 @@ def helper_collect_profile(*devs):
|
|||
for dev in devs: dev.synchronize()
|
||||
for dev in devs: dev._at_profile_finalize()
|
||||
for x in Compiled.profile_events: profile_list.append(x)
|
||||
profile_list.extend(cpu_events)
|
||||
|
||||
def helper_profile_filter_device(profile, device:str):
|
||||
assert any(getattr(x, "device", None) == device and isinstance(x, ProfileDeviceEvent) for x in profile), f"device {device} is not registred"
|
||||
|
|
@ -73,13 +75,15 @@ class TestProfiler(unittest.TestCase):
|
|||
evs = [x for x in profile if isinstance(x, ProfileRangeEvent)]
|
||||
|
||||
assert len(evs) == 3, "3 kernel runs are expected"
|
||||
assert evs[0].is_copy, "kernel should be copy"
|
||||
assert evs[1].name == runner_name, "kernel name is not correct"
|
||||
assert not evs[1].is_copy, "kernel should not be copy"
|
||||
assert evs[2].is_copy, "kernel should be copy"
|
||||
# NOTE: order of events does not matter, the tool is responsible for sorting them
|
||||
copy_events = [e for e in evs if e.is_copy]
|
||||
self.assertEqual(len(copy_events), 2)
|
||||
|
||||
for i in range(1, 3):
|
||||
assert evs[i].st > evs[i-1].en, "timestamp not aranged"
|
||||
prg_events = [e for e in evs if not e.is_copy]
|
||||
assert prg_events[0].name == runner_name, "kernel name is not correct"
|
||||
|
||||
#for i in range(1, 3):
|
||||
# assert evs[i].st > evs[i-1].en, "timestamp not aranged"
|
||||
|
||||
def test_profile_multidev(self):
|
||||
d1 = Device[f"{Device.DEFAULT}:1"]
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
import unittest, decimal, json
|
||||
from dataclasses import dataclass
|
||||
|
||||
from tinygrad.uop.ops import UOp, UPat, Ops, PatternMatcher, TrackedPatternMatcher, TracingKey
|
||||
from tinygrad.uop.ops import UOp, UPat, Ops, PatternMatcher, TrackedPatternMatcher
|
||||
from tinygrad.uop.ops import graph_rewrite, track_rewrites, TRACK_MATCH_STATS
|
||||
from tinygrad.uop.symbolic import sym
|
||||
from tinygrad.dtype import dtypes
|
||||
from tinygrad.helpers import PROFILE, colored, ansistrip, flatten
|
||||
from tinygrad.helpers import PROFILE, colored, ansistrip, flatten, TracingKey, ProfileRangeEvent
|
||||
from tinygrad.device import Buffer
|
||||
|
||||
@track_rewrites(name=True)
|
||||
|
|
@ -230,7 +230,7 @@ class TestVizIntegration(TestViz):
|
|||
self.assertEqual(lst[0]["name"], "Schedule 1 Kernel n1")
|
||||
self.assertEqual(lst[1]["name"], prg.name)
|
||||
|
||||
from tinygrad.device import ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent, ProfileGraphEntry
|
||||
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent, ProfileGraphEntry
|
||||
from tinygrad.viz.serve import get_profile
|
||||
|
||||
class TestVizProfiler(unittest.TestCase):
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass, replace, field
|
||||
from collections import defaultdict
|
||||
from typing import Optional, Any, Generic, TypeVar, Iterator, Generator
|
||||
from typing import Optional, Any, Generic, TypeVar, Iterator
|
||||
import importlib, inspect, functools, pathlib, os, ctypes, ctypes.util, platform, contextlib, sys, re, atexit, pickle, decimal, time
|
||||
from tinygrad.helpers import CI, OSX, LRU, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, from_mv, PROFILE, temp, mv_address, \
|
||||
cpu_time_execution, colored, Context, round_up, DISABLE_COMPILER_CACHE, ALLOW_DEVICE_USAGE
|
||||
cpu_time_execution, colored, Context, round_up, DISABLE_COMPILER_CACHE, ALLOW_DEVICE_USAGE, cpu_events, ProfileEvent
|
||||
from tinygrad.dtype import DType, ImageDType, PtrDType, dtypes, _to_np_dtype
|
||||
from tinygrad.renderer import Renderer
|
||||
from tinygrad.uop.ops import TracingKey
|
||||
|
||||
# **************** Device ****************
|
||||
|
||||
|
|
@ -51,15 +50,10 @@ atexit.register(lambda: [Device[dn].finalize() for dn in Device._opened_devices]
|
|||
|
||||
# **************** Profile ****************
|
||||
|
||||
class ProfileEvent: pass
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProfileDeviceEvent(ProfileEvent):
|
||||
device:str; comp_tdiff:decimal.Decimal=decimal.Decimal(0); copy_tdiff:decimal.Decimal=decimal.Decimal(0) # noqa: E702
|
||||
|
||||
@dataclass
|
||||
class ProfileRangeEvent(ProfileEvent): device:str; name:str|TracingKey; st:decimal.Decimal; en:decimal.Decimal|None=None; is_copy:bool=False # noqa: E702
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProfilePointEvent(ProfileEvent): device:str; name:str; st:decimal.Decimal; ref:int; arg:dict=field(default_factory=dict) # noqa: E702
|
||||
|
||||
|
|
@ -72,14 +66,6 @@ class ProfileGraphEntry: device:str; name:str; st_id:int; en_id:int; is_copy:boo
|
|||
@dataclass(frozen=True)
|
||||
class ProfileGraphEvent(ProfileEvent): ents:list[ProfileGraphEntry]; deps:list[list[int]]; sigs:list[decimal.Decimal] # noqa: E702
|
||||
|
||||
@contextlib.contextmanager
|
||||
def cpu_profile(name:str|TracingKey, device="CPU", is_copy=False, display=True) -> Generator[ProfileRangeEvent, None, None]:
|
||||
res = ProfileRangeEvent(device, name, decimal.Decimal(time.perf_counter_ns()) / 1000, is_copy=is_copy)
|
||||
try: yield res
|
||||
finally:
|
||||
res.en = decimal.Decimal(time.perf_counter_ns()) / 1000
|
||||
if PROFILE and display: Compiled.profile_events.append(res)
|
||||
|
||||
# **************** Buffer + Allocators ****************
|
||||
|
||||
|
||||
|
|
@ -396,7 +382,7 @@ if PROFILE:
|
|||
for dev in devs: dev.synchronize()
|
||||
for dev in devs: dev._at_profile_finalize()
|
||||
|
||||
with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(Compiled.profile_events+Buffer.profile_events, f)
|
||||
with open(fn:=temp("profile.pkl", append_user=True), "wb") as f: pickle.dump(cpu_events+Compiled.profile_events+Buffer.profile_events, f)
|
||||
|
||||
if not getenv("SQTT", 0):
|
||||
from tinygrad.uop.ops import launch_viz
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
from typing import Optional, cast, Generator
|
||||
import time, pprint
|
||||
from dataclasses import dataclass, replace, field
|
||||
from tinygrad.helpers import all_same, colored, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA
|
||||
from tinygrad.helpers import all_same, colored, DEBUG, GlobalCounters, ansilen, BEAM, NOOPT, all_int, CAPTURING, Metadata, TRACEMETA, TracingKey
|
||||
from tinygrad.helpers import DEVECTORIZE, time_to_str, VALIDATE_WITH_CPU, getenv
|
||||
from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer, graph_rewrite, print_uops, track_rewrites, TracingKey
|
||||
from tinygrad.uop.ops import Ops, PatternMatcher, UOp, UPat, Variable, sym_infer, graph_rewrite, print_uops, track_rewrites
|
||||
from tinygrad.device import Device, Buffer
|
||||
from tinygrad.renderer import Renderer, ProgramSpec, Estimates
|
||||
from tinygrad.engine.schedule import ScheduleItem
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
from __future__ import annotations
|
||||
import os, functools, platform, time, re, contextlib, operator, hashlib, pickle, sqlite3, tempfile, pathlib, string, ctypes, sys, gzip, getpass
|
||||
import urllib.request, subprocess, shutil, math, types, copyreg, inspect, importlib
|
||||
import urllib.request, subprocess, shutil, math, types, copyreg, inspect, importlib, decimal
|
||||
from dataclasses import dataclass
|
||||
from typing import Union, ClassVar, Optional, Iterable, Any, TypeVar, Callable, Sequence, TypeGuard, Iterator, Generic
|
||||
from typing import Union, ClassVar, Optional, Iterable, Any, TypeVar, Callable, Sequence, TypeGuard, Iterator, Generic, Generator
|
||||
|
||||
T = TypeVar("T")
|
||||
U = TypeVar("U")
|
||||
|
|
@ -181,6 +181,28 @@ class Profiling(contextlib.ContextDecorator):
|
|||
colored(_format_fcn(fcn).ljust(50), "yellow"),
|
||||
colored(f"<- {(scallers[0][1][2]/tottime)*100:3.0f}% {_format_fcn(scallers[0][0])}", "BLACK") if scallers else '')
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TracingKey:
|
||||
display_name:str # display name of this trace event
|
||||
keys:tuple[str, ...]=() # optional keys to search for related traces
|
||||
fmt:str|None=None # optional detailed formatting
|
||||
cat:str|None=None # optional category to color this by
|
||||
|
||||
class ProfileEvent: pass
|
||||
|
||||
@dataclass
|
||||
class ProfileRangeEvent(ProfileEvent): device:str; name:str|TracingKey; st:decimal.Decimal; en:decimal.Decimal|None=None; is_copy:bool=False # noqa: E702
|
||||
|
||||
cpu_events:list[ProfileEvent] = []
|
||||
@contextlib.contextmanager
|
||||
def cpu_profile(name:str|TracingKey, device="CPU", is_copy=False, display=True) -> Generator[ProfileRangeEvent, None, None]:
|
||||
res = ProfileRangeEvent(device, name, decimal.Decimal(time.perf_counter_ns()) / 1000, is_copy=is_copy)
|
||||
try: yield res
|
||||
finally:
|
||||
res.en = decimal.Decimal(time.perf_counter_ns()) / 1000
|
||||
if PROFILE and display: cpu_events.append(res)
|
||||
|
||||
# *** universal database cache ***
|
||||
|
||||
cache_dir: str = os.path.join(getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" if OSX else "~/.cache")), "tinygrad")
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ from dataclasses import dataclass
|
|||
from tinygrad.runtime.support.hcq import HCQCompiled, HCQAllocator, HCQBuffer, HWQueue, CLikeArgsState, HCQSignal, HCQProgram, FileIOInterface
|
||||
from tinygrad.runtime.support.hcq import MMIOInterface
|
||||
from tinygrad.uop.ops import sint
|
||||
from tinygrad.device import Compiled, ProfileEvent, BufferSpec, PROFILE
|
||||
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, all_same, flatten, DEBUG, AMD_LLVM
|
||||
from tinygrad.device import Compiled, BufferSpec
|
||||
from tinygrad.helpers import getenv, to_mv, round_up, data64_le, all_same, flatten, DEBUG, AMD_LLVM, PROFILE, ProfileEvent
|
||||
from tinygrad.renderer.cstyle import AMDRenderer
|
||||
from tinygrad.renderer.llvmir import AMDLLVMRenderer
|
||||
from tinygrad.runtime.autogen import kfd, hsa, pci, sqtt
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import os, pathlib, struct, ctypes, tempfile, functools, contextlib, decimal, platform
|
||||
from typing import Any, Union, cast
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE
|
||||
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, cpu_profile, ProfileDeviceEvent, ProfileRangeEvent
|
||||
from tinygrad.helpers import prod, to_mv, getenv, round_up, cache_dir, T, init_c_struct_t, PROFILE, ProfileRangeEvent, cpu_profile
|
||||
from tinygrad.device import Compiled, Compiler, CompileError, LRUAllocator, ProfileDeviceEvent
|
||||
from tinygrad.renderer.cstyle import MetalRenderer
|
||||
|
||||
class objc_id(ctypes.c_void_p): # This prevents ctypes from converting response to plain int, and dict.fromkeys() can use it to dedup
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
from __future__ import annotations
|
||||
from typing import cast, Callable, Type, TypeVar, Generic, Any, ClassVar
|
||||
import contextlib, decimal, statistics, time, ctypes, array, os, fcntl, struct, traceback
|
||||
from tinygrad.helpers import PROFILE, getenv, to_mv, round_up
|
||||
from tinygrad.helpers import PROFILE, getenv, to_mv, round_up, ProfileRangeEvent
|
||||
from tinygrad.renderer import Renderer
|
||||
from tinygrad.device import BufferSpec, Compiler, Compiled, LRUAllocator, ProfileRangeEvent, ProfileDeviceEvent, ProfileProgramEvent
|
||||
from tinygrad.device import BufferSpec, Compiler, Compiled, LRUAllocator, ProfileDeviceEvent, ProfileProgramEvent
|
||||
from tinygrad.uop.ops import sym_infer, sint, Variable, UOp
|
||||
from tinygrad.runtime.autogen import libc
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from tinygrad.uop import Ops, GroupOp
|
|||
from tinygrad.uop.mathtraits import MathTrait
|
||||
from tinygrad.dtype import ConstType, ImageDType, dtypes, DType, truncate
|
||||
from tinygrad.helpers import ContextVar, all_int, prod, getenv, all_same, Context, partition, temp, unwrap, T, argfix, Metadata, flatten
|
||||
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name
|
||||
from tinygrad.helpers import PICKLE_BUFFERS, PROFILE, dedup, cdiv, cmod, diskcache_put, to_function_name, cpu_profile, TracingKey
|
||||
if TYPE_CHECKING:
|
||||
from tinygrad.shape.shapetracker import ShapeTracker
|
||||
from tinygrad.device import Buffer, MultiBuffer
|
||||
|
|
@ -769,13 +769,6 @@ class TrackedGraphRewrite:
|
|||
depth:int # depth if it's a subrewrite
|
||||
bottom_up:bool
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TracingKey:
|
||||
display_name:str # display name of this trace event
|
||||
keys:tuple[str, ...]=() # optional keys to search for related traces
|
||||
fmt:str|None=None # optional detailed formatting
|
||||
cat:str|None=None # optional category to color this by
|
||||
|
||||
tracked_keys:list[TracingKey] = []
|
||||
tracked_ctxs:list[list[TrackedGraphRewrite]] = []
|
||||
_name_cnt:dict[str, itertools.count] = {}
|
||||
|
|
@ -794,8 +787,6 @@ def track_rewrites(name:Callable[..., str|TracingKey]|bool=True):
|
|||
if TRACK_MATCH_STATS >= 2:
|
||||
tracked_keys.append(key:=TracingKey(n:=f"{fn} n{next(_name_cnt.setdefault(fn, itertools.count(1)))}", (n,), cat=fn))
|
||||
tracked_ctxs.append([])
|
||||
# late import!
|
||||
from tinygrad.device import cpu_profile
|
||||
with cpu_profile(key, "TINY") as e:
|
||||
ret = func(*args, **kwargs)
|
||||
if TRACK_MATCH_STATS >= 2 and callable(name):
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ import multiprocessing, pickle, difflib, os, threading, json, time, sys, webbrow
|
|||
from http.server import BaseHTTPRequestHandler
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
from typing import Any, TypedDict, Generator
|
||||
from tinygrad.helpers import colored, getenv, tqdm, unwrap, word_wrap, TRACEMETA
|
||||
from tinygrad.uop.ops import TrackedGraphRewrite, TracingKey, UOp, Ops, printable, GroupOp, srender, sint
|
||||
from tinygrad.device import ProfileEvent, ProfileDeviceEvent, ProfileRangeEvent, ProfileGraphEvent, ProfileGraphEntry, ProfilePointEvent
|
||||
from tinygrad.helpers import colored, getenv, tqdm, unwrap, word_wrap, TRACEMETA, ProfileEvent, ProfileRangeEvent, TracingKey
|
||||
from tinygrad.uop.ops import TrackedGraphRewrite, UOp, Ops, printable, GroupOp, srender, sint
|
||||
from tinygrad.device import ProfileDeviceEvent, ProfileGraphEvent, ProfileGraphEntry, ProfilePointEvent
|
||||
from tinygrad.dtype import dtypes
|
||||
|
||||
uops_colors = {Ops.LOAD: "#ffc0c0", Ops.STORE: "#87CEEB", Ops.CONST: "#e0e0e0", Ops.VCONST: "#e0e0e0", Ops.REDUCE: "#FF5B5B",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue