mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
am: autogen asic_regs (#16004)
This commit is contained in:
parent
26406bed83
commit
1c8cb0769a
7 changed files with 10213 additions and 41 deletions
2
.github/workflows/autogen.yml
vendored
2
.github/workflows/autogen.yml
vendored
|
|
@ -48,7 +48,7 @@ jobs:
|
|||
python3 -c "from tinygrad.runtime.autogen import opencl"
|
||||
python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_570, nv_580, nv"
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr_3, hsa, hip, amd_gpu, sqtt, rocprof, amdgpu_kd, amdgpu_drm"
|
||||
python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v13_0_12, smu_v14_0_2, fw, navi_offsets, vega_offsets"
|
||||
python3 -c "from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v13_0_6, smu_v13_0_12, smu_v14_0_2, fw, navi_offsets, vega_offsets, regs"
|
||||
python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
|
||||
python3 -c "from tinygrad.runtime.autogen import llvm"
|
||||
python3 -c "from tinygrad.runtime.autogen import webgpu"
|
||||
|
|
|
|||
|
|
@ -133,8 +133,7 @@ class MockPSP(MockIPBlock):
|
|||
|
||||
class MockSMU(MockIPBlock):
|
||||
def __init__(self, gpu, mmio):
|
||||
try: regs = import_asic_regs('mp', (11, 0), cls=functools.partial(AMDReg, bases={0: IP_BASES[am.MP1_HWIP]}))
|
||||
except Exception: regs = {}
|
||||
regs = import_asic_regs('mp', (11, 0, 0), cls=functools.partial(AMDReg, bases={0: IP_BASES[am.MP1_HWIP]}))
|
||||
super().__init__(gpu, mmio, regs)
|
||||
self._msg_pending = False
|
||||
def r(n): return self.reg(f"mmMP1_SMN_C2PMSG_{n}")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import pathlib, hashlib
|
||||
import pathlib, hashlib, re, itertools
|
||||
from tinygrad.runtime.autogen import load, root
|
||||
|
||||
am_src="https://github.com/ROCm/ROCK-Kernel-Driver/archive/33970e1351f5e511029602454979f3de7e22260f.tar.gz"
|
||||
|
|
@ -6,6 +6,26 @@ AMD, AMDINC = "{}/drivers/gpu/drm/amd", "{}/drivers/gpu/drm/amd/include"
|
|||
inc, kern_rules = ["-include", "stdint.h"], [(r'le32_to_cpu', ''),]
|
||||
fw_src="https://gitlab.com/kernel-firmware/linux-firmware/-/archive/1e2c15348485939baf1b6d1f5a7a3b799d80703d/1e2c15348485939baf1b6d1f5a7a3b799d80703d.tar.gz"
|
||||
|
||||
reg_files = {
|
||||
"gc": [(9,4,3), (11,0,0), (11,0,3), (11,5,0), (12,0,0)],
|
||||
"mmhub": [(1,8,0), (3,0,0), (3,0,1), (3,0,2), (3,3,0), (4,1,0)],
|
||||
"nbio": [(4,3,0), (7,2,0), (7,7,0), (7,9,0), (7,11,0)], "nbif": [(6,3,1)],
|
||||
"mp": [(11,0,0), (13,0,0), (14,0,2)], "hdp": [(4,4,2), (6,0,0), (7,0,0)],
|
||||
"osssys": [(4,4,2), (6,0,0), (6,1,0), (7,0,0)], "sdma": [(4,4,2)]
|
||||
}
|
||||
|
||||
reg_patterns = {
|
||||
"gc": ["GCVM", "GCMC_VM", "CP_(HQD|MQD|MEC|ME_CNTL|PERFMON|RB_WPTR_POLL_CNTL|INT_CNTL|STAT|PFP_PRGRM|ME_PRGRM|COHER_START)", "COMPUTE_",
|
||||
"(SQ|GL2C|TCC)_PERFCOUNTER", "SQ_THREAD_TRACE", "SPI_(CONFIG_CNTL|COMPUTE_QUEUE_RESET)", "GRBM", "SH_MEM", "RLC", "TCP", "GB_ADDR_CONFIG",
|
||||
"SDMA[01]_(WATCHDOG_CNTL|UTCL1_(CNTL|PAGE)|MCU_CNTL|F32_CNTL|CNTL|QUEUE0_|RLC_CGCG_CTRL)", "SCRATCH_REG[67]"],
|
||||
"mmhub": ["MMVM", "MMMC_VM", "MM_ATC_L2_MISC_CG"],
|
||||
"nbio": (nbio:=["BIF_BX_PF[01]_GPU_HDP_FLUSH", "BIF_BX_PF0_RSMU", "BIF_BX0_(REMAP_HDP_MEM_FLUSH_CNTL|BIF_DOORBELL_INT_CNTL|PCIE_INDEX2|PCIE_DATA2)",
|
||||
"BIFC_(DOORBELL_ACCESS_EN_PF|GFX_INT_MONITOR_MASK)", "XCC_DOORBELL_FENCE", "DOORBELL0_CTRL_ENTRY", "GDC_S2A0_S2A_DOORBELL_ENTRY",
|
||||
"S2A_DOORBELL_ENTRY", "RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN", "RCC_DEV0_EPF2_STRAP2"]),
|
||||
"nbif": nbio,
|
||||
"mp": ["MP([01]|ASP)_SMN_C2PMSG"], "hdp": ["HDP_MEM_POWER_CTRL"], "oss": ["IH_"], "sdma": ["SDMA_GFX", "SDMA_CNTL"]
|
||||
}
|
||||
|
||||
def __getattr__(nm):
|
||||
match nm:
|
||||
case "am": return load("am/am", [root/f"extra/amdpci/headers/{s}.h" for s in ["v11_structs", "v12_structs", "amdgpu_vm",
|
||||
|
|
@ -36,4 +56,27 @@ def __getattr__(nm):
|
|||
[f"{{}}/amdgpu/gc_*_{x}.bin" for x in ["pfp", "me", "mec", "imu", "rlc"]], srcs=fw_src, gen=genfw)
|
||||
case "navi_offsets": return load("am/navi_offsets", [f"{AMD}/include/sienna_cichlid_ip_offset.h"], srcs=am_src)
|
||||
case "vega_offsets": return load("am/vega_offsets", [f"{AMD}/include/vega20_ip_offset.h"], srcs=am_src)
|
||||
case "regs":
|
||||
def genreg(_, files, **kwargs):
|
||||
out = ["__all__ = " + repr([file.split('/')[-1] for file in files])]
|
||||
for file, nm in [(file.replace("mp_11_0_0", "mp_11_0"), file.split('/')[-1]) for file in files]:
|
||||
pats = reg_patterns[prefix := {"osssys": "oss"}.get(x:=nm.split("_", 1)[0], x)]
|
||||
|
||||
def split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
|
||||
# handle CDNA's different register names
|
||||
def normalize(reg):
|
||||
return s[0] + prefix.upper()[:2] + s[1] if prefix in ("gc", "mmhub") and (s:=split_name(reg))[1].startswith(("VM_", "MC_VM_")) else reg
|
||||
def extract(lines, pat): return ((normalize(m.group(1)), int(m.group(2), 0)) for l in lines if (m:=re.match(pat, l)))
|
||||
|
||||
offset, sh_mask = pathlib.Path(f"{file}_offset.h").read_text().splitlines(), pathlib.Path(f"{file}_sh_mask.h").read_text().splitlines()
|
||||
defs = {k:v for k,v in extract(offset, r'#define\s+((?:mm|reg)\S+)\s+(0x[\da-fA-F]+|\d+)') if any(re.match("(mm|reg)"+p, k) for p in pats)}
|
||||
fields = {reg: {name.split('__')[1].lower(): ((mask & -mask).bit_length() - 1, mask.bit_length() - 1) for name, mask in fs}
|
||||
for reg, fs in itertools.groupby(extract(sh_mask, r'#define\s+(\S+)_MASK\s+(0x[\da-fA-F]+|\d+)'), lambda x: x[0].split('__')[0])}
|
||||
|
||||
regs = {reg: (off, defs[f"{reg}_BASE_IDX"], fields.get(split_name(reg)[1], {})) for reg,off in defs.items() if f"{reg}_BASE_IDX" in defs}
|
||||
print(f"defined {len(regs)} registers for {nm}")
|
||||
out.extend([f"{nm} = {{"] + [f" {k!r}: {v!r}," for k,v in regs.items()] + ["}"])
|
||||
return "\n".join(out)
|
||||
return load("am/regs", [AMDINC + "/asic_reg/" + {"osssys":"oss"}.get(pre, pre) + f"/{pre}_{'_'.join(map(str, ver))}"
|
||||
for pre in reg_files for ver in sorted(reg_files[pre])], srcs=am_src, gen=genreg)
|
||||
case _: raise AttributeError(f"no such autogen: {nm}")
|
||||
|
|
|
|||
10160
tinygrad/runtime/autogen/am/regs.py
Normal file
10160
tinygrad/runtime/autogen/am/regs.py
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -131,7 +131,7 @@ class AMDComputeQueue(HWQueue):
|
|||
return self
|
||||
|
||||
def memory_barrier(self):
|
||||
pf = '' if self.nbio.version[0] == 2 else '0' if self.nbio.version[:2] != (7, 11) else '1'
|
||||
pf = '0' if self.nbio.version[:2] != (7, 11) else '1'
|
||||
self.wait_reg_mem(reg=getattr(self.nbio, f'regBIF_BX_PF{pf}_GPU_HDP_FLUSH_REQ').addr[0],
|
||||
reg_done=getattr(self.nbio, f'regBIF_BX_PF{pf}_GPU_HDP_FLUSH_DONE').addr[0], value=0xffffffff)
|
||||
return self.acquire_mem()
|
||||
|
|
|
|||
|
|
@ -335,4 +335,4 @@ class AMDev:
|
|||
|
||||
for prefix, hwip in mods:
|
||||
self.__dict__.update(import_asic_regs(prefix, self.ip_ver[hwip], cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[hwip])))
|
||||
self.__dict__.update(import_asic_regs('mp', (11, 0), cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[am.MP1_HWIP])))
|
||||
self.__dict__.update(import_asic_regs('mp', (11, 0, 0), cls=functools.partial(AMRegister, adev=self, bases=self.regs_offset[am.MP1_HWIP])))
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import functools, re, urllib, tinygrad.runtime.autogen
|
||||
from collections import defaultdict
|
||||
import functools, re, tinygrad.runtime.autogen.am
|
||||
from dataclasses import dataclass
|
||||
from tinygrad.helpers import getbits, fetch
|
||||
|
||||
|
|
@ -20,7 +19,6 @@ class AMDReg:
|
|||
@dataclass
|
||||
class AMDIP:
|
||||
name:str; version:tuple[int, ...]; bases:dict[int, tuple[int, ...]] # noqa: E702
|
||||
def __post_init__(self): self.version = fixup_ip_version(self.name, self.version)[0]
|
||||
|
||||
@functools.cached_property
|
||||
def regs(self): return import_asic_regs(self.name, self.version, cls=functools.partial(AMDReg, bases=self.bases))
|
||||
|
|
@ -37,7 +35,7 @@ def fixup_ip_version(ip:str, version:tuple[int, ...]) -> list[tuple[int, ...]]:
|
|||
if version[:len(ver)] == ver: return ovrd_ver
|
||||
return version
|
||||
|
||||
if ip in ['nbio', 'nbif']: version = _apply_ovrd({(3,3): (2,3,0), (7,3): (7,2,0)})
|
||||
if ip in ['nbio', 'nbif']: version = _apply_ovrd({(7,3): (7,2,0)})
|
||||
elif ip in ['mp', 'smu']: version = _apply_ovrd({(14,0,3): (14,0,2)})
|
||||
elif ip in ['gc']: version = _apply_ovrd({(9,5,0): (9,4,3)})
|
||||
elif ip in ['sdma']: version = _apply_ovrd({(4,4,4): (4,4,2)})
|
||||
|
|
@ -76,35 +74,7 @@ def import_pmc(ip) -> dict[str, tuple[str, int]]:
|
|||
return res
|
||||
|
||||
def import_asic_regs(prefix:str, version:tuple[int, ...], cls=AMDReg) -> dict[str, AMDReg]:
|
||||
def _split_name(name): return name[:(pos:=next((i for i,c in enumerate(name) if c.isupper()), len(name)))], name[pos:]
|
||||
def _extract_regs(txt):
|
||||
x = {}
|
||||
for k,v in {m.group(1): int(m.group(2), 0) for line in txt.splitlines() if (m:=re.match(r'#define\s+(\S+)\s+(0x[\da-fA-F]+|\d+)', line))}.items():
|
||||
if k.startswith('VM_') or k.startswith('MC_'): x[prefix.upper()[:2]+k] = v
|
||||
elif k.startswith('regVM_') or k.startswith('regMC_'): x["reg"+prefix.upper()[:2]+k[3:]] = v
|
||||
else: x[k] = v
|
||||
return x
|
||||
def _download_file(ver, suff) -> str:
|
||||
dir_prefix = {"osssys": "oss"}.get(prefix, prefix)
|
||||
fetch_name = f"{prefix}_{'_'.join(map(str, ver))}_{suff}.h"
|
||||
return header_download(f"include/asic_reg/{dir_prefix}/{fetch_name}", name=fetch_name, subdir="asic_regs")
|
||||
|
||||
for ver in fixup_ip_version(prefix, version):
|
||||
try: offs, sh_masks = _extract_regs(_download_file(ver, "offset")), _extract_regs(_download_file(ver, "sh_mask"))
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404: continue
|
||||
raise
|
||||
|
||||
offsets = {k:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and not k.endswith('_BASE_IDX')}
|
||||
bases = {k[:-len('_BASE_IDX')]:v for k,v in offs.items() if _split_name(k)[0] in {'reg', 'mm'} and k.endswith('_BASE_IDX')}
|
||||
|
||||
fields: defaultdict[str, dict[str, tuple[int, int]]] = defaultdict(dict)
|
||||
for field_name, field_mask in sh_masks.items():
|
||||
if not ('__' in field_name and field_name.endswith('_MASK')): continue
|
||||
reg_name, reg_field_name = field_name[:-len('_MASK')].split('__')
|
||||
if reg_name.startswith('MC_') or reg_name.startswith('VM_'): reg_name = f"{prefix.upper()[:2]}{reg_name}"
|
||||
fields[reg_name][reg_field_name.lower()] = ((field_mask & -field_mask).bit_length()-1, field_mask.bit_length()-1)
|
||||
|
||||
# NOTE: Some registers like regGFX_IMU_FUSESTRAP in gc_11_0_0 are missing base idx, just skip them
|
||||
return {reg:cls(name=reg, offset=off, segment=bases[reg], fields=fields[_split_name(reg)[1]]) for reg,off in offsets.items() if reg in bases}
|
||||
from tinygrad.runtime.autogen.am import regs
|
||||
if (mods:=[m for m in regs.__all__ if m.startswith(prefix) and (v:=tuple(map(int, m.split('_')[1:])))[0] == version[0] and v <= version]):
|
||||
return {reg:cls(name=reg, offset=off, segment=seg, fields=fields) for reg,(off,seg,fields) in getattr(regs, mods[-1]).items()}
|
||||
raise ImportError(f"Failed to load ASIC registers for {prefix.upper()} {'.'.join(map(str, version))}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue