assembly/amd: rename to better names (#14384)

* assembly/amd: rename to better names

* might help fuzzing segfault

* emu2 -> emu
This commit is contained in:
George Hotz 2026-01-28 10:00:54 +08:00 committed by GitHub
commit 88bc5ee212
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 28 additions and 29 deletions

View file

@ -679,7 +679,7 @@ jobs:
python-version: '3.13'
- name: Verify AMD autogen is up to date
run: |
python -m extra.assembly.amd.amdxml
python -m extra.assembly.amd.generate
git diff --exit-code extra/assembly/amd/autogen/
- name: Install LLVM 21
run: |

View file

@ -48,13 +48,13 @@ from tinygrad.runtime.autogen import hsa
from tinygrad.helpers import Context, DEBUG, colored
from tinygrad.engine.realize import get_runner
from extra.assembly.amd.decode import decode_inst
from extra.assembly.amd import decode_inst
from extra.assembly.amd.autogen.rdna3.str_pcode import PCODE
from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP1_SDST, VOP2, VOP3, VOP3_SDST, VOP3SD, VOP3P, VOPC,
DS, FLAT, GLOBAL, SCRATCH, VOPD, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOPDOp)
from extra.assembly.amd.dsl import VCC_LO, EXEC_LO, SCC
from extra.assembly.amd.autogen.common import OpType
from extra.assembly.amd.expr_parser import parse_block, _FUNCS
from extra.assembly.amd.pcode import parse_block, _FUNCS
MASK32 = 0xFFFFFFFF
@ -1035,7 +1035,7 @@ def _get_runner(inst_bytes: bytes):
if cls in _INST_HANDLERS:
handler = _INST_HANDLERS[cls]
break
if handler is None: raise RuntimeError(f"[emu2] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}")
if handler is None: raise RuntimeError(f"[emu] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}")
ctx = _Ctx(inst_size)
sink = handler(inst, ctx)
@ -1061,14 +1061,13 @@ def decode_program(data: bytes) -> dict[int, tuple[str, Callable, list[int], Any
if DEBUG >= 3:
try: inst_str = repr(inst)
except Exception: inst_str = f"<{type(inst).__name__} at PC={i}>"
msg = f"[emu2] PC={i}: {inst_str}"
msg = f"[emu] PC={i}: {inst_str}"
print(colored(msg, 'green') if is_new else msg)
if DEBUG >= 4: print(f"{colored(runner.p.src, 'BLACK')}")
result[i] = (runner.p.function_name, runner._prg.fxn, runner.p.globals, runner)
except Exception as e:
try: inst_str = repr(inst)
except Exception: inst_str = f"<{type(inst).__name__}>"
raise RuntimeError(f"[emu2] Failed to compile PC={i} {inst_str}: {type(e).__name__}: {e}") from e
raise RuntimeError(f"[emu] Failed to compile PC={i} {inst_str}: {type(e).__name__}: {e}") from e
i += inst.size()
return result
@ -1160,11 +1159,11 @@ def run_asm(lib: int, lib_sz: int, gx: int, gy: int, gz: int, lx: int, ly: int,
for inst_count in range(1_000_000):
if (pc := st.pc) == 0xFFFFFFFFFFFFFFFF or pc not in program: break
name, fxn, globals_list, _ = program[pc]
assert fxn is not None, f"[emu2] No fxn for {name} at PC={pc}"
assert fxn is not None, f"[emu] No fxn for {name} at PC={pc}"
assert 4 not in globals_list or scratch_buf, f"SCRATCH instruction {name} but scratch_size=0"
if DEBUG >= 5:
if DEBUG >= 6:
inst = decode_inst(bytes((ctypes.c_char * 12).from_address(pc).raw))
print(f"[emu2] exec PC={pc:X}: {inst!r}")
print(f"[emu] exec PC={pc:X}: {inst!r}")
fxn(*[c_bufs[g] for g in globals_list])
else: raise RuntimeError("exceeded 1M instructions, likely infinite loop")
return 0

View file

@ -6,7 +6,7 @@ from tinygrad.runtime.support.elf import elf_loader
from extra.assembly.amd.sqtt import decode, print_packets, INST, VALUINST, IMMEDIATE, WAVESTART, WAVEEND, InstOp, PacketType, IMMEDIATE_MASK
from extra.assembly.amd.dsl import Inst
from extra.assembly.amd.decode import decode_inst
from extra.assembly.amd import decode_inst
from extra.assembly.amd.autogen.rdna3.ins import SOPP, s_endpgm
from extra.assembly.amd.autogen.rdna3.enum import SOPPOp

View file

@ -6,8 +6,8 @@ from pathlib import Path
# Set AMD=1 before importing tinygrad
os.environ["AMD"] = "1"
from extra.assembly.amd.emu2 import run_asm as python_run_asm, decode_program
from extra.assembly.amd.decode import decode_inst
from extra.assembly.amd.emu import run_asm as python_run_asm, decode_program
from extra.assembly.amd import decode_inst
from extra.assembly.amd.autogen.rdna3.ins import SOPP, SOPPOp
REMU_PATH = Path(__file__).parents[3] / "remu/target/release/libremu.so"
@ -68,7 +68,7 @@ def benchmark_emulator(name: str, run_fn, kernel: bytes, global_size, local_size
def profile_instructions(kernel: bytes):
"""Profile individual instruction compile times."""
from extra.assembly.amd.emu2 import _get_runner, _canonical_runner_cache
from extra.assembly.amd.emu import _get_runner, _canonical_runner_cache
from tinygrad.helpers import Context
_get_runner.cache_clear()
_canonical_runner_cache.clear()
@ -98,7 +98,7 @@ def profile_instructions(kernel: bytes):
def benchmark_python_split(kernel: bytes, global_size, local_size, args_ptr, rsrc2: int, iterations: int = 5):
"""Benchmark Python emulator with compile and execution times."""
from extra.assembly.amd.emu2 import _get_runner, _canonical_runner_cache
from extra.assembly.amd.emu import _get_runner, _canonical_runner_cache
from tinygrad.helpers import Context
_get_runner.cache_clear()
_canonical_runner_cache.clear()

View file

@ -6,7 +6,7 @@ Set USE_HW=1 to run on both emulator and hardware, comparing results.
import ctypes, math, os, struct
from extra.assembly.amd.autogen.rdna3.ins import *
from extra.assembly.amd.emu2 import run_asm
from extra.assembly.amd.emu import run_asm
from extra.assembly.amd.dsl import NULL, SCC, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, M0
def _i32(f: float) -> int: return struct.unpack('<I', struct.pack('<f', f))[0]

View file

@ -2,8 +2,8 @@
import unittest, ctypes
from dataclasses import dataclass
from extra.assembly.amd.emu2 import WaveState, decode_program, WAVE_SIZE, VCC_LO, EXEC_LO, SCC
from extra.assembly.amd.decode import decode_inst
from extra.assembly.amd.emu import WaveState, decode_program, WAVE_SIZE, VCC_LO, EXEC_LO, SCC
from extra.assembly.amd import decode_inst
from extra.assembly.amd.test.helpers import KernelInfo
from extra.assembly.amd.test.bench_emu import REMU_PATH

View file

@ -4,8 +4,8 @@ from collections import defaultdict
from tinygrad.helpers import DEBUG
from tinygrad.dtype import dtypes
from tinygrad.uop.ops import UOp, Ops
from extra.assembly.amd.emu2 import parse_pcode
from extra.assembly.amd.expr_parser import parse_expr
from extra.assembly.amd.emu import parse_pcode
from extra.assembly.amd.pcode import parse_expr
from extra.assembly.amd.autogen.rdna3.str_pcode import PCODE
from extra.assembly.amd.autogen.rdna3.enum import VOP1Op, VOP2Op, VOP3Op, SOP1Op, SOP2Op, DSOp

View file

@ -7,7 +7,7 @@ import unittest
from extra.assembly.amd.autogen.rdna3.ins import *
from extra.assembly.amd.dsl import VCC_HI, EXEC_LO, NULL
OFF = NULL # OFF is alias for NULL
from extra.assembly.amd.decode import detect_format
from extra.assembly.amd import detect_format
class TestDS(unittest.TestCase):

View file

@ -11,7 +11,7 @@ Only compute-relevant instruction formats are tested. Graphics-only formats not
import unittest, re, subprocess, functools
from tinygrad.helpers import fetch
from extra.assembly.amd.disasm import disasm
from extra.assembly.amd.decode import decode_inst, detect_format
from extra.assembly.amd import decode_inst, detect_format
from extra.assembly.amd.test.helpers import get_llvm_mc, get_target, get_mattr
LLVM_BASE = "https://raw.githubusercontent.com/llvm/llvm-project/llvmorg-21.1.0/llvm/test/MC/AMDGPU"

View file

@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""Test PDF pseudocode extraction from amdxml.py."""
"""Test PDF pseudocode extraction from generate.py."""
import unittest
from extra.assembly.amd.amdxml import extract_pdf_text, extract_pcode, parse_xml, ARCHS, FIXES
from extra.assembly.amd.generate import extract_pdf_text, extract_pcode, parse_xml, ARCHS, FIXES
EXPECTED_PAGES = {"rdna3": 655, "rdna4": 711, "cdna": 610}

View file

@ -2,7 +2,7 @@
"""Roundtrip tests: generate tinygrad kernels, decode instructions, re-encode, verify match."""
import unittest, io, sys, re, subprocess, os
from extra.assembly.amd.dsl import Inst
from extra.assembly.amd.decode import decode_inst, detect_format
from extra.assembly.amd import decode_inst, detect_format
from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump, get_target, get_mattr
def disassemble_lib(lib: bytes, compiler) -> list[tuple[str, bytes]]:

View file

@ -5,7 +5,7 @@ from pathlib import Path
from tinygrad.helpers import DEBUG
from tinygrad.runtime.autogen import rocprof
from tinygrad.runtime.support.elf import elf_loader
from extra.assembly.amd.decode import decode_inst
from extra.assembly.amd import decode_inst
from extra.assembly.amd.autogen.rdna3.ins import SOPP
from extra.assembly.amd.autogen.rdna3.enum import SOPPOp
from extra.assembly.amd.sqtt import (decode, LAYOUT_HEADER, WAVESTART, WAVESTART_L4, WAVEEND, INST, INST_L4, VALUINST, IMMEDIATE, IMMEDIATE_MASK,

View file

@ -55,7 +55,7 @@ if __name__ == "__main__":
with Context(CORRECT_DIVMOD_FOLDING=1):
simplified_expr = expr.simplify()
solver = z3.Solver()
solver = z3.Solver(ctx=z3.Context())
solver.set(timeout=5000) # some expressions take very long verify, but its very unlikely they actually return sat
z3_expr, z3_simplified_expr, v1, v2, v3 = uops_to_z3(solver, expr, simplified_expr, u1, u2, u3)
check = solver.check(z3_simplified_expr != z3_expr)

View file

@ -22,7 +22,7 @@ class PythonRemu:
scratch_size: int = 0 # private_segment_fixed_size from kernel descriptor
def run_asm(self, lib: int, lib_sz: int, gx: int, gy: int, gz: int, lx: int, ly: int, lz: int, args_ptr: int) -> int:
from extra.assembly.amd.emu2 import run_asm
from extra.assembly.amd.emu import run_asm
return run_asm(lib, lib_sz, gx, gy, gz, lx, ly, lz, args_ptr, self.rsrc2, self.scratch_size)
def _try_dlopen_remu():

View file

@ -434,7 +434,7 @@ def amd_readelf(lib:bytes) -> list[dict]:
def amd_decode(target:int, lib:bytes) -> dict[int, Any]: # Any is the Inst class from extra.assembly.amd.dsl
from tinygrad.runtime.support.elf import elf_loader
from extra.assembly.amd.decode import detect_format
from extra.assembly.amd import detect_format
from extra.assembly.amd.dsl import Inst
image, sections, _ = elf_loader(lib)
text = next((sh for sh in sections if sh.name == ".text"), None)