mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
assembly/amd: rename to better names (#14384)
* assembly/amd: rename to better names * might help fuzzing segfault * emu2 -> emu
This commit is contained in:
parent
065b95cfb0
commit
88bc5ee212
18 changed files with 28 additions and 29 deletions
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
|
@ -679,7 +679,7 @@ jobs:
|
|||
python-version: '3.13'
|
||||
- name: Verify AMD autogen is up to date
|
||||
run: |
|
||||
python -m extra.assembly.amd.amdxml
|
||||
python -m extra.assembly.amd.generate
|
||||
git diff --exit-code extra/assembly/amd/autogen/
|
||||
- name: Install LLVM 21
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -48,13 +48,13 @@ from tinygrad.runtime.autogen import hsa
|
|||
from tinygrad.helpers import Context, DEBUG, colored
|
||||
from tinygrad.engine.realize import get_runner
|
||||
|
||||
from extra.assembly.amd.decode import decode_inst
|
||||
from extra.assembly.amd import decode_inst
|
||||
from extra.assembly.amd.autogen.rdna3.str_pcode import PCODE
|
||||
from extra.assembly.amd.autogen.rdna3.ins import (SOP1, SOP2, SOPC, SOPK, SOPP, SMEM, VOP1, VOP1_SDST, VOP2, VOP3, VOP3_SDST, VOP3SD, VOP3P, VOPC,
|
||||
DS, FLAT, GLOBAL, SCRATCH, VOPD, SOPPOp, SMEMOp, VOP1Op, VOP2Op, VOP3Op, VOPDOp)
|
||||
from extra.assembly.amd.dsl import VCC_LO, EXEC_LO, SCC
|
||||
from extra.assembly.amd.autogen.common import OpType
|
||||
from extra.assembly.amd.expr_parser import parse_block, _FUNCS
|
||||
from extra.assembly.amd.pcode import parse_block, _FUNCS
|
||||
|
||||
MASK32 = 0xFFFFFFFF
|
||||
|
||||
|
|
@ -1035,7 +1035,7 @@ def _get_runner(inst_bytes: bytes):
|
|||
if cls in _INST_HANDLERS:
|
||||
handler = _INST_HANDLERS[cls]
|
||||
break
|
||||
if handler is None: raise RuntimeError(f"[emu2] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}")
|
||||
if handler is None: raise RuntimeError(f"[emu] unimplemented instruction type: {type(inst).__name__} {_op_name(inst)}")
|
||||
|
||||
ctx = _Ctx(inst_size)
|
||||
sink = handler(inst, ctx)
|
||||
|
|
@ -1061,14 +1061,13 @@ def decode_program(data: bytes) -> dict[int, tuple[str, Callable, list[int], Any
|
|||
if DEBUG >= 3:
|
||||
try: inst_str = repr(inst)
|
||||
except Exception: inst_str = f"<{type(inst).__name__} at PC={i}>"
|
||||
msg = f"[emu2] PC={i}: {inst_str}"
|
||||
msg = f"[emu] PC={i}: {inst_str}"
|
||||
print(colored(msg, 'green') if is_new else msg)
|
||||
if DEBUG >= 4: print(f"{colored(runner.p.src, 'BLACK')}")
|
||||
result[i] = (runner.p.function_name, runner._prg.fxn, runner.p.globals, runner)
|
||||
except Exception as e:
|
||||
try: inst_str = repr(inst)
|
||||
except Exception: inst_str = f"<{type(inst).__name__}>"
|
||||
raise RuntimeError(f"[emu2] Failed to compile PC={i} {inst_str}: {type(e).__name__}: {e}") from e
|
||||
raise RuntimeError(f"[emu] Failed to compile PC={i} {inst_str}: {type(e).__name__}: {e}") from e
|
||||
i += inst.size()
|
||||
return result
|
||||
|
||||
|
|
@ -1160,11 +1159,11 @@ def run_asm(lib: int, lib_sz: int, gx: int, gy: int, gz: int, lx: int, ly: int,
|
|||
for inst_count in range(1_000_000):
|
||||
if (pc := st.pc) == 0xFFFFFFFFFFFFFFFF or pc not in program: break
|
||||
name, fxn, globals_list, _ = program[pc]
|
||||
assert fxn is not None, f"[emu2] No fxn for {name} at PC={pc}"
|
||||
assert fxn is not None, f"[emu] No fxn for {name} at PC={pc}"
|
||||
assert 4 not in globals_list or scratch_buf, f"SCRATCH instruction {name} but scratch_size=0"
|
||||
if DEBUG >= 5:
|
||||
if DEBUG >= 6:
|
||||
inst = decode_inst(bytes((ctypes.c_char * 12).from_address(pc).raw))
|
||||
print(f"[emu2] exec PC={pc:X}: {inst!r}")
|
||||
print(f"[emu] exec PC={pc:X}: {inst!r}")
|
||||
fxn(*[c_bufs[g] for g in globals_list])
|
||||
else: raise RuntimeError("exceeded 1M instructions, likely infinite loop")
|
||||
return 0
|
||||
|
|
@ -6,7 +6,7 @@ from tinygrad.runtime.support.elf import elf_loader
|
|||
|
||||
from extra.assembly.amd.sqtt import decode, print_packets, INST, VALUINST, IMMEDIATE, WAVESTART, WAVEEND, InstOp, PacketType, IMMEDIATE_MASK
|
||||
from extra.assembly.amd.dsl import Inst
|
||||
from extra.assembly.amd.decode import decode_inst
|
||||
from extra.assembly.amd import decode_inst
|
||||
from extra.assembly.amd.autogen.rdna3.ins import SOPP, s_endpgm
|
||||
from extra.assembly.amd.autogen.rdna3.enum import SOPPOp
|
||||
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ from pathlib import Path
|
|||
# Set AMD=1 before importing tinygrad
|
||||
os.environ["AMD"] = "1"
|
||||
|
||||
from extra.assembly.amd.emu2 import run_asm as python_run_asm, decode_program
|
||||
from extra.assembly.amd.decode import decode_inst
|
||||
from extra.assembly.amd.emu import run_asm as python_run_asm, decode_program
|
||||
from extra.assembly.amd import decode_inst
|
||||
from extra.assembly.amd.autogen.rdna3.ins import SOPP, SOPPOp
|
||||
|
||||
REMU_PATH = Path(__file__).parents[3] / "remu/target/release/libremu.so"
|
||||
|
|
@ -68,7 +68,7 @@ def benchmark_emulator(name: str, run_fn, kernel: bytes, global_size, local_size
|
|||
|
||||
def profile_instructions(kernel: bytes):
|
||||
"""Profile individual instruction compile times."""
|
||||
from extra.assembly.amd.emu2 import _get_runner, _canonical_runner_cache
|
||||
from extra.assembly.amd.emu import _get_runner, _canonical_runner_cache
|
||||
from tinygrad.helpers import Context
|
||||
_get_runner.cache_clear()
|
||||
_canonical_runner_cache.clear()
|
||||
|
|
@ -98,7 +98,7 @@ def profile_instructions(kernel: bytes):
|
|||
|
||||
def benchmark_python_split(kernel: bytes, global_size, local_size, args_ptr, rsrc2: int, iterations: int = 5):
|
||||
"""Benchmark Python emulator with compile and execution times."""
|
||||
from extra.assembly.amd.emu2 import _get_runner, _canonical_runner_cache
|
||||
from extra.assembly.amd.emu import _get_runner, _canonical_runner_cache
|
||||
from tinygrad.helpers import Context
|
||||
_get_runner.cache_clear()
|
||||
_canonical_runner_cache.clear()
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ Set USE_HW=1 to run on both emulator and hardware, comparing results.
|
|||
import ctypes, math, os, struct
|
||||
from extra.assembly.amd.autogen.rdna3.ins import *
|
||||
|
||||
from extra.assembly.amd.emu2 import run_asm
|
||||
from extra.assembly.amd.emu import run_asm
|
||||
from extra.assembly.amd.dsl import NULL, SCC, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, M0
|
||||
|
||||
def _i32(f: float) -> int: return struct.unpack('<I', struct.pack('<f', f))[0]
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
import unittest, ctypes
|
||||
from dataclasses import dataclass
|
||||
|
||||
from extra.assembly.amd.emu2 import WaveState, decode_program, WAVE_SIZE, VCC_LO, EXEC_LO, SCC
|
||||
from extra.assembly.amd.decode import decode_inst
|
||||
from extra.assembly.amd.emu import WaveState, decode_program, WAVE_SIZE, VCC_LO, EXEC_LO, SCC
|
||||
from extra.assembly.amd import decode_inst
|
||||
from extra.assembly.amd.test.helpers import KernelInfo
|
||||
from extra.assembly.amd.test.bench_emu import REMU_PATH
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ from collections import defaultdict
|
|||
from tinygrad.helpers import DEBUG
|
||||
from tinygrad.dtype import dtypes
|
||||
from tinygrad.uop.ops import UOp, Ops
|
||||
from extra.assembly.amd.emu2 import parse_pcode
|
||||
from extra.assembly.amd.expr_parser import parse_expr
|
||||
from extra.assembly.amd.emu import parse_pcode
|
||||
from extra.assembly.amd.pcode import parse_expr
|
||||
from extra.assembly.amd.autogen.rdna3.str_pcode import PCODE
|
||||
from extra.assembly.amd.autogen.rdna3.enum import VOP1Op, VOP2Op, VOP3Op, SOP1Op, SOP2Op, DSOp
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import unittest
|
|||
from extra.assembly.amd.autogen.rdna3.ins import *
|
||||
from extra.assembly.amd.dsl import VCC_HI, EXEC_LO, NULL
|
||||
OFF = NULL # OFF is alias for NULL
|
||||
from extra.assembly.amd.decode import detect_format
|
||||
from extra.assembly.amd import detect_format
|
||||
|
||||
|
||||
class TestDS(unittest.TestCase):
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ Only compute-relevant instruction formats are tested. Graphics-only formats not
|
|||
import unittest, re, subprocess, functools
|
||||
from tinygrad.helpers import fetch
|
||||
from extra.assembly.amd.disasm import disasm
|
||||
from extra.assembly.amd.decode import decode_inst, detect_format
|
||||
from extra.assembly.amd import decode_inst, detect_format
|
||||
from extra.assembly.amd.test.helpers import get_llvm_mc, get_target, get_mattr
|
||||
|
||||
LLVM_BASE = "https://raw.githubusercontent.com/llvm/llvm-project/llvmorg-21.1.0/llvm/test/MC/AMDGPU"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Test PDF pseudocode extraction from amdxml.py."""
|
||||
"""Test PDF pseudocode extraction from generate.py."""
|
||||
import unittest
|
||||
from extra.assembly.amd.amdxml import extract_pdf_text, extract_pcode, parse_xml, ARCHS, FIXES
|
||||
from extra.assembly.amd.generate import extract_pdf_text, extract_pcode, parse_xml, ARCHS, FIXES
|
||||
|
||||
EXPECTED_PAGES = {"rdna3": 655, "rdna4": 711, "cdna": 610}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
"""Roundtrip tests: generate tinygrad kernels, decode instructions, re-encode, verify match."""
|
||||
import unittest, io, sys, re, subprocess, os
|
||||
from extra.assembly.amd.dsl import Inst
|
||||
from extra.assembly.amd.decode import decode_inst, detect_format
|
||||
from extra.assembly.amd import decode_inst, detect_format
|
||||
from extra.assembly.amd.test.helpers import get_llvm_mc, get_llvm_objdump, get_target, get_mattr
|
||||
|
||||
def disassemble_lib(lib: bytes, compiler) -> list[tuple[str, bytes]]:
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from pathlib import Path
|
|||
from tinygrad.helpers import DEBUG
|
||||
from tinygrad.runtime.autogen import rocprof
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
from extra.assembly.amd.decode import decode_inst
|
||||
from extra.assembly.amd import decode_inst
|
||||
from extra.assembly.amd.autogen.rdna3.ins import SOPP
|
||||
from extra.assembly.amd.autogen.rdna3.enum import SOPPOp
|
||||
from extra.assembly.amd.sqtt import (decode, LAYOUT_HEADER, WAVESTART, WAVESTART_L4, WAVEEND, INST, INST_L4, VALUINST, IMMEDIATE, IMMEDIATE_MASK,
|
||||
|
|
|
|||
2
test/external/fuzz_symbolic.py
vendored
2
test/external/fuzz_symbolic.py
vendored
|
|
@ -55,7 +55,7 @@ if __name__ == "__main__":
|
|||
with Context(CORRECT_DIVMOD_FOLDING=1):
|
||||
simplified_expr = expr.simplify()
|
||||
|
||||
solver = z3.Solver()
|
||||
solver = z3.Solver(ctx=z3.Context())
|
||||
solver.set(timeout=5000) # some expressions take very long verify, but its very unlikely they actually return sat
|
||||
z3_expr, z3_simplified_expr, v1, v2, v3 = uops_to_z3(solver, expr, simplified_expr, u1, u2, u3)
|
||||
check = solver.check(z3_simplified_expr != z3_expr)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class PythonRemu:
|
|||
scratch_size: int = 0 # private_segment_fixed_size from kernel descriptor
|
||||
|
||||
def run_asm(self, lib: int, lib_sz: int, gx: int, gy: int, gz: int, lx: int, ly: int, lz: int, args_ptr: int) -> int:
|
||||
from extra.assembly.amd.emu2 import run_asm
|
||||
from extra.assembly.amd.emu import run_asm
|
||||
return run_asm(lib, lib_sz, gx, gy, gz, lx, ly, lz, args_ptr, self.rsrc2, self.scratch_size)
|
||||
|
||||
def _try_dlopen_remu():
|
||||
|
|
|
|||
|
|
@ -434,7 +434,7 @@ def amd_readelf(lib:bytes) -> list[dict]:
|
|||
|
||||
def amd_decode(target:int, lib:bytes) -> dict[int, Any]: # Any is the Inst class from extra.assembly.amd.dsl
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
from extra.assembly.amd.decode import detect_format
|
||||
from extra.assembly.amd import detect_format
|
||||
from extra.assembly.amd.dsl import Inst
|
||||
image, sections, _ = elf_loader(lib)
|
||||
text = next((sh for sh in sections if sh.name == ".text"), None)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue