fix CUDA=1 disassembly (VIZ=1) by stripping null terminator (#14046)

* fix ptxas disassembly bug

* single '

* move fix to get_bytes

* move rstrip

---------

Co-authored-by: qazal <77887910+Qazalin@users.noreply.github.com>
This commit is contained in:
anu 2026-01-09 01:19:59 -05:00 committed by GitHub
commit c70c112254
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -35,7 +35,7 @@ def pretty_ptx(s):
def cuda_disassemble(lib:bytes, arch:str):
try:
fn = (pathlib.Path(tempfile.gettempdir()) / f"tinycuda_{hashlib.md5(lib).hexdigest()}").as_posix()
with open(fn, "wb") as f: f.write(lib)
with open(fn, "wb") as f: f.write(lib.rstrip(b'\x00'))
subprocess.run(["ptxas", f"-arch={arch}", "-o", fn, fn], check=False, stderr=subprocess.DEVNULL) # optional ptx -> sass step for CUDA=1
print(system(f'nvdisasm {fn}'))
except Exception as e: print("Failed to generate SASS", str(e), "Make sure your PATH contains ptxas/nvdisasm binary of compatible version.")