mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
In-tree autogen: all C libraries (#13220)
* checkout files from autogen branch
* ioctl with payload
* fix am generations
* properly fix generations
This reverts commit b2a54f4f41.
* revert discovery.h
* support pragma pack(1)
* typo
* better getter
* typo
* NVCEC0_QMDV05_00_RELEASE[01]_ENABLE
* align support
* anon handling fix
---------
Co-authored-by: George Hotz <72895+geohot@users.noreply.github.com>
This commit is contained in:
parent
777cbec5b3
commit
09f3aae169
67 changed files with 108566 additions and 205710 deletions
132
.github/workflows/autogen.yml
vendored
132
.github/workflows/autogen.yml
vendored
|
|
@ -19,7 +19,7 @@ on:
|
|||
|
||||
jobs:
|
||||
autogen:
|
||||
name: Autogen
|
||||
name: In-tree Autogen
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
|
|
@ -31,66 +31,107 @@ jobs:
|
|||
opencl: 'true'
|
||||
amd: 'true'
|
||||
cuda: 'true'
|
||||
webgpu: 'true'
|
||||
llvm: 'true'
|
||||
pydeps: 'pyyaml mako'
|
||||
webgpu: 'true'
|
||||
mesa: 'true'
|
||||
pydeps: 'clang>=20 pyyaml mako'
|
||||
- name: Install autogen support packages
|
||||
run: sudo apt-get install -y --no-install-recommends llvm-14-dev libclang-14-dev llvm-20-dev
|
||||
run: sudo apt-get install -y --no-install-recommends libclang-20-dev llvm-20-dev hip-dev libusb-1.0-0-dev
|
||||
- name: Verify OpenCL autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak
|
||||
./autogen_stubs.sh opencl
|
||||
mv tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import opencl"
|
||||
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py
|
||||
- name: Verify CUDA autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak
|
||||
cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak
|
||||
./autogen_stubs.sh cuda
|
||||
./autogen_stubs.sh nv
|
||||
mv tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak
|
||||
mv tinygrad/runtime/autogen/nvrtc.py /tmp/nvrtc.py.bak
|
||||
mv tinygrad/runtime/autogen/nvjitlink.py /tmp/nvjitlink.py.bak
|
||||
mv tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak
|
||||
mv tinygrad/runtime/autogen/nv.py /tmp/nv.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import cuda, nvrtc, nvjitlink, nv_gpu, nv"
|
||||
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
|
||||
diff /tmp/nvrtc.py.bak tinygrad/runtime/autogen/nvrtc.py
|
||||
diff /tmp/nvjitlink.py.bak tinygrad/runtime/autogen/nvjitlink.py
|
||||
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
|
||||
diff /tmp/nv.py.bak tinygrad/runtime/autogen/nv.py
|
||||
- name: Verify AMD autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
|
||||
cp tinygrad/runtime/autogen/kfd.py /tmp/kfd.py.bak
|
||||
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
|
||||
cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak
|
||||
cp tinygrad/runtime/autogen/sqtt.py /tmp/sqtt.py.bak
|
||||
./autogen_stubs.sh hsa
|
||||
./autogen_stubs.sh kfd
|
||||
./autogen_stubs.sh comgr
|
||||
./autogen_stubs.sh amd
|
||||
./autogen_stubs.sh sqtt
|
||||
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
|
||||
diff /tmp/kfd.py.bak tinygrad/runtime/autogen/kfd.py
|
||||
mv tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
|
||||
mv tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
|
||||
mv tinygrad/runtime/autogen/hip.py /tmp/hip.py.bak
|
||||
mv tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak
|
||||
mv tinygrad/runtime/autogen/sqtt.py /tmp/sqtt.py.bak
|
||||
mv tinygrad/runtime/autogen/rocprof.py /tmp/rocprof.py.bak
|
||||
mv tinygrad/runtime/autogen/am/am.py /tmp/am_am.py.bak
|
||||
mv tinygrad/runtime/autogen/am/pm4_soc15.py /tmp/am_pm4_soc15.py.bak
|
||||
mv tinygrad/runtime/autogen/am/pm4_nv.py /tmp/am_pm4_nv.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_4_0_0.py /tmp/am_sdma_4_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_5_0_0.py /tmp/am_sdma_5_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/sdma_6_0_0.py /tmp/am_sdma_6_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/smu_v13_0_0.py /tmp/am_smu_v13_0_0.py.bak
|
||||
mv tinygrad/runtime/autogen/am/smu_v14_0_2.py /tmp/am_smu_v14_0_2.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr, hsa, hip, amd_gpu, sqtt, rocprof; from tinygrad.runtime.autogen.am import am, pm4_soc15, pm4_nv, sdma_4_0_0, sdma_5_0_0, sdma_6_0_0, smu_v13_0_0, smu_v14_0_2"
|
||||
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
|
||||
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
|
||||
diff /tmp/hip.py.bak tinygrad/runtime/autogen/hip.py
|
||||
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py
|
||||
diff /tmp/sqtt.py.bak tinygrad/runtime/autogen/sqtt.py
|
||||
diff /tmp/rocprof.py.bak tinygrad/runtime/autogen/rocprof.py
|
||||
diff /tmp/am_am.py.bak tinygrad/runtime/autogen/am/am.py
|
||||
diff /tmp/am_pm4_soc15.py.bak tinygrad/runtime/autogen/am/pm4_soc15.py
|
||||
diff /tmp/am_pm4_nv.py.bak tinygrad/runtime/autogen/am/pm4_nv.py
|
||||
diff /tmp/am_sdma_4_0_0.py.bak tinygrad/runtime/autogen/am/sdma_4_0_0.py
|
||||
diff /tmp/am_sdma_5_0_0.py.bak tinygrad/runtime/autogen/am/sdma_5_0_0.py
|
||||
diff /tmp/am_sdma_6_0_0.py.bak tinygrad/runtime/autogen/am/sdma_6_0_0.py
|
||||
diff /tmp/am_smu_v13_0_0.py.bak tinygrad/runtime/autogen/am/smu_v13_0_0.py
|
||||
diff /tmp/am_smu_v14_0_2.py.bak tinygrad/runtime/autogen/am/smu_v14_0_2.py
|
||||
- name: Verify Linux autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
|
||||
cp tinygrad/runtime/autogen/ib.py /tmp/ib.py.bak
|
||||
./autogen_stubs.sh io_uring
|
||||
./autogen_stubs.sh ib
|
||||
mv tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
|
||||
mv tinygrad/runtime/autogen/kfd.py /tmp/kfd.py.bak
|
||||
mv tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
|
||||
mv tinygrad/runtime/autogen/ib.py /tmp/ib.py.bak
|
||||
mv tinygrad/runtime/autogen/pci.py /tmp/pci.py.bak
|
||||
mv tinygrad/runtime/autogen/vfio.py /tmp/vfio.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import libc, kfd, io_uring, ib, pci, vfio"
|
||||
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
|
||||
diff /tmp/kfd.py.bak tinygrad/runtime/autogen/kfd.py
|
||||
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
|
||||
diff /tmp/ib.py.bak tinygrad/runtime/autogen/ib.py
|
||||
- name: Verify WebGPU autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak
|
||||
./autogen_stubs.sh webgpu
|
||||
diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py
|
||||
diff /tmp/pci.py.bak tinygrad/runtime/autogen/pci.py
|
||||
diff /tmp/vfio.py.bak tinygrad/runtime/autogen/vfio.py
|
||||
- name: Verify LLVM autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak
|
||||
./autogen_stubs.sh llvm
|
||||
mv tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import llvm"
|
||||
diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py
|
||||
- name: Verify WebGPU autogen
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import webgpu"
|
||||
diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py
|
||||
- name: Verify Qualcomm autogen
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/kgsl.py /tmp/kgsl.py.bak
|
||||
mv tinygrad/runtime/autogen/adreno.py /tmp/adreno.py.bak
|
||||
mv tinygrad/runtime/autogen/qcom_dsp.py /tmp/qcom_dsp.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import kgsl, adreno, qcom_dsp"
|
||||
diff /tmp/kgsl.py.bak tinygrad/runtime/autogen/kgsl.py
|
||||
diff /tmp/adreno.py.bak tinygrad/runtime/autogen/adreno.py
|
||||
diff /tmp/qcom_dsp.py.bak tinygrad/runtime/autogen/qcom_dsp.py
|
||||
- name: Verify libusb autogen
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/libusb.py /tmp/libusb.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import libusb"
|
||||
diff /tmp/libusb.py.bak tinygrad/runtime/autogen/libusb.py
|
||||
- name: Verify mesa autogen
|
||||
run: |
|
||||
cp tinygrad/runtime/autogen/mesa.py /tmp/mesa.py.bak
|
||||
./autogen_stubs.sh mesa
|
||||
mv tinygrad/runtime/autogen/mesa.py /tmp/mesa.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import mesa"
|
||||
diff /tmp/mesa.py.bak tinygrad/runtime/autogen/mesa.py
|
||||
autogen-ng:
|
||||
name: In-tree Autogen
|
||||
autogen-comgr-3:
|
||||
name: In-tree Autogen (comgr 3)
|
||||
runs-on: ubuntu-24.04
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
|
|
@ -101,9 +142,16 @@ jobs:
|
|||
with:
|
||||
pydeps: 'clang>=20'
|
||||
- name: Install autogen support packages
|
||||
run: sudo apt-get install -y --no-install-recommends libclang-20-dev
|
||||
- name: Verify Linux autogen
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import libc"
|
||||
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
|
||||
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null
|
||||
sudo tee /etc/apt/sources.list.d/rocm.list <<EOF
|
||||
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.4 $(lsb_release -cs) main
|
||||
EOF
|
||||
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600
|
||||
sudo apt -qq update || true
|
||||
sudo apt-get install -y --no-install-recommends libclang-20-dev comgr
|
||||
- name: Verify comgr (3) autogen
|
||||
run: |
|
||||
mv tinygrad/runtime/autogen/comgr_3.py /tmp/comgr_3.py.bak
|
||||
python3 -c "from tinygrad.runtime.autogen import comgr"
|
||||
diff /tmp/comgr_3.py.bak tinygrad/runtime/autogen/comgr_3.py
|
||||
|
|
|
|||
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
|
|
@ -230,7 +230,7 @@ jobs:
|
|||
python-version: '3.11'
|
||||
deps: linting
|
||||
- name: Lint bad-indentation and trailing-whitespace with pylint
|
||||
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . --ignore-paths='tinygrad/runtime/autogen'
|
||||
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
|
||||
- name: Lint with ruff
|
||||
run: |
|
||||
pip3 install --upgrade --force-reinstall ruff==0.11.0
|
||||
|
|
|
|||
550
autogen_stubs.sh
550
autogen_stubs.sh
|
|
@ -1,550 +0,0 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
# setup instructions for clang2py
|
||||
if [[ ! $(clang2py -V) ]]; then
|
||||
pushd .
|
||||
cd /tmp
|
||||
sudo apt-get install -y --no-install-recommends clang
|
||||
pip install --upgrade pip setuptools
|
||||
pip install clang==14.0.6
|
||||
git clone https://github.com/nimlgen/ctypeslib.git
|
||||
cd ctypeslib
|
||||
pip install .
|
||||
clang2py -V
|
||||
popd
|
||||
fi
|
||||
|
||||
BASE=tinygrad/runtime/autogen/
|
||||
|
||||
fixup() {
|
||||
sed -i '1s/^/# mypy: ignore-errors\n/' $1
|
||||
sed -i 's/ *$//' $1
|
||||
grep FIXME_STUB $1 || true
|
||||
}
|
||||
|
||||
patch_dlopen() {
|
||||
path=$1; shift
|
||||
name=$1; shift
|
||||
cat <<EOF | sed -i "/import ctypes.*/r /dev/stdin" $path
|
||||
PATHS_TO_TRY = [
|
||||
$(for p in "$@"; do echo " $p,"; done)
|
||||
]
|
||||
def _try_dlopen_$name():
|
||||
library = ctypes.util.find_library("$name")
|
||||
if library:
|
||||
try: return ctypes.CDLL(library)
|
||||
except OSError: pass
|
||||
for candidate in PATHS_TO_TRY:
|
||||
try: return ctypes.CDLL(candidate)
|
||||
except OSError: pass
|
||||
return None
|
||||
EOF
|
||||
}
|
||||
|
||||
generate_opencl() {
|
||||
clang2py /usr/include/CL/cl.h -o $BASE/opencl.py -l /usr/lib/x86_64-linux-gnu/libOpenCL.so.1 -k cdefstum
|
||||
fixup $BASE/opencl.py
|
||||
# hot patches
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/opencl.py
|
||||
sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libOpenCL.so.1')\ctypes.CDLL(ctypes.util.find_library('OpenCL'))\g" $BASE/opencl.py
|
||||
python3 -c "import tinygrad.runtime.autogen.opencl"
|
||||
}
|
||||
|
||||
generate_hip() {
|
||||
clang2py /opt/rocm/include/hip/hip_ext.h /opt/rocm/include/hip/hiprtc.h \
|
||||
/opt/rocm/include/hip/hip_runtime_api.h /opt/rocm/include/hip/driver_types.h \
|
||||
--clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include -x c++" -o $BASE/hip.py -l /opt/rocm/lib/libamdhip64.so
|
||||
echo "hipDeviceProp_t = hipDeviceProp_tR0600" >> $BASE/hip.py
|
||||
echo "hipGetDeviceProperties = hipGetDevicePropertiesR0600" >> $BASE/hip.py
|
||||
fixup $BASE/hip.py
|
||||
# we can trust HIP is always at /opt/rocm/lib
|
||||
#sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/hip.py
|
||||
#sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhiprtc.so')\ctypes.CDLL(ctypes.util.find_library('hiprtc'))\g" $BASE/hip.py
|
||||
#sed -i "s\ctypes.CDLL('/opt/rocm/lib/libamdhip64.so')\ctypes.CDLL(ctypes.util.find_library('amdhip64'))\g" $BASE/hip.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/hip.py
|
||||
sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/hip.py
|
||||
python3 -c "import tinygrad.runtime.autogen.hip"
|
||||
}
|
||||
|
||||
generate_comgr() {
|
||||
clang2py /opt/rocm/include/amd_comgr/amd_comgr.h \
|
||||
--clang-args="-D__HIP_PLATFORM_AMD__ -I/opt/rocm/include -x c++" -o $BASE/comgr.py -l /opt/rocm/lib/libamd_comgr.so
|
||||
fixup $BASE/comgr.py
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/comgr.py
|
||||
patch_dlopen $BASE/comgr.py amd_comgr "'/opt/rocm/lib/libamd_comgr.so'" "os.getenv('ROCM_PATH', '')+'/lib/libamd_comgr.so'" "'/usr/local/lib/libamd_comgr.dylib'" "'/opt/homebrew/lib/libamd_comgr.dylib'"
|
||||
sed -i "s\ctypes.CDLL('/opt/rocm/lib/libamd_comgr.so')\_try_dlopen_amd_comgr()\g" $BASE/comgr.py
|
||||
python3 -c "import tinygrad.runtime.autogen.comgr"
|
||||
}
|
||||
|
||||
generate_kfd() {
|
||||
clang2py /usr/include/linux/kfd_ioctl.h -o $BASE/kfd.py -k cdefstum
|
||||
|
||||
fixup $BASE/kfd.py
|
||||
sed -i "s/import ctypes/import ctypes, os/g" $BASE/kfd.py
|
||||
sed -i "s/import fcntl, functools/import functools/g" $BASE/kfd.py
|
||||
sed -i "/import functools/a from tinygrad.runtime.support.hcq import FileIOInterface" $BASE/kfd.py
|
||||
sed -i "s/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd, \*\*kwargs):/def _do_ioctl(__idir, __base, __nr, __user_struct, __fd:FileIOInterface, \*\*kwargs):/g" $BASE/kfd.py
|
||||
sed -i "s/fcntl.ioctl(__fd, (__idir<<30)/__fd.ioctl((__idir<<30)/g" $BASE/kfd.py
|
||||
sed -i "s/!!/not not /g" $BASE/kfd.py
|
||||
python3 -c "import tinygrad.runtime.autogen.kfd"
|
||||
}
|
||||
|
||||
generate_cuda() {
|
||||
clang2py /usr/include/cuda.h --clang-args="-D__CUDA_API_VERSION_INTERNAL" -o $BASE/cuda.py -l /usr/lib/x86_64-linux-gnu/libcuda.so
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/cuda.py
|
||||
sed -i "s\ctypes.CDLL('/usr/lib/x86_64-linux-gnu/libcuda.so')\ctypes.CDLL(ctypes.util.find_library('cuda'))\g" $BASE/cuda.py
|
||||
fixup $BASE/cuda.py
|
||||
python3 -c "import tinygrad.runtime.autogen.cuda"
|
||||
}
|
||||
|
||||
generate_nvrtc() {
|
||||
clang2py /usr/local/cuda/include/nvrtc.h /usr/local/cuda/include/nvJitLink.h -o $BASE/nvrtc.py -l /usr/local/cuda/lib64/libnvrtc.so -l /usr/local/cuda/lib64/libnvJitLink.so
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" $BASE/nvrtc.py
|
||||
sed -i "s\ctypes.CDLL('/usr/local/cuda/lib64/libnvrtc.so')\ctypes.CDLL(ctypes.util.find_library('nvrtc'))\g" $BASE/nvrtc.py
|
||||
sed -i "s\ctypes.CDLL('/usr/local/cuda/lib64/libnvJitLink.so')\ctypes.CDLL(ctypes.util.find_library('nvJitLink'))\g" $BASE/nvrtc.py
|
||||
fixup $BASE/nvrtc.py
|
||||
python3 -c "import tinygrad.runtime.autogen.nvrtc"
|
||||
}
|
||||
|
||||
generate_nv() {
|
||||
NVKERN_COMMIT_HASH=81fe4fb417c8ac3b9bdcc1d56827d116743892a5
|
||||
NVKERN_SRC=/tmp/open-gpu-kernel-modules-$NVKERN_COMMIT_HASH
|
||||
if [ ! -d "$NVKERN_SRC" ]; then
|
||||
git clone https://github.com/NVIDIA/open-gpu-kernel-modules $NVKERN_SRC
|
||||
pushd .
|
||||
cd $NVKERN_SRC
|
||||
git reset --hard $NVKERN_COMMIT_HASH
|
||||
popd
|
||||
fi
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/nv_gpu_driver/clc6c0qmd.h \
|
||||
extra/nv_gpu_driver/clcec0qmd.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl0000.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl0080.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl2080.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl2080_notification.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc56f.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc86f.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc96f.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc761.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/cl83de.h \
|
||||
$NVKERN_SRC/src/nvidia/generated/g_allclasses.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clc6c0.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/class/clcdc0.h \
|
||||
$NVKERN_SRC/kernel-open/nvidia-uvm/clc6b5.h \
|
||||
$NVKERN_SRC/kernel-open/nvidia-uvm/clc9b5.h \
|
||||
$NVKERN_SRC/kernel-open/nvidia-uvm/uvm_ioctl.h \
|
||||
$NVKERN_SRC/kernel-open/nvidia-uvm/uvm_linux_ioctl.h \
|
||||
$NVKERN_SRC/kernel-open/nvidia-uvm/hwref/ampere/ga100/dev_fault.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv_escape.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-numbers.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-ioctl-numa.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include/nv-unix-nvos-params-wrappers.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/alloc/alloc_channel.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/nvos.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl0000/*.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl0080/*.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl2080/*.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl83de/*.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrlc36f.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrlcb33.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrla06c.h \
|
||||
$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl/ctrl90f1.h \
|
||||
--clang-args="-include $NVKERN_SRC/src/common/sdk/nvidia/inc/nvtypes.h -I$NVKERN_SRC/src/common/inc -I$NVKERN_SRC/kernel-open/nvidia-uvm -I$NVKERN_SRC/kernel-open/common/inc -I$NVKERN_SRC/src/common/sdk/nvidia/inc -I$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include -I$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl" \
|
||||
-o $BASE/nv_gpu.py
|
||||
fixup $BASE/nv_gpu.py
|
||||
sed -i "s\(0000000001)\1\g" $BASE/nv_gpu.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/nv_gpu.py
|
||||
sed -i 's/#\?\s\([A-Za-z0-9_]\+\) = MW ( \([0-9]\+\) : \([0-9]\+\) )/\1 = (\2 , \3)/' $BASE/nv_gpu.py # NVC6C0_QMDV03_00 processing
|
||||
sed -i 's/#\sdef NVC6C0_QMD\([A-Za-z0-9_()]\+\):/def NVC6C0_QMD\1:/' $BASE/nv_gpu.py
|
||||
sed -i 's/#\sdef NVCEC0_QMD\([A-Za-z0-9_()]\+\):/def NVCEC0_QMD\1:/' $BASE/nv_gpu.py
|
||||
sed -E -i -n '/^def (NVCEC0_QMDV05_00_RELEASE)(_ENABLE)\(i\):/{p;s//\1'"0"'\2=\1\2(0)\n\1'"1"'\2=\1\2(1)/;H;b};p;${x;s/^\n//;p}' "$BASE/nv_gpu.py"
|
||||
sed -i 's/#\s*return MW(\([0-9i()*+]\+\):\([0-9i()*+]\+\))/ return (\1 , \2)/' $BASE/nv_gpu.py
|
||||
sed -i 's/#\?\s*\(.*\)\s*=\s*\(NV\)\?BIT\(32\)\?\s*(\s*\([0-9]\+\)\s*)/\1 = (1 << \4)/' $BASE/nv_gpu.py # name = BIT(x) -> name = (1 << x)
|
||||
sed -i "s/UVM_\([A-Za-z0-9_]\+\) = \['i', '(', '\([0-9]\+\)', ')'\]/UVM_\1 = \2/" $BASE/nv_gpu.py # UVM_name = ['i', '(', '<num>', ')'] -> UVM_name = <num>
|
||||
|
||||
# Parse status codes
|
||||
sed -n '1i\
|
||||
nv_status_codes = {}
|
||||
/^NV_STATUS_CODE/ { s/^NV_STATUS_CODE(\([^,]*\), *\([^,]*\), *"\([^"]*\)") *.*$/\1 = \2\nnv_status_codes[\1] = "\3"/; p }' $NVKERN_SRC/src/common/sdk/nvidia/inc/nvstatuscodes.h >> $BASE/nv_gpu.py
|
||||
python3 -c "import tinygrad.runtime.autogen.nv_gpu"
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$NVKERN_SRC/src/nvidia/inc/kernel/gpu/fsp/kern_fsp_cot_payload.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc/gsp/gspifpub.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc/gsp/gsp_fw_wpr_meta.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc/gsp/gsp_fw_sr_meta.h \
|
||||
$NVKERN_SRC/src/nvidia/inc/kernel/gpu/gsp/gsp_init_args.h \
|
||||
$NVKERN_SRC/src/nvidia/inc/kernel/gpu/gsp/gsp_init_args.h \
|
||||
$NVKERN_SRC/src/common/uproc/os/common/include/libos_init_args.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc/rmRiscvUcode.h \
|
||||
$NVKERN_SRC/src/common/shared/msgq/inc/msgq/msgq_priv.h \
|
||||
$NVKERN_SRC/src/nvidia/inc/kernel/vgpu/rpc_headers.h \
|
||||
$NVKERN_SRC/src/nvidia/inc/kernel/vgpu/rpc_global_enums.h \
|
||||
$NVKERN_SRC/src/nvidia/generated/g_rpc-structures.h \
|
||||
$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc/fsp/fsp_nvdm_format.h \
|
||||
extra/nv_gpu_driver/g_rpc-message-header.h \
|
||||
extra/nv_gpu_driver/gsp_static_config.h \
|
||||
extra/nv_gpu_driver/vbios.h \
|
||||
extra/nv_gpu_driver/pci_exp_table.h \
|
||||
--clang-args="-DRPC_MESSAGE_STRUCTURES -DRPC_STRUCTURES -include $NVKERN_SRC/src/common/sdk/nvidia/inc/nvtypes.h -I$NVKERN_SRC/src/nvidia/generated -I$NVKERN_SRC/src/common/inc -I$NVKERN_SRC/src/nvidia/inc -I$NVKERN_SRC/src/nvidia/interface/ -I$NVKERN_SRC/src/nvidia/inc/kernel -I$NVKERN_SRC/src/nvidia/inc/libraries -I$NVKERN_SRC/src/nvidia/arch/nvalloc/common/inc -I$NVKERN_SRC/kernel-open/nvidia-uvm -I$NVKERN_SRC/kernel-open/common/inc -I$NVKERN_SRC/src/common/sdk/nvidia/inc -I$NVKERN_SRC/src/nvidia/arch/nvalloc/unix/include -I$NVKERN_SRC/src/common/sdk/nvidia/inc/ctrl" \
|
||||
-o $BASE/nv/nv.py
|
||||
|
||||
fixup $BASE/nv/nv.py
|
||||
python3 -c "import tinygrad.runtime.autogen.nv.nv"
|
||||
}
|
||||
|
||||
generate_amd() {
|
||||
# clang2py broken when pass -x c++ to prev headers
|
||||
clang2py -k cdefstum \
|
||||
extra/hip_gpu_driver/sdma_registers.h \
|
||||
extra/hip_gpu_driver/nvd.h \
|
||||
extra/hip_gpu_driver/gc_11_0_0_offset.h \
|
||||
extra/hip_gpu_driver/sienna_cichlid_ip_offset.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/amd_gpu.py
|
||||
|
||||
fixup $BASE/amd_gpu.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/amd_gpu.py
|
||||
python3 -c "import tinygrad.runtime.autogen.amd_gpu"
|
||||
}
|
||||
|
||||
generate_hsa() {
|
||||
clang2py \
|
||||
/opt/rocm/include/hsa/hsa.h \
|
||||
/opt/rocm/include/hsa/hsa_ext_amd.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_signal.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_queue.h \
|
||||
/opt/rocm/include/hsa/amd_hsa_kernel_code.h \
|
||||
/opt/rocm/include/hsa/hsa_ext_finalize.h /opt/rocm/include/hsa/hsa_ext_image.h \
|
||||
/opt/rocm/include/hsa/hsa_ven_amd_aqlprofile.h \
|
||||
--clang-args="-I/opt/rocm/include" \
|
||||
-o $BASE/hsa.py -l /opt/rocm/lib/libhsa-runtime64.so
|
||||
|
||||
fixup $BASE/hsa.py
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/hsa.py
|
||||
sed -i "s\ctypes.CDLL('/opt/rocm/lib/libhsa-runtime64.so')\ctypes.CDLL(os.getenv('ROCM_PATH')+'/lib/libhsa-runtime64.so' if os.getenv('ROCM_PATH') else ctypes.util.find_library('hsa-runtime64'))\g" $BASE/hsa.py
|
||||
python3 -c "import tinygrad.runtime.autogen.hsa"
|
||||
}
|
||||
|
||||
generate_io_uring() {
|
||||
clang2py -k cdefstum \
|
||||
/usr/include/liburing.h \
|
||||
/usr/include/linux/io_uring.h \
|
||||
-o $BASE/io_uring.py
|
||||
|
||||
sed -r '/^#define __NR_io_uring/ s/^#define __(NR_io_uring[^ ]+) (.*)$/\1 = \2/; t; d' /usr/include/asm-generic/unistd.h >> $BASE/io_uring.py # io_uring syscalls numbers
|
||||
fixup $BASE/io_uring.py
|
||||
}
|
||||
|
||||
generate_ib() {
|
||||
clang2py -k cdefstum \
|
||||
/usr/include/infiniband/verbs.h \
|
||||
/usr/include/infiniband/verbs_api.h \
|
||||
/usr/include/infiniband/ib_user_ioctl_verbs.h \
|
||||
/usr/include/rdma/ib_user_verbs.h \
|
||||
-o $BASE/ib.py
|
||||
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util\g" "$BASE/ib.py"
|
||||
sed -i "s\FIXME_STUB\libibverbs\g" "$BASE/ib.py"
|
||||
sed -i "s\FunctionFactoryStub()\ctypes.CDLL(ctypes.util.find_library('ibverbs'), use_errno=True)\g" "$BASE/ib.py"
|
||||
|
||||
fixup $BASE/ib.py
|
||||
}
|
||||
|
||||
generate_llvm() {
|
||||
INC="$(llvm-config-14 --includedir)"
|
||||
clang2py -k cdefstum \
|
||||
$(find "$INC/llvm-c/" -type f -name '*.h' | sort) \
|
||||
"$INC/llvm/Config/Targets.def" \
|
||||
"$INC/llvm/Config/AsmPrinters.def" \
|
||||
"$INC/llvm/Config/AsmParsers.def" \
|
||||
"$INC/llvm/Config/Disassemblers.def" \
|
||||
--clang-args="$(llvm-config-14 --cflags)" \
|
||||
-o "$BASE/llvm.py"
|
||||
|
||||
sed -i "s\import ctypes\import ctypes, tinygrad.runtime.support.llvm as llvm_support\g" "$BASE/llvm.py"
|
||||
sed -i "s\FIXME_STUB\llvm\g" "$BASE/llvm.py"
|
||||
sed -i "s\FunctionFactoryStub()\ctypes.CDLL(llvm_support.LLVM_PATH)\g" "$BASE/llvm.py"
|
||||
|
||||
fixup "$BASE/llvm.py"
|
||||
}
|
||||
|
||||
generate_kgsl() {
|
||||
clang2py extra/qcom_gpu_driver/msm_kgsl.h -o $BASE/kgsl.py -k cdefstum
|
||||
fixup $BASE/kgsl.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/kgsl.py
|
||||
sed -nE 's/#define ([A-Za-z0-9_]+)_SHIFT\s*[^\S\r\n]*[0-9]*$/def \1(val): return (val << \1_SHIFT) \& \1_MASK/p' extra/qcom_gpu_driver/msm_kgsl.h >> $BASE/kgsl.py
|
||||
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\__fd.ioctl((__idir<<30)\g" $BASE/kgsl.py
|
||||
python3 -c "import tinygrad.runtime.autogen.kgsl"
|
||||
}
|
||||
|
||||
generate_adreno() {
|
||||
clang2py extra/qcom_gpu_driver/a6xx.xml.h -o $BASE/adreno.py -k cestum
|
||||
sed -nE 's/#define ([A-Za-z0-9_]+)__SHIFT\s*[^\S\r\n]*[0-9]*$/def \1(val): return (val << \1__SHIFT) \& \1__MASK/p' extra/qcom_gpu_driver/a6xx.xml.h >> $BASE/adreno.py
|
||||
fixup $BASE/adreno.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/adreno.py
|
||||
python3 -c "import tinygrad.runtime.autogen.adreno"
|
||||
}
|
||||
|
||||
generate_qcom() {
|
||||
clang2py -k cdefstum \
|
||||
extra/dsp/include/ion.h \
|
||||
extra/dsp/include/msm_ion.h \
|
||||
extra/dsp/include/adsprpc_shared.h \
|
||||
extra/dsp/include/remote_default.h \
|
||||
extra/dsp/include/apps_std.h \
|
||||
-o $BASE/qcom_dsp.py
|
||||
|
||||
fixup $BASE/qcom_dsp.py
|
||||
python3 -c "import tinygrad.runtime.autogen.qcom_dsp"
|
||||
}
|
||||
|
||||
generate_pci() {
|
||||
clang2py -k cdefstum \
|
||||
/usr/include/linux/pci_regs.h \
|
||||
-o $BASE/pci.py
|
||||
fixup $BASE/pci.py
|
||||
}
|
||||
|
||||
generate_vfio() {
|
||||
clang2py -k cdefstum \
|
||||
/usr/include/linux/vfio.h \
|
||||
-o $BASE/vfio.py
|
||||
fixup $BASE/vfio.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/vfio.py
|
||||
sed -i "s\import fcntl, functools\import functools" $BASE/vfio.py
|
||||
sed -i "s\import ctypes,os\a from tinygrad.runtime.support import FileIOInterface\g" $BASE/vfio.py
|
||||
sed -i "s\fcntl.ioctl(__fd, (__idir<<30)\return __fd.ioctl((__idir<<30)\g" $BASE/vfio.py
|
||||
}
|
||||
|
||||
generate_am() {
|
||||
AMKERN_COMMIT_HASH=ceb12c04e2b5b53ec0779362831f5ee40c4921e4
|
||||
AMKERN_SRC=/tmp/ROCK-Kernel-Driver-$AMKERN_COMMIT_HASH
|
||||
if [ ! -d "$AMKERN_SRC" ]; then
|
||||
git clone https://github.com/ROCm/ROCK-Kernel-Driver $AMKERN_SRC --depth 1
|
||||
fi
|
||||
AMKERN_AMD=$AMKERN_SRC/drivers/gpu/drm/amd/
|
||||
AMKERN_INC=$AMKERN_AMD/include/
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/amdpci/headers/v11_structs.h \
|
||||
extra/amdpci/headers/v12_structs.h \
|
||||
extra/amdpci/headers/amdgpu_vm.h \
|
||||
extra/amdpci/headers/discovery.h \
|
||||
extra/amdpci/headers/amdgpu_ucode.h \
|
||||
extra/amdpci/headers/psp_gfx_if.h \
|
||||
extra/amdpci/headers/amdgpu_psp.h \
|
||||
extra/amdpci/headers/amdgpu_irq.h \
|
||||
extra/amdpci/headers/amdgpu_doorbell.h \
|
||||
$AMKERN_INC/soc15_ih_clientid.h \
|
||||
--clang-args="-include stdint.h" \
|
||||
-o $BASE/am/am.py
|
||||
fixup $BASE/am/am.py
|
||||
sed -i "s\(int64_t)\ \g" $BASE/am/am.py
|
||||
sed -i "s\AMDGPU_PTE_MTYPE_VG10(2)\AMDGPU_PTE_MTYPE_VG10(0, 2)\g" $BASE/am/am.py # incorrect parsing (TODO: remove when clang2py is gone).
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_AMD/amdkfd/kfd_pm4_headers_ai.h \
|
||||
$AMKERN_AMD/amdgpu/soc15d.h \
|
||||
-o $BASE/am/pm4_soc15.py
|
||||
fixup $BASE/am/pm4_soc15.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_AMD/amdkfd/kfd_pm4_headers_ai.h \
|
||||
$AMKERN_AMD/amdgpu/nvd.h \
|
||||
-o $BASE/am/pm4_nv.py
|
||||
fixup $BASE/am/pm4_nv.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/hip_gpu_driver/sdma_registers.h \
|
||||
$AMKERN_AMD/amdgpu/vega10_sdma_pkt_open.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/am/sdma_4_0_0.py
|
||||
fixup $BASE/am/sdma_4_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/hip_gpu_driver/sdma_registers.h \
|
||||
$AMKERN_AMD/amdgpu/navi10_sdma_pkt_open.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/am/sdma_5_0_0.py
|
||||
fixup $BASE/am/sdma_5_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
extra/hip_gpu_driver/sdma_registers.h \
|
||||
$AMKERN_AMD/amdgpu/sdma_v6_0_0_pkt_open.h \
|
||||
--clang-args="-I/opt/rocm/include -x c++" \
|
||||
-o $BASE/am/sdma_6_0_0.py
|
||||
fixup $BASE/am/sdma_6_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h \
|
||||
extra/amdpci/headers/amdgpu_smu.h \
|
||||
-o $BASE/am/smu_v13_0_0.py
|
||||
fixup $BASE/am/smu_v13_0_0.py
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu_v14_0_2_ppsmc.h \
|
||||
$AMKERN_AMD/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0.h \
|
||||
extra/amdpci/headers/amdgpu_smu.h \
|
||||
--clang-args="-include stdint.h" \
|
||||
-o $BASE/am/smu_v14_0_2.py
|
||||
fixup $BASE/am/smu_v14_0_2.py
|
||||
}
|
||||
|
||||
generate_sqtt() {
|
||||
clang2py -k cdefstum \
|
||||
extra/sqtt/sqtt.h \
|
||||
-o $BASE/sqtt.py
|
||||
fixup $BASE/sqtt.py
|
||||
sed -i "s\import ctypes\import ctypes, os\g" $BASE/sqtt.py
|
||||
python3 -c "import tinygrad.runtime.autogen.sqtt"
|
||||
|
||||
ROCPROF_COMMIT_HASH=dd0485100971522cc4cd8ae136bdda431061a04d
|
||||
ROCPROF_SRC=/tmp/rocprof-trace-decoder-$ROCPROF_COMMIT_HASH
|
||||
if [ ! -d "$ROCPROF_SRC" ]; then
|
||||
git clone https://github.com/ROCm/rocprof-trace-decoder $ROCPROF_SRC
|
||||
pushd .
|
||||
cd $ROCPROF_SRC
|
||||
git reset --hard $ROCPROF_COMMIT_HASH
|
||||
popd
|
||||
fi
|
||||
|
||||
clang2py -k cdefstum \
|
||||
$ROCPROF_SRC/include/rocprof_trace_decoder.h \
|
||||
$ROCPROF_SRC/include/trace_decoder_instrument.h \
|
||||
$ROCPROF_SRC/include/trace_decoder_types.h \
|
||||
-o $BASE/rocprof.py
|
||||
fixup $BASE/rocprof.py
|
||||
sed -i '1s/^/# pylint: skip-file\n/' $BASE/rocprof.py
|
||||
sed -i "s/import ctypes/import ctypes, ctypes.util/g" $BASE/rocprof.py
|
||||
patch_dlopen $BASE/rocprof.py rocprof-trace-decoder "'/usr/local/lib/librocprof-trace-decoder.so'" "'/usr/local/lib/librocprof-trace-decoder.dylib'"
|
||||
sed -i "s/def _try_dlopen_rocprof-trace-decoder():/def _try_dlopen_rocprof_trace_decoder():/g" $BASE/rocprof.py
|
||||
sed -i "s|FunctionFactoryStub()|_try_dlopen_rocprof_trace_decoder()|g" $BASE/rocprof.py
|
||||
}
|
||||
|
||||
generate_webgpu() {
|
||||
clang2py extra/webgpu/webgpu.h -o $BASE/webgpu.py
|
||||
fixup $BASE/webgpu.py
|
||||
sed -i "s/FIXME_STUB/webgpu/g" "$BASE/webgpu.py"
|
||||
sed -i "s/FunctionFactoryStub()/ctypes.CDLL(webgpu_support.WEBGPU_PATH)/g" "$BASE/webgpu.py"
|
||||
sed -i "s/import ctypes/import ctypes, tinygrad.runtime.support.webgpu as webgpu_support/g" "$BASE/webgpu.py"
|
||||
python3 -c "import tinygrad.runtime.autogen.webgpu"
|
||||
}
|
||||
|
||||
generate_libusb() {
|
||||
clang2py -k cdefstum \
|
||||
/usr/include/libusb-1.0/libusb.h \
|
||||
-o $BASE/libusb.py
|
||||
|
||||
fixup $BASE/libusb.py
|
||||
sed -i "s\import ctypes\import ctypes, ctypes.util, os\g" $BASE/libusb.py
|
||||
sed -i "s/FIXME_STUB/libusb/g" "$BASE/libusb.py"
|
||||
sed -i "s/libusb_le16_to_cpu = libusb_cpu_to_le16//g" "$BASE/libusb.py"
|
||||
sed -i "s/FunctionFactoryStub()/None if (lib_path:=os.getenv('LIBUSB_PATH', ctypes.util.find_library('usb-1.0'))) is None else ctypes.CDLL(lib_path)/g" "$BASE/libusb.py"
|
||||
python3 -c "import tinygrad.runtime.autogen.libusb"
|
||||
}
|
||||
|
||||
generate_mesa() {
|
||||
MESA_TAG="mesa-25.2.4"
|
||||
MESA_SRC=/tmp/mesa-$MESA_TAG
|
||||
TINYMESA_TAG=tinymesa-32dc66c
|
||||
TINYMESA_DIR=/tmp/tinymesa-$MESA_TAG-$TINYMESA_TAG/
|
||||
TINYMESA_SO=$TINYMESA_DIR/libtinymesa_cpu.so
|
||||
if [ ! -d "$MESA_SRC" ]; then
|
||||
git clone --depth 1 --branch $MESA_TAG https://gitlab.freedesktop.org/mesa/mesa.git $MESA_SRC
|
||||
pushd .
|
||||
cd $MESA_SRC
|
||||
git reset --hard $MESA_COMMIT_HASH
|
||||
# clang 14 doesn't support packed enums
|
||||
sed -i "s/enum \w\+ \(\w\+\);$/uint8_t \1;/" $MESA_SRC/src/nouveau/headers/nv_device_info.h
|
||||
sed -i "s/enum \w\+ \(\w\+\);$/uint8_t \1;/" $MESA_SRC/src/nouveau/compiler/nak.h
|
||||
sed -i "s/nir_instr_type \(\w\+\);/uint8_t \1;/" $MESA_SRC/src/compiler/nir/nir.h
|
||||
mkdir -p gen/util/format
|
||||
python3 src/util/format/u_format_table.py src/util/format/u_format.yaml --enums > gen/util/format/u_format_gen.h
|
||||
python3 src/compiler/nir/nir_opcodes_h.py > gen/nir_opcodes.h
|
||||
python3 src/compiler/nir/nir_intrinsics_h.py --outdir gen
|
||||
python3 src/compiler/nir/nir_intrinsics_indices_h.py --outdir gen
|
||||
python3 src/compiler/nir/nir_builder_opcodes_h.py > gen/nir_builder_opcodes.h
|
||||
python3 src/compiler/nir/nir_intrinsics_h.py --outdir gen
|
||||
python3 src/compiler/builtin_types_h.py gen/builtin_types.h
|
||||
popd
|
||||
fi
|
||||
|
||||
if [ ! -d "$TINYMESA_DIR" ]; then
|
||||
mkdir $TINYMESA_DIR
|
||||
curl -L https://github.com/sirhcm/tinymesa/releases/download/$TINYMESA_TAG/libtinymesa_cpu-$MESA_TAG-linux-amd64.so -o $TINYMESA_SO
|
||||
fi
|
||||
|
||||
clang2py -k cdefstu \
|
||||
$MESA_SRC/src/compiler/nir/nir.h \
|
||||
$MESA_SRC/src/compiler/nir/nir_builder.h \
|
||||
$MESA_SRC/src/compiler/nir/nir_shader_compiler_options.h \
|
||||
$MESA_SRC/src/compiler/nir/nir_serialize.h \
|
||||
$MESA_SRC/gen/nir_intrinsics.h \
|
||||
$MESA_SRC/src/nouveau/headers/nv_device_info.h \
|
||||
$MESA_SRC/src/nouveau/compiler/nak.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_passmgr.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_misc.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_type.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_init.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_nir.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_struct.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_jit_types.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_flow.h \
|
||||
$MESA_SRC/src/gallium/auxiliary/gallivm/lp_bld_const.h \
|
||||
$MESA_SRC/src/compiler/glsl_types.h \
|
||||
$MESA_SRC/src/util/blob.h \
|
||||
$MESA_SRC/src/util/ralloc.h \
|
||||
--clang-args="-DHAVE_ENDIAN_H -DHAVE_STRUCT_TIMESPEC -DHAVE_PTHREAD -I$MESA_SRC/src -I$MESA_SRC/include -I$MESA_SRC/gen -I$MESA_SRC/src/compiler/nir -I$MESA_SRC/src/gallium/auxiliary -I$MESA_SRC/src/gallium/include -I$(llvm-config-20 --includedir)" \
|
||||
-l $TINYMESA_SO \
|
||||
-o $BASE/mesa.py
|
||||
|
||||
LVP_NIR_OPTIONS=$(./extra/mesa/lvp_nir_options.sh $MESA_SRC)
|
||||
|
||||
fixup $BASE/mesa.py
|
||||
patch_dlopen $BASE/mesa.py tinymesa_cpu "(BASE:=os.getenv('MESA_PATH', f\"/usr{'/local/' if helpers.OSX else '/'}lib\"))+'/libtinymesa_cpu'+(EXT:='.dylib' if helpers.OSX else '.so')" "f'{BASE}/libtinymesa{EXT}'" "'/opt/homebrew/lib/libtinymesa_cpu.dylib'" "'/opt/homebrew/lib/libtinymesa.dylib'"
|
||||
echo "lvp_nir_options = gzip.decompress(base64.b64decode('$LVP_NIR_OPTIONS'))" >> $BASE/mesa.py
|
||||
sed -i "/in_dll/s/.*/try: &\nexcept (AttributeError, ValueError): pass/" $BASE/mesa.py
|
||||
sed -i "s/import ctypes/import ctypes, ctypes.util, os, gzip, base64, subprocess, tinygrad.helpers as helpers/" $BASE/mesa.py
|
||||
sed -i "s/ctypes.CDLL('.\+')/(dll := _try_dlopen_tinymesa_cpu())/" $BASE/mesa.py
|
||||
echo "def __getattr__(nm): raise AttributeError('LLVMpipe requires tinymesa_cpu' if 'tinymesa_cpu' not in dll._name else f'attribute {nm} not found') if dll else FileNotFoundError(f'libtinymesa not found (MESA_PATH={BASE}). See https://github.com/sirhcm/tinymesa ($TINYMESA_TAG, $MESA_TAG)')" >> $BASE/mesa.py
|
||||
sed -i "s/ctypes.glsl_base_type/glsl_base_type/" $BASE/mesa.py
|
||||
# bitfield bug in clang2py
|
||||
sed -i "s/('fp_fast_math', ctypes.c_bool, 9)/('fp_fast_math', ctypes.c_uint32, 9)/" $BASE/mesa.py
|
||||
sed -i "s/('\(\w\+\)', pipe_shader_type, 8)/('\1', ctypes.c_ubyte)/" $BASE/mesa.py
|
||||
sed -i "s/\([0-9]\+\)()/\1/" $BASE/mesa.py
|
||||
sed -i '/struct_nir_builder._pack_ = 1 # source:False/d' "$BASE/mesa.py"
|
||||
python3 -c "import tinygrad.runtime.autogen.mesa"
|
||||
}
|
||||
|
||||
if [ "$1" == "opencl" ]; then generate_opencl
|
||||
elif [ "$1" == "hip" ]; then generate_hip
|
||||
elif [ "$1" == "comgr" ]; then generate_comgr
|
||||
elif [ "$1" == "cuda" ]; then generate_cuda
|
||||
elif [ "$1" == "nvrtc" ]; then generate_nvrtc
|
||||
elif [ "$1" == "hsa" ]; then generate_hsa
|
||||
elif [ "$1" == "kfd" ]; then generate_kfd
|
||||
elif [ "$1" == "nv" ]; then generate_nv
|
||||
elif [ "$1" == "amd" ]; then generate_amd
|
||||
elif [ "$1" == "am" ]; then generate_am
|
||||
elif [ "$1" == "sqtt" ]; then generate_sqtt
|
||||
elif [ "$1" == "qcom" ]; then generate_qcom
|
||||
elif [ "$1" == "io_uring" ]; then generate_io_uring
|
||||
elif [ "$1" == "ib" ]; then generate_ib
|
||||
elif [ "$1" == "llvm" ]; then generate_llvm
|
||||
elif [ "$1" == "kgsl" ]; then generate_kgsl
|
||||
elif [ "$1" == "adreno" ]; then generate_adreno
|
||||
elif [ "$1" == "pci" ]; then generate_pci
|
||||
elif [ "$1" == "vfio" ]; then generate_vfio
|
||||
elif [ "$1" == "webgpu" ]; then generate_webgpu
|
||||
elif [ "$1" == "libusb" ]; then generate_libusb
|
||||
elif [ "$1" == "mesa" ]; then generate_mesa
|
||||
elif [ "$1" == "all" ]; then generate_opencl; generate_hip; generate_comgr; generate_cuda; generate_nvrtc; generate_hsa; generate_kfd; generate_nv; generate_amd; generate_io_uring; generate_am; generate_webgpu; generate_mesa
|
||||
else echo "usage: $0 <type>"
|
||||
fi
|
||||
|
|
@ -19,5 +19,6 @@ trap 'rm -f "$TMP"' EXIT
|
|||
EOF
|
||||
sed -n '/struct nir_shader_compiler_options/,/^}/{p;/^}/q}' $1/src/gallium/drivers/llvmpipe/lp_screen.c
|
||||
echo "int main(void) { write(1, &gallivm_nir_options, sizeof(gallivm_nir_options)); }"
|
||||
) | cc -x c -o $TMP - -I$1/src/compiler/nir -I$1/src -I$1/include && $TMP | gzip | base64 -w0
|
||||
) | cc -x c -o $TMP - -I$1/src/compiler/nir -I$1/src -I$1/include || exit 1
|
||||
|
||||
printf 'lvp_nir_options = gzip.decompress(base64.b64decode("%s"))' $("$TMP" | gzip | base64 -w0)
|
||||
|
|
|
|||
53
extra/nvJitLink.h
Normal file
53
extra/nvJitLink.h
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* NVIDIA_COPYRIGHT_BEGIN
|
||||
*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* NVIDIA CORPORATION and its licensors retain all intellectual property
|
||||
* and proprietary rights in and to this software, related documentation
|
||||
* and any modifications thereto. Any use, reproduction, disclosure or
|
||||
* distribution of this software and related documentation without an express
|
||||
* license agreement from NVIDIA CORPORATION is strictly prohibited.
|
||||
*
|
||||
* NVIDIA_COPYRIGHT_END
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef enum {
|
||||
NVJITLINK_SUCCESS = 0,
|
||||
NVJITLINK_ERROR_UNRECOGNIZED_OPTION,
|
||||
NVJITLINK_ERROR_MISSING_ARCH,
|
||||
NVJITLINK_ERROR_INVALID_INPUT,
|
||||
NVJITLINK_ERROR_PTX_COMPILE,
|
||||
NVJITLINK_ERROR_NVVM_COMPILE,
|
||||
NVJITLINK_ERROR_INTERNAL
|
||||
} nvJitLinkResult;
|
||||
|
||||
typedef enum {
|
||||
NVJITLINK_INPUT_NONE = 0,
|
||||
NVJITLINK_INPUT_CUBIN = 1,
|
||||
NVJITLINK_INPUT_PTX,
|
||||
NVJITLINK_INPUT_LTOIR,
|
||||
NVJITLINK_INPUT_FATBIN,
|
||||
NVJITLINK_INPUT_OBJECT,
|
||||
NVJITLINK_INPUT_LIBRARY
|
||||
} nvJitLinkInputType;
|
||||
|
||||
typedef struct nvJitLink* nvJitLinkHandle;
|
||||
|
||||
nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle *handle, uint32_t numOptions, const char **options);
|
||||
nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle *handle);
|
||||
nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void *data, size_t size, const char *name);
|
||||
nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char *fileName);
|
||||
nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle);
|
||||
nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t *size);
|
||||
nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void *cubin);
|
||||
nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t *size);
|
||||
nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char *ptx);
|
||||
nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t *size);
|
||||
nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char *log);
|
||||
nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t *size);
|
||||
nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char *log);
|
||||
nvJitLinkResult nvJitLinkVersion(unsigned int *major, unsigned int *minor);
|
||||
|
|
@ -65,6 +65,8 @@
|
|||
#define NVCEC0_QMDV05_00_GRID_HEIGHT_RESUME MW(271:256)
|
||||
#define NVCEC0_QMDV05_00_GRID_DEPTH_RESUME MW(287:272)
|
||||
#define NVCEC0_QMDV05_00_RELEASE_ENABLE(i) MW((288+(i)*16):(288+(i)*16))
|
||||
#define NVCEC0_QMDV05_00_RELEASE0_ENABLE NVCEC0_QMDV05_00_RELEASE_ENABLE(0)
|
||||
#define NVCEC0_QMDV05_00_RELEASE1_ENABLE NVCEC0_QMDV05_00_RELEASE_ENABLE(1)
|
||||
#define NVCEC0_QMDV05_00_RELEASE_ENABLE_FALSE 0x00000000
|
||||
#define NVCEC0_QMDV05_00_RELEASE_ENABLE_TRUE 0x00000001
|
||||
#define NVCEC0_QMDV05_00_RELEASE_STRUCTURE_SIZE(i) MW((290+(i)*16):(289+(i)*16))
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ def install_hook(c_function, python_function):
|
|||
return orig_func
|
||||
|
||||
# *** ioctl lib end ***
|
||||
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
|
||||
from tinygrad.runtime.autogen import nv_gpu
|
||||
nvescs = {getattr(nv_gpu, x):x for x in dir(nv_gpu) if x.startswith("NV_ESC")}
|
||||
nvcmds = {getattr(nv_gpu, x):(x, getattr(nv_gpu, "struct_"+x+"_PARAMS", getattr(nv_gpu, "struct_"+x.replace("_CMD_", "_")+"_PARAMS", None))) for x in dir(nv_gpu) if \
|
||||
x.startswith("NV") and x[6:].startswith("_CTRL_") and isinstance(getattr(nv_gpu, x), int)}
|
||||
|
|
@ -272,4 +272,4 @@ def compare_launch_state(states, good_states):
|
|||
|
||||
return True, "PASS"
|
||||
|
||||
# IOCTL=1 CUDA=1 CUDA_PTX=1 python3 test/test_ops.py TestOps.test_tiny_add
|
||||
# IOCTL=1 CUDA=1 CUDA_PTX=1 python3 test/test_ops.py TestOps.test_tiny_add
|
||||
|
|
|
|||
|
|
@ -185,9 +185,7 @@ class RGP:
|
|||
magic_number=sqtt.SQTT_FILE_MAGIC_NUMBER,
|
||||
version_major=sqtt.SQTT_FILE_VERSION_MAJOR,
|
||||
version_minor=sqtt.SQTT_FILE_VERSION_MINOR,
|
||||
flags=sqtt.struct_sqtt_file_header_flags(
|
||||
_0=sqtt.union_sqtt_file_header_flags_0(value=1),
|
||||
),
|
||||
flags=sqtt.struct_sqtt_file_header_flags(value=1,),
|
||||
chunk_offset=ctypes.sizeof(sqtt.struct_sqtt_file_header),
|
||||
)
|
||||
chunks = [
|
||||
|
|
@ -265,7 +263,7 @@ class RGP:
|
|||
profiling_mode=sqtt.SQTT_PROFILING_MODE_PRESENT,
|
||||
instruction_trace_mode=sqtt.SQTT_INSTRUCTION_TRACE_FULL_FRAME if sqtt_itrace_enabled else sqtt.SQTT_INSTRUCTION_TRACE_DISABLED,
|
||||
instruction_trace_data=sqtt.union_sqtt_instruction_trace_data(
|
||||
shader_engine_filter=sqtt.struct_sqtt_instruction_trace_data_shader_engine_filter(mask=sqtt_itrace_se_mask),
|
||||
shader_engine_filter=sqtt.union_sqtt_instruction_trace_data_shader_engine_filter(mask=sqtt_itrace_se_mask),
|
||||
),
|
||||
)),
|
||||
*flatten([(
|
||||
|
|
@ -276,13 +274,11 @@ class RGP:
|
|||
),
|
||||
shader_engine_index=sqtt_event.se,
|
||||
sqtt_version={11: sqtt.SQTT_VERSION_3_2, 12: sqtt.SQTT_VERSION_3_3}.get(gfx_ver),
|
||||
_0=sqtt.union_sqtt_file_chunk_sqtt_desc_0(
|
||||
v1=sqtt.struct_sqtt_file_chunk_sqtt_desc_0_v1(
|
||||
instrumentation_spec_version=1,
|
||||
instrumentation_api_version=0,
|
||||
compute_unit_index=0,
|
||||
)
|
||||
),
|
||||
v1=sqtt.struct_sqtt_file_chunk_sqtt_desc_0_v1(
|
||||
instrumentation_spec_version=1,
|
||||
instrumentation_api_version=0,
|
||||
compute_unit_index=0,
|
||||
)
|
||||
)),
|
||||
RGPChunk(sqtt.struct_sqtt_file_chunk_sqtt_data(
|
||||
header=sqtt.struct_sqtt_file_chunk_header(
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ packages = [
|
|||
'tinygrad.runtime',
|
||||
'tinygrad.runtime.autogen',
|
||||
'tinygrad.runtime.autogen.am',
|
||||
'tinygrad.runtime.autogen.nv',
|
||||
'tinygrad.runtime.graph',
|
||||
'tinygrad.runtime.support',
|
||||
'tinygrad.runtime.support.am',
|
||||
|
|
@ -162,7 +161,6 @@ exclude = [
|
|||
".git/",
|
||||
"docs/",
|
||||
"extra/",
|
||||
"tinygrad/runtime/autogen",
|
||||
"test/external/mlperf_resnet",
|
||||
"test/external/mlperf_unet3d",
|
||||
]
|
||||
|
|
@ -228,6 +226,7 @@ select = [
|
|||
"F541",
|
||||
"F841",
|
||||
]
|
||||
"tinygrad/runtime/autogen/**/*.py" = ["E501", "F401", "E722", "E731", "F821", "A006"]
|
||||
|
||||
[tool.ruff.format]
|
||||
exclude = ["*"]
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ remu = _try_dlopen_remu()
|
|||
def create_sdma_packets():
|
||||
# TODO: clean up this, if we want to keep it
|
||||
structs = {}
|
||||
for name,pkt in [(name,s) for name,s in amd_gpu.__dict__.items() if name.startswith("struct_SDMA_PKT_") and name.endswith("_TAG")]:
|
||||
for name,pkt in [(name,s) for name,s in amd_gpu.__dict__.items() if name.startswith("rocr_AMD_SDMA_PKT_") and name.endswith("_TAG")]:
|
||||
names = set()
|
||||
fields = []
|
||||
for pkt_fields in pkt._fields_:
|
||||
|
|
@ -47,7 +47,7 @@ def create_sdma_packets():
|
|||
# merge together 64-bit fields, otherwise just append them
|
||||
if fname.endswith("_63_32") and fields[-1][0].endswith("_31_0"): fields[-1] = tuple([fname[:-6], ctypes.c_ulong, 64])
|
||||
else: fields.append(tuple([fname, *union_fields[1:]]))
|
||||
new_name = name[16:-4].lower()
|
||||
new_name = name[18:-4].lower()
|
||||
structs[new_name] = init_c_struct_t(tuple(fields))
|
||||
assert ctypes.sizeof(structs[new_name]) == ctypes.sizeof(pkt), f"{ctypes.sizeof(structs[new_name])} != {ctypes.sizeof(pkt)}"
|
||||
return type("SDMA_PKTS", (object, ), structs)
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ def cuStreamWaitEvent(stream: Any, event, flags: int) -> int: return orig_cuda.C
|
|||
def cuCtxSynchronize() -> int: return orig_cuda.CUDA_SUCCESS
|
||||
|
||||
def cuGetErrorString(error: int, pStr) -> int:
|
||||
error_str = orig_cuda.cudaError_enum__enumvalues.get(error, "Unknown CUDA error").encode()
|
||||
error_str = orig_cuda.enum_cudaError_enum.get(error, "Unknown CUDA error").encode()
|
||||
buf = ctypes.create_string_buffer(error_str)
|
||||
# Set the pointer to point to our error string buffer
|
||||
pStr._obj.value = ctypes.cast(buf, ctypes.POINTER(ctypes.c_char))
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import ctypes, mmap, collections, functools, os
|
||||
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
|
||||
from tinygrad.runtime.autogen import nv_gpu
|
||||
from typing import Any
|
||||
from tinygrad.helpers import to_mv
|
||||
from test.mockgpu.driver import VirtDriver, VirtFileDesc, VirtFile
|
||||
|
|
@ -254,4 +254,4 @@ class NVDriver(VirtDriver):
|
|||
for gpu in self.gpus.values():
|
||||
for q in gpu.queues:
|
||||
if q.ctrl.GPGet != q.ctrl.GPPut:
|
||||
any_progress |= q.execute()
|
||||
any_progress |= q.execute()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import ctypes, time
|
||||
import tinygrad.runtime.autogen.nv_gpu as nv_gpu
|
||||
from tinygrad.runtime.autogen import nv_gpu
|
||||
from enum import Enum, auto
|
||||
from test.mockgpu.gpu import VirtGPU
|
||||
from test.mockgpu.helpers import _try_dlopen_gpuocelot
|
||||
|
|
|
|||
|
|
@ -416,6 +416,7 @@ def to_mv(ptr:int, sz:int) -> memoryview: return memoryview((ctypes.c_uint8 * sz
|
|||
def mv_address(mv): return ctypes.addressof(ctypes.c_char.from_buffer(mv))
|
||||
def to_char_p_p(options: list[bytes], to_type=ctypes.c_char):
|
||||
return (ctypes.POINTER(to_type) * len(options))(*[ctypes.cast(ctypes.create_string_buffer(o), ctypes.POINTER(to_type)) for o in options])
|
||||
def charptr(s:str|bytes): return ctypes.cast(ctypes.c_char_p(s if isinstance(s, bytes) else s.encode()), ctypes.POINTER(ctypes.c_char))
|
||||
@functools.cache
|
||||
def init_c_struct_t(fields: tuple[tuple[str, type[ctypes._SimpleCData]], ...]):
|
||||
class CStruct(ctypes.Structure):
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
from typing import Callable, cast, Any
|
||||
from tinygrad.dtype import AddrSpace, DType, PtrDType, dtypes
|
||||
from tinygrad.helpers import DEBUG, OSX, unwrap
|
||||
from tinygrad.helpers import DEBUG, OSX, unwrap, charptr
|
||||
from tinygrad.renderer import Renderer
|
||||
from tinygrad.renderer.cstyle import CUDARenderer
|
||||
from tinygrad.uop.ops import GroupOp, Ops, UOp, PatternMatcher, UPat, range_str
|
||||
import tinygrad.runtime.autogen.mesa as mesa
|
||||
from tinygrad.runtime.autogen import mesa
|
||||
import base64, ctypes, ctypes.util, struct, functools, inspect
|
||||
|
||||
def g(s:str): return getattr(mesa, s)
|
||||
|
|
@ -51,7 +51,7 @@ def nir_instr(nc=1, bs=lambda: None, intrins=None, srcs=None, has_def=True, df=N
|
|||
instr = f(*args, **kwargs)
|
||||
if has_def: mesa.nir_def_init(instr.contents.instr, getattr(instr.contents, "def"), go(nc), go(bs))
|
||||
for k, v in go(intrins or {}).items():
|
||||
idx = mesa.nir_intrinsic_infos[instr.contents.intrinsic].index_map[g(f"NIR_INTRINSIC_{k}")]
|
||||
idx = mesa.nir_intrinsic_infos[instr.contents.intrinsic.value].index_map[g(f"NIR_INTRINSIC_{k}")]
|
||||
assert idx > 0
|
||||
instr.contents.const_index[idx - 1] = go(v)
|
||||
for i, src in enumerate(go(srcs or [])): ctypes.cast(instr.contents.src, ctypes.POINTER(mesa.nir_src))[i] = go(src)
|
||||
|
|
@ -177,7 +177,7 @@ class NIRRenderer(Renderer):
|
|||
elif u.op is Ops.AFTER:
|
||||
self.r[u] = self.r[u.src[0]]
|
||||
elif u.op == Ops.SINK:
|
||||
if u.arg is not None: self.b.shader.contents.info.name = mesa.char_pointer_cast(u.arg.function_name)
|
||||
if u.arg is not None: self.b.shader.contents.info.name = charptr(u.arg.function_name.encode())
|
||||
elif u.op == Ops.DEFINE_LOCAL:
|
||||
self.r[u] = nimm(self.b, self.b.shader.contents.info.shared_size, dtypes.long)
|
||||
self.b.shader.contents.info.shared_size += u.dtype.nbytes()
|
||||
|
|
|
|||
|
|
@ -1,11 +1,22 @@
|
|||
import importlib, pathlib
|
||||
from tinygrad.helpers import system
|
||||
import glob, importlib, pathlib, subprocess, tarfile
|
||||
from tinygrad.helpers import fetch, flatten, system
|
||||
|
||||
root = (here:=pathlib.Path(__file__).parent).parents[2]
|
||||
nv_src = "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/81fe4fb417c8ac3b9bdcc1d56827d116743892a5.tar.gz"
|
||||
|
||||
def load(name, dll, files, **kwargs):
|
||||
if not (f:=(root/(path:=kwargs.pop("path", __name__)).replace('.','/')/f"{name}.py")).exists():
|
||||
files = files() if callable(files) else files
|
||||
files, kwargs['args'] = files() if callable(files) else files, args() if callable(args:=kwargs.get('args', [])) else args
|
||||
if (tarball:=kwargs.pop('tarball', None)):
|
||||
# dangerous for arbitrary urls!
|
||||
with tarfile.open(fetch(tarball, gunzip=tarball.endswith("gz"))) as tf:
|
||||
tf.extractall("/tmp")
|
||||
base = f"/tmp/{tf.getnames()[0]}"
|
||||
files, kwargs['args'] = [str(f).format(base) for f in files], [a.format(base) for a in kwargs.get('args', [])]
|
||||
kwargs['anon_names'] = {k.format(base):v for k,v in kwargs.get('anon_names', {}).items()}
|
||||
if (preprocess:=kwargs.pop('preprocess', None)): preprocess(base)
|
||||
files = flatten(sorted(glob.glob(p, recursive=True)) if isinstance(p, str) and '*' in p else [p] for p in files)
|
||||
kwargs['epilog'] = (epi(base) if tarball else epi()) if callable(epi:=kwargs.get('epilog', [])) else epi
|
||||
f.write_text(importlib.import_module("tinygrad.runtime.support.autogen").gen(dll, files, **kwargs))
|
||||
return importlib.import_module(f"{path}.{name.replace('/', '.')}")
|
||||
|
||||
|
|
@ -14,4 +25,100 @@ def __getattr__(nm):
|
|||
case "libc": return load("libc", ["find_library('c')"], lambda: (
|
||||
[i for i in system("dpkg -L libc6-dev").split() if 'sys/mman.h' in i or 'sys/syscall.h' in i] +
|
||||
["/usr/include/string.h", "/usr/include/elf.h", "/usr/include/unistd.h", "/usr/include/asm-generic/mman-common.h"]), use_errno=True)
|
||||
case "opencl": return load("opencl", ["find_library('OpenCL')"], ["/usr/include/CL/cl.h"])
|
||||
case "cuda": return load("cuda", ["find_library('cuda')"], ["/usr/include/cuda.h"], args=["-D__CUDA_API_VERSION_INTERNAL"], parse_macros=False)
|
||||
case "nvrtc": return load("nvrtc", ["find_library('nvrtc')"], ["/usr/include/nvrtc.h"])
|
||||
case "nvjitlink": load("nvjitlink", ["find_library('nvJitLink')"], [root/"extra/nvJitLink.h"])
|
||||
case "kfd": return load("kfd", [], ["/usr/include/linux/kfd_ioctl.h"])
|
||||
case "nv_gpu":
|
||||
return load("nv_gpu", [], [
|
||||
*[root/"extra/nv_gpu_driver"/s for s in ["clc6c0qmd.h","clcec0qmd.h"]], "{}/kernel-open/common/inc/nvmisc.h",
|
||||
*[f"{{}}/src/common/sdk/nvidia/inc/class/cl{s}.h" for s in ["0000", "0080", "2080", "2080_notification", "c56f", "c86f", "c96f", "c761",
|
||||
"83de", "c6c0", "cdc0"]],
|
||||
*[f"{{}}/kernel-open/nvidia-uvm/{s}.h" for s in ["clc6b5", "clc9b5", "uvm_ioctl", "uvm_linux_ioctl", "hwref/ampere/ga100/dev_fault"]],
|
||||
*[f"{{}}/src/nvidia/arch/nvalloc/unix/include/nv{s}.h" for s in ["_escape", "-ioctl", "-ioctl-numbers",
|
||||
"-ioctl-numa", "-unix-nvos-params-wrappers"]],
|
||||
*[f"{{}}/src/common/sdk/nvidia/inc/{s}.h" for s in ["alloc/alloc_channel", "nvos", "ctrl/ctrlc36f", "ctrl/ctrlcb33",
|
||||
"ctrl/ctrla06c", "ctrl/ctrl90f1"]],
|
||||
*[f"{{}}/src/common/sdk/nvidia/inc/ctrl/ctrl{s}/*.h" for s in ["0000", "0080", "2080", "83de"]],
|
||||
"{}/kernel-open/common/inc/nvstatus.h", "{}/src/nvidia/generated/g_allclasses.h"
|
||||
], args=[
|
||||
"-include", "{}/src/common/sdk/nvidia/inc/nvtypes.h", "-I{}/src/common/inc", "-I{}/kernel-open/nvidia-uvm", "-I{}/kernel-open/common/inc",
|
||||
"-I{}/src/common/sdk/nvidia/inc", "-I{}/src/nvidia/arch/nvalloc/unix/include", "-I{}/src/common/sdk/nvidia/inc/ctrl"
|
||||
], rules=[(r'MW\(([^:]+):(.+)\)',r'(\1, \2)')], tarball=nv_src, anon_names={"{}/kernel-open/common/inc/nvstatus.h:37":"nv_status_codes"})
|
||||
case "nv": return load("nv", [], [
|
||||
*[f"{{}}/src/nvidia/inc/kernel/gpu/{s}.h" for s in ["fsp/kern_fsp_cot_payload", "gsp/gsp_init_args"]],
|
||||
*[f"{{}}/src/nvidia/arch/nvalloc/common/inc/{s}.h" for s in ["gsp/gspifpub", "gsp/gsp_fw_wpr_meta", "gsp/gsp_fw_sr_meta", "rmRiscvUcode",
|
||||
"fsp/fsp_nvdm_format"]],
|
||||
*[f"{{}}/src/nvidia/inc/kernel/vgpu/{s}.h" for s in ["rpc_headers", "rpc_global_enums"]],
|
||||
"{}/src/common/uproc/os/common/include/libos_init_args.h", "{}/src/common/shared/msgq/inc/msgq/msgq_priv.h",
|
||||
"{}/src/nvidia/generated/g_rpc-structures.h", root/"extra/nv_gpu_driver/g_rpc-message-header.h", root/"extra/nv_gpu_driver/gsp_static_config.h",
|
||||
root/"extra/nv_gpu_driver/vbios.h", root/"extra/nv_gpu_driver/pci_exp_table.h"
|
||||
], args=[
|
||||
"-DRPC_MESSAGE_STRUCTURES", "-DRPC_STRUCTURES", "-include", "{}/src/common/sdk/nvidia/inc/nvtypes.h", "-I{}/src/nvidia/generated",
|
||||
"-I{}/src/common/inc", "-I{}/src/nvidia/inc", "-I{}/src/nvidia/interface/", "-I{}/src/nvidia/inc/kernel", "-I{}/src/nvidia/inc/libraries",
|
||||
"-I{}/src/nvidia/arch/nvalloc/common/inc", "-I{}/kernel-open/nvidia-uvm", "-I{}/kernel-open/common/inc", "-I{}/src/common/sdk/nvidia/inc",
|
||||
"-I{}/src/nvidia/arch/nvalloc/unix/include", "-I{}/src/common/sdk/nvidia/inc/ctrl"
|
||||
], tarball=nv_src, anon_names={
|
||||
"{}/src/nvidia/inc/kernel/vgpu/rpc_global_enums.h:8": "rpc_fns",
|
||||
"{}/src/nvidia/inc/kernel/vgpu/rpc_global_enums.h:244": "rpc_events"
|
||||
})
|
||||
# this defines all syscall numbers. should probably unify linux autogen?
|
||||
case "io_uring": return load("io_uring", [], ["/usr/include/liburing.h", "/usr/include/linux/io_uring.h", "/usr/include/asm-generic/unistd.h"],
|
||||
rules=[('__NR', 'NR')])
|
||||
case "ib": return load("ib", ["ibverbs"], ["/usr/include/infiniband/verbs.h", "/usr/include/infiniband/verbs_api.h",
|
||||
"/usr/include/infiniband/ib_user_ioctl_verbs.h","/usr/include/rdma/ib_user_verbs.h"], use_errno=True)
|
||||
case "llvm": return load("llvm", ["LLVM_PATH"], lambda: [system("llvm-config-20 --includedir")+"/llvm-c/**/*.h"],
|
||||
args=lambda: system("llvm-config-20 --cflags").split(), recsym=True,
|
||||
prolog=["from tinygrad.runtime.support.llvm import LLVM_PATH"])
|
||||
case "pci": return load("pci", [], ["/usr/include/linux/pci_regs.h"])
|
||||
case "vfio": return load("vfio", [], ["/usr/include/linux/vfio.h"])
|
||||
# could add rule: WGPU_COMMA -> ','
|
||||
case "webgpu":
|
||||
return load("webgpu", ["WEBGPU_PATH"], [root/"extra/webgpu/webgpu.h"], prolog=["from tinygrad.runtime.support.webgpu import WEBGPU_PATH"])
|
||||
case "libusb": return load("libusb", ["os.getenv('LIBUSB_PATH', find_library('usb-1.0'))"], ["/usr/include/libusb-1.0/libusb.h"])
|
||||
case "hip": return load("hip", ["os.getenv('ROCM_PATH', '/opt/rocm')+'/lib/libamdhip64.so'"], ["/opt/rocm/include/hip/hip_ext.h",
|
||||
"/opt/rocm/include/hip/hiprtc.h", "/opt/rocm/include/hip/hip_runtime_api.h", "/opt/rocm/include/hip/driver_types.h"],
|
||||
args=["-D__HIP_PLATFORM_AMD__", "-I/opt/rocm/include", "-x", "c++"])
|
||||
case "comgr" | "comgr_3":
|
||||
try: use_3 = nm == "comgr_3" or int(system("dpkg-query -f '${version}' -W comgr")[1]) >= 3
|
||||
except FileNotFoundError: use_3 = nm == "comgr_3"
|
||||
return load("comgr_3" if use_3 else "comgr", [
|
||||
"os.getenv('ROCM_PATH', '/opt/rocm')+'/lib/libamd_comgr.so'", "'/usr/local/lib/libamd_comgr.dylib'", "'/opt/homebrew/lib/libamd_comgr.dylib'"
|
||||
], ["/opt/rocm/include/amd_comgr/amd_comgr.h"], args=["-D__HIP_PLATFORM_AMD__", "-I/opt/rocm/include", "-x", "c++"])
|
||||
case "hsa": return load("hsa", ["os.getenv('ROCM_PATH', '/opt/rocm')+'/lib/libhsa-runtime64.so'", "find_library('hsa-runtime64')"], [
|
||||
f"/opt/rocm/include/hsa/{s}.h" for s in ["hsa", "hsa_ext_amd", "amd_hsa_signal", "amd_hsa_queue", "amd_hsa_kernel_code", "hsa_ext_finalize",
|
||||
"hsa_ext_image", "hsa_ven_amd_aqlprofile"] ], args=["-I/opt/rocm/include"])
|
||||
case "amd_gpu": return load("amd_gpu", [], [root/f"extra/hip_gpu_driver/{s}.h" for s in ["sdma_registers", "nvd", "gc_11_0_0_offset",
|
||||
"sienna_cichlid_ip_offset"]],
|
||||
args=["-I/opt/rocm/include", "-x", "c++"])
|
||||
case "kgsl": return load("kgsl", [], [root/"extra/qcom_gpu_driver/msm_kgsl.h"], args=["-D__user="])
|
||||
case "adreno": return load("adreno", [], [root/"extra/qcom_gpu_driver/a6xx.xml.h"])
|
||||
case "qcom_dsp":
|
||||
return load("qcom_dsp", [], [root/f"extra/dsp/include/{s}.h" for s in ["ion", "msm_ion", "adsprpc_shared", "remote_default", "apps_std"]])
|
||||
case "sqtt": return load("sqtt", [], [root/"extra/sqtt/sqtt.h"])
|
||||
case "rocprof":
|
||||
return load("rocprof", ["find_library('rocprof-trace-decoder')", p:="'/usr/local/lib/rocprof-trace-decoder.so'", p.replace('so','dylib')],
|
||||
[f"{{}}/include/{s}.h" for s in ["rocprof_trace_decoder", "trace_decoder_instrument", "trace_decoder_types"]],
|
||||
tarball="https://github.com/ROCm/rocprof-trace-decoder/archive/dd0485100971522cc4cd8ae136bdda431061a04d.tar.gz")
|
||||
case "mesa": return load("mesa", ["find_library('tinymesa_cpu')",
|
||||
"(BASE:=os.getenv('MESA_PATH', f\"/usr{'/local/' if OSX else '/'}lib\"))+'/libtinymesa_cpu'+(EXT:='.dylib' if OSX else '.so')",
|
||||
"f'{BASE}/libtinymesa{EXT}'", "'/opt/homebrew/lib/libtinymesa_cpu.dylib'", "'/opt/homebrew/lib/libtinymesa.dylib'"], [
|
||||
*[f"{{}}/src/compiler/nir/{s}.h" for s in ["nir", "nir_builder", "nir_shader_compiler_options", "nir_serialize"]], "{}/gen/nir_intrinsics.h",
|
||||
*[f"{{}}/src/nouveau/{s}.h" for s in ["headers/nv_device_info", "compiler/nak"]],
|
||||
*[f"{{}}/src/gallium/auxiliary/gallivm/lp_bld{s}.h" for s in ["", "_passmgr", "_misc", "_type", "_init", "_nir", "_struct", "_jit_types",
|
||||
"_flow", "_const"]],
|
||||
"{}/src/compiler/glsl_types.h", "{}/src/util/blob.h", "{}/src/util/ralloc.h"], args=lambda:[
|
||||
"-DHAVE_ENDIAN_H", "-DHAVE_STRUCT_TIMESPEC", "-DHAVE_PTHREAD", "-DHAVE_FUNC_ATTRIBUTE_PACKED", "-I{}/src", "-I{}/include", "-I{}/gen",
|
||||
"-I{}/src/compiler/nir", "-I{}/src/gallium/auxiliary", "-I{}/src/gallium/include", f"-I{system('llvm-config-20 --includedir')}"],
|
||||
preprocess=lambda path: subprocess.run("""mkdir -p gen/util/format
|
||||
python3 src/util/format/u_format_table.py src/util/format/u_format.yaml --enums > gen/util/format/u_format_gen.h
|
||||
python3 src/compiler/nir/nir_opcodes_h.py > gen/nir_opcodes.h
|
||||
python3 src/compiler/nir/nir_intrinsics_h.py --outdir gen
|
||||
python3 src/compiler/nir/nir_intrinsics_indices_h.py --outdir gen
|
||||
python3 src/compiler/nir/nir_builder_opcodes_h.py > gen/nir_builder_opcodes.h
|
||||
python3 src/compiler/nir/nir_intrinsics_h.py --outdir gen
|
||||
python3 src/compiler/builtin_types_h.py gen/builtin_types.h""", cwd=path, shell=True, check=True),
|
||||
tarball="https://gitlab.freedesktop.org/mesa/mesa/-/archive/mesa-25.2.4/mesa-25.2.4.tar.gz",
|
||||
prolog=["import gzip, base64", "from tinygrad.helpers import OSX"], epilog=lambda path: [system(f"{root}/extra/mesa/lvp_nir_options.sh {path}")])
|
||||
case _: raise AttributeError(f"no such autogen: {nm}")
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
23
tinygrad/runtime/autogen/am/__init__.py
Normal file
23
tinygrad/runtime/autogen/am/__init__.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
from tinygrad.runtime.autogen import load, root
|
||||
|
||||
am_src="https://github.com/ROCm/ROCK-Kernel-Driver/archive/ceb12c04e2b5b53ec0779362831f5ee40c4921e4.tar.gz"
|
||||
AMD="{}/drivers/gpu/drm/amd"
|
||||
inc = ["-include", "stdint.h"]
|
||||
|
||||
def __getattr__(nm):
|
||||
match nm:
|
||||
case "am": return load("am/am", [], [root/f"extra/amdpci/headers/{s}.h" for s in ["v11_structs", "v12_structs", "amdgpu_vm", "discovery",
|
||||
"amdgpu_ucode", "psp_gfx_if", "amdgpu_psp", "amdgpu_irq", "amdgpu_doorbell"]]+[f"{AMD}/include/soc15_ih_clientid.h"], args=inc, tarball=am_src)
|
||||
case "pm4_soc15": return load("am/pm4_soc15", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/soc15d.h"], tarball=am_src)
|
||||
case "pm4_nv": return load("am/pm4_nv", [], [f"{AMD}/amdkfd/kfd_pm4_headers_ai.h", f"{AMD}/amdgpu/nvd.h"], tarball=am_src)
|
||||
case "sdma_4_0_0": return load("am/sdma_4_0_0", [], [root/"extra/hip_gpu_driver/sdma_registers.h", f"{AMD}/amdgpu/vega10_sdma_pkt_open.h"],
|
||||
args=["-I/opt/rocm/include", "-x", "c++"], tarball=am_src),
|
||||
case "sdma_5_0_0": return load("am/sdma_5_0_0", [], [root/"extra/hip_gpu_driver/sdma_registers.h", f"{AMD}/amdgpu/navi10_sdma_pkt_open.h"],
|
||||
args=["-I/opt/rocm/include", "-x", "c++"], tarball=am_src),
|
||||
case "sdma_6_0_0": return load("am/sdma_6_0_0", [], [root/"extra/hip_gpu_driver/sdma_registers.h", f"{AMD}//amdgpu/sdma_v6_0_0_pkt_open.h"],
|
||||
args=["-I/opt/rocm/include", "-x", "c++"], tarball=am_src),
|
||||
case "smu_v13_0_0": return load("am/smu_v13_0_0",[],[f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v13_0_0_ppsmc","smu13_driver_if_v13_0_0"]]
|
||||
+[root/"extra/amdpci/headers/amdgpu_smu.h"], tarball=am_src),
|
||||
case "smu_v14_0_2": return load("am/smu_v14_0_2", [], [f"{AMD}/pm/swsmu/inc/pmfw_if/{s}.h" for s in ["smu_v14_0_0_pmfw", "smu_v14_0_2_ppsmc",
|
||||
"smu14_driver_if_v14_0"]]+[root/"extra/amdpci/headers/amdgpu_smu.h"], args=inc, tarball=am_src)
|
||||
case _: raise AttributeError(f"no such autogen: {nm}")
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
4837
tinygrad/runtime/autogen/nv.py
Normal file
4837
tinygrad/runtime/autogen/nv.py
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
89
tinygrad/runtime/autogen/nvjitlink.py
Normal file
89
tinygrad/runtime/autogen/nvjitlink.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
# mypy: ignore-errors
|
||||
import ctypes
|
||||
from tinygrad.helpers import unwrap
|
||||
from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR
|
||||
from ctypes.util import find_library
|
||||
def dll():
|
||||
try: return ctypes.CDLL(unwrap(find_library('nvJitLink')))
|
||||
except: pass
|
||||
return None
|
||||
dll = dll()
|
||||
|
||||
nvJitLinkResult = CEnum(ctypes.c_uint32)
|
||||
NVJITLINK_SUCCESS = nvJitLinkResult.define('NVJITLINK_SUCCESS', 0)
|
||||
NVJITLINK_ERROR_UNRECOGNIZED_OPTION = nvJitLinkResult.define('NVJITLINK_ERROR_UNRECOGNIZED_OPTION', 1)
|
||||
NVJITLINK_ERROR_MISSING_ARCH = nvJitLinkResult.define('NVJITLINK_ERROR_MISSING_ARCH', 2)
|
||||
NVJITLINK_ERROR_INVALID_INPUT = nvJitLinkResult.define('NVJITLINK_ERROR_INVALID_INPUT', 3)
|
||||
NVJITLINK_ERROR_PTX_COMPILE = nvJitLinkResult.define('NVJITLINK_ERROR_PTX_COMPILE', 4)
|
||||
NVJITLINK_ERROR_NVVM_COMPILE = nvJitLinkResult.define('NVJITLINK_ERROR_NVVM_COMPILE', 5)
|
||||
NVJITLINK_ERROR_INTERNAL = nvJitLinkResult.define('NVJITLINK_ERROR_INTERNAL', 6)
|
||||
|
||||
nvJitLinkInputType = CEnum(ctypes.c_uint32)
|
||||
NVJITLINK_INPUT_NONE = nvJitLinkInputType.define('NVJITLINK_INPUT_NONE', 0)
|
||||
NVJITLINK_INPUT_CUBIN = nvJitLinkInputType.define('NVJITLINK_INPUT_CUBIN', 1)
|
||||
NVJITLINK_INPUT_PTX = nvJitLinkInputType.define('NVJITLINK_INPUT_PTX', 2)
|
||||
NVJITLINK_INPUT_LTOIR = nvJitLinkInputType.define('NVJITLINK_INPUT_LTOIR', 3)
|
||||
NVJITLINK_INPUT_FATBIN = nvJitLinkInputType.define('NVJITLINK_INPUT_FATBIN', 4)
|
||||
NVJITLINK_INPUT_OBJECT = nvJitLinkInputType.define('NVJITLINK_INPUT_OBJECT', 5)
|
||||
NVJITLINK_INPUT_LIBRARY = nvJitLinkInputType.define('NVJITLINK_INPUT_LIBRARY', 6)
|
||||
|
||||
class struct_nvJitLink(Struct): pass
|
||||
nvJitLinkHandle = ctypes.POINTER(struct_nvJitLink)
|
||||
uint32_t = ctypes.c_uint32
|
||||
# nvJitLinkResult nvJitLinkCreate(nvJitLinkHandle *handle, uint32_t numOptions, const char **options)
|
||||
try: (nvJitLinkCreate:=dll.nvJitLinkCreate).restype, nvJitLinkCreate.argtypes = nvJitLinkResult, [ctypes.POINTER(nvJitLinkHandle), uint32_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkDestroy(nvJitLinkHandle *handle)
|
||||
try: (nvJitLinkDestroy:=dll.nvJitLinkDestroy).restype, nvJitLinkDestroy.argtypes = nvJitLinkResult, [ctypes.POINTER(nvJitLinkHandle)]
|
||||
except AttributeError: pass
|
||||
|
||||
size_t = ctypes.c_uint64
|
||||
# nvJitLinkResult nvJitLinkAddData(nvJitLinkHandle handle, nvJitLinkInputType inputType, const void *data, size_t size, const char *name)
|
||||
try: (nvJitLinkAddData:=dll.nvJitLinkAddData).restype, nvJitLinkAddData.argtypes = nvJitLinkResult, [nvJitLinkHandle, nvJitLinkInputType, ctypes.c_void_p, size_t, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkAddFile(nvJitLinkHandle handle, nvJitLinkInputType inputType, const char *fileName)
|
||||
try: (nvJitLinkAddFile:=dll.nvJitLinkAddFile).restype, nvJitLinkAddFile.argtypes = nvJitLinkResult, [nvJitLinkHandle, nvJitLinkInputType, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkComplete(nvJitLinkHandle handle)
|
||||
try: (nvJitLinkComplete:=dll.nvJitLinkComplete).restype, nvJitLinkComplete.argtypes = nvJitLinkResult, [nvJitLinkHandle]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetLinkedCubinSize(nvJitLinkHandle handle, size_t *size)
|
||||
try: (nvJitLinkGetLinkedCubinSize:=dll.nvJitLinkGetLinkedCubinSize).restype, nvJitLinkGetLinkedCubinSize.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetLinkedCubin(nvJitLinkHandle handle, void *cubin)
|
||||
try: (nvJitLinkGetLinkedCubin:=dll.nvJitLinkGetLinkedCubin).restype, nvJitLinkGetLinkedCubin.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.c_void_p]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetLinkedPtxSize(nvJitLinkHandle handle, size_t *size)
|
||||
try: (nvJitLinkGetLinkedPtxSize:=dll.nvJitLinkGetLinkedPtxSize).restype, nvJitLinkGetLinkedPtxSize.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetLinkedPtx(nvJitLinkHandle handle, char *ptx)
|
||||
try: (nvJitLinkGetLinkedPtx:=dll.nvJitLinkGetLinkedPtx).restype, nvJitLinkGetLinkedPtx.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetErrorLogSize(nvJitLinkHandle handle, size_t *size)
|
||||
try: (nvJitLinkGetErrorLogSize:=dll.nvJitLinkGetErrorLogSize).restype, nvJitLinkGetErrorLogSize.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetErrorLog(nvJitLinkHandle handle, char *log)
|
||||
try: (nvJitLinkGetErrorLog:=dll.nvJitLinkGetErrorLog).restype, nvJitLinkGetErrorLog.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetInfoLogSize(nvJitLinkHandle handle, size_t *size)
|
||||
try: (nvJitLinkGetInfoLogSize:=dll.nvJitLinkGetInfoLogSize).restype, nvJitLinkGetInfoLogSize.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkGetInfoLog(nvJitLinkHandle handle, char *log)
|
||||
try: (nvJitLinkGetInfoLog:=dll.nvJitLinkGetInfoLog).restype, nvJitLinkGetInfoLog.argtypes = nvJitLinkResult, [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvJitLinkResult nvJitLinkVersion(unsigned int *major, unsigned int *minor)
|
||||
try: (nvJitLinkVersion:=dll.nvJitLinkVersion).restype, nvJitLinkVersion.argtypes = nvJitLinkResult, [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_uint32)]
|
||||
except AttributeError: pass
|
||||
|
||||
|
|
@ -1,581 +1,113 @@
|
|||
# mypy: ignore-errors
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# TARGET arch is: []
|
||||
# WORD_SIZE is: 8
|
||||
# POINTER_SIZE is: 8
|
||||
# LONGDOUBLE_SIZE is: 16
|
||||
#
|
||||
import ctypes, ctypes.util
|
||||
import ctypes
|
||||
from tinygrad.helpers import unwrap
|
||||
from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR
|
||||
from ctypes.util import find_library
|
||||
def dll():
|
||||
try: return ctypes.CDLL(unwrap(find_library('nvrtc')))
|
||||
except: pass
|
||||
return None
|
||||
dll = dll()
|
||||
|
||||
nvrtcResult = CEnum(ctypes.c_uint32)
|
||||
NVRTC_SUCCESS = nvrtcResult.define('NVRTC_SUCCESS', 0)
|
||||
NVRTC_ERROR_OUT_OF_MEMORY = nvrtcResult.define('NVRTC_ERROR_OUT_OF_MEMORY', 1)
|
||||
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = nvrtcResult.define('NVRTC_ERROR_PROGRAM_CREATION_FAILURE', 2)
|
||||
NVRTC_ERROR_INVALID_INPUT = nvrtcResult.define('NVRTC_ERROR_INVALID_INPUT', 3)
|
||||
NVRTC_ERROR_INVALID_PROGRAM = nvrtcResult.define('NVRTC_ERROR_INVALID_PROGRAM', 4)
|
||||
NVRTC_ERROR_INVALID_OPTION = nvrtcResult.define('NVRTC_ERROR_INVALID_OPTION', 5)
|
||||
NVRTC_ERROR_COMPILATION = nvrtcResult.define('NVRTC_ERROR_COMPILATION', 6)
|
||||
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = nvrtcResult.define('NVRTC_ERROR_BUILTIN_OPERATION_FAILURE', 7)
|
||||
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = nvrtcResult.define('NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION', 8)
|
||||
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = nvrtcResult.define('NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION', 9)
|
||||
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = nvrtcResult.define('NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID', 10)
|
||||
NVRTC_ERROR_INTERNAL_ERROR = nvrtcResult.define('NVRTC_ERROR_INTERNAL_ERROR', 11)
|
||||
|
||||
_libraries = {}
|
||||
_libraries['libnvrtc.so'] = ctypes.CDLL(ctypes.util.find_library('nvrtc'))
|
||||
def string_cast(char_pointer, encoding='utf-8', errors='strict'):
|
||||
value = ctypes.cast(char_pointer, ctypes.c_char_p).value
|
||||
if value is not None and encoding is not None:
|
||||
value = value.decode(encoding, errors=errors)
|
||||
return value
|
||||
# const char *nvrtcGetErrorString(nvrtcResult result)
|
||||
try: (nvrtcGetErrorString:=dll.nvrtcGetErrorString).restype, nvrtcGetErrorString.argtypes = ctypes.POINTER(ctypes.c_char), [nvrtcResult]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcVersion(int *major, int *minor)
|
||||
try: (nvrtcVersion:=dll.nvrtcVersion).restype, nvrtcVersion.argtypes = nvrtcResult, [ctypes.POINTER(ctypes.c_int32), ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError: pass
|
||||
|
||||
def char_pointer_cast(string, encoding='utf-8'):
|
||||
if encoding is not None:
|
||||
try:
|
||||
string = string.encode(encoding)
|
||||
except AttributeError:
|
||||
# In Python3, bytes has no encode attribute
|
||||
pass
|
||||
string = ctypes.c_char_p(string)
|
||||
return ctypes.cast(string, ctypes.POINTER(ctypes.c_char))
|
||||
# nvrtcResult nvrtcGetNumSupportedArchs(int *numArchs)
|
||||
try: (nvrtcGetNumSupportedArchs:=dll.nvrtcGetNumSupportedArchs).restype, nvrtcGetNumSupportedArchs.argtypes = nvrtcResult, [ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetSupportedArchs(int *supportedArchs)
|
||||
try: (nvrtcGetSupportedArchs:=dll.nvrtcGetSupportedArchs).restype, nvrtcGetSupportedArchs.argtypes = nvrtcResult, [ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError: pass
|
||||
|
||||
|
||||
class AsDictMixin:
|
||||
import sys
|
||||
if sys.version_info >= (3, 14): _layout_ = 'ms'
|
||||
@classmethod
|
||||
def as_dict(cls, self):
|
||||
result = {}
|
||||
if not isinstance(self, AsDictMixin):
|
||||
# not a structure, assume it's already a python object
|
||||
return self
|
||||
if not hasattr(cls, "_fields_"):
|
||||
return result
|
||||
# sys.version_info >= (3, 5)
|
||||
# for (field, *_) in cls._fields_: # noqa
|
||||
for field_tuple in cls._fields_: # noqa
|
||||
field = field_tuple[0]
|
||||
if field.startswith('PADDING_'):
|
||||
continue
|
||||
value = getattr(self, field)
|
||||
type_ = type(value)
|
||||
if hasattr(value, "_length_") and hasattr(value, "_type_"):
|
||||
# array
|
||||
if not hasattr(type_, "as_dict"):
|
||||
value = [v for v in value]
|
||||
else:
|
||||
type_ = type_._type_
|
||||
value = [type_.as_dict(v) for v in value]
|
||||
elif hasattr(value, "contents") and hasattr(value, "_type_"):
|
||||
# pointer
|
||||
try:
|
||||
if not hasattr(type_, "as_dict"):
|
||||
value = value.contents
|
||||
else:
|
||||
type_ = type_._type_
|
||||
value = type_.as_dict(value.contents)
|
||||
except ValueError:
|
||||
# nullptr
|
||||
value = None
|
||||
elif isinstance(value, AsDictMixin):
|
||||
# other structure
|
||||
value = type_.as_dict(value)
|
||||
result[field] = value
|
||||
return result
|
||||
|
||||
|
||||
class Structure(ctypes.Structure, AsDictMixin):
|
||||
|
||||
def __init__(self, *args, **kwds):
|
||||
# We don't want to use positional arguments fill PADDING_* fields
|
||||
|
||||
args = dict(zip(self.__class__._field_names_(), args))
|
||||
args.update(kwds)
|
||||
super(Structure, self).__init__(**args)
|
||||
|
||||
@classmethod
|
||||
def _field_names_(cls):
|
||||
if hasattr(cls, '_fields_'):
|
||||
return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING'))
|
||||
else:
|
||||
return ()
|
||||
|
||||
@classmethod
|
||||
def get_type(cls, field):
|
||||
for f in cls._fields_:
|
||||
if f[0] == field:
|
||||
return f[1]
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def bind(cls, bound_fields):
|
||||
fields = {}
|
||||
for name, type_ in cls._fields_:
|
||||
if hasattr(type_, "restype"):
|
||||
if name in bound_fields:
|
||||
if bound_fields[name] is None:
|
||||
fields[name] = type_()
|
||||
else:
|
||||
# use a closure to capture the callback from the loop scope
|
||||
fields[name] = (
|
||||
type_((lambda callback: lambda *args: callback(*args))(
|
||||
bound_fields[name]))
|
||||
)
|
||||
del bound_fields[name]
|
||||
else:
|
||||
# default callback implementation (does nothing)
|
||||
try:
|
||||
default_ = type_(0).restype().value
|
||||
except TypeError:
|
||||
default_ = None
|
||||
fields[name] = type_((
|
||||
lambda default_: lambda *args: default_)(default_))
|
||||
else:
|
||||
# not a callback function, use default initialization
|
||||
if name in bound_fields:
|
||||
fields[name] = bound_fields[name]
|
||||
del bound_fields[name]
|
||||
else:
|
||||
fields[name] = type_()
|
||||
if len(bound_fields) != 0:
|
||||
raise ValueError(
|
||||
"Cannot bind the following unknown callback(s) {}.{}".format(
|
||||
cls.__name__, bound_fields.keys()
|
||||
))
|
||||
return cls(**fields)
|
||||
|
||||
|
||||
class Union(ctypes.Union, AsDictMixin):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
_libraries['libnvJitLink.so'] = ctypes.CDLL(ctypes.util.find_library('nvJitLink'))
|
||||
c_int128 = ctypes.c_ubyte*16
|
||||
c_uint128 = c_int128
|
||||
void = None
|
||||
if ctypes.sizeof(ctypes.c_longdouble) == 16:
|
||||
c_long_double_t = ctypes.c_longdouble
|
||||
else:
|
||||
c_long_double_t = ctypes.c_ubyte*16
|
||||
|
||||
|
||||
|
||||
|
||||
# values for enumeration 'c__EA_nvrtcResult'
|
||||
c__EA_nvrtcResult__enumvalues = {
|
||||
0: 'NVRTC_SUCCESS',
|
||||
1: 'NVRTC_ERROR_OUT_OF_MEMORY',
|
||||
2: 'NVRTC_ERROR_PROGRAM_CREATION_FAILURE',
|
||||
3: 'NVRTC_ERROR_INVALID_INPUT',
|
||||
4: 'NVRTC_ERROR_INVALID_PROGRAM',
|
||||
5: 'NVRTC_ERROR_INVALID_OPTION',
|
||||
6: 'NVRTC_ERROR_COMPILATION',
|
||||
7: 'NVRTC_ERROR_BUILTIN_OPERATION_FAILURE',
|
||||
8: 'NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION',
|
||||
9: 'NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION',
|
||||
10: 'NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID',
|
||||
11: 'NVRTC_ERROR_INTERNAL_ERROR',
|
||||
12: 'NVRTC_ERROR_TIME_FILE_WRITE_FAILED',
|
||||
}
|
||||
NVRTC_SUCCESS = 0
|
||||
NVRTC_ERROR_OUT_OF_MEMORY = 1
|
||||
NVRTC_ERROR_PROGRAM_CREATION_FAILURE = 2
|
||||
NVRTC_ERROR_INVALID_INPUT = 3
|
||||
NVRTC_ERROR_INVALID_PROGRAM = 4
|
||||
NVRTC_ERROR_INVALID_OPTION = 5
|
||||
NVRTC_ERROR_COMPILATION = 6
|
||||
NVRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7
|
||||
NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8
|
||||
NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9
|
||||
NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10
|
||||
NVRTC_ERROR_INTERNAL_ERROR = 11
|
||||
NVRTC_ERROR_TIME_FILE_WRITE_FAILED = 12
|
||||
c__EA_nvrtcResult = ctypes.c_uint32 # enum
|
||||
nvrtcResult = c__EA_nvrtcResult
|
||||
nvrtcResult__enumvalues = c__EA_nvrtcResult__enumvalues
|
||||
try:
|
||||
nvrtcGetErrorString = _libraries['libnvrtc.so'].nvrtcGetErrorString
|
||||
nvrtcGetErrorString.restype = ctypes.POINTER(ctypes.c_char)
|
||||
nvrtcGetErrorString.argtypes = [nvrtcResult]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcVersion = _libraries['libnvrtc.so'].nvrtcVersion
|
||||
nvrtcVersion.restype = nvrtcResult
|
||||
nvrtcVersion.argtypes = [ctypes.POINTER(ctypes.c_int32), ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetNumSupportedArchs = _libraries['libnvrtc.so'].nvrtcGetNumSupportedArchs
|
||||
nvrtcGetNumSupportedArchs.restype = nvrtcResult
|
||||
nvrtcGetNumSupportedArchs.argtypes = [ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetSupportedArchs = _libraries['libnvrtc.so'].nvrtcGetSupportedArchs
|
||||
nvrtcGetSupportedArchs.restype = nvrtcResult
|
||||
nvrtcGetSupportedArchs.argtypes = [ctypes.POINTER(ctypes.c_int32)]
|
||||
except AttributeError:
|
||||
pass
|
||||
class struct__nvrtcProgram(Structure):
|
||||
pass
|
||||
|
||||
class struct__nvrtcProgram(Struct): pass
|
||||
nvrtcProgram = ctypes.POINTER(struct__nvrtcProgram)
|
||||
try:
|
||||
nvrtcCreateProgram = _libraries['libnvrtc.so'].nvrtcCreateProgram
|
||||
nvrtcCreateProgram.restype = nvrtcResult
|
||||
nvrtcCreateProgram.argtypes = [ctypes.POINTER(ctypes.POINTER(struct__nvrtcProgram)), ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_char), ctypes.c_int32, ctypes.POINTER(ctypes.POINTER(ctypes.c_char)), ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcDestroyProgram = _libraries['libnvrtc.so'].nvrtcDestroyProgram
|
||||
nvrtcDestroyProgram.restype = nvrtcResult
|
||||
nvrtcDestroyProgram.argtypes = [ctypes.POINTER(ctypes.POINTER(struct__nvrtcProgram))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcCompileProgram = _libraries['libnvrtc.so'].nvrtcCompileProgram
|
||||
nvrtcCompileProgram.restype = nvrtcResult
|
||||
nvrtcCompileProgram.argtypes = [nvrtcProgram, ctypes.c_int32, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetPTXSize = _libraries['libnvrtc.so'].nvrtcGetPTXSize
|
||||
nvrtcGetPTXSize.restype = nvrtcResult
|
||||
nvrtcGetPTXSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetPTX = _libraries['libnvrtc.so'].nvrtcGetPTX
|
||||
nvrtcGetPTX.restype = nvrtcResult
|
||||
nvrtcGetPTX.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetCUBINSize = _libraries['libnvrtc.so'].nvrtcGetCUBINSize
|
||||
nvrtcGetCUBINSize.restype = nvrtcResult
|
||||
nvrtcGetCUBINSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetCUBIN = _libraries['libnvrtc.so'].nvrtcGetCUBIN
|
||||
nvrtcGetCUBIN.restype = nvrtcResult
|
||||
nvrtcGetCUBIN.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetNVVMSize = _libraries['libnvrtc.so'].nvrtcGetNVVMSize
|
||||
nvrtcGetNVVMSize.restype = nvrtcResult
|
||||
nvrtcGetNVVMSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetNVVM = _libraries['libnvrtc.so'].nvrtcGetNVVM
|
||||
nvrtcGetNVVM.restype = nvrtcResult
|
||||
nvrtcGetNVVM.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetLTOIRSize = _libraries['libnvrtc.so'].nvrtcGetLTOIRSize
|
||||
nvrtcGetLTOIRSize.restype = nvrtcResult
|
||||
nvrtcGetLTOIRSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetLTOIR = _libraries['libnvrtc.so'].nvrtcGetLTOIR
|
||||
nvrtcGetLTOIR.restype = nvrtcResult
|
||||
nvrtcGetLTOIR.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetOptiXIRSize = _libraries['libnvrtc.so'].nvrtcGetOptiXIRSize
|
||||
nvrtcGetOptiXIRSize.restype = nvrtcResult
|
||||
nvrtcGetOptiXIRSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetOptiXIR = _libraries['libnvrtc.so'].nvrtcGetOptiXIR
|
||||
nvrtcGetOptiXIR.restype = nvrtcResult
|
||||
nvrtcGetOptiXIR.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetProgramLogSize = _libraries['libnvrtc.so'].nvrtcGetProgramLogSize
|
||||
nvrtcGetProgramLogSize.restype = nvrtcResult
|
||||
nvrtcGetProgramLogSize.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetProgramLog = _libraries['libnvrtc.so'].nvrtcGetProgramLog
|
||||
nvrtcGetProgramLog.restype = nvrtcResult
|
||||
nvrtcGetProgramLog.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcAddNameExpression = _libraries['libnvrtc.so'].nvrtcAddNameExpression
|
||||
nvrtcAddNameExpression.restype = nvrtcResult
|
||||
nvrtcAddNameExpression.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvrtcGetLoweredName = _libraries['libnvrtc.so'].nvrtcGetLoweredName
|
||||
nvrtcGetLoweredName.restype = nvrtcResult
|
||||
nvrtcGetLoweredName.argtypes = [nvrtcProgram, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
# nvrtcResult nvrtcCreateProgram(nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char *const *headers, const char *const *includeNames)
|
||||
try: (nvrtcCreateProgram:=dll.nvrtcCreateProgram).restype, nvrtcCreateProgram.argtypes = nvrtcResult, [ctypes.POINTER(nvrtcProgram), ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_char), ctypes.c_int32, ctypes.POINTER(ctypes.POINTER(ctypes.c_char)), ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError: pass
|
||||
|
||||
# values for enumeration 'c__EA_nvJitLinkResult'
|
||||
c__EA_nvJitLinkResult__enumvalues = {
|
||||
0: 'NVJITLINK_SUCCESS',
|
||||
1: 'NVJITLINK_ERROR_UNRECOGNIZED_OPTION',
|
||||
2: 'NVJITLINK_ERROR_MISSING_ARCH',
|
||||
3: 'NVJITLINK_ERROR_INVALID_INPUT',
|
||||
4: 'NVJITLINK_ERROR_PTX_COMPILE',
|
||||
5: 'NVJITLINK_ERROR_NVVM_COMPILE',
|
||||
6: 'NVJITLINK_ERROR_INTERNAL',
|
||||
7: 'NVJITLINK_ERROR_THREADPOOL',
|
||||
8: 'NVJITLINK_ERROR_UNRECOGNIZED_INPUT',
|
||||
}
|
||||
NVJITLINK_SUCCESS = 0
|
||||
NVJITLINK_ERROR_UNRECOGNIZED_OPTION = 1
|
||||
NVJITLINK_ERROR_MISSING_ARCH = 2
|
||||
NVJITLINK_ERROR_INVALID_INPUT = 3
|
||||
NVJITLINK_ERROR_PTX_COMPILE = 4
|
||||
NVJITLINK_ERROR_NVVM_COMPILE = 5
|
||||
NVJITLINK_ERROR_INTERNAL = 6
|
||||
NVJITLINK_ERROR_THREADPOOL = 7
|
||||
NVJITLINK_ERROR_UNRECOGNIZED_INPUT = 8
|
||||
c__EA_nvJitLinkResult = ctypes.c_uint32 # enum
|
||||
nvJitLinkResult = c__EA_nvJitLinkResult
|
||||
nvJitLinkResult__enumvalues = c__EA_nvJitLinkResult__enumvalues
|
||||
# nvrtcResult nvrtcDestroyProgram(nvrtcProgram *prog)
|
||||
try: (nvrtcDestroyProgram:=dll.nvrtcDestroyProgram).restype, nvrtcDestroyProgram.argtypes = nvrtcResult, [ctypes.POINTER(nvrtcProgram)]
|
||||
except AttributeError: pass
|
||||
|
||||
# values for enumeration 'c__EA_nvJitLinkInputType'
|
||||
c__EA_nvJitLinkInputType__enumvalues = {
|
||||
0: 'NVJITLINK_INPUT_NONE',
|
||||
1: 'NVJITLINK_INPUT_CUBIN',
|
||||
2: 'NVJITLINK_INPUT_PTX',
|
||||
3: 'NVJITLINK_INPUT_LTOIR',
|
||||
4: 'NVJITLINK_INPUT_FATBIN',
|
||||
5: 'NVJITLINK_INPUT_OBJECT',
|
||||
6: 'NVJITLINK_INPUT_LIBRARY',
|
||||
10: 'NVJITLINK_INPUT_ANY',
|
||||
}
|
||||
NVJITLINK_INPUT_NONE = 0
|
||||
NVJITLINK_INPUT_CUBIN = 1
|
||||
NVJITLINK_INPUT_PTX = 2
|
||||
NVJITLINK_INPUT_LTOIR = 3
|
||||
NVJITLINK_INPUT_FATBIN = 4
|
||||
NVJITLINK_INPUT_OBJECT = 5
|
||||
NVJITLINK_INPUT_LIBRARY = 6
|
||||
NVJITLINK_INPUT_ANY = 10
|
||||
c__EA_nvJitLinkInputType = ctypes.c_uint32 # enum
|
||||
nvJitLinkInputType = c__EA_nvJitLinkInputType
|
||||
nvJitLinkInputType__enumvalues = c__EA_nvJitLinkInputType__enumvalues
|
||||
class struct_nvJitLink(Structure):
|
||||
pass
|
||||
# nvrtcResult nvrtcCompileProgram(nvrtcProgram prog, int numOptions, const char *const *options)
|
||||
try: (nvrtcCompileProgram:=dll.nvrtcCompileProgram).restype, nvrtcCompileProgram.argtypes = nvrtcResult, [nvrtcProgram, ctypes.c_int32, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError: pass
|
||||
|
||||
nvJitLinkHandle = ctypes.POINTER(struct_nvJitLink)
|
||||
uint32_t = ctypes.c_uint32
|
||||
try:
|
||||
__nvJitLinkCreate_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkCreate_12_4
|
||||
__nvJitLinkCreate_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkCreate_12_4.argtypes = [ctypes.POINTER(ctypes.POINTER(struct_nvJitLink)), uint32_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkCreate = _libraries['libnvJitLink.so'].nvJitLinkCreate
|
||||
nvJitLinkCreate.restype = nvJitLinkResult
|
||||
nvJitLinkCreate.argtypes = [ctypes.POINTER(ctypes.POINTER(struct_nvJitLink)), uint32_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkDestroy_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkDestroy_12_4
|
||||
__nvJitLinkDestroy_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkDestroy_12_4.argtypes = [ctypes.POINTER(ctypes.POINTER(struct_nvJitLink))]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkDestroy = _libraries['libnvJitLink.so'].nvJitLinkDestroy
|
||||
nvJitLinkDestroy.restype = nvJitLinkResult
|
||||
nvJitLinkDestroy.argtypes = [ctypes.POINTER(ctypes.POINTER(struct_nvJitLink))]
|
||||
except AttributeError:
|
||||
pass
|
||||
size_t = ctypes.c_uint64
|
||||
try:
|
||||
__nvJitLinkAddData_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkAddData_12_4
|
||||
__nvJitLinkAddData_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkAddData_12_4.argtypes = [nvJitLinkHandle, nvJitLinkInputType, ctypes.POINTER(None), size_t, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkAddData = _libraries['libnvJitLink.so'].nvJitLinkAddData
|
||||
nvJitLinkAddData.restype = nvJitLinkResult
|
||||
nvJitLinkAddData.argtypes = [nvJitLinkHandle, nvJitLinkInputType, ctypes.POINTER(None), size_t, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkAddFile_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkAddFile_12_4
|
||||
__nvJitLinkAddFile_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkAddFile_12_4.argtypes = [nvJitLinkHandle, nvJitLinkInputType, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkAddFile = _libraries['libnvJitLink.so'].nvJitLinkAddFile
|
||||
nvJitLinkAddFile.restype = nvJitLinkResult
|
||||
nvJitLinkAddFile.argtypes = [nvJitLinkHandle, nvJitLinkInputType, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkComplete_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkComplete_12_4
|
||||
__nvJitLinkComplete_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkComplete_12_4.argtypes = [nvJitLinkHandle]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkComplete = _libraries['libnvJitLink.so'].nvJitLinkComplete
|
||||
nvJitLinkComplete.restype = nvJitLinkResult
|
||||
nvJitLinkComplete.argtypes = [nvJitLinkHandle]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetLinkedCubinSize_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetLinkedCubinSize_12_4
|
||||
__nvJitLinkGetLinkedCubinSize_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetLinkedCubinSize_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetLinkedCubinSize = _libraries['libnvJitLink.so'].nvJitLinkGetLinkedCubinSize
|
||||
nvJitLinkGetLinkedCubinSize.restype = nvJitLinkResult
|
||||
nvJitLinkGetLinkedCubinSize.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetLinkedCubin_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetLinkedCubin_12_4
|
||||
__nvJitLinkGetLinkedCubin_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetLinkedCubin_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetLinkedCubin = _libraries['libnvJitLink.so'].nvJitLinkGetLinkedCubin
|
||||
nvJitLinkGetLinkedCubin.restype = nvJitLinkResult
|
||||
nvJitLinkGetLinkedCubin.argtypes = [nvJitLinkHandle, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetLinkedPtxSize_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetLinkedPtxSize_12_4
|
||||
__nvJitLinkGetLinkedPtxSize_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetLinkedPtxSize_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetLinkedPtxSize = _libraries['libnvJitLink.so'].nvJitLinkGetLinkedPtxSize
|
||||
nvJitLinkGetLinkedPtxSize.restype = nvJitLinkResult
|
||||
nvJitLinkGetLinkedPtxSize.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetLinkedPtx_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetLinkedPtx_12_4
|
||||
__nvJitLinkGetLinkedPtx_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetLinkedPtx_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetLinkedPtx = _libraries['libnvJitLink.so'].nvJitLinkGetLinkedPtx
|
||||
nvJitLinkGetLinkedPtx.restype = nvJitLinkResult
|
||||
nvJitLinkGetLinkedPtx.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetErrorLogSize_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetErrorLogSize_12_4
|
||||
__nvJitLinkGetErrorLogSize_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetErrorLogSize_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetErrorLogSize = _libraries['libnvJitLink.so'].nvJitLinkGetErrorLogSize
|
||||
nvJitLinkGetErrorLogSize.restype = nvJitLinkResult
|
||||
nvJitLinkGetErrorLogSize.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetErrorLog_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetErrorLog_12_4
|
||||
__nvJitLinkGetErrorLog_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetErrorLog_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetErrorLog = _libraries['libnvJitLink.so'].nvJitLinkGetErrorLog
|
||||
nvJitLinkGetErrorLog.restype = nvJitLinkResult
|
||||
nvJitLinkGetErrorLog.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetInfoLogSize_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetInfoLogSize_12_4
|
||||
__nvJitLinkGetInfoLogSize_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetInfoLogSize_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetInfoLogSize = _libraries['libnvJitLink.so'].nvJitLinkGetInfoLogSize
|
||||
nvJitLinkGetInfoLogSize.restype = nvJitLinkResult
|
||||
nvJitLinkGetInfoLogSize.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_uint64)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
__nvJitLinkGetInfoLog_12_4 = _libraries['libnvJitLink.so'].__nvJitLinkGetInfoLog_12_4
|
||||
__nvJitLinkGetInfoLog_12_4.restype = nvJitLinkResult
|
||||
__nvJitLinkGetInfoLog_12_4.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkGetInfoLog = _libraries['libnvJitLink.so'].nvJitLinkGetInfoLog
|
||||
nvJitLinkGetInfoLog.restype = nvJitLinkResult
|
||||
nvJitLinkGetInfoLog.argtypes = [nvJitLinkHandle, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
nvJitLinkVersion = _libraries['libnvJitLink.so'].nvJitLinkVersion
|
||||
nvJitLinkVersion.restype = nvJitLinkResult
|
||||
nvJitLinkVersion.argtypes = [ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(ctypes.c_uint32)]
|
||||
except AttributeError:
|
||||
pass
|
||||
__all__ = \
|
||||
['NVJITLINK_ERROR_INTERNAL', 'NVJITLINK_ERROR_INVALID_INPUT',
|
||||
'NVJITLINK_ERROR_MISSING_ARCH', 'NVJITLINK_ERROR_NVVM_COMPILE',
|
||||
'NVJITLINK_ERROR_PTX_COMPILE', 'NVJITLINK_ERROR_THREADPOOL',
|
||||
'NVJITLINK_ERROR_UNRECOGNIZED_INPUT',
|
||||
'NVJITLINK_ERROR_UNRECOGNIZED_OPTION', 'NVJITLINK_INPUT_ANY',
|
||||
'NVJITLINK_INPUT_CUBIN', 'NVJITLINK_INPUT_FATBIN',
|
||||
'NVJITLINK_INPUT_LIBRARY', 'NVJITLINK_INPUT_LTOIR',
|
||||
'NVJITLINK_INPUT_NONE', 'NVJITLINK_INPUT_OBJECT',
|
||||
'NVJITLINK_INPUT_PTX', 'NVJITLINK_SUCCESS',
|
||||
'NVRTC_ERROR_BUILTIN_OPERATION_FAILURE',
|
||||
'NVRTC_ERROR_COMPILATION', 'NVRTC_ERROR_INTERNAL_ERROR',
|
||||
'NVRTC_ERROR_INVALID_INPUT', 'NVRTC_ERROR_INVALID_OPTION',
|
||||
'NVRTC_ERROR_INVALID_PROGRAM',
|
||||
'NVRTC_ERROR_NAME_EXPRESSION_NOT_VALID',
|
||||
'NVRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION',
|
||||
'NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION',
|
||||
'NVRTC_ERROR_OUT_OF_MEMORY',
|
||||
'NVRTC_ERROR_PROGRAM_CREATION_FAILURE',
|
||||
'NVRTC_ERROR_TIME_FILE_WRITE_FAILED', 'NVRTC_SUCCESS',
|
||||
'__nvJitLinkAddData_12_4', '__nvJitLinkAddFile_12_4',
|
||||
'__nvJitLinkComplete_12_4', '__nvJitLinkCreate_12_4',
|
||||
'__nvJitLinkDestroy_12_4', '__nvJitLinkGetErrorLogSize_12_4',
|
||||
'__nvJitLinkGetErrorLog_12_4', '__nvJitLinkGetInfoLogSize_12_4',
|
||||
'__nvJitLinkGetInfoLog_12_4',
|
||||
'__nvJitLinkGetLinkedCubinSize_12_4',
|
||||
'__nvJitLinkGetLinkedCubin_12_4',
|
||||
'__nvJitLinkGetLinkedPtxSize_12_4',
|
||||
'__nvJitLinkGetLinkedPtx_12_4', 'c__EA_nvJitLinkInputType',
|
||||
'c__EA_nvJitLinkResult', 'c__EA_nvrtcResult', 'nvJitLinkAddData',
|
||||
'nvJitLinkAddFile', 'nvJitLinkComplete', 'nvJitLinkCreate',
|
||||
'nvJitLinkDestroy', 'nvJitLinkGetErrorLog',
|
||||
'nvJitLinkGetErrorLogSize', 'nvJitLinkGetInfoLog',
|
||||
'nvJitLinkGetInfoLogSize', 'nvJitLinkGetLinkedCubin',
|
||||
'nvJitLinkGetLinkedCubinSize', 'nvJitLinkGetLinkedPtx',
|
||||
'nvJitLinkGetLinkedPtxSize', 'nvJitLinkHandle',
|
||||
'nvJitLinkInputType', 'nvJitLinkInputType__enumvalues',
|
||||
'nvJitLinkResult', 'nvJitLinkResult__enumvalues',
|
||||
'nvJitLinkVersion', 'nvrtcAddNameExpression',
|
||||
'nvrtcCompileProgram', 'nvrtcCreateProgram',
|
||||
'nvrtcDestroyProgram', 'nvrtcGetCUBIN', 'nvrtcGetCUBINSize',
|
||||
'nvrtcGetErrorString', 'nvrtcGetLTOIR', 'nvrtcGetLTOIRSize',
|
||||
'nvrtcGetLoweredName', 'nvrtcGetNVVM', 'nvrtcGetNVVMSize',
|
||||
'nvrtcGetNumSupportedArchs', 'nvrtcGetOptiXIR',
|
||||
'nvrtcGetOptiXIRSize', 'nvrtcGetPTX', 'nvrtcGetPTXSize',
|
||||
'nvrtcGetProgramLog', 'nvrtcGetProgramLogSize',
|
||||
'nvrtcGetSupportedArchs', 'nvrtcProgram', 'nvrtcResult',
|
||||
'nvrtcResult__enumvalues', 'nvrtcVersion', 'size_t',
|
||||
'struct__nvrtcProgram', 'struct_nvJitLink', 'uint32_t']
|
||||
# nvrtcResult nvrtcGetPTXSize(nvrtcProgram prog, size_t *ptxSizeRet)
|
||||
try: (nvrtcGetPTXSize:=dll.nvrtcGetPTXSize).restype, nvrtcGetPTXSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetPTX(nvrtcProgram prog, char *ptx)
|
||||
try: (nvrtcGetPTX:=dll.nvrtcGetPTX).restype, nvrtcGetPTX.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetCUBINSize(nvrtcProgram prog, size_t *cubinSizeRet)
|
||||
try: (nvrtcGetCUBINSize:=dll.nvrtcGetCUBINSize).restype, nvrtcGetCUBINSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetCUBIN(nvrtcProgram prog, char *cubin)
|
||||
try: (nvrtcGetCUBIN:=dll.nvrtcGetCUBIN).restype, nvrtcGetCUBIN.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# __attribute__((deprecated("This function will be removed in a future release. Please use nvrtcGetLTOIRSize instead"))) nvrtcResult nvrtcGetNVVMSize(nvrtcProgram prog, size_t *nvvmSizeRet)
|
||||
try: (nvrtcGetNVVMSize:=dll.nvrtcGetNVVMSize).restype, nvrtcGetNVVMSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# __attribute__((deprecated("This function will be removed in a future release. Please use nvrtcGetLTOIR instead"))) nvrtcResult nvrtcGetNVVM(nvrtcProgram prog, char *nvvm)
|
||||
try: (nvrtcGetNVVM:=dll.nvrtcGetNVVM).restype, nvrtcGetNVVM.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetLTOIRSize(nvrtcProgram prog, size_t *LTOIRSizeRet)
|
||||
try: (nvrtcGetLTOIRSize:=dll.nvrtcGetLTOIRSize).restype, nvrtcGetLTOIRSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetLTOIR(nvrtcProgram prog, char *LTOIR)
|
||||
try: (nvrtcGetLTOIR:=dll.nvrtcGetLTOIR).restype, nvrtcGetLTOIR.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetOptiXIRSize(nvrtcProgram prog, size_t *optixirSizeRet)
|
||||
try: (nvrtcGetOptiXIRSize:=dll.nvrtcGetOptiXIRSize).restype, nvrtcGetOptiXIRSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetOptiXIR(nvrtcProgram prog, char *optixir)
|
||||
try: (nvrtcGetOptiXIR:=dll.nvrtcGetOptiXIR).restype, nvrtcGetOptiXIR.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetProgramLogSize(nvrtcProgram prog, size_t *logSizeRet)
|
||||
try: (nvrtcGetProgramLogSize:=dll.nvrtcGetProgramLogSize).restype, nvrtcGetProgramLogSize.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(size_t)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetProgramLog(nvrtcProgram prog, char *log)
|
||||
try: (nvrtcGetProgramLog:=dll.nvrtcGetProgramLog).restype, nvrtcGetProgramLog.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcAddNameExpression(nvrtcProgram prog, const char *const name_expression)
|
||||
try: (nvrtcAddNameExpression:=dll.nvrtcAddNameExpression).restype, nvrtcAddNameExpression.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char)]
|
||||
except AttributeError: pass
|
||||
|
||||
# nvrtcResult nvrtcGetLoweredName(nvrtcProgram prog, const char *const name_expression, const char **lowered_name)
|
||||
try: (nvrtcGetLoweredName:=dll.nvrtcGetLoweredName).restype, nvrtcGetLoweredName.argtypes = nvrtcResult, [nvrtcProgram, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
except AttributeError: pass
|
||||
|
||||
__DEPRECATED__ = lambda msg: __attribute__((deprecated(msg)))
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,669 +1,260 @@
|
|||
# pylint: skip-file
|
||||
# mypy: ignore-errors
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# TARGET arch is: []
|
||||
# WORD_SIZE is: 8
|
||||
# POINTER_SIZE is: 8
|
||||
# LONGDOUBLE_SIZE is: 16
|
||||
#
|
||||
import ctypes, ctypes.util
|
||||
PATHS_TO_TRY = [
|
||||
'/usr/local/lib/librocprof-trace-decoder.so',
|
||||
'/usr/local/lib/librocprof-trace-decoder.dylib',
|
||||
]
|
||||
def _try_dlopen_rocprof_trace_decoder():
|
||||
library = ctypes.util.find_library("rocprof-trace-decoder")
|
||||
if library:
|
||||
try: return ctypes.CDLL(library)
|
||||
except OSError: pass
|
||||
for candidate in PATHS_TO_TRY:
|
||||
try: return ctypes.CDLL(candidate)
|
||||
except OSError: pass
|
||||
import ctypes
|
||||
from tinygrad.helpers import unwrap
|
||||
from tinygrad.runtime.support.c import Struct, CEnum, _IO, _IOW, _IOR, _IOWR
|
||||
from ctypes.util import find_library
|
||||
def dll():
|
||||
try: return ctypes.CDLL(unwrap(find_library('rocprof-trace-decoder')))
|
||||
except: pass
|
||||
try: return ctypes.CDLL(unwrap('/usr/local/lib/rocprof-trace-decoder.so'))
|
||||
except: pass
|
||||
try: return ctypes.CDLL(unwrap('/usr/local/lib/rocprof-trace-decoder.dylib'))
|
||||
except: pass
|
||||
return None
|
||||
dll = dll()
|
||||
|
||||
rocprofiler_thread_trace_decoder_status_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR', 1)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES', 2)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT', 3)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA', 4)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST = rocprofiler_thread_trace_decoder_status_t.define('ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST', 5)
|
||||
|
||||
class AsDictMixin:
|
||||
@classmethod
|
||||
def as_dict(cls, self):
|
||||
result = {}
|
||||
if not isinstance(self, AsDictMixin):
|
||||
# not a structure, assume it's already a python object
|
||||
return self
|
||||
if not hasattr(cls, "_fields_"):
|
||||
return result
|
||||
# sys.version_info >= (3, 5)
|
||||
# for (field, *_) in cls._fields_: # noqa
|
||||
for field_tuple in cls._fields_: # noqa
|
||||
field = field_tuple[0]
|
||||
if field.startswith('PADDING_'):
|
||||
continue
|
||||
value = getattr(self, field)
|
||||
type_ = type(value)
|
||||
if hasattr(value, "_length_") and hasattr(value, "_type_"):
|
||||
# array
|
||||
if not hasattr(type_, "as_dict"):
|
||||
value = [v for v in value]
|
||||
else:
|
||||
type_ = type_._type_
|
||||
value = [type_.as_dict(v) for v in value]
|
||||
elif hasattr(value, "contents") and hasattr(value, "_type_"):
|
||||
# pointer
|
||||
try:
|
||||
if not hasattr(type_, "as_dict"):
|
||||
value = value.contents
|
||||
else:
|
||||
type_ = type_._type_
|
||||
value = type_.as_dict(value.contents)
|
||||
except ValueError:
|
||||
# nullptr
|
||||
value = None
|
||||
elif isinstance(value, AsDictMixin):
|
||||
# other structure
|
||||
value = type_.as_dict(value)
|
||||
result[field] = value
|
||||
return result
|
||||
enum_rocprofiler_thread_trace_decoder_record_type_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY', 1)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT', 2)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE', 3)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO', 4)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG', 5)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA', 6)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME', 7)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY', 8)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST = enum_rocprofiler_thread_trace_decoder_record_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST', 9)
|
||||
|
||||
|
||||
class Structure(ctypes.Structure, AsDictMixin):
|
||||
|
||||
def __init__(self, *args, **kwds):
|
||||
# We don't want to use positional arguments fill PADDING_* fields
|
||||
|
||||
args = dict(zip(self.__class__._field_names_(), args))
|
||||
args.update(kwds)
|
||||
super(Structure, self).__init__(**args)
|
||||
|
||||
@classmethod
|
||||
def _field_names_(cls):
|
||||
if hasattr(cls, '_fields_'):
|
||||
return (f[0] for f in cls._fields_ if not f[0].startswith('PADDING'))
|
||||
else:
|
||||
return ()
|
||||
|
||||
@classmethod
|
||||
def get_type(cls, field):
|
||||
for f in cls._fields_:
|
||||
if f[0] == field:
|
||||
return f[1]
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def bind(cls, bound_fields):
|
||||
fields = {}
|
||||
for name, type_ in cls._fields_:
|
||||
if hasattr(type_, "restype"):
|
||||
if name in bound_fields:
|
||||
if bound_fields[name] is None:
|
||||
fields[name] = type_()
|
||||
else:
|
||||
# use a closure to capture the callback from the loop scope
|
||||
fields[name] = (
|
||||
type_((lambda callback: lambda *args: callback(*args))(
|
||||
bound_fields[name]))
|
||||
)
|
||||
del bound_fields[name]
|
||||
else:
|
||||
# default callback implementation (does nothing)
|
||||
try:
|
||||
default_ = type_(0).restype().value
|
||||
except TypeError:
|
||||
default_ = None
|
||||
fields[name] = type_((
|
||||
lambda default_: lambda *args: default_)(default_))
|
||||
else:
|
||||
# not a callback function, use default initialization
|
||||
if name in bound_fields:
|
||||
fields[name] = bound_fields[name]
|
||||
del bound_fields[name]
|
||||
else:
|
||||
fields[name] = type_()
|
||||
if len(bound_fields) != 0:
|
||||
raise ValueError(
|
||||
"Cannot bind the following unknown callback(s) {}.{}".format(
|
||||
cls.__name__, bound_fields.keys()
|
||||
))
|
||||
return cls(**fields)
|
||||
|
||||
|
||||
class Union(ctypes.Union, AsDictMixin):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
c_int128 = ctypes.c_ubyte*16
|
||||
c_uint128 = c_int128
|
||||
void = None
|
||||
if ctypes.sizeof(ctypes.c_longdouble) == 16:
|
||||
c_long_double_t = ctypes.c_longdouble
|
||||
else:
|
||||
c_long_double_t = ctypes.c_ubyte*16
|
||||
|
||||
def string_cast(char_pointer, encoding='utf-8', errors='strict'):
|
||||
value = ctypes.cast(char_pointer, ctypes.c_char_p).value
|
||||
if value is not None and encoding is not None:
|
||||
value = value.decode(encoding, errors=errors)
|
||||
return value
|
||||
|
||||
|
||||
def char_pointer_cast(string, encoding='utf-8'):
|
||||
if encoding is not None:
|
||||
try:
|
||||
string = string.encode(encoding)
|
||||
except AttributeError:
|
||||
# In Python3, bytes has no encode attribute
|
||||
pass
|
||||
string = ctypes.c_char_p(string)
|
||||
return ctypes.cast(string, ctypes.POINTER(ctypes.c_char))
|
||||
|
||||
|
||||
|
||||
class FunctionFactoryStub:
|
||||
def __getattr__(self, _):
|
||||
return ctypes.CFUNCTYPE(lambda y:y)
|
||||
|
||||
# libraries['FIXME_STUB'] explanation
|
||||
# As you did not list (-l libraryname.so) a library that exports this function
|
||||
# This is a non-working stub instead.
|
||||
# You can either re-run clan2py with -l /path/to/library.so
|
||||
# Or manually fix this by comment the ctypes.CDLL loading
|
||||
_libraries = {}
|
||||
_libraries['FIXME_STUB'] = _try_dlopen_rocprof_trace_decoder() # ctypes.CDLL('FIXME_STUB')
|
||||
|
||||
|
||||
|
||||
# values for enumeration 'rocprofiler_thread_trace_decoder_info_t'
|
||||
rocprofiler_thread_trace_decoder_info_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST',
|
||||
2: 'ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE',
|
||||
3: 'ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE',
|
||||
4: 'ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST = 1
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE = 2
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE = 3
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST = 4
|
||||
rocprofiler_thread_trace_decoder_info_t = ctypes.c_uint32 # enum
|
||||
class struct_rocprofiler_thread_trace_decoder_pc_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_pc_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_pc_t._fields_ = [
|
||||
('address', ctypes.c_uint64),
|
||||
('code_object_id', ctypes.c_uint64),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_pc_t = struct_rocprofiler_thread_trace_decoder_pc_t
|
||||
class struct_rocprofiler_thread_trace_decoder_perfevent_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_perfevent_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_perfevent_t._fields_ = [
|
||||
('time', ctypes.c_int64),
|
||||
('events0', ctypes.c_uint16),
|
||||
('events1', ctypes.c_uint16),
|
||||
('events2', ctypes.c_uint16),
|
||||
('events3', ctypes.c_uint16),
|
||||
('CU', ctypes.c_ubyte),
|
||||
('bank', ctypes.c_ubyte),
|
||||
('PADDING_0', ctypes.c_ubyte * 6),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_perfevent_t = struct_rocprofiler_thread_trace_decoder_perfevent_t
|
||||
class struct_rocprofiler_thread_trace_decoder_occupancy_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_occupancy_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_occupancy_t._fields_ = [
|
||||
('pc', rocprofiler_thread_trace_decoder_pc_t),
|
||||
('time', ctypes.c_uint64),
|
||||
('reserved', ctypes.c_ubyte),
|
||||
('cu', ctypes.c_ubyte),
|
||||
('simd', ctypes.c_ubyte),
|
||||
('wave_id', ctypes.c_ubyte),
|
||||
('start', ctypes.c_uint32, 1),
|
||||
('_rsvd', ctypes.c_uint32, 31),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_occupancy_t = struct_rocprofiler_thread_trace_decoder_occupancy_t
|
||||
|
||||
# values for enumeration 'rocprofiler_thread_trace_decoder_wstate_type_t'
|
||||
rocprofiler_thread_trace_decoder_wstate_type_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE',
|
||||
2: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC',
|
||||
3: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT',
|
||||
4: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL',
|
||||
5: 'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE = 1
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC = 2
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT = 3
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL = 4
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST = 5
|
||||
rocprofiler_thread_trace_decoder_wstate_type_t = ctypes.c_uint32 # enum
|
||||
class struct_rocprofiler_thread_trace_decoder_wave_state_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_wave_state_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_wave_state_t._fields_ = [
|
||||
('type', ctypes.c_int32),
|
||||
('duration', ctypes.c_int32),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_wave_state_t = struct_rocprofiler_thread_trace_decoder_wave_state_t
|
||||
|
||||
# values for enumeration 'rocprofiler_thread_trace_decoder_inst_category_t'
|
||||
rocprofiler_thread_trace_decoder_inst_category_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM',
|
||||
2: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU',
|
||||
3: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM',
|
||||
4: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT',
|
||||
5: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS',
|
||||
6: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU',
|
||||
7: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP',
|
||||
8: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT',
|
||||
9: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED',
|
||||
10: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT',
|
||||
11: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE',
|
||||
12: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH',
|
||||
13: 'ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM = 1
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU = 2
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM = 3
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT = 4
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS = 5
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU = 6
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP = 7
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT = 8
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED = 9
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT = 10
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE = 11
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH = 12
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST = 13
|
||||
rocprofiler_thread_trace_decoder_inst_category_t = ctypes.c_uint32 # enum
|
||||
class struct_rocprofiler_thread_trace_decoder_inst_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_inst_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_inst_t._fields_ = [
|
||||
('category', ctypes.c_uint32, 8),
|
||||
('stall', ctypes.c_uint32, 24),
|
||||
('duration', ctypes.c_int32),
|
||||
('time', ctypes.c_int64),
|
||||
('pc', rocprofiler_thread_trace_decoder_pc_t),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_inst_t = struct_rocprofiler_thread_trace_decoder_inst_t
|
||||
class struct_rocprofiler_thread_trace_decoder_wave_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_wave_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_wave_t._fields_ = [
|
||||
('cu', ctypes.c_ubyte),
|
||||
('simd', ctypes.c_ubyte),
|
||||
('wave_id', ctypes.c_ubyte),
|
||||
('contexts', ctypes.c_ubyte),
|
||||
('_rsvd1', ctypes.c_uint32),
|
||||
('_rsvd2', ctypes.c_uint32),
|
||||
('_rsvd3', ctypes.c_uint32),
|
||||
('begin_time', ctypes.c_int64),
|
||||
('end_time', ctypes.c_int64),
|
||||
('timeline_size', ctypes.c_uint64),
|
||||
('instructions_size', ctypes.c_uint64),
|
||||
('timeline_array', ctypes.POINTER(struct_rocprofiler_thread_trace_decoder_wave_state_t)),
|
||||
('instructions_array', ctypes.POINTER(struct_rocprofiler_thread_trace_decoder_inst_t)),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_wave_t = struct_rocprofiler_thread_trace_decoder_wave_t
|
||||
class struct_rocprofiler_thread_trace_decoder_realtime_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_realtime_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_realtime_t._fields_ = [
|
||||
('shader_clock', ctypes.c_int64),
|
||||
('realtime_clock', ctypes.c_uint64),
|
||||
('reserved', ctypes.c_uint64),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_realtime_t = struct_rocprofiler_thread_trace_decoder_realtime_t
|
||||
|
||||
# values for enumeration 'rocprofiler_thread_trace_decoder_shaderdata_flags_t'
|
||||
rocprofiler_thread_trace_decoder_shaderdata_flags_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV = 1
|
||||
rocprofiler_thread_trace_decoder_shaderdata_flags_t = ctypes.c_uint32 # enum
|
||||
class struct_rocprofiler_thread_trace_decoder_shaderdata_t(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprofiler_thread_trace_decoder_shaderdata_t._pack_ = 1 # source:False
|
||||
struct_rocprofiler_thread_trace_decoder_shaderdata_t._fields_ = [
|
||||
('time', ctypes.c_int64),
|
||||
('value', ctypes.c_uint64),
|
||||
('cu', ctypes.c_ubyte),
|
||||
('simd', ctypes.c_ubyte),
|
||||
('wave_id', ctypes.c_ubyte),
|
||||
('flags', ctypes.c_ubyte),
|
||||
('reserved', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
rocprofiler_thread_trace_decoder_shaderdata_t = struct_rocprofiler_thread_trace_decoder_shaderdata_t
|
||||
|
||||
# values for enumeration 'rocprofiler_thread_trace_decoder_record_type_t'
|
||||
rocprofiler_thread_trace_decoder_record_type_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY',
|
||||
2: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT',
|
||||
3: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE',
|
||||
4: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO',
|
||||
5: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG',
|
||||
6: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA',
|
||||
7: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME',
|
||||
8: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY',
|
||||
9: 'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY = 1
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT = 2
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE = 3
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO = 4
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG = 5
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA = 6
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME = 7
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY = 8
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST = 9
|
||||
rocprofiler_thread_trace_decoder_record_type_t = ctypes.c_uint32 # enum
|
||||
|
||||
# values for enumeration 'c__EA_rocprofiler_thread_trace_decoder_status_t'
|
||||
c__EA_rocprofiler_thread_trace_decoder_status_t__enumvalues = {
|
||||
0: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS',
|
||||
1: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR',
|
||||
2: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES',
|
||||
3: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT',
|
||||
4: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA',
|
||||
5: 'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST',
|
||||
}
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS = 0
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR = 1
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES = 2
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT = 3
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA = 4
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST = 5
|
||||
c__EA_rocprofiler_thread_trace_decoder_status_t = ctypes.c_uint32 # enum
|
||||
rocprofiler_thread_trace_decoder_status_t = c__EA_rocprofiler_thread_trace_decoder_status_t
|
||||
rocprofiler_thread_trace_decoder_status_t__enumvalues = c__EA_rocprofiler_thread_trace_decoder_status_t__enumvalues
|
||||
rocprof_trace_decoder_trace_callback_t = ctypes.CFUNCTYPE(c__EA_rocprofiler_thread_trace_decoder_status_t, rocprofiler_thread_trace_decoder_record_type_t, ctypes.POINTER(None), ctypes.c_uint64, ctypes.POINTER(None))
|
||||
rocprof_trace_decoder_isa_callback_t = ctypes.CFUNCTYPE(c__EA_rocprofiler_thread_trace_decoder_status_t, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_uint64), struct_rocprofiler_thread_trace_decoder_pc_t, ctypes.POINTER(None))
|
||||
rocprof_trace_decoder_se_data_callback_t = ctypes.CFUNCTYPE(ctypes.c_uint64, ctypes.POINTER(ctypes.POINTER(ctypes.c_ubyte)), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(None))
|
||||
try:
|
||||
rocprof_trace_decoder_parse_data = _libraries['FIXME_STUB'].rocprof_trace_decoder_parse_data
|
||||
rocprof_trace_decoder_parse_data.restype = rocprofiler_thread_trace_decoder_status_t
|
||||
rocprof_trace_decoder_parse_data.argtypes = [rocprof_trace_decoder_se_data_callback_t, rocprof_trace_decoder_trace_callback_t, rocprof_trace_decoder_isa_callback_t, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
rocprof_trace_decoder_get_info_string = _libraries['FIXME_STUB'].rocprof_trace_decoder_get_info_string
|
||||
rocprof_trace_decoder_get_info_string.restype = ctypes.POINTER(ctypes.c_char)
|
||||
rocprof_trace_decoder_get_info_string.argtypes = [rocprofiler_thread_trace_decoder_info_t]
|
||||
except AttributeError:
|
||||
pass
|
||||
try:
|
||||
rocprof_trace_decoder_get_status_string = _libraries['FIXME_STUB'].rocprof_trace_decoder_get_status_string
|
||||
rocprof_trace_decoder_get_status_string.restype = ctypes.POINTER(ctypes.c_char)
|
||||
rocprof_trace_decoder_get_status_string.argtypes = [rocprofiler_thread_trace_decoder_status_t]
|
||||
except AttributeError:
|
||||
pass
|
||||
rocprofiler_thread_trace_decoder_debug_callback_t = ctypes.CFUNCTYPE(None, ctypes.c_int64, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_char), ctypes.POINTER(None))
|
||||
rocprof_trace_decoder_trace_callback_t = ctypes.CFUNCTYPE(rocprofiler_thread_trace_decoder_status_t, enum_rocprofiler_thread_trace_decoder_record_type_t, ctypes.c_void_p, ctypes.c_uint64, ctypes.c_void_p)
|
||||
class struct_rocprofiler_thread_trace_decoder_pc_t(Struct): pass
|
||||
uint64_t = ctypes.c_uint64
|
||||
try:
|
||||
rocprof_trace_decoder_dump_data = _libraries['FIXME_STUB'].rocprof_trace_decoder_dump_data
|
||||
rocprof_trace_decoder_dump_data.restype = rocprofiler_thread_trace_decoder_status_t
|
||||
rocprof_trace_decoder_dump_data.argtypes = [ctypes.POINTER(ctypes.c_char), uint64_t, rocprofiler_thread_trace_decoder_debug_callback_t, ctypes.POINTER(None)]
|
||||
except AttributeError:
|
||||
pass
|
||||
class union_rocprof_trace_decoder_gfx9_header_t(Union):
|
||||
pass
|
||||
|
||||
class struct_rocprof_trace_decoder_gfx9_header_t_0(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprof_trace_decoder_gfx9_header_t_0._pack_ = 1 # source:False
|
||||
struct_rocprof_trace_decoder_gfx9_header_t_0._fields_ = [
|
||||
('legacy_version', ctypes.c_uint64, 13),
|
||||
('gfx9_version2', ctypes.c_uint64, 3),
|
||||
('DSIMDM', ctypes.c_uint64, 4),
|
||||
('DCU', ctypes.c_uint64, 5),
|
||||
('reserved1', ctypes.c_uint64, 1),
|
||||
('SEID', ctypes.c_uint64, 6),
|
||||
('reserved2', ctypes.c_uint64, 32),
|
||||
struct_rocprofiler_thread_trace_decoder_pc_t._fields_ = [
|
||||
('address', uint64_t),
|
||||
('code_object_id', uint64_t),
|
||||
]
|
||||
rocprof_trace_decoder_isa_callback_t = ctypes.CFUNCTYPE(rocprofiler_thread_trace_decoder_status_t, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_uint64), ctypes.POINTER(ctypes.c_uint64), struct_rocprofiler_thread_trace_decoder_pc_t, ctypes.c_void_p)
|
||||
rocprof_trace_decoder_se_data_callback_t = ctypes.CFUNCTYPE(ctypes.c_uint64, ctypes.POINTER(ctypes.POINTER(ctypes.c_ubyte)), ctypes.POINTER(ctypes.c_uint64), ctypes.c_void_p)
|
||||
# rocprofiler_thread_trace_decoder_status_t rocprof_trace_decoder_parse_data(rocprof_trace_decoder_se_data_callback_t se_data_callback, rocprof_trace_decoder_trace_callback_t trace_callback, rocprof_trace_decoder_isa_callback_t isa_callback, void *userdata)
|
||||
try: (rocprof_trace_decoder_parse_data:=dll.rocprof_trace_decoder_parse_data).restype, rocprof_trace_decoder_parse_data.argtypes = rocprofiler_thread_trace_decoder_status_t, [rocprof_trace_decoder_se_data_callback_t, rocprof_trace_decoder_trace_callback_t, rocprof_trace_decoder_isa_callback_t, ctypes.c_void_p]
|
||||
except AttributeError: pass
|
||||
|
||||
union_rocprof_trace_decoder_gfx9_header_t._pack_ = 1 # source:False
|
||||
union_rocprof_trace_decoder_gfx9_header_t._anonymous_ = ('_0',)
|
||||
enum_rocprofiler_thread_trace_decoder_info_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE = enum_rocprofiler_thread_trace_decoder_info_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST = enum_rocprofiler_thread_trace_decoder_info_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST', 1)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE = enum_rocprofiler_thread_trace_decoder_info_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE', 2)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE = enum_rocprofiler_thread_trace_decoder_info_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE', 3)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST = enum_rocprofiler_thread_trace_decoder_info_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST', 4)
|
||||
|
||||
rocprofiler_thread_trace_decoder_info_t = enum_rocprofiler_thread_trace_decoder_info_t
|
||||
# const char *rocprof_trace_decoder_get_info_string(rocprofiler_thread_trace_decoder_info_t info)
|
||||
try: (rocprof_trace_decoder_get_info_string:=dll.rocprof_trace_decoder_get_info_string).restype, rocprof_trace_decoder_get_info_string.argtypes = ctypes.POINTER(ctypes.c_char), [rocprofiler_thread_trace_decoder_info_t]
|
||||
except AttributeError: pass
|
||||
|
||||
# const char *rocprof_trace_decoder_get_status_string(rocprofiler_thread_trace_decoder_status_t status)
|
||||
try: (rocprof_trace_decoder_get_status_string:=dll.rocprof_trace_decoder_get_status_string).restype, rocprof_trace_decoder_get_status_string.argtypes = ctypes.POINTER(ctypes.c_char), [rocprofiler_thread_trace_decoder_status_t]
|
||||
except AttributeError: pass
|
||||
|
||||
rocprofiler_thread_trace_decoder_debug_callback_t = ctypes.CFUNCTYPE(None, ctypes.c_int64, ctypes.POINTER(ctypes.c_char), ctypes.POINTER(ctypes.c_char), ctypes.c_void_p)
|
||||
# rocprofiler_thread_trace_decoder_status_t rocprof_trace_decoder_dump_data(const char *data, uint64_t data_size, rocprofiler_thread_trace_decoder_debug_callback_t cb, void *userdata)
|
||||
try: (rocprof_trace_decoder_dump_data:=dll.rocprof_trace_decoder_dump_data).restype, rocprof_trace_decoder_dump_data.argtypes = rocprofiler_thread_trace_decoder_status_t, [ctypes.POINTER(ctypes.c_char), uint64_t, rocprofiler_thread_trace_decoder_debug_callback_t, ctypes.c_void_p]
|
||||
except AttributeError: pass
|
||||
|
||||
class union_rocprof_trace_decoder_gfx9_header_t(ctypes.Union): pass
|
||||
class union_rocprof_trace_decoder_gfx9_header_t_0(Struct): pass
|
||||
union_rocprof_trace_decoder_gfx9_header_t_0._fields_ = [
|
||||
('legacy_version', uint64_t,13),
|
||||
('gfx9_version2', uint64_t,3),
|
||||
('DSIMDM', uint64_t,4),
|
||||
('DCU', uint64_t,5),
|
||||
('reserved1', uint64_t,1),
|
||||
('SEID', uint64_t,6),
|
||||
('reserved2', uint64_t,32),
|
||||
]
|
||||
union_rocprof_trace_decoder_gfx9_header_t._anonymous_ = ['_0']
|
||||
union_rocprof_trace_decoder_gfx9_header_t._fields_ = [
|
||||
('_0', struct_rocprof_trace_decoder_gfx9_header_t_0),
|
||||
('raw', ctypes.c_uint64),
|
||||
('_0', union_rocprof_trace_decoder_gfx9_header_t_0),
|
||||
('raw', uint64_t),
|
||||
]
|
||||
|
||||
rocprof_trace_decoder_gfx9_header_t = union_rocprof_trace_decoder_gfx9_header_t
|
||||
class union_rocprof_trace_decoder_instrument_enable_t(Union):
|
||||
pass
|
||||
|
||||
class struct_rocprof_trace_decoder_instrument_enable_t_0(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprof_trace_decoder_instrument_enable_t_0._pack_ = 1 # source:False
|
||||
struct_rocprof_trace_decoder_instrument_enable_t_0._fields_ = [
|
||||
('char1', ctypes.c_uint32, 8),
|
||||
('char2', ctypes.c_uint32, 8),
|
||||
('char3', ctypes.c_uint32, 8),
|
||||
('char4', ctypes.c_uint32, 8),
|
||||
class union_rocprof_trace_decoder_instrument_enable_t(ctypes.Union): pass
|
||||
class union_rocprof_trace_decoder_instrument_enable_t_0(Struct): pass
|
||||
union_rocprof_trace_decoder_instrument_enable_t_0._fields_ = [
|
||||
('char1', ctypes.c_uint32,8),
|
||||
('char2', ctypes.c_uint32,8),
|
||||
('char3', ctypes.c_uint32,8),
|
||||
('char4', ctypes.c_uint32,8),
|
||||
]
|
||||
|
||||
union_rocprof_trace_decoder_instrument_enable_t._pack_ = 1 # source:False
|
||||
union_rocprof_trace_decoder_instrument_enable_t._anonymous_ = ('_0',)
|
||||
union_rocprof_trace_decoder_instrument_enable_t._anonymous_ = ['_0']
|
||||
union_rocprof_trace_decoder_instrument_enable_t._fields_ = [
|
||||
('_0', struct_rocprof_trace_decoder_instrument_enable_t_0),
|
||||
('u32All', ctypes.c_uint32),
|
||||
('_0', union_rocprof_trace_decoder_instrument_enable_t_0),
|
||||
('u32All', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
rocprof_trace_decoder_instrument_enable_t = union_rocprof_trace_decoder_instrument_enable_t
|
||||
class union_rocprof_trace_decoder_packet_header_t(Union):
|
||||
pass
|
||||
|
||||
class struct_rocprof_trace_decoder_packet_header_t_0(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprof_trace_decoder_packet_header_t_0._pack_ = 1 # source:False
|
||||
struct_rocprof_trace_decoder_packet_header_t_0._fields_ = [
|
||||
('opcode', ctypes.c_uint32, 8),
|
||||
('type', ctypes.c_uint32, 4),
|
||||
('data20', ctypes.c_uint32, 20),
|
||||
class union_rocprof_trace_decoder_packet_header_t(ctypes.Union): pass
|
||||
class union_rocprof_trace_decoder_packet_header_t_0(Struct): pass
|
||||
union_rocprof_trace_decoder_packet_header_t_0._fields_ = [
|
||||
('opcode', ctypes.c_uint32,8),
|
||||
('type', ctypes.c_uint32,4),
|
||||
('data20', ctypes.c_uint32,20),
|
||||
]
|
||||
|
||||
union_rocprof_trace_decoder_packet_header_t._pack_ = 1 # source:False
|
||||
union_rocprof_trace_decoder_packet_header_t._anonymous_ = ('_0',)
|
||||
union_rocprof_trace_decoder_packet_header_t._anonymous_ = ['_0']
|
||||
union_rocprof_trace_decoder_packet_header_t._fields_ = [
|
||||
('_0', struct_rocprof_trace_decoder_packet_header_t_0),
|
||||
('u32All', ctypes.c_uint32),
|
||||
('_0', union_rocprof_trace_decoder_packet_header_t_0),
|
||||
('u32All', ctypes.c_uint32),
|
||||
]
|
||||
|
||||
rocprof_trace_decoder_packet_header_t = union_rocprof_trace_decoder_packet_header_t
|
||||
enum_rocprof_trace_decoder_packet_opcode_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_CODEOBJ = enum_rocprof_trace_decoder_packet_opcode_t.define('ROCPROF_TRACE_DECODER_PACKET_OPCODE_CODEOBJ', 4)
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_RT_TIMESTAMP = enum_rocprof_trace_decoder_packet_opcode_t.define('ROCPROF_TRACE_DECODER_PACKET_OPCODE_RT_TIMESTAMP', 5)
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_AGENT_INFO = enum_rocprof_trace_decoder_packet_opcode_t.define('ROCPROF_TRACE_DECODER_PACKET_OPCODE_AGENT_INFO', 6)
|
||||
|
||||
# values for enumeration 'rocprof_trace_decoder_packet_opcode_t'
|
||||
rocprof_trace_decoder_packet_opcode_t__enumvalues = {
|
||||
4: 'ROCPROF_TRACE_DECODER_PACKET_OPCODE_CODEOBJ',
|
||||
5: 'ROCPROF_TRACE_DECODER_PACKET_OPCODE_RT_TIMESTAMP',
|
||||
6: 'ROCPROF_TRACE_DECODER_PACKET_OPCODE_AGENT_INFO',
|
||||
}
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_CODEOBJ = 4
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_RT_TIMESTAMP = 5
|
||||
ROCPROF_TRACE_DECODER_PACKET_OPCODE_AGENT_INFO = 6
|
||||
rocprof_trace_decoder_packet_opcode_t = ctypes.c_uint32 # enum
|
||||
rocprof_trace_decoder_packet_opcode_t = enum_rocprof_trace_decoder_packet_opcode_t
|
||||
enum_rocprof_trace_decoder_agent_info_type_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ = enum_rocprof_trace_decoder_agent_info_type_t.define('ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ', 0)
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_COUNTER_INTERVAL = enum_rocprof_trace_decoder_agent_info_type_t.define('ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_COUNTER_INTERVAL', 1)
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_LAST = enum_rocprof_trace_decoder_agent_info_type_t.define('ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_LAST', 2)
|
||||
|
||||
# values for enumeration 'rocprof_trace_decoder_agent_info_type_t'
|
||||
rocprof_trace_decoder_agent_info_type_t__enumvalues = {
|
||||
0: 'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ',
|
||||
1: 'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_COUNTER_INTERVAL',
|
||||
2: 'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_LAST',
|
||||
}
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ = 0
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_COUNTER_INTERVAL = 1
|
||||
ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_LAST = 2
|
||||
rocprof_trace_decoder_agent_info_type_t = ctypes.c_uint32 # enum
|
||||
class union_rocprof_trace_decoder_codeobj_marker_tail_t(Union):
|
||||
pass
|
||||
|
||||
class struct_rocprof_trace_decoder_codeobj_marker_tail_t_0(Structure):
|
||||
pass
|
||||
|
||||
struct_rocprof_trace_decoder_codeobj_marker_tail_t_0._pack_ = 1 # source:False
|
||||
struct_rocprof_trace_decoder_codeobj_marker_tail_t_0._fields_ = [
|
||||
('isUnload', ctypes.c_uint32, 1),
|
||||
('bFromStart', ctypes.c_uint32, 1),
|
||||
('legacy_id', ctypes.c_uint32, 30),
|
||||
rocprof_trace_decoder_agent_info_type_t = enum_rocprof_trace_decoder_agent_info_type_t
|
||||
class union_rocprof_trace_decoder_codeobj_marker_tail_t(ctypes.Union): pass
|
||||
class union_rocprof_trace_decoder_codeobj_marker_tail_t_0(Struct): pass
|
||||
uint32_t = ctypes.c_uint32
|
||||
union_rocprof_trace_decoder_codeobj_marker_tail_t_0._fields_ = [
|
||||
('isUnload', uint32_t,1),
|
||||
('bFromStart', uint32_t,1),
|
||||
('legacy_id', uint32_t,30),
|
||||
]
|
||||
|
||||
union_rocprof_trace_decoder_codeobj_marker_tail_t._pack_ = 1 # source:False
|
||||
union_rocprof_trace_decoder_codeobj_marker_tail_t._anonymous_ = ('_0',)
|
||||
union_rocprof_trace_decoder_codeobj_marker_tail_t._anonymous_ = ['_0']
|
||||
union_rocprof_trace_decoder_codeobj_marker_tail_t._fields_ = [
|
||||
('_0', struct_rocprof_trace_decoder_codeobj_marker_tail_t_0),
|
||||
('raw', ctypes.c_uint32),
|
||||
('_0', union_rocprof_trace_decoder_codeobj_marker_tail_t_0),
|
||||
('raw', uint32_t),
|
||||
]
|
||||
|
||||
rocprof_trace_decoder_codeobj_marker_tail_t = union_rocprof_trace_decoder_codeobj_marker_tail_t
|
||||
enum_rocprof_trace_decoder_codeobj_marker_type_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_TAIL = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_TAIL', 0)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_LO = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_LO', 1)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_LO = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_LO', 2)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_HI = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_HI', 3)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_HI = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_HI', 4)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_LO = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_LO', 5)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_HI = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_HI', 6)
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_LAST = enum_rocprof_trace_decoder_codeobj_marker_type_t.define('ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_LAST', 7)
|
||||
|
||||
# values for enumeration 'rocprof_trace_decoder_codeobj_marker_type_t'
|
||||
rocprof_trace_decoder_codeobj_marker_type_t__enumvalues = {
|
||||
0: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_TAIL',
|
||||
1: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_LO',
|
||||
2: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_LO',
|
||||
3: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_HI',
|
||||
4: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_HI',
|
||||
5: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_LO',
|
||||
6: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_HI',
|
||||
7: 'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_LAST',
|
||||
}
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_TAIL = 0
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_LO = 1
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_LO = 2
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_HI = 3
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_HI = 4
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_LO = 5
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_HI = 6
|
||||
ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_LAST = 7
|
||||
rocprof_trace_decoder_codeobj_marker_type_t = ctypes.c_uint32 # enum
|
||||
__all__ = \
|
||||
['ROCPROFILER_THREAD_TRACE_DECODER_INFO_DATA_LOST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INFO_LAST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INFO_NONE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INFO_STITCH_INCOMPLETE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INFO_WAVE_INCOMPLETE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_DEBUG',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_LAST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_SHADERDATA',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_SHADER_DATA',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_OUT_OF_RESOURCES',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_LAST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL',
|
||||
'ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT',
|
||||
'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_COUNTER_INTERVAL',
|
||||
'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_LAST',
|
||||
'ROCPROF_TRACE_DECODER_AGENT_INFO_TYPE_RT_FREQUENCY_KHZ',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_HI',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ADDR_LO',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_HI',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_ID_LO',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_LAST',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_HI',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_SIZE_LO',
|
||||
'ROCPROF_TRACE_DECODER_CODEOBJ_MARKER_TYPE_TAIL',
|
||||
'ROCPROF_TRACE_DECODER_PACKET_OPCODE_AGENT_INFO',
|
||||
'ROCPROF_TRACE_DECODER_PACKET_OPCODE_CODEOBJ',
|
||||
'ROCPROF_TRACE_DECODER_PACKET_OPCODE_RT_TIMESTAMP',
|
||||
'c__EA_rocprofiler_thread_trace_decoder_status_t',
|
||||
'rocprof_trace_decoder_agent_info_type_t',
|
||||
'rocprof_trace_decoder_codeobj_marker_tail_t',
|
||||
'rocprof_trace_decoder_codeobj_marker_type_t',
|
||||
'rocprof_trace_decoder_dump_data',
|
||||
'rocprof_trace_decoder_get_info_string',
|
||||
'rocprof_trace_decoder_get_status_string',
|
||||
'rocprof_trace_decoder_gfx9_header_t',
|
||||
'rocprof_trace_decoder_instrument_enable_t',
|
||||
'rocprof_trace_decoder_isa_callback_t',
|
||||
'rocprof_trace_decoder_packet_header_t',
|
||||
'rocprof_trace_decoder_packet_opcode_t',
|
||||
'rocprof_trace_decoder_parse_data',
|
||||
'rocprof_trace_decoder_se_data_callback_t',
|
||||
'rocprof_trace_decoder_trace_callback_t',
|
||||
'rocprofiler_thread_trace_decoder_debug_callback_t',
|
||||
'rocprofiler_thread_trace_decoder_info_t',
|
||||
'rocprofiler_thread_trace_decoder_inst_category_t',
|
||||
'rocprofiler_thread_trace_decoder_inst_t',
|
||||
'rocprofiler_thread_trace_decoder_occupancy_t',
|
||||
'rocprofiler_thread_trace_decoder_pc_t',
|
||||
'rocprofiler_thread_trace_decoder_perfevent_t',
|
||||
'rocprofiler_thread_trace_decoder_realtime_t',
|
||||
'rocprofiler_thread_trace_decoder_record_type_t',
|
||||
'rocprofiler_thread_trace_decoder_shaderdata_flags_t',
|
||||
'rocprofiler_thread_trace_decoder_shaderdata_t',
|
||||
'rocprofiler_thread_trace_decoder_status_t',
|
||||
'rocprofiler_thread_trace_decoder_status_t__enumvalues',
|
||||
'rocprofiler_thread_trace_decoder_wave_state_t',
|
||||
'rocprofiler_thread_trace_decoder_wave_t',
|
||||
'rocprofiler_thread_trace_decoder_wstate_type_t',
|
||||
'struct_rocprof_trace_decoder_codeobj_marker_tail_t_0',
|
||||
'struct_rocprof_trace_decoder_gfx9_header_t_0',
|
||||
'struct_rocprof_trace_decoder_instrument_enable_t_0',
|
||||
'struct_rocprof_trace_decoder_packet_header_t_0',
|
||||
'struct_rocprofiler_thread_trace_decoder_inst_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_occupancy_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_pc_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_perfevent_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_realtime_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_shaderdata_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_wave_state_t',
|
||||
'struct_rocprofiler_thread_trace_decoder_wave_t', 'uint64_t',
|
||||
'union_rocprof_trace_decoder_codeobj_marker_tail_t',
|
||||
'union_rocprof_trace_decoder_gfx9_header_t',
|
||||
'union_rocprof_trace_decoder_instrument_enable_t',
|
||||
'union_rocprof_trace_decoder_packet_header_t']
|
||||
rocprof_trace_decoder_codeobj_marker_type_t = enum_rocprof_trace_decoder_codeobj_marker_type_t
|
||||
rocprofiler_thread_trace_decoder_pc_t = struct_rocprofiler_thread_trace_decoder_pc_t
|
||||
class struct_rocprofiler_thread_trace_decoder_perfevent_t(Struct): pass
|
||||
int64_t = ctypes.c_int64
|
||||
uint16_t = ctypes.c_uint16
|
||||
uint8_t = ctypes.c_ubyte
|
||||
struct_rocprofiler_thread_trace_decoder_perfevent_t._fields_ = [
|
||||
('time', int64_t),
|
||||
('events0', uint16_t),
|
||||
('events1', uint16_t),
|
||||
('events2', uint16_t),
|
||||
('events3', uint16_t),
|
||||
('CU', uint8_t),
|
||||
('bank', uint8_t),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_perfevent_t = struct_rocprofiler_thread_trace_decoder_perfevent_t
|
||||
class struct_rocprofiler_thread_trace_decoder_occupancy_t(Struct): pass
|
||||
struct_rocprofiler_thread_trace_decoder_occupancy_t._fields_ = [
|
||||
('pc', rocprofiler_thread_trace_decoder_pc_t),
|
||||
('time', uint64_t),
|
||||
('reserved', uint8_t),
|
||||
('cu', uint8_t),
|
||||
('simd', uint8_t),
|
||||
('wave_id', uint8_t),
|
||||
('start', uint32_t,1),
|
||||
('_rsvd', uint32_t,31),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_occupancy_t = struct_rocprofiler_thread_trace_decoder_occupancy_t
|
||||
enum_rocprofiler_thread_trace_decoder_wstate_type_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EMPTY', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_IDLE', 1)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_EXEC', 2)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_WAIT', 3)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_STALL', 4)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST = enum_rocprofiler_thread_trace_decoder_wstate_type_t.define('ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST', 5)
|
||||
|
||||
rocprofiler_thread_trace_decoder_wstate_type_t = enum_rocprofiler_thread_trace_decoder_wstate_type_t
|
||||
class struct_rocprofiler_thread_trace_decoder_wave_state_t(Struct): pass
|
||||
int32_t = ctypes.c_int32
|
||||
struct_rocprofiler_thread_trace_decoder_wave_state_t._fields_ = [
|
||||
('type', int32_t),
|
||||
('duration', int32_t),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_wave_state_t = struct_rocprofiler_thread_trace_decoder_wave_state_t
|
||||
enum_rocprofiler_thread_trace_decoder_inst_category_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_NONE', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_SMEM', 1)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_SALU', 2)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_VMEM', 3)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_FLAT', 4)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_LDS', 5)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_VALU', 6)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_JUMP', 7)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_NEXT', 8)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_IMMED', 9)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_CONTEXT', 10)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_MESSAGE', 11)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_BVH', 12)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST = enum_rocprofiler_thread_trace_decoder_inst_category_t.define('ROCPROFILER_THREAD_TRACE_DECODER_INST_LAST', 13)
|
||||
|
||||
rocprofiler_thread_trace_decoder_inst_category_t = enum_rocprofiler_thread_trace_decoder_inst_category_t
|
||||
class struct_rocprofiler_thread_trace_decoder_inst_t(Struct): pass
|
||||
struct_rocprofiler_thread_trace_decoder_inst_t._fields_ = [
|
||||
('category', uint32_t,8),
|
||||
('stall', uint32_t,24),
|
||||
('duration', int32_t),
|
||||
('time', int64_t),
|
||||
('pc', rocprofiler_thread_trace_decoder_pc_t),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_inst_t = struct_rocprofiler_thread_trace_decoder_inst_t
|
||||
class struct_rocprofiler_thread_trace_decoder_wave_t(Struct): pass
|
||||
struct_rocprofiler_thread_trace_decoder_wave_t._fields_ = [
|
||||
('cu', uint8_t),
|
||||
('simd', uint8_t),
|
||||
('wave_id', uint8_t),
|
||||
('contexts', uint8_t),
|
||||
('_rsvd1', uint32_t),
|
||||
('_rsvd2', uint32_t),
|
||||
('_rsvd3', uint32_t),
|
||||
('begin_time', int64_t),
|
||||
('end_time', int64_t),
|
||||
('timeline_size', uint64_t),
|
||||
('instructions_size', uint64_t),
|
||||
('timeline_array', ctypes.POINTER(rocprofiler_thread_trace_decoder_wave_state_t)),
|
||||
('instructions_array', ctypes.POINTER(rocprofiler_thread_trace_decoder_inst_t)),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_wave_t = struct_rocprofiler_thread_trace_decoder_wave_t
|
||||
class struct_rocprofiler_thread_trace_decoder_realtime_t(Struct): pass
|
||||
struct_rocprofiler_thread_trace_decoder_realtime_t._fields_ = [
|
||||
('shader_clock', int64_t),
|
||||
('realtime_clock', uint64_t),
|
||||
('reserved', uint64_t),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_realtime_t = struct_rocprofiler_thread_trace_decoder_realtime_t
|
||||
enum_rocprofiler_thread_trace_decoder_shaderdata_flags_t = CEnum(ctypes.c_uint32)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM = enum_rocprofiler_thread_trace_decoder_shaderdata_flags_t.define('ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_IMM', 0)
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV = enum_rocprofiler_thread_trace_decoder_shaderdata_flags_t.define('ROCPROFILER_THREAD_TRACE_DECODER_SHADERDATA_FLAGS_PRIV', 1)
|
||||
|
||||
rocprofiler_thread_trace_decoder_shaderdata_flags_t = enum_rocprofiler_thread_trace_decoder_shaderdata_flags_t
|
||||
class struct_rocprofiler_thread_trace_decoder_shaderdata_t(Struct): pass
|
||||
struct_rocprofiler_thread_trace_decoder_shaderdata_t._fields_ = [
|
||||
('time', int64_t),
|
||||
('value', uint64_t),
|
||||
('cu', uint8_t),
|
||||
('simd', uint8_t),
|
||||
('wave_id', uint8_t),
|
||||
('flags', uint8_t),
|
||||
('reserved', uint32_t),
|
||||
]
|
||||
rocprofiler_thread_trace_decoder_shaderdata_t = struct_rocprofiler_thread_trace_decoder_shaderdata_t
|
||||
rocprofiler_thread_trace_decoder_record_type_t = enum_rocprofiler_thread_trace_decoder_record_type_t
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -188,14 +188,9 @@ class AMDComputeQueue(HWQueue):
|
|||
### SQTT ###
|
||||
|
||||
def sqtt_setup_exec(self, prg, global_size):
|
||||
self.sqtt_userdata(sqtt.struct_rgp_sqtt_marker_pipeline_bind(
|
||||
_0=sqtt.union_rgp_sqtt_marker_pipeline_bind_0(_0=sqtt.struct_rgp_sqtt_marker_pipeline_bind_0_0(
|
||||
identifier=sqtt.RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE, bind_point=(__BIND_POINT_COMPUTE:=1))),
|
||||
_1=sqtt.union_rgp_sqtt_marker_pipeline_bind_1(api_pso_hash=data64_le(prg.libhash[0]))))
|
||||
|
||||
self.sqtt_userdata(sqtt.struct_rgp_sqtt_marker_event(
|
||||
_0=sqtt.union_rgp_sqtt_marker_event_0(_0=sqtt.struct_rgp_sqtt_marker_event_0_0(has_thread_dims=1)),
|
||||
_2=sqtt.union_rgp_sqtt_marker_event_2(cmd_id=next(prg.dev.sqtt_next_cmd_id))), *global_size)
|
||||
self.sqtt_userdata(sqtt.struct_rgp_sqtt_marker_pipeline_bind(identifier=sqtt.RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE,
|
||||
bind_point=(__BIND_POINT_COMPUTE:=1), api_pso_hash=data64_le(prg.libhash[0])))
|
||||
self.sqtt_userdata(sqtt.struct_rgp_sqtt_marker_event(has_thread_dims=1, cmd_id=next(prg.dev.sqtt_next_cmd_id)), *global_size)
|
||||
|
||||
se_cap = max(prod([x if isinstance(x, int) else 1 for x in global_size]) // 4, 1) // 32
|
||||
for xcc in range(self.dev.xccs):
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ if MOCKGPU:=getenv("MOCKGPU"): from test.mockgpu.cuda import cuda # type: ignore
|
|||
|
||||
def check(status):
|
||||
if status != 0:
|
||||
error = ctypes.string_at(init_c_var(ctypes.POINTER(ctypes.c_char)(), lambda x: cuda.cuGetErrorString(status, ctypes.byref(x)))).decode()
|
||||
error = ctypes.string_at(init_c_var(ctypes.c_char_p(), lambda x: cuda.cuGetErrorString(status, x))).decode()
|
||||
raise RuntimeError(f"CUDA Error {status}, {error}")
|
||||
|
||||
def encode_args(args, vals) -> tuple[ctypes.Structure, ctypes.Array]:
|
||||
|
|
@ -42,7 +42,7 @@ class CUDAProgram:
|
|||
status = cuda.cuModuleLoadData(ctypes.byref(self.module), lib)
|
||||
if status != 0:
|
||||
del self.module
|
||||
raise RuntimeError(f"module load failed with status code {status}: {cuda.cudaError_enum__enumvalues[status]}")
|
||||
raise RuntimeError(f"module load failed with status code {status}: {cuda.CUresult.get(status)}")
|
||||
check(cuda.cuModuleGetFunction(ctypes.byref(prg := cuda.CUfunction()), self.module, name.encode("utf-8")))
|
||||
self.prg = prg
|
||||
if self.smem > 0: check(cuda.cuFuncSetAttribute(self.prg, cuda.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, self.smem))
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def uvm_ioctl(cmd, sttyp, fd:FileIOInterface, **kwargs):
|
|||
|
||||
def make_uvm_type():
|
||||
return type("NVUVM", (object,), {name.replace("UVM_", "").lower(): functools.partial(uvm_ioctl, dt, getattr(nv_gpu, name+"_PARAMS"))
|
||||
for name,dt in nv_gpu.__dict__.items() if name.startswith("UVM_") and nv_gpu.__dict__.get(name+"_PARAMS")})
|
||||
for name,dt in nv_gpu.__dict__.items() if name.startswith("UVM_") and nv_gpu.__dict__.get(name+"_PARAMS")})
|
||||
uvm = make_uvm_type()
|
||||
|
||||
class QMD:
|
||||
|
|
@ -436,7 +436,7 @@ class NVKIface:
|
|||
|
||||
def _gpu_uvm_map(self, va_base, size, mem_handle, create_range=True, has_cpu_mapping=False) -> HCQBuffer:
|
||||
if create_range: uvm.create_external_range(self.fd_uvm, base=va_base, length=size)
|
||||
attrs = (nv_gpu.struct_c__SA_UvmGpuMappingAttributes*256)(nv_gpu.struct_c__SA_UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1))
|
||||
attrs = (nv_gpu.UvmGpuMappingAttributes*256)(nv_gpu.UvmGpuMappingAttributes(gpuUuid=self.gpu_uuid, gpuMappingType=1))
|
||||
|
||||
# NOTE: va_addr is set to make rawbufs compatible with HCQBuffer protocol.
|
||||
return HCQBuffer(va_base, size, meta=uvm.map_external_allocation(self.fd_uvm, base=va_base, length=size, rmCtrlFd=self.fd_ctl.fd,
|
||||
|
|
|
|||
|
|
@ -321,13 +321,15 @@ class QCOMAllocator(HCQAllocatorBase):
|
|||
self.dev.synchronize()
|
||||
self.dev._gpu_free(opaque)
|
||||
|
||||
def flag(nm, val): return (val << getattr(kgsl, f"{nm}_SHIFT")) & getattr(kgsl, f"{nm}_MASK")
|
||||
|
||||
class QCOMDevice(HCQCompiled):
|
||||
def __init__(self, device:str=""):
|
||||
self.fd = FileIOInterface('/dev/kgsl-3d0', os.O_RDWR)
|
||||
self.dummy_addr = cast(int, self._gpu_alloc(0x1000).va_addr)
|
||||
|
||||
flags = kgsl.KGSL_CONTEXT_PREAMBLE | kgsl.KGSL_CONTEXT_PWR_CONSTRAINT | kgsl.KGSL_CONTEXT_NO_FAULT_TOLERANCE | kgsl.KGSL_CONTEXT_NO_GMEM_ALLOC \
|
||||
| kgsl.KGSL_CONTEXT_PRIORITY(getenv("QCOM_PRIORITY", 8)) | kgsl.KGSL_CONTEXT_PREEMPT_STYLE(kgsl.KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
|
||||
| flag("KGSL_CONTEXT_PRIORITY", getenv("QCOM_PRIORITY", 8)) | flag("KGSL_CONTEXT_PREEMPT_STYLE", kgsl.KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
|
||||
self.ctx = kgsl.IOCTL_KGSL_DRAWCTXT_CREATE(self.fd, flags=flags).drawctxt_id
|
||||
|
||||
self.cmd_buf = self._gpu_alloc(16 << 20)
|
||||
|
|
@ -357,8 +359,8 @@ class QCOMDevice(HCQCompiled):
|
|||
functools.partial(QCOMComputeQueue, self), None)
|
||||
|
||||
def _gpu_alloc(self, size:int, flags:int=0, uncached=False, fill_zeroes=False) -> HCQBuffer:
|
||||
flags |= kgsl.KGSL_MEMALIGN(alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP
|
||||
if uncached: flags |= kgsl.KGSL_CACHEMODE(kgsl.KGSL_CACHEMODE_UNCACHED)
|
||||
flags |= flag("KGSL_MEMALIGN", alignment_hint:=12) | kgsl.KGSL_MEMFLAGS_USE_CPU_MAP
|
||||
if uncached: flags |= flag("KGSL_CACHEMODE", kgsl.KGSL_CACHEMODE_UNCACHED)
|
||||
|
||||
alloc = kgsl.IOCTL_KGSL_GPUOBJ_ALLOC(self.fd, size=(bosz:=round_up(size, 1<<alignment_hint)), flags=flags, mmapsize=bosz)
|
||||
va_addr = self.fd.mmap(0, bosz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED, alloc.id * 0x1000)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import os
|
|||
WGPUDevPtr: TypeAlias = webgpu.WGPUDevice # type: ignore
|
||||
WGPUBufPtr: TypeAlias = webgpu.WGPUBuffer # type: ignore
|
||||
|
||||
backend_types = {v: k for k, v in webgpu.WGPUBackendType__enumvalues.items() }
|
||||
backend_types = {v: k for k, v in webgpu.enum_WGPUBackendType.items() }
|
||||
|
||||
instance = webgpu.wgpuCreateInstance(webgpu.WGPUInstanceDescriptor(features = webgpu.WGPUInstanceFeatures(timedWaitAnyEnable = True)))
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ def _run(async_fun, cb_info_type, cb_type, status_enum, res_idx:int|None, msg_id
|
|||
cb_info = cb_info_type(nextInChain=None, mode=webgpu.WGPUCallbackMode_WaitAnyOnly, callback=cb_type(cb))
|
||||
_wait(async_fun(*params, cb_info))
|
||||
|
||||
if result[0] != 1: raise RuntimeError(f"[{status_enum[result[0]] if status_enum else 'ERROR'}]{result[msg_idx] if msg_idx else ''}")
|
||||
if result[0] != 1: raise RuntimeError(f"[{status_enum.get(result[0]) if status_enum else 'ERROR'}]{result[msg_idx] if msg_idx else ''}")
|
||||
return result[res_idx] if res_idx else None
|
||||
|
||||
def copy_buffer_to_buffer(dev:WGPUDevPtr, src:WGPUBufPtr, src_offset:int, dst:WGPUBufPtr, dst_offset:int, size:int):
|
||||
|
|
@ -54,8 +54,8 @@ def read_buffer(dev:WGPUDevPtr, buf:WGPUBufPtr) -> memoryview:
|
|||
tmp_buffer = webgpu.wgpuDeviceCreateBuffer(dev, webgpu.WGPUBufferDescriptor(size=size,
|
||||
usage=webgpu.WGPUBufferUsage_CopyDst | webgpu.WGPUBufferUsage_MapRead, mappedAtCreation=False))
|
||||
copy_buffer_to_buffer(dev, buf, 0, tmp_buffer, 0, size)
|
||||
_run(webgpu.wgpuBufferMapAsync2, webgpu.WGPUBufferMapCallbackInfo2, webgpu.WGPUBufferMapCallback2, webgpu.WGPUBufferMapAsyncStatus__enumvalues,
|
||||
None, 0, tmp_buffer, webgpu.WGPUMapMode_Read, 0, size)
|
||||
_run(webgpu.wgpuBufferMapAsync2, webgpu.WGPUBufferMapCallbackInfo2, webgpu.WGPUBufferMapCallback2, webgpu.WGPUBufferMapAsyncStatus, None, 0,
|
||||
tmp_buffer, webgpu.WGPUMapMode_Read, 0, size)
|
||||
void_ptr = ctypes.cast(webgpu.wgpuBufferGetConstMappedRange(tmp_buffer, 0, size), ctypes.c_void_p)
|
||||
buf_copy = bytearray((ctypes.c_uint8 * size).from_address(void_ptr.value))
|
||||
webgpu.wgpuBufferUnmap(tmp_buffer)
|
||||
|
|
@ -140,7 +140,7 @@ class WebGPUProgram:
|
|||
compute_desc = webgpu.WGPUComputePipelineDescriptor(layout=pipeline_layout,
|
||||
compute=webgpu.WGPUComputeState(module=self.prg, entryPoint=to_wgpu_str(self.name)))
|
||||
pipeline_result = _run(webgpu.wgpuDeviceCreateComputePipelineAsync2, webgpu.WGPUCreateComputePipelineAsyncCallbackInfo2,
|
||||
webgpu.WGPUCreateComputePipelineAsyncCallback2, webgpu.WGPUCreatePipelineAsyncStatus__enumvalues, 1, None, self.dev, compute_desc)
|
||||
webgpu.WGPUCreateComputePipelineAsyncCallback2, webgpu.WGPUCreatePipelineAsyncStatus, 1, None, self.dev, compute_desc)
|
||||
|
||||
command_encoder = webgpu.wgpuDeviceCreateCommandEncoder(self.dev, webgpu.WGPUCommandEncoderDescriptor())
|
||||
comp_pass_desc = webgpu.WGPUComputePassDescriptor(nextInChain=None)
|
||||
|
|
@ -195,9 +195,7 @@ class WebGpuDevice(Compiled):
|
|||
def __init__(self, device:str):
|
||||
# Requesting an adapter
|
||||
adapter_res = _run(webgpu.wgpuInstanceRequestAdapterF, webgpu.WGPURequestAdapterCallbackInfo, webgpu.WGPURequestAdapterCallback,
|
||||
webgpu.WGPURequestAdapterStatus__enumvalues, 1, 2, instance,
|
||||
|
||||
webgpu.WGPURequestAdapterOptions(powerPreference=webgpu.WGPUPowerPreference_HighPerformance,
|
||||
webgpu.WGPURequestAdapterStatus, 1, 2, instance, webgpu.WGPURequestAdapterOptions(powerPreference=webgpu.WGPUPowerPreference_HighPerformance,
|
||||
backendType=backend_types.get(os.getenv("WEBGPU_BACKEND", ""), 0)))
|
||||
|
||||
# Get supported features
|
||||
|
|
@ -215,11 +213,11 @@ class WebGpuDevice(Compiled):
|
|||
|
||||
# Requesting a device
|
||||
device_res = _run(webgpu.wgpuAdapterRequestDeviceF, webgpu.WGPURequestDeviceCallbackInfo, webgpu.WGPURequestDeviceCallback,
|
||||
webgpu.WGPURequestDeviceStatus__enumvalues, 1, 2, adapter_res, dev_desc)
|
||||
webgpu.WGPURequestDeviceStatus, 1, 2, adapter_res, dev_desc)
|
||||
|
||||
super().__init__(device, WebGpuAllocator(device_res), [(WGSLRenderer, Compiler)],
|
||||
functools.partial(WebGPUProgram, (device_res, webgpu.WGPUFeatureName_TimestampQuery in supported)))
|
||||
|
||||
def synchronize(self):
|
||||
_run(webgpu.wgpuQueueOnSubmittedWorkDone2, webgpu.WGPUQueueWorkDoneCallbackInfo2, webgpu.WGPUQueueWorkDoneCallback2,
|
||||
webgpu.WGPUQueueWorkDoneStatus__enumvalues, None, None, webgpu.wgpuDeviceGetQueue(self.runtime.args[0][0]))
|
||||
webgpu.WGPUQueueWorkDoneStatus, None, None, webgpu.wgpuDeviceGetQueue(self.runtime.args[0][0]))
|
||||
|
|
|
|||
|
|
@ -238,8 +238,6 @@ class AMDev(PCIDevImplBase):
|
|||
ihdr = am.struct_ip_discovery_header.from_address(ctypes.addressof(self.bhdr) + self.bhdr.table_list[am.IP_DISCOVERY].offset)
|
||||
assert self.bhdr.binary_signature == am.BINARY_SIGNATURE and ihdr.signature == am.DISCOVERY_TABLE_SIGNATURE, "discovery signatures mismatch"
|
||||
|
||||
# Mapping of HW IP to Discovery HW IP
|
||||
hw_id_map = {am.__dict__[x]: int(y) for x,y in am.hw_id_map}
|
||||
self.regs_offset:dict[int, dict[int, tuple]] = collections.defaultdict(dict)
|
||||
self.ip_ver:dict[int, tuple[int, int, int]] = {}
|
||||
|
||||
|
|
@ -251,7 +249,7 @@ class AMDev(PCIDevImplBase):
|
|||
ip = am.struct_ip_v4.from_address(ip_offset)
|
||||
ba = ((ctypes.c_uint64 if ihdr.base_addr_64_bit else ctypes.c_uint32) * ip.num_base_address).from_address(ip_offset + 8)
|
||||
for hw_ip in range(1, am.MAX_HWIP):
|
||||
if hw_ip in hw_id_map and hw_id_map[hw_ip] == ip.hw_id:
|
||||
if hw_ip in am.hw_id_map and am.hw_id_map[hw_ip] == ip.hw_id:
|
||||
self.regs_offset[hw_ip][ip.instance_number] = tuple(list(ba))
|
||||
self.ip_ver[hw_ip] = (ip.major, ip.minor, ip.revision)
|
||||
|
||||
|
|
|
|||
|
|
@ -427,7 +427,7 @@ class AM_PSP(AM_IP):
|
|||
|
||||
self._wait_for_bootloader()
|
||||
|
||||
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading sos component: {am.psp_fw_type__enumvalues[fw]}")
|
||||
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading sos component: {am.enum_psp_fw_type.get(fw)}")
|
||||
|
||||
self._prep_msg1(self.adev.fw.sos_fw[fw])
|
||||
self.adev.reg(f"{self.reg_pref}_36").write(self.msg1_addr >> 20)
|
||||
|
|
@ -482,7 +482,7 @@ class AM_PSP(AM_IP):
|
|||
def _load_ip_fw_cmd(self, fw_types:list[int], fw_bytes:memoryview):
|
||||
self._prep_msg1(fw_bytes)
|
||||
for fw_type in fw_types:
|
||||
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading fw: {am.psp_gfx_fw_type__enumvalues[fw_type]}")
|
||||
if DEBUG >= 2: print(f"am {self.adev.devfmt}: loading fw: {am.enum_psp_gfx_fw_type.get(fw_type)}")
|
||||
cmd = am.struct_psp_gfx_cmd_resp(cmd_id=am.GFX_CMD_ID_LOAD_IP_FW)
|
||||
cmd.cmd.cmd_load_ip_fw.fw_phy_addr_hi, cmd.cmd.cmd_load_ip_fw.fw_phy_addr_lo = data64(self.msg1_addr)
|
||||
cmd.cmd.cmd_load_ip_fw.fw_size = len(fw_bytes)
|
||||
|
|
|
|||
|
|
@ -47,7 +47,6 @@ def gen(dll, files, args=[], prolog=[], rules=[], epilog=[], recsym=False, use_e
|
|||
return types[t.spelling][0]
|
||||
case TK.RECORD:
|
||||
# TODO: packed unions
|
||||
# TODO: pragma pack support
|
||||
# check for forward declaration
|
||||
if t.spelling in types: types[t.spelling] = (nm:=types[t.spelling][0]), len(list(t.get_fields())) != 0
|
||||
else:
|
||||
|
|
@ -56,11 +55,15 @@ def gen(dll, files, args=[], prolog=[], rules=[], epilog=[], recsym=False, use_e
|
|||
else: types[t.spelling] = (nm:=t.spelling.replace(' ', '_').replace('::', '_')), len(list(t.get_fields())) != 0
|
||||
lines.append(f"class {nm}({'Struct' if decl.kind==CK.STRUCT_DECL else 'ctypes.Union'}): pass")
|
||||
if typedef: lines.append(f"{typedef} = {nm}")
|
||||
if (is_packed:=(CK.PACKED_ATTR in attrs(decl)) or ((N:=t.get_align()) != max([f.type.get_align() for f in t.get_fields()], default=N))):
|
||||
if t.get_align() != 1:
|
||||
print(f"WARNING: ignoring alignment={t.get_align()} on {t.spelling}")
|
||||
is_packed = False
|
||||
acnt = itertools.count().__next__
|
||||
ll=[" ("+((fn:=f"'_{acnt()}'")+f", {tname(f.type, nm+fn[1:-1])}" if f.is_anonymous_record_decl() else f"'{f.spelling}', "+
|
||||
tname(f.type, f'{nm}_{f.spelling}'))+(f',{f.get_bitfield_width()}' if f.is_bitfield() else '')+")," for f in t.get_fields()]
|
||||
lines.extend(([f"{nm}._anonymous_ = ["+", ".join(f"'_{i}'" for i in range(n))+"]"] if (n:=acnt()) else [])+
|
||||
([f"{nm}._packed_ = True"] * (CK.PACKED_ATTR in attrs(decl)))+([f"{nm}._fields_ = [",*ll,"]"] if ll else []))
|
||||
([f"{nm}._packed_ = True"] * is_packed)+([f"{nm}._fields_ = [",*ll,"]"] if ll else []))
|
||||
return nm
|
||||
case TK.ENUM:
|
||||
# TODO: C++ and GNU C have forward declared enums
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
import ctypes, functools, sys
|
||||
from typing import TYPE_CHECKING
|
||||
from tinygrad.helpers import flatten
|
||||
from _ctypes import _SimpleCData
|
||||
|
||||
def _do_ioctl(__idir, __base, __nr, __struct, __fd, **kwargs):
|
||||
def _do_ioctl(__idir, __base, __nr, __struct, __fd, *args, __payload=None, **kwargs):
|
||||
import tinygrad.runtime.support.hcq as hcq, fcntl
|
||||
ioctl = __fd.ioctl if isinstance(__fd, hcq.FileIOInterface) else functools.partial(fcntl.ioctl, __fd)
|
||||
if (rc:=ioctl((__idir<<30)|(ctypes.sizeof(out:=__struct(**kwargs))<<16)|(__base<<8)|__nr, out)): raise RuntimeError(f"ioctl returned {rc}")
|
||||
if (rc:=ioctl((__idir<<30)|(ctypes.sizeof(out:=(__payload or __struct(*args, **kwargs)))<<16)|(__base<<8)|__nr, out)):
|
||||
raise RuntimeError(f"ioctl returned {rc}")
|
||||
return out
|
||||
|
||||
def _IO(base, nr): return functools.partial(_do_ioctl, 0, ord(base) if isinstance(base, str) else base, nr, None)
|
||||
|
|
@ -50,16 +53,28 @@ else:
|
|||
|
||||
@staticmethod
|
||||
def _build(cls, fields):
|
||||
o = 0
|
||||
for n,t,b in [(f[0], f[1], f[2] if len(f) == 3 else 0) for f in fields]:
|
||||
if b == 0: o = (o + 7) & ~7
|
||||
m = (1 << (sz:=ctypes.sizeof(t)*8 if b == 0 else b)) - 1
|
||||
def _s(self,v,m,s,b): self._data[:] = ((int.from_bytes(self._data,sys.byteorder)&~(m<<s))|((v&m)<<s)).to_bytes(len(self._data), sys.byteorder)
|
||||
setattr(cls, n, property(functools.partial(lambda self,m,s:(int.from_bytes(self._data,sys.byteorder)>>s)&m,m=m,s=o),
|
||||
functools.partial(_s,m=m,s=o,b=b)))
|
||||
o += sz
|
||||
offset = 0
|
||||
for nm, ty, bf in [(f[0], f[1], f[2] if len(f) == 3 else 0) for f in fields]:
|
||||
if bf == 0: offset = (offset + 7) & ~7
|
||||
mask = (1 << (sz:=ctypes.sizeof(ty)*8 if bf == 0 else bf)) - 1
|
||||
def fget(self, mask, off, ty): return ((int.from_bytes(self._data, sys.byteorder)>>off)&mask if issubclass(ty, _SimpleCData) else
|
||||
ty.from_buffer(memoryview(self._data)[(st:=off//8):st+ctypes.sizeof(ty)]))
|
||||
def fset(self, val, mask, off): self._data[:] = (((int.from_bytes(self._data, sys.byteorder) & ~(mask<<off))|((val&mask)<<off))
|
||||
.to_bytes(len(self._data), sys.byteorder))
|
||||
setattr(cls, nm, property(functools.partial(fget, mask=mask, off=offset, ty=ty), functools.partial(fset, mask=mask, off=offset)))
|
||||
offset += sz
|
||||
|
||||
type(ctypes.Structure).__setattr__(cls, '_fields_', [('_data', ctypes.c_ubyte * ((o + 7) // 8))])
|
||||
def pget(ty, s): return getattr(ty, f'_packed_{s}_', getattr(ty, f'_{s}_', []))
|
||||
def get_aty(anm, fs=fields): return next(f[1] for f in fs if f[0] == anm)
|
||||
def get_fnms(ty): return [f[0] for f in pget(ty, 'fields') if f[0] not in pget(ty, 'anonymous')]
|
||||
|
||||
if hasattr(cls, '_anonymous_'):
|
||||
for anm, aty in [(a, get_aty(a)) for a in cls._anonymous_]:
|
||||
for fnm in (get_fnms(aty) + flatten([get_fnms(get_aty(aanm, pget(aty, 'fields'))) for aanm in pget(aty, 'anonymous')])):
|
||||
setattr(cls, fnm, property(lambda self: getattr(getattr(self, anm), fnm), lambda self, v: setattr(getattr(self, anm), fnm, v)))
|
||||
setattr(cls, '_packed_anonymous_', cls._anonymous_)
|
||||
setattr(cls, '_anonymous_', [])
|
||||
type(ctypes.Structure).__setattr__(cls, '_fields_', [('_data', ctypes.c_ubyte * ((offset + 7) // 8))])
|
||||
type(ctypes.Structure).__setattr__(cls, '_packed_', True)
|
||||
setattr(cls, '_packed_fields_', fields)
|
||||
|
||||
|
|
@ -69,4 +84,3 @@ else:
|
|||
for f,v in zip(self._packed_fields_, args): setattr(self, f[0], v)
|
||||
for k,v in kwargs.items(): setattr(self, k, v)
|
||||
else: super().__init__(*args, **kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,10 @@
|
|||
import ctypes
|
||||
from tinygrad.helpers import system
|
||||
import tinygrad.runtime.autogen.comgr as comgr
|
||||
assert comgr.AMD_COMGR_LANGUAGE_HIP == 4
|
||||
from tinygrad.runtime.autogen import comgr
|
||||
try:
|
||||
comgr.amd_comgr_get_version(ctypes.byref(major:=ctypes.c_uint64()), ctypes.byref(minor:=ctypes.c_uint64()))
|
||||
if major.value >= 3:
|
||||
# in comgr 3 the values of enums in headers were changed: https://github.com/ROCm/llvm-project/issues/272
|
||||
import tinygrad.runtime.autogen.comgr_3 as comgr # type: ignore[no-redef]
|
||||
assert comgr.AMD_COMGR_LANGUAGE_HIP == 3
|
||||
except AttributeError: pass # ignore if ROCm isn't installed
|
||||
assert comgr.AMD_COMGR_LANGUAGE_HIP == 3 if major.value >= 3 else 4
|
||||
except AttributeError: assert comgr.AMD_COMGR_LANGUAGE_HIP == 3 # if rocm is not installed, use old values
|
||||
from tinygrad.device import Compiler, CompileError
|
||||
from tinygrad.runtime.support.compiler_cpu import LLVMCompiler
|
||||
from tinygrad.helpers import OSX, to_char_p_p
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
import ctypes, platform, sys, subprocess
|
||||
from tinygrad.device import Compiler
|
||||
from tinygrad.helpers import OSX, getenv, capstone_flatdump, DEBUG
|
||||
from tinygrad.helpers import OSX, getenv, capstone_flatdump, DEBUG, unwrap
|
||||
from tinygrad.runtime.support.elf import jit_loader
|
||||
try: import tinygrad.runtime.autogen.llvm as llvm
|
||||
try: from tinygrad.runtime.autogen import llvm
|
||||
except (ImportError, FileNotFoundError): llvm = None #type:ignore[assignment]
|
||||
|
||||
class ClangJITCompiler(Compiler):
|
||||
|
|
@ -24,7 +24,7 @@ class ClangJITCompiler(Compiler):
|
|||
def cerr(): return ctypes.pointer(ctypes.pointer(ctypes.c_char()))
|
||||
|
||||
def expect(x, err, ret=None):
|
||||
if x: raise RuntimeError(llvm.string_cast(err.contents) if not isinstance(err, str) else err)
|
||||
if x: raise RuntimeError(unwrap(ctypes.cast(err.contents, ctypes.c_char_p).value).decode() if not isinstance(err, str) else err)
|
||||
return ret
|
||||
|
||||
class LLVMCompiler(Compiler):
|
||||
|
|
@ -50,7 +50,7 @@ class LLVMCompiler(Compiler):
|
|||
self.passes = b'default<O0>'
|
||||
|
||||
self.diag_msgs: list[str] = []
|
||||
@ctypes.CFUNCTYPE(None, llvm.LLVMDiagnosticInfoRef, ctypes.c_void_p)
|
||||
@llvm.LLVMDiagnosticHandler
|
||||
def handle_diag(diag_ref, _arg):
|
||||
severity = llvm.LLVMGetDiagInfoSeverity(diag_ref)
|
||||
msg = ctypes.string_at(llvm.LLVMGetDiagInfoDescription(diag_ref)).decode()
|
||||
|
|
@ -70,7 +70,7 @@ class LLVMCompiler(Compiler):
|
|||
expect(llvm.LLVMRunPasses(mod, self.passes, self.target_machine, self.pbo), 'failed to run passes')
|
||||
if DEBUG >= 7: print(ctypes.string_at(llvm.LLVMPrintModuleToString(mod)).decode())
|
||||
obj_buf = expect(llvm.LLVMTargetMachineEmitToMemoryBuffer(self.target_machine, mod, llvm.LLVMObjectFile, err:=cerr(),
|
||||
ctypes.pointer(buf:=llvm.LLVMMemoryBufferRef())), err, buf)
|
||||
buf:=llvm.LLVMMemoryBufferRef()), err, buf)
|
||||
llvm.LLVMDisposeModule(mod)
|
||||
obj = ctypes.string_at(llvm.LLVMGetBufferStart(obj_buf), llvm.LLVMGetBufferSize(obj_buf))
|
||||
llvm.LLVMDisposeMemoryBuffer(obj_buf)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import subprocess, hashlib, tempfile, ctypes, re, pathlib
|
||||
from typing import Callable
|
||||
from tinygrad.helpers import to_char_p_p, colored, init_c_var, getenv, system
|
||||
import tinygrad.runtime.autogen.nvrtc as nvrtc
|
||||
from tinygrad.runtime.autogen import nvrtc, nvjitlink as jitlink
|
||||
from tinygrad.device import Compiler, CompileError
|
||||
|
||||
CUDA_PATH = getenv("CUDA_PATH", "")
|
||||
|
|
@ -17,8 +17,8 @@ def nvrtc_check(status, ctx=None):
|
|||
|
||||
def jitlink_check(status, ctx=None):
|
||||
if status != 0:
|
||||
err_log = _get_bytes(ctx, nvrtc.nvJitLinkGetErrorLog, nvrtc.nvJitLinkGetErrorLogSize, lambda _: None).decode() if ctx else ""
|
||||
raise CompileError(f"NvJitLink Error {status}, {nvrtc.nvJitLinkResult__enumvalues.get(status, 'Unknown')}\n{err_log}")
|
||||
err_log = _get_bytes(ctx, jitlink.nvJitLinkGetErrorLog, jitlink.nvJitLinkGetErrorLogSize, lambda _: None).decode() if ctx else ""
|
||||
raise CompileError(f"jitlink Error {status}, {jitlink.nvJitLinkResult.get(status)}\n{err_log}")
|
||||
|
||||
def pretty_ptx(s):
|
||||
# all expressions match `<valid_before><expr><valid_after>` and replace it with `<valid_before>color(<expr>)<valid_after>`
|
||||
|
|
@ -83,12 +83,12 @@ class PTXCompiler(Compiler):
|
|||
|
||||
class NVPTXCompiler(PTXCompiler):
|
||||
def __init__(self, arch:str):
|
||||
nvrtc_check(nvrtc.nvJitLinkVersion(ctypes.byref(ctypes.c_uint()), ctypes.byref(ctypes.c_uint())))
|
||||
nvrtc_check(jitlink.nvJitLinkVersion(ctypes.byref(ctypes.c_uint()), ctypes.byref(ctypes.c_uint())))
|
||||
super().__init__(arch, cache_key="nv_ptx")
|
||||
def compile(self, src:str) -> bytes:
|
||||
jitlink_check(nvrtc.nvJitLinkCreate(handle := nvrtc.nvJitLinkHandle(), 1, to_char_p_p([f'-arch={self.arch}'.encode()])), handle)
|
||||
jitlink_check(nvrtc.nvJitLinkAddData(handle, nvrtc.NVJITLINK_INPUT_PTX, ptxsrc:=super().compile(src), len(ptxsrc), "<null>".encode()), handle)
|
||||
jitlink_check(nvrtc.nvJitLinkComplete(handle), handle)
|
||||
data = _get_bytes(handle, nvrtc.nvJitLinkGetLinkedCubin, nvrtc.nvJitLinkGetLinkedCubinSize, jitlink_check)
|
||||
jitlink_check(nvrtc.nvJitLinkDestroy(handle))
|
||||
jitlink_check(jitlink.nvJitLinkCreate(handle := jitlink.nvJitLinkHandle(), 1, to_char_p_p([f'-arch={self.arch}'.encode()])), handle)
|
||||
jitlink_check(jitlink.nvJitLinkAddData(handle, jitlink.NVJITLINK_INPUT_PTX, ptxsrc:=super().compile(src), len(ptxsrc), "<null>".encode()), handle)
|
||||
jitlink_check(jitlink.nvJitLinkComplete(handle), handle)
|
||||
data = _get_bytes(handle, jitlink.nvJitLinkGetLinkedCubin, jitlink.nvJitLinkGetLinkedCubinSize, jitlink_check)
|
||||
jitlink_check(jitlink.nvJitLinkDestroy(handle))
|
||||
return data
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
import base64, ctypes, pathlib, tempfile, hashlib
|
||||
from tinygrad.device import Compiler
|
||||
from tinygrad.helpers import cpu_objdump, system
|
||||
import tinygrad.runtime.autogen.mesa as mesa
|
||||
from tinygrad.runtime.autogen import mesa
|
||||
from tinygrad.runtime.support.compiler_cpu import CPULLVMCompiler, expect, cerr
|
||||
try: import tinygrad.runtime.autogen.llvm as llvm
|
||||
try: from tinygrad.runtime.autogen import llvm
|
||||
except (ImportError, FileNotFoundError): llvm = None #type:ignore[assignment]
|
||||
|
||||
def deserialize(enc_src, opts):
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
import resource, ctypes, weakref, functools, itertools, tinygrad.runtime.autogen.ib as ib
|
||||
import resource, ctypes, weakref, functools, itertools
|
||||
from tinygrad.runtime.autogen import ib
|
||||
from typing import Iterator
|
||||
from dataclasses import dataclass
|
||||
from weakref import WeakKeyDictionary
|
||||
|
|
@ -141,7 +142,7 @@ class IBConn:
|
|||
while (wr_id in self.pending_wrids) if wr_id is not None else self.pending_wrids:
|
||||
if self.ctx.ctx.contents.ops.poll_cq(self.cq, _num_entries:=1, ctypes.byref(wc:=ib.struct_ibv_wc())):
|
||||
if wc.status != ib.IBV_WC_SUCCESS:
|
||||
raise RuntimeError(f'Work Request completed with error: wr_id={wc.wr_id} status={ib.ibv_wc_status__enumvalues.get(wc.status, wc.status)}')
|
||||
raise RuntimeError(f'Work Request completed with error: wr_id={wc.wr_id} status={ib.enum_ibv_wc_status.get(wc.status, wc.status)}')
|
||||
self.pending_wrids.remove(wc.wr_id)
|
||||
|
||||
def rdma_write(self, sgl:list[SGE]):
|
||||
|
|
@ -162,7 +163,7 @@ class IBConn:
|
|||
# Scatter-Gather Entry for local memory
|
||||
sge = ctypes.pointer(ib.struct_ibv_sge(addr=sg.src_iova+off, length=min(sg.size-off, self.ctx.port_attr.max_msg_sz), lkey=sg.src_key))
|
||||
# RDMA struct for remote memory
|
||||
wr = ib.union_ibv_send_wr_wr(rdma=ib.struct_ibv_send_wr_1_rdma(remote_addr=sg.dst_iova+off, rkey=sg.dst_key))
|
||||
wr = ib.struct_ibv_send_wr_wr(rdma=ib.struct_ibv_send_wr_wr_rdma(remote_addr=sg.dst_iova+off, rkey=sg.dst_key))
|
||||
# Signal (with chosen work request id) if it's the last wr (first in the loop since it's reversed)
|
||||
wid, flags = (wr_id, ib.IBV_SEND_SIGNALED) if swr is None else (0, 0)
|
||||
# Create Send Request
|
||||
|
|
|
|||
|
|
@ -1,11 +1,10 @@
|
|||
from __future__ import annotations
|
||||
import ctypes, time, array, struct, itertools, dataclasses
|
||||
from typing import cast, Any
|
||||
from tinygrad.runtime.autogen.nv import nv
|
||||
from tinygrad.runtime.autogen import nv, nv_gpu, pci
|
||||
from tinygrad.helpers import to_mv, lo32, hi32, DEBUG, round_up, round_down, mv_address, fetch, wait_cond
|
||||
from tinygrad.runtime.support.system import System
|
||||
from tinygrad.runtime.support.elf import elf_loader
|
||||
from tinygrad.runtime.autogen import nv_gpu, pci
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class GRBufDesc: size:int; virt:bool; phys:bool; local:bool=False # noqa: E702
|
||||
|
|
@ -69,8 +68,8 @@ class NVRpcQueue:
|
|||
System.memory_barrier()
|
||||
|
||||
if DEBUG >= 3:
|
||||
rpc_names = {**nv.c__Ea_NV_VGPU_MSG_FUNCTION_NOP__enumvalues, **nv.c__Ea_NV_VGPU_MSG_EVENT_FIRST_EVENT__enumvalues}
|
||||
print(f"nv {self.gsp.nvdev.devfmt}: in RPC: {rpc_names.get(hdr.function, f'ev:{hdr.function:x}')}, res:{hdr.rpc_result:#x}")
|
||||
nm = nv.rpc_fns.get(hdr.function, nv.rpc_events.get(hdr.function, f'ev:{hdr.function:x}'))
|
||||
print(f"nv {self.gsp.nvdev.devfmt}: in RPC: {nm}, res:{hdr.rpc_result:#x}")
|
||||
|
||||
if hdr.rpc_result != 0: raise RuntimeError(f"RPC call {hdr.function} failed with result {hdr.rpc_result}")
|
||||
if hdr.function == cmd: return msg
|
||||
|
|
@ -443,7 +442,7 @@ class NV_GSP(NV_IP):
|
|||
bufs_p = nv_gpu.struct_NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS(pageSize=res_sz, numLevelsToCopy=3,
|
||||
virtAddrLo=res_va, virtAddrHi=res_va + res_sz - 1)
|
||||
for i,pt in enumerate(self.nvdev.mm.page_tables(res_va, size=res_sz)):
|
||||
bufs_p.levels[i] = nv_gpu.struct_NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS_0(physAddress=pt.paddr,
|
||||
bufs_p.levels[i] = nv_gpu.struct_NV90F1_CTRL_VASPACE_COPY_SERVER_RESERVED_PDES_PARAMS_level(physAddress=pt.paddr,
|
||||
size=self.nvdev.mm.pte_cnt[0] * 8 if i == 0 else 0x1000, pageShift=self.nvdev.mm.pte_covers[i].bit_length() - 1, aperture=1)
|
||||
self.rpc_rm_control(hObject=vaspace, cmd=nv_gpu.NV90F1_CTRL_CMD_VASPACE_COPY_SERVER_RESERVED_PDES, params=bufs_p)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue