move tests to test/backend (#14691)

* move tests to test/backend

* fix imports

* fix CI

* revert that one

* Fix formatting in README for test command
This commit is contained in:
George Hotz 2026-02-12 11:09:44 +08:00 committed by GitHub
commit c331798201
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
61 changed files with 76 additions and 90 deletions

View file

@ -106,7 +106,7 @@ jobs:
sudo apt update || true sudo apt update || true
sudo apt install -y --no-install-recommends ninja-build sudo apt install -y --no-install-recommends ninja-build
- name: Test one op - name: Test one op
run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_ops.py TestOps.test_add run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_tiny.py TestTiny.test_plus
- name: Test ResNet-18 - name: Test ResNet-18
run: DEBUG=2 python3 extra/torch_backend/example.py run: DEBUG=2 python3 extra/torch_backend/example.py
- name: custom tests - name: custom tests
@ -114,7 +114,7 @@ jobs:
- name: Test one op in torch tests - name: Test one op in torch tests
run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32 run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32
- name: Test Ops with TINY_BACKEND - name: Test Ops with TINY_BACKEND
run: CPU=1 CPU_LLVM=1 LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/test_ops.py --durations=20 run: CPU=1 CPU_LLVM=1 LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/backend/test_ops.py --durations=20
- name: Test in-place operations on views - name: Test in-place operations on views
run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py
- name: Test multi-gpu - name: Test multi-gpu
@ -158,25 +158,25 @@ jobs:
key: be-minimal key: be-minimal
deps: testing_unit deps: testing_unit
- name: Test dtype with Python emulator - name: Test dtype with Python emulator
run: DEBUG=1 PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py run: DEBUG=1 PYTHON=1 python3 -m pytest -n=auto test/backend/test_dtype.py test/backend/test_dtype_alu.py
- name: Test ops with Python emulator - name: Test ops with Python emulator
run: DEBUG=2 SKIP_SLOW_TEST=1 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py --durations=20 run: DEBUG=2 SKIP_SLOW_TEST=1 PYTHON=1 python3 -m pytest -n=auto test/backend/test_ops.py --durations=20
- name: Test uops with Python emulator - name: Test uops with Python emulator
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20 run: PYTHON=1 python3 -m pytest test/backend/test_uops.py --durations=20
- name: Test symbolic with Python emulator - name: Test symbolic with Python emulator
run: PYTHON=1 python3 test/test_symbolic_ops.py run: PYTHON=1 python3 test/backend/test_symbolic_ops.py
- name: test_renderer_failures with Python emulator - name: test_renderer_failures with Python emulator
run: PYTHON=1 python3 -m pytest -rA test/test_renderer_failures.py::TestRendererFailures run: PYTHON=1 python3 -m pytest -rA test/backend/test_renderer_failures.py::TestRendererFailures
- name: Test IMAGE=2 support - name: Test IMAGE=2 support
run: | run: |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm IMAGE=2 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d IMAGE=2 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_simple_conv2d
- name: Test emulated METAL tensor cores - name: Test emulated METAL tensor cores
run: | run: |
DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_big_gemm
DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
- name: Test emulated AMX tensor cores - name: Test emulated AMX tensor cores
run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm
- name: Test emulated AMD tensor cores - name: Test emulated AMD tensor cores
run: | run: |
DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
@ -197,9 +197,9 @@ jobs:
DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
- name: Test emulated CUDA tensor cores - name: Test emulated CUDA tensor cores
run: | run: |
DEBUG=2 EMULATE=CUDA FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 DEBUG=2 EMULATE=CUDA FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm_fp16
DEBUG=2 EMULATE=CUDA ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm DEBUG=2 EMULATE=CUDA ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm
DEBUG=2 EMULATE=CUDA_SM75 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 DEBUG=2 EMULATE=CUDA_SM75 FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm_fp16
DEBUG=2 EMULATE=CUDA_SM89 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py DEBUG=2 EMULATE=CUDA_SM89 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py
- name: Test emulated INTEL OpenCL tensor cores - name: Test emulated INTEL OpenCL tensor cores
run: DEBUG=2 EMULATE=INTEL FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py run: DEBUG=2 EMULATE=INTEL FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py
@ -271,7 +271,7 @@ jobs:
- name: Run NULL backend tests - name: Run NULL backend tests
run: NULL=1 python -m pytest -n=auto test/null/ --durations=20 run: NULL=1 python -m pytest -n=auto test/null/ --durations=20
- name: Run targetted tests on NULL backend - name: Run targetted tests on NULL backend
run: NULL=1 python3 -m unittest test.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step run: NULL=1 python3 -m unittest test.backend.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step
# TODO: too slow # TODO: too slow
# - name: Run SDXL on NULL backend # - name: Run SDXL on NULL backend
# run: NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights # run: NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights
@ -316,7 +316,7 @@ jobs:
deps: testing_unit deps: testing_unit
python-version: '3.14' python-version: '3.14'
- name: Test SPEC=2 - name: Test SPEC=2
run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 --ignore=test/models --ignore=test/null --ignore test/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" --splits 2 --group ${{ matrix.group }} run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 --ignore=test/models --ignore=test/null --ignore test/backend/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" --splits 2 --group ${{ matrix.group }}
fuzzing: fuzzing:
name: Fuzzing name: Fuzzing
@ -354,7 +354,7 @@ jobs:
opencl: 'true' opencl: 'true'
- name: Test CL IMAGE=2 ops - name: Test CL IMAGE=2 ops
run: | run: |
CL=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20 CL=1 IMAGE=2 python -m pytest -n=auto test/backend/test_ops.py --durations=20
# TODO: training is broken # TODO: training is broken
# CL=1 IMAGE=2 python test/models/test_end2end.py TestEnd2End.test_linear_mnist # CL=1 IMAGE=2 python test/models/test_end2end.py TestEnd2End.test_linear_mnist
- name: Run process replay tests - name: Run process replay tests
@ -378,7 +378,7 @@ jobs:
- name: Run Kernel Count Test - name: Run Kernel Count Test
run: CL=1 python -m pytest -n=auto test/external/external_test_opt.py run: CL=1 python -m pytest -n=auto test/external/external_test_opt.py
- name: Run fused optimizer tests - name: Run fused optimizer tests
run: CL=1 FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py test/test_optim.py -k "not muon" run: CL=1 FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py test/backend/test_optim.py -k "not muon"
- name: Upload artifact - name: Upload artifact
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
@ -437,7 +437,7 @@ jobs:
- name: Test Additional ONNX Ops (CPU) - name: Test Additional ONNX Ops (CPU)
run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_ops.py run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_ops.py
- name: Test Quantize ONNX - name: Test Quantize ONNX
run: CPU=1 CPU_LLVM=0 python3 test/test_quantize_onnx.py run: CPU=1 CPU_LLVM=0 python3 test/backend/test_quantize_onnx.py
- name: Run process replay tests - name: Run process replay tests
uses: ./.github/actions/process-replay uses: ./.github/actions/process-replay
@ -551,11 +551,11 @@ jobs:
pydeps: "pillow" pydeps: "pillow"
llvm: "true" llvm: "true"
- name: Test LLVM=1 DEVECTORIZE=0 - name: Test LLVM=1 DEVECTORIZE=0
run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/backend/test_ops.py
- name: Test LLVM=1 DEVECTORIZE=0 for model - name: Test LLVM=1 DEVECTORIZE=0 for model
run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 test/models/test_efficientnet.py run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 test/models/test_efficientnet.py
- name: Test CPU=1 DEVECTORIZE=0 - name: Test CPU=1 DEVECTORIZE=0
run: CPU=1 CPU_LLVM=0 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py run: CPU=1 CPU_LLVM=0 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/backend/test_ops.py
testdsp: testdsp:
name: Linux (DSP) name: Linux (DSP)
@ -587,9 +587,9 @@ jobs:
- name: Run test_tiny on DSP - name: Run test_tiny on DSP
run: DEBUG=2 DSP=1 python test/test_tiny.py run: DEBUG=2 DSP=1 python test/test_tiny.py
- name: Test transcendentals - name: Test transcendentals
run: CC=clang-20 DEBUG=2 DSP=1 python test/test_transcendental.py TestTranscendentalVectorized run: CC=clang-20 DEBUG=2 DSP=1 python test/backend/test_transcendental.py TestTranscendentalVectorized
- name: Test quantize onnx - name: Test quantize onnx
run: DEBUG=2 DSP=1 python3 test/test_quantize_onnx.py run: DEBUG=2 DSP=1 python3 test/backend/test_quantize_onnx.py
testwebgpu: testwebgpu:
name: Linux (WebGPU) name: Linux (WebGPU)
@ -608,7 +608,7 @@ jobs:
- name: Check Device.DEFAULT (WEBGPU) and print some source - name: Check Device.DEFAULT (WEBGPU) and print some source
run: | run: |
WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
- name: Run selected webgpu tests - name: Run selected webgpu tests
run: | run: |
WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --durations=20 WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --durations=20
@ -642,19 +642,19 @@ jobs:
- name: Check Device.DEFAULT and print some source - name: Check Device.DEFAULT and print some source
run: | run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT" python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT"
DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
- name: Run LLVM test - name: Run LLVM test
if: matrix.backend=='amdllvm' if: matrix.backend=='amdllvm'
run: python test/device/test_amd_llvm.py run: python test/device/test_amd_llvm.py
- name: Run pytest (amd) - name: Run pytest (amd)
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py test/device/test_hcq.py test/testextra/test_cfg_viz.py --durations=20 run: python -m pytest -n=auto test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py test/device/test_hcq.py test/testextra/test_cfg_viz.py --durations=20
- name: Run pytest (amd) - name: Run pytest (amd)
run: python -m pytest test/external/external_test_am.py --durations=20 run: python -m pytest test/external/external_test_am.py --durations=20
- name: Run TRANSCENDENTAL math - name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20
- name: Run TestOps.test_add with SQTT - name: Run TestOps.test_add with SQTT
run: | run: |
VIZ=-2 DEBUG=5 python3 test/test_ops.py TestOps.test_add VIZ=-2 DEBUG=5 python3 test/backend/test_ops.py TestOps.test_add
extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp extra/sqtt/rgptool.py create "/tmp/profile.pkl.$USER" -o /tmp/gpu0.rgp
- name: Run AMD emulated mmapeak on NULL backend - name: Run AMD emulated mmapeak on NULL backend
env: env:
@ -700,12 +700,12 @@ jobs:
- name: Run RDNA3 emulator tests (AMD_LLVM=1) - name: Run RDNA3 emulator tests (AMD_LLVM=1)
run: AMD_LLVM=1 python -m pytest -n=auto extra/assembly/amd/ --durations 20 run: AMD_LLVM=1 python -m pytest -n=auto extra/assembly/amd/ --durations 20
- name: Run RDNA3 dtype tests - name: Run RDNA3 dtype tests
run: AMD_LLVM=0 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py --durations 20 run: AMD_LLVM=0 pytest -n=auto test/backend/test_dtype_alu.py test/backend/test_dtype.py --durations 20
- name: Run RDNA3 dtype tests (AMD_LLVM=1) - name: Run RDNA3 dtype tests (AMD_LLVM=1)
run: AMD_LLVM=1 pytest -n=auto test/test_dtype_alu.py test/test_dtype.py --durations 20 run: AMD_LLVM=1 pytest -n=auto test/backend/test_dtype_alu.py test/backend/test_dtype.py --durations 20
# TODO: run all once emulator is faster # TODO: run all once emulator is faster
- name: Run RDNA3 ops tests - name: Run RDNA3 ops tests
run: SKIP_SLOW_TEST=1 AMD_LLVM=0 pytest -n=auto test/test_ops.py -k "test_sparse_categorical_crossentropy or test_tril or test_nonzero or test_softmax_argmax" --durations 20 run: SKIP_SLOW_TEST=1 AMD_LLVM=0 pytest -n=auto test/backend/test_ops.py -k "test_sparse_categorical_crossentropy or test_tril or test_nonzero or test_softmax_argmax" --durations 20
- name: Run RDNA4 emulator tests - name: Run RDNA4 emulator tests
run: MOCKGPU_ARCH=rdna4 python -m pytest test/test_tiny.py -v --durations 20 run: MOCKGPU_ARCH=rdna4 python -m pytest test/test_tiny.py -v --durations 20
@ -736,12 +736,12 @@ jobs:
- name: Check Device.DEFAULT and print some source - name: Check Device.DEFAULT and print some source
run: | run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT" python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT"
DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
- name: Run pytest (cuda) - name: Run pytest (cuda)
# skip multitensor because it's slow # skip multitensor because it's slow
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --ignore test/test_multitensor.py --durations=20 run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --ignore test/backend/test_multitensor.py --durations=20
- name: Run TestOps.test_add with PMA - name: Run TestOps.test_add with PMA
run: VIZ=-1 PMA=1 DEBUG=5 python3 test/test_ops.py TestOps.test_add run: VIZ=-1 PMA=1 DEBUG=5 python3 test/backend/test_ops.py TestOps.test_add
- name: Run process replay tests - name: Run process replay tests
uses: ./.github/actions/process-replay uses: ./.github/actions/process-replay
@ -770,11 +770,11 @@ jobs:
- name: Check Device.DEFAULT and print some source - name: Check Device.DEFAULT and print some source
run: | run: |
python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CPU','CL'], Device.DEFAULT" python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CPU','CL'], Device.DEFAULT"
DEBUG=5 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus
- name: Run pytest (${{ matrix.backend }}) - name: Run pytest (${{ matrix.backend }})
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --durations=20 run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore=test/null --durations=20
- name: Run TRANSCENDENTAL math - name: Run TRANSCENDENTAL math
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20
- name: Run process replay tests - name: Run process replay tests
uses: ./.github/actions/process-replay uses: ./.github/actions/process-replay
@ -804,15 +804,15 @@ jobs:
- name: Run ONNX - name: Run ONNX
run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
- name: Test tensor core ops (fake) - name: Test tensor core ops (fake)
run: METAL=1 DEBUG=3 TC=2 python test/test_ops.py TestOps.test_gemm run: METAL=1 DEBUG=3 TC=2 python test/backend/test_ops.py TestOps.test_gemm
- name: Test tensor core ops (real) - name: Test tensor core ops (real)
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm run: METAL=1 DEBUG=3 python test/backend/test_ops.py TestOps.test_big_gemm
- name: Test Beam Search - name: Test Beam Search
run: METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py run: METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
#- name: Fuzz Test linearizer #- name: Fuzz Test linearizer
# run: METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py # run: METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
- name: Run TRANSCENDENTAL math - name: Run TRANSCENDENTAL math
run: METAL=1 TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 run: METAL=1 TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20
- name: Run pytest (amd) - name: Run pytest (amd)
env: env:
MOCKGPU: 1 MOCKGPU: 1
@ -854,7 +854,7 @@ jobs:
deps: testing deps: testing
webgpu: 'true' webgpu: 'true'
- name: Test infinity math in WGSL - name: Test infinity math in WGSL
run: WEBGPU=1 python -m pytest -n=auto test/test_renderer_failures.py::TestWGSLFailures::test_multiply_infinity --durations=20 run: WEBGPU=1 python -m pytest -n=auto test/backend/test_renderer_failures.py::TestWGSLFailures::test_multiply_infinity --durations=20
- name: Build WEBGPU Efficientnet - name: Build WEBGPU Efficientnet
run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet
- name: Clean npm cache - name: Clean npm cache
@ -944,7 +944,7 @@ jobs:
shell: bash shell: bash
run: | run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT" python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT"
python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20 python -m pytest -n=auto test/test_tiny.py test/backend/test_ops.py --durations=20
# ****** Compile-only Tests ****** # ****** Compile-only Tests ******
@ -973,5 +973,5 @@ jobs:
shell: bash shell: bash
run: | run: |
python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'"
DEBUG=4 python3 test/test_ops.py TestOps.test_add DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add
python -m pytest -n=auto test/test_ops.py --durations=20 python -m pytest -n=auto test/backend/test_ops.py --durations=20

View file

@ -28,7 +28,7 @@ repos:
pass_filenames: false pass_filenames: false
- id: tests - id: tests
name: comprehensive test suite name: comprehensive test suite
entry: env OMP_NUM_THREADS=1 SKIP_SLOW_TEST=1 PYTHONPATH="." python3 -m pytest -n=6 test/test_ops.py test/test_schedule.py test/unit/test_assign.py test/test_tensor.py test/test_jit.py test/unit/test_schedule_cache.py test/null/test_pattern_matcher.py test/null/test_uop_symbolic.py test/unit/test_helpers.py entry: env OMP_NUM_THREADS=1 SKIP_SLOW_TEST=1 PYTHONPATH="." python3 -m pytest -n=6 test/backend/test_ops.py test/backend/test_schedule.py test/unit/test_assign.py test/backend/test_tensor.py test/backend/test_jit.py test/unit/test_schedule_cache.py test/null/test_pattern_matcher.py test/null/test_uop_symbolic.py test/unit/test_helpers.py
language: system language: system
always_run: true always_run: true
pass_filenames: false pass_filenames: false

View file

@ -41,10 +41,10 @@ Schedules are cached by graph structure. BIND nodes (variables with bound values
python -m pytest test/unit/test_schedule_cache.py -xvs python -m pytest test/unit/test_schedule_cache.py -xvs
# Run with timeout # Run with timeout
python -m pytest test/test_symbolic_ops.py -x --timeout=60 python -m pytest test/backend/test_symbolic_ops.py -x --timeout=60
# Debug with print # Debug with print
DEBUG=2 python -m pytest test/test_schedule.py::test_name -xvs DEBUG=2 python -m pytest test/backend/test_schedule.py::test_name -xvs
# Visualize UOp graphs # Visualize UOp graphs
VIZ=1 python -c "from tinygrad import Tensor; Tensor.ones(10).sum().realize()" VIZ=1 python -c "from tinygrad import Tensor; Tensor.ones(10).sum().realize()"

View file

@ -192,7 +192,7 @@ For more examples on how to run the full test suite please refer to the [CI work
Some examples of running tests locally: Some examples of running tests locally:
```sh ```sh
python3 -m pip install -e '.[testing]' # install extra deps for testing python3 -m pip install -e '.[testing]' # install extra deps for testing
python3 test/test_ops.py # just the ops tests python3 test/backend/test_ops.py # just the ops tests
python3 -m pytest test/ # whole test suite python3 -m pytest test/ # whole test suite
``` ```

View file

@ -7,7 +7,7 @@ export CAPTURE_PROCESS_REPLAY=1
rm "$LOGOPS" 2>/dev/null || true rm "$LOGOPS" 2>/dev/null || true
test/external/process_replay/reset.py test/external/process_replay/reset.py
CI=1 python3 -m pytest -n=auto test/test_ops.py test/test_nn.py test/unit/test_winograd.py test/null/test_real_world.py --durations=20 CI=1 python3 -m pytest -n=auto test/backend/test_ops.py test/backend/test_nn.py test/unit/test_winograd.py test/null/test_real_world.py --durations=20
CL=1 python3 -m pytest test/test_tiny.py CL=1 python3 -m pytest test/test_tiny.py
# extract, sort and uniq # extract, sort and uniq

View file

@ -6,7 +6,7 @@ EXAMPLES_DIR = Path(__file__).parent
PROFILE_PATH = Path(temp("profile.pkl", append_user=True)) PROFILE_PATH = Path(temp("profile.pkl", append_user=True))
EXAMPLES = [ EXAMPLES = [
"test.test_custom_kernel.TestCustomKernel.test_empty", "test.backend.test_custom_kernel.TestCustomKernel.test_empty",
"test.test_tiny.TestTiny.test_plus", "test.test_tiny.TestTiny.test_plus",
"test.test_tiny.TestTiny.test_gemm", "test.test_tiny.TestTiny.test_gemm",
] ]

View file

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
AMD=1 AMD_LLVM=1 python -m pytest -n=1 test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py --durations=20 AMD=1 AMD_LLVM=1 python -m pytest -n=1 test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py --durations=20
AMD=1 AMD_LLVM=0 python -m pytest -n=1 test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/test_jit.py test/test_graph.py test/test_multitensor.py --durations=20 AMD=1 AMD_LLVM=0 python -m pytest -n=1 test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py --durations=20
CNT=1 AMD_LLVM=0 DEBUG=2 FP8E4M3=1 HALF=0 BFLOAT16=0 SHOULD_USE_TC=1 python extra/gemm/simple_matmul.py CNT=1 AMD_LLVM=0 DEBUG=2 FP8E4M3=1 HALF=0 BFLOAT16=0 SHOULD_USE_TC=1 python extra/gemm/simple_matmul.py
CNT=1 AMD_LLVM=0 DEBUG=2 FP8E4M3=0 HALF=1 BFLOAT16=0 SHOULD_USE_TC=1 python extra/gemm/simple_matmul.py CNT=1 AMD_LLVM=0 DEBUG=2 FP8E4M3=0 HALF=1 BFLOAT16=0 SHOULD_USE_TC=1 python extra/gemm/simple_matmul.py

View file

@ -122,7 +122,7 @@ do_not_mutate = [
"tinygrad/helpers.py", "tinygrad/helpers.py",
"tinygrad/tensor.py", "tinygrad/tensor.py",
] ]
tests_dir = ["test/test_tiny.py", "test/test_ops.py"] tests_dir = ["test/test_tiny.py", "test/backend/test_ops.py"]
debug = true debug = true

View file

@ -1,12 +0,0 @@
FROM ubuntu:22.04
# Install python3.10, and pip3
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Install python dependencies
COPY . ./tinygrad
WORKDIR tinygrad
RUN pip install -e .

5
test/README Normal file
View file

@ -0,0 +1,5 @@
Three groups of tests run in CI
backend -- tests that run on each backend
null -- tests that don't require any backend
unit -- tests that only run on a single backend in CI

0
test/backend/__init__.py Normal file
View file

View file

@ -198,7 +198,7 @@ class TestProfiler(unittest.TestCase):
@unittest.skip("this test is flaky") @unittest.skip("this test is flaky")
@unittest.skipUnless(Device[Device.DEFAULT].graph is not None, "graph support required") @unittest.skipUnless(Device[Device.DEFAULT].graph is not None, "graph support required")
def test_graph(self): def test_graph(self):
from test.test_graph import helper_alloc_rawbuffer, helper_exec_op, helper_test_graphs from test.backend.test_graph import helper_alloc_rawbuffer, helper_exec_op, helper_test_graphs
device = TestProfiler.d0.device device = TestProfiler.d0.device
bufs = [helper_alloc_rawbuffer(device, fill=True) for _ in range(5)] bufs = [helper_alloc_rawbuffer(device, fill=True) for _ in range(5)]
graphs = [[helper_exec_op(device, bufs[0], [bufs[1], bufs[2]]), helper_exec_op(device, bufs[0], [bufs[3], bufs[4]]),]] graphs = [[helper_exec_op(device, bufs[0], [bufs[1], bufs[2]]), helper_exec_op(device, bufs[0], [bufs[3], bufs[4]]),]]

View file

@ -2,8 +2,8 @@ import unittest
from tinygrad import Tensor, Device, dtypes from tinygrad import Tensor, Device, dtypes
from tinygrad.tensor import _to_np_dtype from tinygrad.tensor import _to_np_dtype
from tinygrad.helpers import Context, getenv, CI, OSX from tinygrad.helpers import Context, getenv, CI, OSX
from test.test_schedule import check_schedule from test.backend.test_schedule import check_schedule
from test.test_dtype_alu import ht, dtypes_float from test.backend.test_dtype_alu import ht, dtypes_float
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
import numpy as np import numpy as np
import math import math

View file

@ -12,16 +12,9 @@ from tinygrad.engine.schedule import ExecItem
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.codegen.opt import Opt, OptOps from tinygrad.codegen.opt import Opt, OptOps
from tinygrad.renderer.ptx import PTXRenderer from tinygrad.renderer.ptx import PTXRenderer
from test.helpers import get_uops from test.helpers import to_uops_list
from dataclasses import replace from dataclasses import replace
def to_uops_list(u:list[UOp], ren=None) -> list[UOp]:
sink = UOp.group(*u)
for r in sink.ranges: sink = sink.end(r)
ret = get_uops(sink.sink(arg=KernelInfo(opts_to_apply=())), ren)
assert ret[-1].op is Ops.SINK
return ret
def _uops_to_prg(uops_list): def _uops_to_prg(uops_list):
prg = get_program(UOp.sink(*uops_list, arg=KernelInfo()), Device[Device.DEFAULT].renderer) prg = get_program(UOp.sink(*uops_list, arg=KernelInfo()), Device[Device.DEFAULT].renderer)
return CompiledRunner(replace(prg, device=Device.DEFAULT)) return CompiledRunner(replace(prg, device=Device.DEFAULT))

View file

@ -2,7 +2,7 @@ from tinygrad import Tensor, dtypes, GlobalCounters
dtypes.default_float = dtypes.float16 dtypes.default_float = dtypes.float16
from tinygrad.dtype import to_dtype from tinygrad.dtype import to_dtype
from tinygrad.helpers import getenv from tinygrad.helpers import getenv
from test.test_softmax_fusion import single_kernel_softmax from test.backend.test_softmax_fusion import single_kernel_softmax
if __name__ == "__main__": if __name__ == "__main__":
# softmax in bert layers # softmax in bert layers

View file

@ -3,7 +3,7 @@
set -e set -e
HEAD=$(git rev-parse --abbrev-ref HEAD) HEAD=$(git rev-parse --abbrev-ref HEAD)
python test/external/process_replay/reset.py python test/external/process_replay/reset.py
CAPTURE_PROCESS_REPLAY=1 python test/test_ops.py TestOps.test_add CAPTURE_PROCESS_REPLAY=1 python test/backend/test_ops.py TestOps.test_add
git checkout master git checkout master
git checkout $HEAD -- test/external/process_replay/process_replay.py git checkout $HEAD -- test/external/process_replay/process_replay.py
ASSERT_PROCESS_REPLAY=${ASSERT_PROCESS_REPLAY:-1} python test/external/process_replay/process_replay.py ASSERT_PROCESS_REPLAY=${ASSERT_PROCESS_REPLAY:-1} python test/external/process_replay/process_replay.py

View file

@ -67,6 +67,13 @@ def eval_uop(uop:UOp, inputs:list[tuple[DType, list[Any]]]|None=None):
prog(out_buf:=allocator.alloc(uop.dtype.itemsize), *bufs) prog(out_buf:=allocator.alloc(uop.dtype.itemsize), *bufs)
return out_buf.cast(uop.dtype.fmt or "").tolist()[0] return out_buf.cast(uop.dtype.fmt or "").tolist()[0]
def to_uops_list(u:list[UOp], ren=None) -> list[UOp]:
sink = UOp.group(*u)
for r in sink.ranges: sink = sink.end(r)
ret = get_uops(sink.sink(arg=KernelInfo(opts_to_apply=())), ren)
assert ret[-1].op is Ops.SINK
return ret
def not_support_multi_device(): def not_support_multi_device():
# CL and CUDA don't support multi device if in CI # CL and CUDA don't support multi device if in CI
return CI and Device.DEFAULT in ("CL", "CUDA") return CI and Device.DEFAULT in ("CL", "CUDA")

View file

@ -5,7 +5,7 @@ from tinygrad.helpers import DEBUG, Context
from tinygrad.uop.ops import Ops, UOp, UPat, PatternMatcher, track_rewrites, graph_rewrite, GroupOp, AxisType from tinygrad.uop.ops import Ops, UOp, UPat, PatternMatcher, track_rewrites, graph_rewrite, GroupOp, AxisType
from tinygrad.uop.symbolic import sym from tinygrad.uop.symbolic import sym
from tinygrad.codegen.late.expander import expander from tinygrad.codegen.late.expander import expander
from test.test_uops import to_uops_list from test.helpers import to_uops_list
simple_pm = PatternMatcher([ simple_pm = PatternMatcher([
(UPat.cvar('x', dtypes.int), lambda x: UOp.const(dtypes.float, 1.0) + UOp.const(dtypes.float, 2.0)), (UPat.cvar('x', dtypes.int), lambda x: UOp.const(dtypes.float, 1.0) + UOp.const(dtypes.float, 2.0)),

View file

@ -5,17 +5,10 @@ from tinygrad.tensor import Tensor
from tinygrad.helpers import Timing, Context from tinygrad.helpers import Timing, Context
from tinygrad.dtype import dtypes, ConstFloat # noqa: F401 from tinygrad.dtype import dtypes, ConstFloat # noqa: F401
from tinygrad.device import Device from tinygrad.device import Device
from tinygrad.uop.ops import Ops, UOp, UPat, KernelInfo, exec_alu from tinygrad.uop.ops import Ops, UOp, UPat, exec_alu
from tinygrad.uop.spec import shared_spec from tinygrad.uop.spec import shared_spec
from tinygrad.uop.symbolic import sym from tinygrad.uop.symbolic import sym
from test.helpers import get_uops from test.helpers import to_uops_list
def to_uops_list(u:list[UOp], ren=None) -> list[UOp]:
sink = UOp.group(*u)
for r in sink.ranges: sink = sink.end(r)
ret = get_uops(sink.sink(arg=KernelInfo(opts_to_apply=())), ren)
assert ret[-1].op is Ops.SINK
return ret
class TestSafeCast(unittest.TestCase): class TestSafeCast(unittest.TestCase):
def test_cast_folds(self): def test_cast_folds(self):

View file

@ -3,7 +3,7 @@ from tinygrad import dtypes, Variable
from tinygrad.dtype import AddrSpace from tinygrad.dtype import AddrSpace
from tinygrad.helpers import Context from tinygrad.helpers import Context
from tinygrad.uop.ops import Ops, UOp, AxisType from tinygrad.uop.ops import Ops, UOp, AxisType
from test.test_uops import to_uops_list from test.helpers import to_uops_list
class TestValidateOOB(unittest.TestCase): class TestValidateOOB(unittest.TestCase):
"""Test z3 validation of index bounds for different ALU ops and patterns.""" """Test z3 validation of index bounds for different ALU ops and patterns."""

0
test/opt/__init__.py Normal file
View file

View file

@ -3,7 +3,7 @@ from tinygrad import Device, Tensor, dtypes
from tinygrad.codegen.opt import Opt, OptOps, KernelOptError from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
# TODO: write a clean version of this # TODO: write a clean version of this
from test.test_linearizer import helper_linearizer_opt from test.backend.test_linearizer import helper_linearizer_opt
class TestKernelOpts(unittest.TestCase): class TestKernelOpts(unittest.TestCase):
@unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals") @unittest.skipUnless(Device[Device.DEFAULT].renderer.has_local, "test requires locals")

View file

@ -14,7 +14,7 @@ from tinygrad.codegen.opt import Opt, OptOps, KernelOptError
from tinygrad.codegen.opt.tc import amd_cdna_1616128 from tinygrad.codegen.opt.tc import amd_cdna_1616128
# TODO: write a clean version of this # TODO: write a clean version of this
from test.test_linearizer import helper_realized_ast, helper_linearizer_opt from test.backend.test_linearizer import helper_realized_ast, helper_linearizer_opt
# NOTE: get_program always passes in Device[Device.DEFAULT].renderer explicitly for process_replay!!! # NOTE: get_program always passes in Device[Device.DEFAULT].renderer explicitly for process_replay!!!