Compare commits

...

1 commit

Author SHA1 Message Date
George Hotz
15309ea0d8 cleanup tests, bump caches 2025-08-19 21:08:57 -07:00
10 changed files with 33 additions and 37 deletions

View file

@ -121,7 +121,7 @@ runs:
echo 'Acquire::GzipIndexes "true";' | sudo tee /etc/apt/apt.conf.d/gzip echo 'Acquire::GzipIndexes "true";' | sudo tee /etc/apt/apt.conf.d/gzip
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel
echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' | sudo tee -a /etc/apt/apt.conf.d/99keep-debs echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' | sudo tee -a /etc/apt/apt.conf.d/99keep-debs
- name: Add OpenCL Repo - name: Add OpenCL Repo
if: inputs.opencl == 'true' && runner.os == 'Linux' if: inputs.opencl == 'true' && runner.os == 'Linux'
shell: bash shell: bash
@ -174,7 +174,7 @@ runs:
if [[ "${{ inputs.llvm }}" == "true" ]]; then if [[ "${{ inputs.llvm }}" == "true" ]]; then
pkgs+=" libllvm20 clang-20 lld-20" pkgs+=" libllvm20 clang-20 lld-20"
fi fi
echo "pkgs=$pkgs" >> "$GITHUB_OUTPUT" echo "pkgs=$pkgs" >> "$GITHUB_OUTPUT"
echo "hash=$(echo -n "$pkgs" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT" echo "hash=$(echo -n "$pkgs" | sha256sum | cut -d' ' -f1)" >> "$GITHUB_OUTPUT"
@ -183,21 +183,21 @@ runs:
uses: actions/cache@v4 uses: actions/cache@v4
with: with:
path: /var/cache/apt/archives/ path: /var/cache/apt/archives/
key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }} key: ${{ runner.os }}-apt-${{ steps.apt-pkgs.outputs.hash }}-${{ env.APT_CACHE_VERSION }}
- name: Run apt Update + Install - name: Run apt Update + Install
if: runner.os == 'Linux' && (inputs.opencl == 'true' || inputs.amd == 'true' || inputs.cuda == 'true' || inputs.webgpu == 'true' || inputs.llvm == 'true') if: runner.os == 'Linux' && (inputs.opencl == 'true' || inputs.amd == 'true' || inputs.cuda == 'true' || inputs.webgpu == 'true' || inputs.llvm == 'true')
shell: bash shell: bash
run: | run: |
sudo apt -qq update || true sudo apt -qq update || true
# ******** do install ******** # ******** do install ********
if [[ -n "${{ steps.apt-pkgs.outputs.pkgs }}" ]]; then if [[ -n "${{ steps.apt-pkgs.outputs.pkgs }}" ]]; then
sudo apt-get -y --allow-unauthenticated --no-install-recommends install ${{ steps.apt-pkgs.outputs.pkgs }} sudo apt-get -y --allow-unauthenticated --no-install-recommends install ${{ steps.apt-pkgs.outputs.pkgs }}
fi fi
sudo chown -R $USER:$USER /var/cache/apt/archives/ sudo chown -R $USER:$USER /var/cache/apt/archives/
# **** AMD **** # **** AMD ****
- name: Setup AMD (Linux) - name: Setup AMD (Linux)
if: inputs.amd == 'true' && runner.os == 'Linux' if: inputs.amd == 'true' && runner.os == 'Linux'
@ -234,7 +234,7 @@ runs:
cache-name: cache-gpuocelot-build cache-name: cache-gpuocelot-build
with: with:
path: ${{ github.workspace }}/gpuocelot/ocelot path: ${{ github.workspace }}/gpuocelot/ocelot
key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-0 key: ${{ runner.os }}-gpuocelot-b16039dc940dc6bc4ea0a98380495769ff35ed99-rebuild-${{ env.BUILD_CACHE_VERSION }}
- name: Clone/compile gpuocelot - name: Clone/compile gpuocelot
if: inputs.ocelot == 'true' && steps.cache-build.outputs.cache-hit != 'true' if: inputs.ocelot == 'true' && steps.cache-build.outputs.cache-hit != 'true'
shell: bash shell: bash

View file

@ -63,7 +63,7 @@ jobs:
- name: Run model inference benchmark - name: Run model inference benchmark
run: METAL=1 python3.11 test/external/external_model_benchmark.py run: METAL=1 python3.11 test/external/external_model_benchmark.py
- name: Test speed vs torch - name: Test speed vs torch
run: BIG=2 MPS=1 python3.11 test/external/external_test_speed_v_torch.py | tee torch_speed.txt run: BIG=2 MPS=1 python3.11 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test tensor cores - name: Test tensor cores
run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops run: METAL=1 python3.11 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded TestLinearizer.test_tensor_cores_padded_uops
- name: Test AMX tensor cores - name: Test AMX tensor cores
@ -187,7 +187,7 @@ jobs:
- name: Run model inference benchmark - name: Run model inference benchmark
run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py run: NV=1 CAPTURE_PROCESS_REPLAY=0 NOCLANG=1 python3 test/external/external_model_benchmark.py
- name: Test speed vs torch - name: Test speed vs torch
run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt run: NV=1 CAPTURE_PROCESS_REPLAY=0 HALF=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical - name: Test speed vs theoretical
run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 run: NV=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test benchmark allreduce - name: Test benchmark allreduce
@ -389,7 +389,7 @@ jobs:
#- name: Test speed vs torch #- name: Test speed vs torch
# run: | # run: |
# python3 -c "import torch; print(torch.__version__)" # python3 -c "import torch; print(torch.__version__)"
# LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/external/external_test_speed_v_torch.py | tee torch_speed.txt # LD_PRELOAD="/opt/rocm/lib/libhsa-runtime64.so" HSA=1 BIG=2 TORCHCUDA=1 python3 test/speed/external_test_speed_v_torch.py | tee torch_speed.txt
- name: Test speed vs theoretical - name: Test speed vs theoretical
run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20 run: AMD=1 IGNORE_BEAM_CACHE=1 BEAM_DEBUG=1 DEBUG=1 python -m pytest -rA test/external/speed_v_theoretical.py --durations=20
- name: Test tensor cores - name: Test tensor cores

View file

@ -1,8 +1,10 @@
name: Unit Tests name: Unit Tests
env: env:
# increment this when downloads substantially change to avoid the internet # increment this when downloads substantially change to avoid the internet
DOWNLOAD_CACHE_VERSION: '11' DOWNLOAD_CACHE_VERSION: '12'
PYTHON_CACHE_VERSION: '2' PYTHON_CACHE_VERSION: '3'
APT_CACHE_VERSION: '1'
BUILD_CACHE_VERSION: '1'
CAPTURE_PROCESS_REPLAY: 1 CAPTURE_PROCESS_REPLAY: 1
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@ -30,9 +32,9 @@ jobs:
- name: External Benchmark Schedule - name: External Benchmark Schedule
run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py
- name: Speed Test - name: Speed Test
run: LLVM=1 python3 test/external/external_test_speed_v_torch.py run: LLVM=1 python3 test/speed/external_test_speed_v_torch.py
- name: Speed Test (BEAM=2) - name: Speed Test (BEAM=2)
run: BEAM=2 LLVM=1 python3 test/external/external_test_speed_v_torch.py run: BEAM=2 LLVM=1 python3 test/speed/external_test_speed_v_torch.py
docs: docs:
name: Docs name: Docs

View file

@ -2,7 +2,7 @@ import time
from tinygrad import Tensor, TinyJit, Device, Context from tinygrad import Tensor, TinyJit, Device, Context
from tinygrad.helpers import Profiling, Timing, GlobalCounters from tinygrad.helpers import Profiling, Timing, GlobalCounters
# python3 test/external/external_test_speed_v_torch.py TestSpeed.test_add_a # python3 test/speed/external_test_speed_v_torch.py TestSpeed.test_add_a
@TinyJit @TinyJit
def plus(a:Tensor, b:Tensor): return a+b def plus(a:Tensor, b:Tensor): return a+b

View file

@ -1,7 +1,10 @@
import unittest import unittest, io
from contextlib import redirect_stdout
from tinygrad import Tensor, dtypes, Device from tinygrad import Tensor, dtypes, Device
from tinygrad.helpers import OSX
from tinygrad.engine.realize import lower_schedule from tinygrad.engine.realize import lower_schedule
from tinygrad.device import is_dtype_supported from tinygrad.device import is_dtype_supported
from tinygrad.engine.realize import get_program
class TestCompileFailures(unittest.TestCase): class TestCompileFailures(unittest.TestCase):
def compile(self, out:Tensor): def compile(self, out:Tensor):
@ -14,5 +17,17 @@ class TestCompileFailures(unittest.TestCase):
def test_add_max_uchar(self): def test_add_max_uchar(self):
self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max()) self.compile((Tensor.empty(1024, dtype='uint8') + Tensor.empty(1024, dtype='uint8')).max())
class TestDisassembly(unittest.TestCase):
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
def test_float16_alu(self):
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
s = c.schedule()[-1]
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
lib = Device[Device.DEFAULT].compiler.compile(p.src)
out = io.StringIO()
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
assert "fcvt" not in out.getvalue()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -1,21 +0,0 @@
import unittest, io
from tinygrad import Tensor, dtypes
from contextlib import redirect_stdout
from tinygrad.device import Device
from tinygrad.helpers import OSX
from tinygrad.engine.realize import get_program
class TestDisassembly(unittest.TestCase):
# TODO: fails on llvm. llvm.LLVMGetHostCPUName() returns "generic"
@unittest.skipUnless(Device.DEFAULT in ("CPU",) and OSX, "m series cpus support fp16 arithmetic")
def test_float16_alu(self):
c = Tensor([1], dtype=dtypes.float16) + Tensor([1], dtype=dtypes.float16)
s = c.schedule()[-1]
p = get_program(s.ast, Device[Device.DEFAULT].renderer)
lib = Device[Device.DEFAULT].compiler.compile(p.src)
out = io.StringIO()
with redirect_stdout(out): Device[Device.DEFAULT].compiler.disassemble(lib)
assert "fcvt" not in out.getvalue()
if __name__ == "__main__":
unittest.main()