update tests, WEBGPU=1 pytest . passes (#14089)

* update tests, `WEBGPU=1 pytest .` passes

* minor update
This commit is contained in:
chenyu 2026-01-10 00:03:02 -05:00 committed by GitHub
commit 92246ea731
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 43 additions and 26 deletions

View file

@ -605,9 +605,7 @@ jobs:
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
- name: Run selected webgpu tests
run: |
WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit \
--ignore=test/test_copy_speed.py --ignore=test/test_rearrange_einops.py \
--ignore=test/test_fuzz_shape_ops.py --durations=20
WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
- name: Run process replay tests
uses: ./.github/actions/process-replay

View file

@ -8,9 +8,9 @@ import torch
def get_question_samp(bsz, seq_len, vocab_size, seed):
np.random.seed(seed)
in_ids= np.random.randint(vocab_size, size=(bsz, seq_len))
in_ids = np.random.randint(vocab_size, size=(bsz, seq_len), dtype=np.int32)
mask = np.random.choice([True, False], size=(bsz, seq_len))
seg_ids = np.random.randint(2, size=(bsz, seq_len)) # type_vocab_size
seg_ids = np.random.randint(2, size=(bsz, seq_len), dtype=np.int32) # type_vocab_size
return in_ids, mask, seg_ids
def set_equal_weights(mdl, torch_mdl):

View file

@ -1,6 +1,10 @@
import unittest
from extra.models import resnet
from tinygrad import dtypes
from tinygrad.device import is_dtype_supported
# pretrained weights contain num_batches_tracked as int64
@unittest.skipUnless(is_dtype_supported(dtypes.int64), "need int64 support")
class TestResnet(unittest.TestCase):
def test_model_load(self):
model = resnet.ResNet18()

View file

@ -52,6 +52,8 @@ def wer_helper(result: str, reference: str)->float:
@unittest.skipIf(Device.DEFAULT in ["CPU"], "slow")
@unittest.skipUnless(is_dtype_supported(dtypes.float16), "need float16 support")
# TODO: WEBGPU GPU dispatch dimensions limit
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU GPU dispatch dimensions limit")
class TestWhisper(unittest.TestCase):
@classmethod
def setUpClass(cls):

View file

@ -308,6 +308,7 @@ class TestTautologicalCompare(unittest.TestCase):
np.testing.assert_equal((Tensor(True) < Tensor(False)).numpy(), False)
np.testing.assert_equal((Tensor(True) < Tensor(True)).numpy(), False)
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU doesn't support NaN comparison correctly")
def test_a_eq_a(self):
# self eq is always true for int or bool
a = Tensor([1, 2, 3])

View file

@ -639,6 +639,8 @@ class TestOps(unittest.TestCase):
helper_test_op([(45,65), (45,65)], lambda x,y: x**y)
helper_test_op([(45,65), (45,65)], lambda x,y: x.pow(y))
# TODO: WEBGPU NaN handling in pow operations
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU NaN handling differs")
def test_pow(self):
helper_test_op([(45,65)], lambda x: x**0)
helper_test_op([(45,65)], lambda x: x**1)

View file

@ -118,7 +118,7 @@ class TestSchedule(unittest.TestCase):
a = Tensor.randn(4, 2, 1).realize().permute((1, 0, 2))
b = a.cast(dtypes.half).expand((2, 4, 4))+2
run_schedule(check_schedule(b, 1))
np.testing.assert_allclose(b.numpy(), np.broadcast_to(a.numpy().astype(np.float16), (2, 4, 4))+2)
np.testing.assert_allclose(b.numpy(), np.broadcast_to(a.numpy().astype(np.float16), (2, 4, 4))+2, rtol=1e-3)
def test_indexing_scalars_simple(self):
X = Tensor.randn(2, 2).realize()

View file

@ -1,7 +1,7 @@
import unittest
import random
from os import getenv
from tinygrad import Tensor, TinyJit, Variable, dtypes
from tinygrad import Tensor, TinyJit, Variable, dtypes, Device
from tinygrad.helpers import Context
import numpy as np
@ -159,6 +159,8 @@ class TestSetitem(unittest.TestCase):
t[:-1] = t[1:]
self.assertEqual(t.tolist(), [[2.0], [1.0], [1.0]])
# TODO: WEBGPU pipeline validation error
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU pipeline validation error")
def test_setitem_big(self):
idx_size, val = 256, 4
t = Tensor.arange(0, idx_size+1)

View file

@ -47,6 +47,8 @@ class TestTranscendentalMath(unittest.TestCase):
op[1](np.array([x], dtype=_to_np_dtype(dtypes.float16))),
atol=1e-2, rtol=5e-3) # exp can have bigger rtol
# TODO: WEBGPU produces incorrect values near infinity
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU incorrect values near inf")
@given(strat.sampled_from([(dtypes.float64, 709.5), (dtypes.float32, 88.7), (dtypes.float16, 11)]))
def test_exp_near_inf(self, dtype_x):
# reordering compute might return inf

View file

@ -6,6 +6,8 @@ from tinygrad.helpers import Context, CI
from tinygrad.tensor import Tensor
from tinygrad.device import Device
_SKIP_KERNEL_TIMING = Device.DEFAULT == "WEBGPU" # WEBGPU kernel timing not supported
class TestBenchLog(unittest.TestCase):
def setUp(self):
clear_events()
@ -35,7 +37,7 @@ class TestBenchLog(unittest.TestCase):
self.assertGreater(_events[event]["wall"][0], 0)
self.assertGreater(_events[event]["wall"][1], 0)
@skipIf(CI, "ci timing is not accurate")
@skipIf(CI or _SKIP_KERNEL_TIMING, "ci timing is not accurate")
def test_log_single_kernel_time(self):
wall_times = []
@ -52,7 +54,7 @@ class TestBenchLog(unittest.TestCase):
self.assertLess(_events[event]["kernel"][0], wall_times[0])
self.assertGreater(_events[event]["kernel"][0], 0)
@skipIf(CI and Device.DEFAULT == "CUDA", "ci cuda timing is not accurate")
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
def test_interleaved_wall_kernel_time(self):
wall_times = []
with Context(DEBUG=2):
@ -74,7 +76,7 @@ class TestBenchLog(unittest.TestCase):
self.assertLess(_events[event]["kernel"][0], wall_times[0])
self.assertGreater(_events[event]["kernel"][0], 0)
@skipIf(CI and Device.DEFAULT == "CUDA", "ci cuda timing is not accurate")
@skipIf((CI and Device.DEFAULT == "CUDA") or _SKIP_KERNEL_TIMING, "ci cuda timing is not accurate")
def test_stacked_wall_kernel_time(self):
with Context(DEBUG=2):
for event in BenchEvent:

View file

@ -44,7 +44,7 @@ class TestConv(unittest.TestCase):
x = Tensor.rand(1,1,32,32)
w = Tensor.rand(1,1,3,3)
out = x.conv2d(w, padding=(1,1))
np.testing.assert_allclose(out.relu().numpy(), np.maximum(out.numpy(), 0))
np.testing.assert_allclose(out.relu().numpy(), np.maximum(out.numpy(), 0), atol=1e-6)
def test_two_binops_no_rerun(self):
x = Tensor.randn(1,12,16,32)

View file

@ -167,6 +167,7 @@ class TestSafetensors(unittest.TestCase):
for dtype in dtypes.fields().values():
if dtype in [dtypes.bfloat16]: continue # not supported in numpy
if dtype in [dtypes.double, *dtypes.fp8s] and Device.DEFAULT == "METAL": continue # not supported on METAL
if not is_dtype_supported(dtype): continue
path = temp(f"ones.{dtype}.safetensors")
ones = Tensor(np.random.rand(10,10), dtype=dtype)
safe_save(get_state_dict(ones), path)

View file

@ -40,6 +40,8 @@ def ggml_tensor_to_numpy(tensor: ggml.ggml_tensor_p):
return np.lib.stride_tricks.as_strided(output, shape=shape, strides=strides), ctx
@unittest.skipIf(any(not is_dtype_supported(t) for t in [ dtypes.uint8, dtypes.half ]), "Backend must support uint8 and half")
# TODO: WEBGPU GGUF dequantization produces incorrect values
@unittest.skipIf(Device.DEFAULT == "WEBGPU", "WEBGPU GGUF dequantization issue")
class TestGGUF(unittest.TestCase):
def setUp(self) -> None:
params = ggml.ggml_init_params(mem_size=0, mem_buffer=None, no_alloc=False)

View file

@ -699,6 +699,7 @@ class TestIndexing(unittest.TestCase):
i, j = indices
numpy_testing_assert_equal_helper(x[i:j], x[0:1])
@unittest.skipUnless(is_dtype_supported(dtypes.int64), "tensor indexing uses int64 internally")
def test_ellipsis_tensor(self):
x = Tensor.arange(0, 9).reshape(3, 3)
idx = Tensor([0, 2])

View file

@ -11,50 +11,50 @@ from tinygrad import Tensor
class test_rearrange_examples(unittest.TestCase):
def test1(self):
# transpose
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
y = x.rearrange("b c h w -> b h w c")
assert tuple(y.shape) == (10, 30, 40, 20)
def test2(self):
# view / reshape
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
y = x.rearrange("b c h w -> b (c h w)")
assert tuple(y.shape) == (10, 20 * 30 * 40)
def test3(self):
# depth-to-space
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
y = x.rearrange("b (c h1 w1) h w -> b c (h h1) (w w1)", h1=2, w1=2)
assert tuple(y.shape) == (10, 5, 30 * 2, 40 * 2)
def test4(self):
# space-to-depth
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
y = x.rearrange("b c (h h1) (w w1) -> b (h1 w1 c) h w", h1=2, w1=2)
assert tuple(y.shape) == (10, 20 * 4, 30 // 2, 40 // 2)
def test5(self):
# simple transposition
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
y = x.rearrange("b1 sound b2 letter -> b1 b2 sound letter")
assert tuple(y.shape) == (10, 30, 20, 40)
def test6(self):
# parsing parameters
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
t = x.rearrange("b c h w -> (b h w) c")
t = t[:, ::2] # replacement for dot-product, just changes size of second axis
assert tuple(t.shape) == (10 * 30 * 40, 10)
def test7(self):
x = Tensor(np.arange(10 * 20 * 30 * 40).reshape([10, 20, 30, 40]))
x = Tensor(np.arange(10 * 20 * 30 * 40, dtype=np.int32).reshape([10, 20, 30, 40]))
# split of embedding into groups
y1, y2 = x.rearrange("b (c g) h w -> g b c h w", g=2)
assert tuple(y1.shape) == (10, 10, 30, 40)
assert tuple(y2.shape) == (10, 10, 30, 40)
def test8(self):
x = Tensor(np.arange(10 * 20 * 1 * 1).reshape([10, 20, 1, 1]))
x = Tensor(np.arange(10 * 20 * 1 * 1, dtype=np.int32).reshape([10, 20, 1, 1]))
# squeeze - unsqueeze
y = x.rearrange("b c () () -> b c")
assert tuple(y.shape) == (10, 20)
@ -62,7 +62,7 @@ class test_rearrange_examples(unittest.TestCase):
assert tuple(y.shape) == (20, 10, 1, 1)
def test9(self):
x = Tensor(np.arange(10 * 20 * 1 * 1).reshape([10, 20, 1, 1]))
x = Tensor(np.arange(10 * 20 * 1 * 1, dtype=np.int32).reshape([10, 20, 1, 1]))
# squeeze - unsqueeze
y = x.rearrange("b c 1 1 -> b c")
assert tuple(y.shape) == (10, 20)
@ -164,7 +164,7 @@ class test_rearrange_ops(unittest.TestCase):
("a b c d e -> b (a c d) e", "a b ... e -> b (a ...) e"),
]
xnp = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6])
xnp = np.arange(2 * 3 * 4 * 5 * 6, dtype=np.int32).reshape([2, 3, 4, 5, 6])
x = Tensor(xnp)
for pattern in identity_patterns:
assert np.array_equal(xnp, x.rearrange(pattern).numpy()), pattern
@ -174,7 +174,7 @@ class test_rearrange_ops(unittest.TestCase):
def test_rearrange_consistency(self):
shape = [1, 2, 3, 5, 7, 11]
xnp = np.arange(np.prod(shape)).reshape(shape)
xnp = np.arange(np.prod(shape), dtype=np.int32).reshape(shape)
x = Tensor(xnp)
for pattern in [
"a b c d e f -> a b c d e f",
@ -205,7 +205,7 @@ class test_rearrange_ops(unittest.TestCase):
result = temp.rearrange("(f d) c (e b) a -> a b c d e f", **sizes).numpy()
assert np.array_equal(xnp, result)
x2 = np.arange(2 * 3 * 4).reshape([2, 3, 4])
x2 = np.arange(2 * 3 * 4, dtype=np.int32).reshape([2, 3, 4])
result = Tensor(x2).rearrange("a b c -> b c a").numpy()
assert x2[1, 2, 3] == result[2, 3, 1]
assert x2[0, 1, 2] == result[1, 2, 0]
@ -213,7 +213,7 @@ class test_rearrange_ops(unittest.TestCase):
def test_rearrange_permutations(self):
# tests random permutation of axes against two independent numpy ways
for n_axes in range(1, 10):
x = np.arange(2**n_axes).reshape([2] * n_axes)
x = np.arange(2**n_axes, dtype=np.int32).reshape([2] * n_axes)
permutation = np.random.permutation(n_axes)
left_expression = " ".join("i" + str(axis) for axis in range(n_axes))
right_expression = " ".join("i" + str(axis) for axis in permutation)
@ -224,7 +224,7 @@ class test_rearrange_ops(unittest.TestCase):
assert x[tuple(pick)] == result[tuple(pick[permutation])]
for n_axes in range(1, 10):
x = np.arange(2**n_axes).reshape([2] * n_axes)
x = np.arange(2**n_axes, dtype=np.int32).reshape([2] * n_axes)
permutation = np.random.permutation(n_axes)
left_expression = " ".join("i" + str(axis) for axis in range(n_axes)[::-1])
right_expression = " ".join("i" + str(axis) for axis in permutation[::-1])
@ -310,7 +310,7 @@ class test_rearrange_parsing(unittest.TestCase):
("a b … e -> b (a …) e", "a b ... e -> b (a ...) e"),
]
xnp = np.arange(2 * 3 * 4 * 5 * 6).reshape([2, 3, 4, 5, 6])
xnp = np.arange(2 * 3 * 4 * 5 * 6, dtype=np.int32).reshape([2, 3, 4, 5, 6])
x = Tensor(xnp)
for pattern1, pattern2 in equivalent_rearrange_patterns: