mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
* 1
* 83 failed
* learning how git works
* lol idk
* zero shape aaaa
* space lol
* aaa
* test check
* haha
* fixed gather
* 73 failing
* 71 failing
* 68 failing
* added some debug
* fking resize
* lol
* 62 failing
* 58 failling fucking did nearest resize hell yeah
* clean up
* 56 failing
* janitor duty
* lol
* 53 failing
* hi mom
* 50 failing
* added linear interp, but coord_trans is wrong
* did lin interpolation woohoo
* 43 failing
* 40 failing
* temporary Gather fix
* 39 failing
* fixed slice onnxver<10
* 37 failing
* 35 failing
* excluded tests that use float64
* 32 failing with hacks
* added _batchnorm() for 3D 5D batchnorm, 29 failing
* changed ALLOWED_KERNEL_COUNT from 199 to 207
* added improved Gather op, reverted ALLOWED_KERNEL_COUNT commit
* support Round op
* added storage_order/indices maxpool, 27 failing
* support maxunpool, 25 failures
* support Gradient, 23 failures
* merged new where
* added Adam
* cleanups
* added Momentum and Nesterov Momentum
* added Adagrad
* support sequence_type, 20 failing
* ugh git
* I give up on cubic interp :D, 9 failing
* sexy 1 liner gather, much improved, wow
* polished gather to make it shine bright like a diamond
* clean 1 liner for gather
* improved readability of gather
* uhh
* clean up
* more clean up
* WHITEspace
* implemented SoftmaxCrossEntropyLoss op
* added comments and cleaned up if statements
* update
* thank based wozeparrot for pow and new GatherElements
* CPU and TORCH all pass | cast float64 -> float32 for all fromCPU()
* _nearest_gather() failing on yolo
* reverted ops_cpu change and added assert in Resize
* added comments for resize for multiple channels
* oops
* merge
* test
* switched np.pad to Tensor.pad for constant padding
* gah
* gah2
* sexy reflect pad with movementops -> add
* delete commented out lines
* edge mode pad sexy as well
* trying out model_benchmark
* revert gitignore change lol
* init
* Revert "init"
This reverts commit 682bf2073a.
* wrote cast workaround for CPU, CPU and TORCH all pass
* wrote cast workaround for CPU, CPU and TORCH all pass
* skipped tests w/ 0 shape for METAL and GPU
* excluded tests for CLANG, CPU, TORCH, CLANG pass
* fixed hacky ConvTranspose
* gotta figure out autopad
* UOps.STORE support cast bool -> float
* small fix for fast gather
* reverted 0 shape skipped tests
* oops missed a file
* added comment
* fixed slice op hack
* First commit to pr
* More trig ops
* More trig ops
* format
* isinf support
* More ops
* changed onnx_ops to use our new gather :D
* Det op bug fix
* rebase
* fixed some tests
* det broken and slow
* fixed compress to use new gather
* implemented argmax argmin
* support variable types in type_proto
* support Upsample and Identity sequence
* we support float64 now and tinygrad support automatic broadcasting
* added EyeLike op
* resize does support multiple channels now actually
* yolov8 onnx runs successfully
* added batch size 1
* oops
* finally fixed type_proto I think
* fixed some llvm bugs
* del whitespaces
* added ZenginU Format PR
* test
* oops
* added float64 exclude tests back
* more skipped tests
* try
* ok openpilot pass
* flake8 pass
* woooooohooo
* revert external_model_benchmark changes
* perf tested gather
* removed promote types from ops_cpu
* numerical errors from 1681 is fixed
---------
Co-authored-by: ZenginU <umutzengin00@gmail.com>
50 lines
3.5 KiB
Python
50 lines
3.5 KiB
Python
import numpy as np
|
|
import operator
|
|
from typing import Callable, Dict, Tuple, Optional
|
|
from tinygrad.helpers import dtypes, DType
|
|
from tinygrad.ops import UnaryOps, BinaryOps, MovementOps, ReduceOps, TernaryOps, Op, Interpreted
|
|
from tinygrad.runtime.lib import RawBuffer
|
|
|
|
def shape_to_axis(old_shape:Tuple[int, ...], new_shape:Tuple[int, ...]) -> Tuple[int, ...]:
|
|
assert len(old_shape) == len(new_shape), "reduce shapes must have same dimensions"
|
|
return tuple(i for i,(a,b) in enumerate(zip(old_shape, new_shape)) if a != b)
|
|
|
|
base_fxn_for_op: Dict[Op, Callable] = {
|
|
UnaryOps.NEG: operator.neg, BinaryOps.ADD: operator.add, BinaryOps.SUB: operator.sub, BinaryOps.MUL: operator.mul, BinaryOps.DIV: operator.truediv,
|
|
ReduceOps.SUM: lambda x, new_shape: x.sum(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
|
|
ReduceOps.MAX: lambda x, new_shape: (x.amax if hasattr(x, 'amax') else x.max)(shape_to_axis(x.shape, new_shape), keepdims=True) if tuple(x.shape) != tuple(new_shape) else x[:],
|
|
MovementOps.RESHAPE: lambda x, arg: x.reshape(arg), MovementOps.SHRINK: lambda x, arg: x[tuple(slice(p[0], p[1], None) for p in arg)],
|
|
}
|
|
|
|
def match_types(x, y):
|
|
up = x.dtype if dtypes.from_np(x.dtype).priority > dtypes.from_np(y.dtype).priority else y.dtype
|
|
return x.astype(up, copy=False), y.astype(up, copy=False)
|
|
|
|
def einsum_mulacc(einsum, get_strides, expand):
|
|
def einscripts(x): return ''.join(["abcdefghijklmnopqrstuvwxyz"[i] for i in x])
|
|
def axes_slice(strides): return [i for i,s in enumerate(strides) if s != 0], tuple([slice(None) if s != 0 else 0 for i,s in enumerate(strides)])
|
|
def mulacc(a, b, new_shape):
|
|
(a_axes, a_slices), (b_axes, b_slices) = axes_slice(get_strides(a)), axes_slice(get_strides(b))
|
|
out = [i for i in range(len(new_shape)) if a.shape[i] == new_shape[i] and (i in a_axes or i in b_axes)]
|
|
ret = einsum(f"{einscripts(a_axes)}, {einscripts(b_axes)} -> {einscripts(out)}", a[a_slices], b[b_slices])
|
|
return expand(ret.reshape([(1 if i not in a_axes and i not in b_axes else s) for i,s in enumerate(new_shape)]), new_shape)
|
|
return mulacc
|
|
|
|
numpy_fxn_for_op: Dict[Op, Callable] = {**base_fxn_for_op, **{
|
|
UnaryOps.NOOP: lambda x: np.require(x, requirements='C'), UnaryOps.EXP2: np.exp2, UnaryOps.LOG2: np.log2, UnaryOps.SIN: np.sin,
|
|
UnaryOps.CAST: lambda x,y: x.view(y[0].np) if y[1] else x.astype(y[0].np, copy=False),
|
|
BinaryOps.MAX: np.maximum, BinaryOps.CMPLT: lambda x,y: (x<y).astype(np.promote_types(x.dtype,y.dtype)), BinaryOps.ADD: lambda x, y: np.add(*match_types(x, y)),
|
|
BinaryOps.SUB: lambda x, y: np.subtract(*match_types(x, y)), BinaryOps.MUL: lambda x, y: np.multiply(*match_types(x, y)),
|
|
BinaryOps.DIV: lambda x, y: np.divide(*match_types(x, y)), UnaryOps.SQRT: np.sqrt,
|
|
MovementOps.PERMUTE: lambda x, order: x.transpose(order), MovementOps.PAD: np.pad, MovementOps.EXPAND: np.broadcast_to,
|
|
MovementOps.STRIDE: lambda x, arg: x[tuple(slice(None, None, i) for i in arg)],
|
|
TernaryOps.MULACC: einsum_mulacc(lambda s,a,b: np.einsum(s, *match_types(a.copy(), b.copy()), optimize=True), lambda x: x.strides, np.broadcast_to),
|
|
TernaryOps.WHERE: np.where,
|
|
}}
|
|
|
|
class RawNumpyBuffer(RawBuffer):
|
|
def __init__(self, size:int, dtype:DType, buf:Optional[np.ndarray]=None): super().__init__(size, dtype, buf if buf is not None else np.empty([size], dtype.np))
|
|
@classmethod
|
|
def fromCPU(cls, x): return cls(x.size, dtypes.from_np(x.dtype), x)
|
|
def toCPU(self): return self._buf
|
|
CPUBuffer = Interpreted(RawNumpyBuffer, numpy_fxn_for_op, from_underlying=RawNumpyBuffer.fromCPU)
|