mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
DEV is ContextVar, setting Device.DEFAULT is deprecated (#15508)
This commit is contained in:
parent
9583489068
commit
adbfd82d1d
10 changed files with 72 additions and 59 deletions
|
|
@ -2,13 +2,14 @@
|
|||
import os
|
||||
if "NOOPT" not in os.environ: os.environ["NOOPT"] = "1"
|
||||
from tinygrad import Device, nn, Tensor, dtypes
|
||||
Device.DEFAULT = "CPU"
|
||||
from train_gpt2 import GPT, GPTConfig
|
||||
from tinygrad.helpers import dedup, flatten, getenv, GlobalCounters, to_function_name
|
||||
from tinygrad.helpers import DEV, dedup, flatten, getenv, GlobalCounters, to_function_name
|
||||
from tinygrad.engine.realize import get_kernel
|
||||
from tinygrad.engine.memory import memory_planner
|
||||
from tinygrad.uop.ops import Ops
|
||||
|
||||
DEV.value = "CPU"
|
||||
|
||||
TIMING = getenv("TIMING")
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -325,19 +325,18 @@ def eval_stable_diffusion():
|
|||
# NOTE: the clip weights are the same between model.cond_stage_model and clip_encoder
|
||||
eval_timesteps = list(reversed(range(1, 1000, 20)))
|
||||
|
||||
original_device, Device.DEFAULT = Device.DEFAULT, "CPU"
|
||||
# The choice of alphas_prev[0] = alphas_cumprod[0] seems arbitrary, but it's how the mlperf ref does it:
|
||||
# alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist())
|
||||
eval_alphas_prev = model.alphas_cumprod[0:1].cat(model.alphas_cumprod[list(range(1, 1000, 20))[:-1]]).to(GPUS).realize()
|
||||
inception = FidInceptionV3().load_from_pretrained(CKPTDIR / "inception" / "pt_inception-2015-12-05-6726825d.pth")
|
||||
vision_cfg = {'width': 1280, 'layers': 32, 'd_head': 80, 'image_size': 224, 'patch_size': 14}
|
||||
text_cfg = {'width': 1024, 'n_heads': 16, 'layers': 24, 'vocab_size': 49408, 'ctx_length': 77}
|
||||
clip.gelu = gelu_erf
|
||||
clip_encoder = OpenClipEncoder(1024, text_cfg, vision_cfg)
|
||||
loaded = torch_load(CKPTDIR / "clip" / "open_clip_pytorch_model.bin")
|
||||
loaded.update({"attn_mask": clip_encoder.attn_mask, "mean": clip_encoder.mean, "std": clip_encoder.std})
|
||||
load_state_dict(clip_encoder, loaded)
|
||||
Device.DEFAULT=original_device
|
||||
with Context(DEV="CPU"):
|
||||
# The choice of alphas_prev[0] = alphas_cumprod[0] seems arbitrary, but it's how the mlperf ref does it:
|
||||
# alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_timesteps[:-1]].tolist())
|
||||
eval_alphas_prev = model.alphas_cumprod[0:1].cat(model.alphas_cumprod[list(range(1, 1000, 20))[:-1]]).to(GPUS).realize()
|
||||
inception = FidInceptionV3().load_from_pretrained(CKPTDIR / "inception" / "pt_inception-2015-12-05-6726825d.pth")
|
||||
vision_cfg = {'width': 1280, 'layers': 32, 'd_head': 80, 'image_size': 224, 'patch_size': 14}
|
||||
text_cfg = {'width': 1024, 'n_heads': 16, 'layers': 24, 'vocab_size': 49408, 'ctx_length': 77}
|
||||
clip.gelu = gelu_erf
|
||||
clip_encoder = OpenClipEncoder(1024, text_cfg, vision_cfg)
|
||||
loaded = torch_load(CKPTDIR / "clip" / "open_clip_pytorch_model.bin")
|
||||
loaded.update({"attn_mask": clip_encoder.attn_mask, "mean": clip_encoder.mean, "std": clip_encoder.std})
|
||||
load_state_dict(clip_encoder, loaded)
|
||||
|
||||
@TinyJit
|
||||
def denoise_step(x:Tensor, x_x:Tensor, t_t:Tensor, uc_c:Tensor, sqrt_alphas_cumprod_t:Tensor, sqrt_one_minus_alphas_cumprod_t:Tensor,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ from extra.export_model import export_model
|
|||
from examples.llama3 import build_transformer, Tokenizer
|
||||
from tinygrad.nn.state import get_state_dict, load_state_dict
|
||||
from tinygrad import Device, Variable, Tensor, dtypes, TinyJit
|
||||
from tinygrad.helpers import fetch, Context
|
||||
from tinygrad.helpers import DEV, fetch, Context
|
||||
from tiktoken.load import load_tiktoken_bpe, dump_tiktoken_bpe
|
||||
|
||||
def prepare_browser_chunks(model):
|
||||
|
|
@ -115,7 +115,7 @@ if __name__=="__main__":
|
|||
start_pos = Variable("start_pos", 0, max_context).bind(0)
|
||||
model_input = lambda: [Tensor([[tok]]), start_pos, TEMPERATURE, TOP_K, TOP_P, ALPHA_F, ALPHA_P]
|
||||
|
||||
Device.DEFAULT="CPU"
|
||||
DEV.value = "CPU"
|
||||
model = build_transformer(model_path, model_size="1B", quantize="int8", scale_dtype=dtypes.float32, device=Device.DEFAULT, max_context=max_context)
|
||||
state_dict = get_state_dict(model)
|
||||
validate_model(model, tokenizer)
|
||||
|
|
@ -129,7 +129,7 @@ if __name__=="__main__":
|
|||
with open(os.path.join(os.path.dirname(__file__), f"{model_name}.c"), "w") as f: f.write(cprog)
|
||||
with open(os.path.join(os.path.dirname(__file__), "net_clang.js"), "w") as f: f.write(js_wrapper)
|
||||
|
||||
Device.DEFAULT="WEBGPU"
|
||||
DEV.value = "WEBGPU"
|
||||
# float16 is not yet supported for dawn/Vulkan/NVIDIA stack, see: https://issues.chromium.org/issues/42251215
|
||||
# therefore for now, we used CLANG to quantize the float16 llama to int8 with float32 scales, then load to WEBGPU
|
||||
model = build_transformer(model_path, model_size="1B", quantize="int8", max_context=max_context, load_weights=False)
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ from extra.f16_decompress import u32_to_f16
|
|||
from examples.stable_diffusion import StableDiffusion
|
||||
from tinygrad.nn.state import get_state_dict, safe_save, safe_load_metadata, torch_load, load_state_dict
|
||||
from tinygrad.tensor import Tensor
|
||||
from tinygrad import Device, dtypes
|
||||
from tinygrad.helpers import fetch
|
||||
from tinygrad import dtypes
|
||||
from tinygrad.helpers import DEV, fetch
|
||||
from typing import NamedTuple, Any, List
|
||||
import requests
|
||||
import argparse
|
||||
|
|
@ -80,7 +80,7 @@ if __name__ == "__main__":
|
|||
parser = argparse.ArgumentParser(description='Run Stable Diffusion', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--remoteweights', action='store_true', help="Use safetensors from Huggingface, or from local")
|
||||
args = parser.parse_args()
|
||||
Device.DEFAULT = "WEBGPU"
|
||||
DEV.value = "WEBGPU"
|
||||
|
||||
model = StableDiffusion()
|
||||
|
||||
|
|
|
|||
|
|
@ -4,10 +4,11 @@ from tinygrad.tensor import Tensor
|
|||
from tinygrad.nn.state import safe_save
|
||||
from extra.export_model import export_model
|
||||
from tinygrad.device import Device
|
||||
from tinygrad.helpers import DEV
|
||||
from tinygrad.nn.state import safe_load, load_state_dict
|
||||
|
||||
if __name__ == "__main__":
|
||||
Device.DEFAULT = "WEBGPU"
|
||||
DEV.value = "WEBGPU"
|
||||
yolo_variant = 'n'
|
||||
yolo_infer = YOLOv8(w=0.25, r=2.0, d=0.33, num_classes=80)
|
||||
state_dict = safe_load(get_weights_location(yolo_variant))
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
from tinygrad import nn, Tensor, Device, dtypes
|
||||
from tinygrad.helpers import Timing
|
||||
from tinygrad import nn, Tensor, dtypes
|
||||
from tinygrad.helpers import DEV, Timing
|
||||
|
||||
from extra.models.llama import Transformer
|
||||
from examples.llama3 import MODEL_PARAMS
|
||||
|
||||
if __name__ == "__main__":
|
||||
Device.DEFAULT = "NULL"
|
||||
DEV.value = "NULL"
|
||||
Tensor.training = True
|
||||
#model_size = "8B"
|
||||
model_size = "405B"
|
||||
|
|
|
|||
48
test/external/external_model_benchmark.py
vendored
48
test/external/external_model_benchmark.py
vendored
|
|
@ -7,7 +7,7 @@ from onnx2torch import convert
|
|||
from tinygrad.nn.onnx import OnnxRunner
|
||||
from tinygrad.helpers import OSX, DEBUG, fetch, getenv
|
||||
from tinygrad.dtype import _to_np_dtype
|
||||
from tinygrad import Tensor, Device, dtypes
|
||||
from tinygrad import Tensor, Device, Context, dtypes
|
||||
|
||||
MODELS = {
|
||||
"resnet50": "https://github.com/onnx/models/raw/main/validated/vision/classification/resnet/model/resnet50-caffe2-v1-9.onnx",
|
||||
|
|
@ -60,16 +60,16 @@ def benchmark_model(m, devices, validate_outs=False):
|
|||
# print input names
|
||||
if DEBUG >= 2: print(list(runner.graph_inputs))
|
||||
for device in devices:
|
||||
Device.DEFAULT = device
|
||||
inputs = {k:Tensor(inp) for k,inp in np_inputs.items()}
|
||||
tinygrad_model = runner.to(device)
|
||||
benchmark(m, f"tinygrad_{device.lower()}_jitless", lambda: {k:v.numpy() for k,v in tinygrad_model(inputs).items()})
|
||||
with Context(DEV=device):
|
||||
inputs = {k:Tensor(inp) for k,inp in np_inputs.items()}
|
||||
tinygrad_model = runner.to(device)
|
||||
benchmark(m, f"tinygrad_{device.lower()}_jitless", lambda: {k:v.numpy() for k,v in tinygrad_model(inputs).items()})
|
||||
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
tinygrad_jitted_model = TinyJit(lambda **kwargs: {k:v.realize() for k,v in tinygrad_model(kwargs).items()})
|
||||
for _ in range(3): {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}
|
||||
benchmark(m, f"tinygrad_{device.lower()}_jit", lambda: {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}) # noqa: F821
|
||||
del inputs, tinygrad_model, tinygrad_jitted_model
|
||||
from tinygrad.engine.jit import TinyJit
|
||||
tinygrad_jitted_model = TinyJit(lambda **kwargs: {k:v.realize() for k,v in tinygrad_model(kwargs).items()})
|
||||
for _ in range(3): {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}
|
||||
benchmark(m, f"tinygrad_{device.lower()}_jit", lambda: {k:v.numpy() for k,v in tinygrad_jitted_model(**inputs).items()}) # noqa: F821
|
||||
del inputs, tinygrad_model, tinygrad_jitted_model
|
||||
|
||||
# convert model to torch
|
||||
try:
|
||||
|
|
@ -104,22 +104,22 @@ def benchmark_model(m, devices, validate_outs=False):
|
|||
if validate_outs:
|
||||
for device in devices:
|
||||
rtol, atol = 2e-3, 2e-3 # tolerance for fp16 models
|
||||
Device.DEFAULT = device
|
||||
# force half inputs to float for numerical stability when validating
|
||||
# this will rely on automatic dtype promotion for converting half weights inside the graph
|
||||
if m in half_models:
|
||||
inputs = {k:Tensor(inp, dtype=dtypes.float32) if inp.dtype == np.float16 else Tensor(inp) for k,inp in np_inputs.items()}
|
||||
else:
|
||||
inputs = {k:Tensor(inp) for k,inp in np_inputs.items()}
|
||||
tinygrad_model = runner.to(device)
|
||||
tinygrad_out = tinygrad_model(inputs)
|
||||
with Context(DEV=device):
|
||||
# force half inputs to float for numerical stability when validating
|
||||
# this will rely on automatic dtype promotion for converting half weights inside the graph
|
||||
if m in half_models:
|
||||
inputs = {k:Tensor(inp, dtype=dtypes.float32) if inp.dtype == np.float16 else Tensor(inp) for k,inp in np_inputs.items()}
|
||||
else:
|
||||
inputs = {k:Tensor(inp) for k,inp in np_inputs.items()}
|
||||
tinygrad_model = runner.to(device)
|
||||
tinygrad_out = tinygrad_model(inputs)
|
||||
|
||||
ort_sess = ort.InferenceSession(str(fn), ort_options, ["CPUExecutionProvider"])
|
||||
onnx_out = ort_sess.run(output_names, np_inputs)
|
||||
onnx_out = dict([*list(zip(output_names, onnx_out))])
|
||||
ort_sess = ort.InferenceSession(str(fn), ort_options, ["CPUExecutionProvider"])
|
||||
onnx_out = ort_sess.run(output_names, np_inputs)
|
||||
onnx_out = dict([*list(zip(output_names, onnx_out))])
|
||||
|
||||
assert_allclose(tinygrad_out, onnx_out, rtol=rtol, atol=atol)
|
||||
print(f"{m:16s}outputs validated on {device=} with rtol={rtol:.1e}, atol={atol:.1e}")
|
||||
assert_allclose(tinygrad_out, onnx_out, rtol=rtol, atol=atol)
|
||||
print(f"{m:16s}outputs validated on {device=} with rtol={rtol:.1e}, atol={atol:.1e}")
|
||||
|
||||
if open_csv is None:
|
||||
open_csv = csv.DictWriter(open('onnx_inference_speed.csv', 'w', newline=''), fieldnames=list(CSV.keys()))
|
||||
|
|
|
|||
|
|
@ -26,13 +26,16 @@ class TestDevice(unittest.TestCase):
|
|||
|
||||
def test_lowercase_canonicalizes(self):
|
||||
device = Device.DEFAULT
|
||||
Device.DEFAULT = device.lower()
|
||||
self.assertEqual(Device.canonicalize(None), device)
|
||||
Device.DEFAULT = device
|
||||
with Context(DEV=device.lower()):
|
||||
self.assertEqual(Device.canonicalize(None), device)
|
||||
|
||||
def test_set_device_default_raises(self):
|
||||
with self.assertRaisesRegex(AttributeError, "setting Device.DEFAULT is deprecated"):
|
||||
Device.DEFAULT = "CPU"
|
||||
|
||||
def test_old_device_env_raises(self):
|
||||
result = subprocess.run(['python3', '-c', 'from tinygrad import Device; Device.DEFAULT'],
|
||||
env={**os.environ, "CPU": "1"}, capture_output=True)
|
||||
env={**os.environ, "CPU": "1", "DEV": ""}, capture_output=True)
|
||||
self.assertNotEqual(result.returncode, 0)
|
||||
self.assertIn(b"deprecated", result.stderr)
|
||||
|
||||
|
|
@ -95,6 +98,12 @@ class TestDevice(unittest.TestCase):
|
|||
with patch("tinygrad.renderer.cstyle.ClangJITRenderer.__init__", side_effect=RuntimeError("broken")):
|
||||
self.assertIsInstance(dev.renderer.compiler, CPULLVMCompiler)
|
||||
|
||||
def test_dev_contextvar(self):
|
||||
orig_dev = Device.DEFAULT
|
||||
with Context(DEV="CPU"): self.assertEqual(Tensor.empty(1).device, "CPU")
|
||||
with Context(DEV="NULL"): self.assertEqual(Tensor.empty(1).device, "NULL")
|
||||
self.assertEqual(Tensor.empty(1).device, orig_dev)
|
||||
|
||||
class MockCompiler(Compiler):
|
||||
def __init__(self, key): super().__init__(key)
|
||||
def compile(self, src) -> bytes: return src.encode()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from typing import Any, Generic, TypeVar, Iterator, Generator, TYPE_CHECKING
|
|||
import importlib, inspect, functools, pathlib, os, platform, contextlib, sys, re, atexit, pickle, decimal
|
||||
from tinygrad.helpers import BENCHMARKS, CI, OSX, LRU, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, PROFILE, temp, colored
|
||||
from tinygrad.helpers import Context, CCACHE, ALLOW_DEVICE_USAGE, MAX_BUFFER_SIZE, cpu_events, ProfileEvent, ProfilePointEvent, ContextVar
|
||||
from tinygrad.helpers import unwrap_class_type, suppress_finalizing, select_first_inited, VIZ, CPU_LLVM, CPU_LVP, NV_PTX, CUDA_PTX, NV_NAK
|
||||
from tinygrad.helpers import unwrap_class_type, suppress_finalizing, select_first_inited, DEV, VIZ, CPU_LLVM, CPU_LVP, NV_PTX, CUDA_PTX, NV_NAK
|
||||
from tinygrad.helpers import EMULATE, EMULATED_DTYPES, NULL_IR3, NULL_QCOMCL, IMAGE, FLOAT16, TracingKey, size_to_str
|
||||
from tinygrad.dtype import DType, PtrDType, dtypes, _to_np_dtype
|
||||
if TYPE_CHECKING: from tinygrad.renderer import Renderer
|
||||
|
|
@ -39,11 +39,14 @@ class _Device:
|
|||
def get_available_devices(self) -> Iterator[str]:
|
||||
for device in ALL_DEVICES:
|
||||
with contextlib.suppress(Exception): yield self[device].device
|
||||
@property
|
||||
def DEFAULT(self) -> str: return DEV.value.upper() if DEV else self._select_device
|
||||
@DEFAULT.setter
|
||||
def DEFAULT(self, v): raise AttributeError(f'setting Device.DEFAULT is deprecated, use "with Context(DEV={v!r})" or "DEV.value = {v!r}"')
|
||||
@functools.cached_property
|
||||
def DEFAULT(self) -> str:
|
||||
def _select_device(self) -> str:
|
||||
assert (dev:=next((d for d in self._devices if d not in ["DISK", "TINYFS", "NPY"] and getenv(d) == 1), None)) is None, \
|
||||
f"{dev}=1 is deprecated, use DEV={dev} instead"
|
||||
if (dev:=getenv("DEV", "").upper()): return dev
|
||||
try:
|
||||
device = next(self.get_available_devices())
|
||||
os.environ["DEV"] = device # we set this in environment for spawned children
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ class ContextVar(Generic[T]):
|
|||
assert isinstance(self.value, str)
|
||||
return [getattr(obj, x) if obj else x for x in self.value.split(',') if x]
|
||||
|
||||
DEBUG, BEAM, NOOPT = ContextVar("DEBUG", 0), ContextVar("BEAM", 0), ContextVar("NOOPT", 0)
|
||||
DEV, DEBUG, BEAM, NOOPT = ContextVar("DEV", ""), ContextVar("DEBUG", 0), ContextVar("BEAM", 0), ContextVar("NOOPT", 0)
|
||||
IMAGE, FLOAT16, OPENPILOT_HACKS = ContextVar("IMAGE", 0), ContextVar("FLOAT16", 0), ContextVar("OPENPILOT_HACKS", 0)
|
||||
JIT, JIT_BATCH_SIZE = ContextVar("JIT", 2 if OSX and ARCH_X86 else 1), ContextVar("JIT_BATCH_SIZE", 32)
|
||||
WINO, CAPTURING, TRACEMETA = ContextVar("WINO", 0), ContextVar("CAPTURING", 1), ContextVar("TRACEMETA", 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue