size*itemsize -> nbytes (#15729)

and some UOp.size removal to prep for size to mixin change
2026-06-24 02:14:17 +00:00 · 2026-04-14 16:27:54 -04:00 · 2026-04-14 16:27:54 -04:00 · 3394d18066
commit 3394d18066
parent e9ecc990ea
8 changed files with 10 additions and 10 deletions
--- a/examples/llama.py
+++ b/examples/llama.py
@ -445,7 +445,7 @@ After you are done speaking, output [EOS]. You are not Chad.
  print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model")
  device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
  llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
-  param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(llama.model))
+  param_bytes = sum(x.nbytes() for x in get_parameters(llama.model))

  outputted = pre_prompt if chatbot else args.prompt
  start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted)
--- a/examples/llama3.py
+++ b/examples/llama3.py
@ -324,7 +324,7 @@ if __name__ == "__main__":

  device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
  model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device)
-  param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(model))
+  param_bytes = sum(x.nbytes() for x in get_parameters(model))

  if not args.no_api and not args.benchmark:
    from bottle import Bottle, request, response, HTTPResponse, abort, static_file
--- a/examples/qwq.py
+++ b/examples/qwq.py
@ -66,7 +66,7 @@ if __name__ == "__main__":
  model_path = Path(args.weights) if args.weights else download_weights(model_info["total_num_weights"])
  transformer = load_model(model_path, model_info["model_params"])
  tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer"])
-  param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(transformer))
+  param_bytes = sum(x.nbytes() for x in get_parameters(transformer))

  outputted = args.prompt
  start_pos, toks = 0, tokenizer(outputted)["input_ids"]
--- a/extra/gemm/amx.py
+++ b/extra/gemm/amx.py
@ -44,9 +44,9 @@ nc = np.random.randn(N, N).astype(np.float32)

 ns = nb.reshape(-1, 32).sum(axis=0)

-a = MallocAllocator.alloc(na.size * np.dtype(np.float32).itemsize)
-b = MallocAllocator.alloc(nb.size * np.dtype(np.float32).itemsize)
-c = MallocAllocator.alloc(nc.size * np.dtype(np.float32).itemsize)
+a = MallocAllocator.alloc(na.nbytes)
+b = MallocAllocator.alloc(nb.nbytes)
+c = MallocAllocator.alloc(nc.nbytes)

 MallocAllocator._copyin(b, flat_mv(nb.data))
 MallocAllocator._copyin(c, flat_mv(nc.data))
--- a/test/backend/test_graph.py
+++ b/test/backend/test_graph.py
@ -60,7 +60,7 @@ def helper_make_view(base, offset_elems, size_elems):

 def helper_run_jit(jis, bufs, out_buffers):
  for rawbuf in out_buffers:
-    mv = memoryview(bytearray(rawbuf.size * rawbuf.dtype.itemsize))
+    mv = memoryview(bytearray(rawbuf.nbytes))
    ctypes.memset(from_mv(mv), 0, len(mv))
    rawbuf.copyin(mv)

--- a/test/external/fuzz_graph.py
+++ b/test/external/fuzz_graph.py
@ -85,7 +85,7 @@ def run_jit(jis, all_buffers, input_buffers, var_vals):
  with Context(DEBUG=0):
    for rawbuf in all_buffers:
      if rawbuf in input_buffers: continue
-      mv = memoryview(bytearray(rawbuf.size * rawbuf.dtype.itemsize))
+      mv = memoryview(bytearray(rawbuf.nbytes))
      ctypes.memset(from_mv(mv), 0, len(mv))
      rawbuf.copyin(mv)

--- a/tinygrad/engine/callify.py
+++ b/tinygrad/engine/callify.py
@ -65,7 +65,7 @@ def _make_buffer_view(src:UOp) -> UOp|None:
  if (offset := src.contiguous_view_offset()) is None: return None
  buf = src.base
  if buf.op is Ops.BUFFER_VIEW: offset, buf = offset + buf.arg[1], buf.src[0]
-  return UOp(Ops.BUFFER_VIEW, src.dtype, (buf,), (src.size, offset)).reshape(src.shape)
+  return UOp(Ops.BUFFER_VIEW, src.dtype, (buf,), (src.numel(), offset)).reshape(src.shape)

 def contiguous_mops_to_view(c:UOp, src:UOp):
  """CONTIGUOUS(MOPS(BUFFER)) → CONTIGUOUS(BUFFER_VIEW) when movement ops collapse to a contiguous range."""
--- a/tinygrad/tensor.py
+++ b/tinygrad/tensor.py
@ -57,7 +57,7 @@ def _frompy(x:list|tuple|bytes, dtype:DType, device:str|tuple[str,...]) -> UOp:
    ret = UOp.new_buffer("PYTHON", prod(shape:=get_shape(x)), dtype).reshape(shape)
    assert dtype.fmt is not None, f"{dtype=} has None fmt"
    truncate_function = truncate[dtype]
-    data = struct.pack(f"{ret.size}{dtype.fmt}", *[truncate_function(dtype.const(xi)) for xi in fully_flatten(x)])
+    data = struct.pack(f"{prod(shape)}{dtype.fmt}", *[truncate_function(dtype.const(xi)) for xi in fully_flatten(x)])
  # fake realize. if target device is PYTHON it needs bytearray to be writable
  ret.buffer.allocate(memoryview(data if device != "PYTHON" else bytearray(data)))
  return ret