size*itemsize -> nbytes (#15729)

and some UOp.size removal to prep for size to mixin change
This commit is contained in:
chenyu 2026-04-14 16:27:54 -04:00 committed by GitHub
commit 3394d18066
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 10 additions and 10 deletions

View file

@ -445,7 +445,7 @@ After you are done speaking, output [EOS]. You are not Chad.
print(f"using LLaMA{LLAMA_SUFFIX}-{args.size} model")
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(llama.model))
param_bytes = sum(x.nbytes() for x in get_parameters(llama.model))
outputted = pre_prompt if chatbot else args.prompt
start_pos, toks = 0, [llama.tokenizer.bos_id()] + llama.tokenizer.encode(outputted)

View file

@ -324,7 +324,7 @@ if __name__ == "__main__":
device = tuple(f"{Device.DEFAULT}:{i}" for i in range(args.shard)) if args.shard > 1 else Device.DEFAULT
model = build_transformer(args.model, model_size=args.size, quantize=args.quantize, device=device)
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(model))
param_bytes = sum(x.nbytes() for x in get_parameters(model))
if not args.no_api and not args.benchmark:
from bottle import Bottle, request, response, HTTPResponse, abort, static_file

View file

@ -66,7 +66,7 @@ if __name__ == "__main__":
model_path = Path(args.weights) if args.weights else download_weights(model_info["total_num_weights"])
transformer = load_model(model_path, model_info["model_params"])
tokenizer = AutoTokenizer.from_pretrained(model_info["tokenizer"])
param_bytes = sum(x.uop.size * x.dtype.itemsize for x in get_parameters(transformer))
param_bytes = sum(x.nbytes() for x in get_parameters(transformer))
outputted = args.prompt
start_pos, toks = 0, tokenizer(outputted)["input_ids"]

View file

@ -44,9 +44,9 @@ nc = np.random.randn(N, N).astype(np.float32)
ns = nb.reshape(-1, 32).sum(axis=0)
a = MallocAllocator.alloc(na.size * np.dtype(np.float32).itemsize)
b = MallocAllocator.alloc(nb.size * np.dtype(np.float32).itemsize)
c = MallocAllocator.alloc(nc.size * np.dtype(np.float32).itemsize)
a = MallocAllocator.alloc(na.nbytes)
b = MallocAllocator.alloc(nb.nbytes)
c = MallocAllocator.alloc(nc.nbytes)
MallocAllocator._copyin(b, flat_mv(nb.data))
MallocAllocator._copyin(c, flat_mv(nc.data))

View file

@ -60,7 +60,7 @@ def helper_make_view(base, offset_elems, size_elems):
def helper_run_jit(jis, bufs, out_buffers):
for rawbuf in out_buffers:
mv = memoryview(bytearray(rawbuf.size * rawbuf.dtype.itemsize))
mv = memoryview(bytearray(rawbuf.nbytes))
ctypes.memset(from_mv(mv), 0, len(mv))
rawbuf.copyin(mv)

View file

@ -85,7 +85,7 @@ def run_jit(jis, all_buffers, input_buffers, var_vals):
with Context(DEBUG=0):
for rawbuf in all_buffers:
if rawbuf in input_buffers: continue
mv = memoryview(bytearray(rawbuf.size * rawbuf.dtype.itemsize))
mv = memoryview(bytearray(rawbuf.nbytes))
ctypes.memset(from_mv(mv), 0, len(mv))
rawbuf.copyin(mv)

View file

@ -65,7 +65,7 @@ def _make_buffer_view(src:UOp) -> UOp|None:
if (offset := src.contiguous_view_offset()) is None: return None
buf = src.base
if buf.op is Ops.BUFFER_VIEW: offset, buf = offset + buf.arg[1], buf.src[0]
return UOp(Ops.BUFFER_VIEW, src.dtype, (buf,), (src.size, offset)).reshape(src.shape)
return UOp(Ops.BUFFER_VIEW, src.dtype, (buf,), (src.numel(), offset)).reshape(src.shape)
def contiguous_mops_to_view(c:UOp, src:UOp):
"""CONTIGUOUS(MOPS(BUFFER)) → CONTIGUOUS(BUFFER_VIEW) when movement ops collapse to a contiguous range."""

View file

@ -57,7 +57,7 @@ def _frompy(x:list|tuple|bytes, dtype:DType, device:str|tuple[str,...]) -> UOp:
ret = UOp.new_buffer("PYTHON", prod(shape:=get_shape(x)), dtype).reshape(shape)
assert dtype.fmt is not None, f"{dtype=} has None fmt"
truncate_function = truncate[dtype]
data = struct.pack(f"{ret.size}{dtype.fmt}", *[truncate_function(dtype.const(xi)) for xi in fully_flatten(x)])
data = struct.pack(f"{prod(shape)}{dtype.fmt}", *[truncate_function(dtype.const(xi)) for xi in fully_flatten(x)])
# fake realize. if target device is PYTHON it needs bytearray to be writable
ret.buffer.allocate(memoryview(data if device != "PYTHON" else bytearray(data)))
return ret