clean up gguf (#16160)

This commit is contained in:
b1tg 2026-05-13 12:16:10 +08:00 committed by GitHub
commit 3c806ff406
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 39 additions and 38 deletions

View file

@ -115,33 +115,35 @@ class TestGGUF(unittest.TestCase):
with self.assertRaises(ValueError):
ggml_data_to_tensor(Tensor.empty(512, dtype=dtypes.uint8), 256, 1337)
def test_multi_part_load(self):
def build(n_total, part_no, tensors):
# [header] [kv_data] [tensor_infos] [padding] [tensor_data_blob]
buf = bytearray()
# Header: magic "GGUF" + version=3 + n_tensors + n_kv=2
buf += struct.pack("<4siqq", b"GGUF", 3, len(tensors), 2)
# KV entries: [key_len: uint64][key bytes][type: int32][value]
for k, v in [("split.count", n_total), ("split.no", part_no)]:
kb = k.encode()
buf += struct.pack("<Q", len(kb)) + kb + struct.pack("<i", 4) + struct.pack("<I", v)
data_off = 0
# Tensor infos: [name_len][name][ndims][dims reversed][qtype][offset_into_data_blob]
for name, dims, qtype, data in tensors:
nb = name.encode()
buf += struct.pack("<Q", len(nb)) + nb + struct.pack("<I", len(dims))
for d in reversed(dims): buf += struct.pack("<Q", d)
buf += struct.pack("<i", qtype) + struct.pack("<Q", data_off)
data_off += len(data)
buf += b"\x00" * ((32 - len(buf) % 32) % 32)
for _, _, _, data in tensors: buf += data
return bytes(buf)
@staticmethod
def _build_gguf(tensors, kvs):
# [header] [kv_data] [tensor_infos] [padding] [tensor_data_blob]
buf = bytearray()
# Header: magic "GGUF" + version=3 + n_tensors + n_kv
buf += struct.pack("<4siqq", b"GGUF", 3, len(tensors), len(kvs))
# KV entries: [key_len: uint64][key bytes][type: int32][value]
for k, v in kvs:
kb = k.encode()
if isinstance(v, str): buf += struct.pack("<Q", len(kb)) + kb + struct.pack("<i", 8) + struct.pack("<Q", len(v)) + v.encode()
else: buf += struct.pack("<Q", len(kb)) + kb + struct.pack("<i", 4) + struct.pack("<I", v)
data_off = 0
# Tensor infos: [name_len][name][ndims][dims reversed][qtype][offset_into_data_blob]
for name, dims, qtype, data in tensors:
nb = name.encode()
buf += struct.pack("<Q", len(nb)) + nb + struct.pack("<I", len(dims))
for d in reversed(dims): buf += struct.pack("<Q", d)
buf += struct.pack("<i", qtype) + struct.pack("<Q", data_off)
data_off += len(data)
buf += b"\x00" * ((32 - len(buf) % 32) % 32)
for _, _, _, data in tensors: buf += data
return bytes(buf)
def test_multi_part_load(self):
with tempfile.TemporaryDirectory() as d:
d = pathlib.Path(d)
a, b = np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), np.array([5.0, 6.0], dtype=np.float32)
(d / "test-00001-of-00002.gguf").write_bytes(build(2, 0, [("a", (4,), 0, a.tobytes())]))
(d / "test-00002-of-00002.gguf").write_bytes(build(2, 1, [("b", (2,), 0, b.tobytes())]))
(d / "test-00001-of-00002.gguf").write_bytes(self._build_gguf([("a", (4,), 0, a.tobytes())], [("split.count", 2), ("split.no", 0)]))
(d / "test-00002-of-00002.gguf").write_bytes(self._build_gguf([("b", (2,), 0, b.tobytes())], [("split.count", 2), ("split.no", 1)]))
kv, ts = gguf_load(d / "test-00001-of-00002.gguf")
self.assertEqual(kv["split.count"], 2)
np.testing.assert_equal(ts["a"].numpy(), a)

View file

@ -12,6 +12,14 @@ def _ggml_iq_grid(device: str, grid: tuple[int, ...], grid_shape: tuple[int, int
values = [float((w >> (8*i)) & 0xFF) for w in grid for i in range(grid_shape[1])]
return Tensor(values, dtype=dtypes.float32, device=device).reshape(grid_shape)
# native types {ggml_type: dtype}
_GGML_NATIVE = {0: dtypes.float32, 1: dtypes.float16, 24: dtypes.int8, 25: dtypes.int16,
26: dtypes.int32, 27: dtypes.int64, 28: dtypes.float64, 30: dtypes.bfloat16}
# quant types {ggml_type: (number of elements, number of bytes)}
_GGML_QUANT = {2:(32,18), 3:(32,20), 6:(32,22), 7:(32,24), 8:(32,34),
12:(256,144), 13:(256,176), 14:(256,210), 18:(256,98), 21:(256,110), 22:(256,82), 23:(256,136), 39:(32,17), 41:(128,18)}
def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
"""
Converts ggml tensor data to a tinygrad tensor.
@ -24,11 +32,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
"""
# https://github.com/ggerganov/ggml/blob/323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e/include/ggml.h#L356
# native types
if (dtype := {
0: dtypes.float32, 1: dtypes.float16, 24: dtypes.int8,
25: dtypes.int16, 26: dtypes.int32, 27: dtypes.int64, 28: dtypes.float64, 30: dtypes.bfloat16,
}.get(ggml_type)) is not None:
if (dtype := _GGML_NATIVE.get(ggml_type)) is not None:
return t[:dtype.itemsize * n].contiguous().bitcast(dtype)
def q_to_uint8(t: Tensor, b: int) -> Tensor:
@ -36,12 +40,7 @@ def ggml_data_to_tensor(t: Tensor, n: int, ggml_type: int) -> Tensor:
shift_tensor, bitmask = Tensor.stack(*[ Tensor(2**(i*b), device=t.device, dtype=t.dtype) for i in range(8//b) ]), 0xff >> (8 - b)
return t.unsqueeze(-1).expand((*t.shape,8//b)).div(shift_tensor, rounding_mode="trunc").bitwise_and(bitmask).transpose(-1, -2).flatten(-2)
# map to (number of elements, number of bytes)
if (nelements_nbytes := {
2:(32,18), 3:(32,20), 6:(32,22), 7:(32,24), 8:(32,34),
12:(256,144), 13:(256,176), 14:(256,210), 18:(256,98), 21:(256,110), 22:(256,82), 23:(256,136), 39:(32,17),
41:(128,18)
}.get(ggml_type)) is not None:
if (nelements_nbytes := _GGML_QUANT.get(ggml_type)) is not None:
from tinygrad.runtime.autogen import ggml_common as _ggml
blocks = t[:(n//nelements_nbytes[0])*nelements_nbytes[1]].reshape((-1, nelements_nbytes[1])).contiguous()
if ggml_type == 2: return (q_to_uint8(blocks[:,2:], 4).bitcast(dtypes.int8) - 8) * blocks[:,:2].bitcast(dtypes.float16).cast(dtypes.float32)
@ -132,6 +131,8 @@ readers: dict[int, Callable[[io.BufferedIOBase], Any]] = { 8: read_str, 9: read_
read_uint32, read_int32, read_uint64, read_int64 = readers[4], readers[5], readers[10], readers[11]
def _gguf_parse(tensor: Tensor) -> tuple[dict, dict[str, Tensor]]:
# TODO: remove the need for copy to default device
tensor = tensor.to(None).realize()
r = io.BufferedReader(TensorIO(tensor), 1_000_000)
magic, version, n_tensors, n_kv = r.read(4), read_int32(r), read_int64(r), read_int64(r)
if magic != b"GGUF" or version not in [2, 3]: raise ValueError("Invalid GGUF format!")
@ -169,10 +170,8 @@ def gguf_load(fn: Tensor|str|pathlib.Path) -> tuple[dict, dict[str, Tensor]]:
NOTE: The provided tensor must be on a device that supports execution.
"""
# TODO: remove the need for copy to default device
def load(p): return _gguf_parse(p if isinstance(p, Tensor) else Tensor(p).to(None).realize())
kv, sd = load(fn)
kv, sd = _gguf_parse(fn if isinstance(fn, Tensor) else Tensor(pathlib.Path(fn)))
if kv.get('split.count', 1) <= 1: return kv, sd
if isinstance(fn, Tensor): raise ValueError("multi-part GGUF requires a path argument (got Tensor)")
for pp in _gguf_split_paths(pathlib.Path(fn), kv)[1:]: sd.update(load(pp)[1])
for pp in _gguf_split_paths(pathlib.Path(fn), kv)[1:]: sd.update(_gguf_parse(Tensor(pp))[1])
return kv, sd