mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
hotfix: fix dataset reading for new llm.c
This commit is contained in:
parent
6e0c16dfb0
commit
eecfdd2f6e
1 changed files with 3 additions and 2 deletions
|
|
@ -142,7 +142,8 @@ if __name__ == "__main__":
|
|||
assert os.path.isfile(tokens_bin)
|
||||
print(f"loading cached tokens in {tokens_bin}")
|
||||
with open(tokens_bin, "rb") as f:
|
||||
tokens = np.frombuffer(f.read(), dtype=np.int32)
|
||||
f.seek(0x400)
|
||||
tokens = np.frombuffer(f.read(), dtype=np.uint16).astype(np.int32)
|
||||
tokens = Tensor(tokens)
|
||||
|
||||
# lightweight dataloader
|
||||
|
|
@ -161,7 +162,7 @@ if __name__ == "__main__":
|
|||
# forward backward for a few iterations
|
||||
data_iter = iter(get_batch())
|
||||
x, y = next(data_iter) # we'll overfit this batch below
|
||||
optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=1e-4)
|
||||
optimizer = nn.optim.AdamW(nn.state.get_parameters(model), lr=1e-4, weight_decay=0)
|
||||
|
||||
@TinyJit
|
||||
def step(x, y):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue