hotfix: fix dataset reading for new llm.c

2026-06-24 02:14:17 +00:00 · 2024-06-03 14:03:16 +02:00 · 2024-06-03 14:03:16 +02:00 · eecfdd2f6e
commit eecfdd2f6e
parent 6e0c16dfb0
1 changed files with 3 additions and 2 deletions
--- a/examples/llm.c/train_gpt2.py
+++ b/examples/llm.c/train_gpt2.py
@ -142,7 +142,8 @@ if __name__ == "__main__":
  assert os.path.isfile(tokens_bin)
  print(f"loading cached tokens in {tokens_bin}")
  with open(tokens_bin, "rb") as f:
-    tokens = np.frombuffer(f.read(), dtype=np.int32)
+    f.seek(0x400)
+    tokens = np.frombuffer(f.read(), dtype=np.uint16).astype(np.int32)
  tokens = Tensor(tokens)

  # lightweight dataloader
@ -161,7 +162,7 @@ if __name__ == "__main__":
  # forward backward for a few iterations
  data_iter = iter(get_batch())
  x, y = next(data_iter) # we'll overfit this batch below
-  optimizer = nn.optim.Adam(nn.state.get_parameters(model), lr=1e-4)
+  optimizer = nn.optim.AdamW(nn.state.get_parameters(model), lr=1e-4, weight_decay=0)

  @TinyJit
  def step(x, y):