mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
llama3: data seed (#14681)
This commit is contained in:
parent
0c63f63ee4
commit
4b5d3bda1f
3 changed files with 5 additions and 2 deletions
|
|
@ -1294,6 +1294,7 @@ def train_llama3():
|
|||
grad_acc = config["GRADIENT_ACC_STEPS"] = getenv("GRADIENT_ACC_STEPS", 1)
|
||||
GBS = config["GLOBAL_BATCH_SIZE"] = BS * grad_acc
|
||||
SEED = config["SEED"] = getenv("SEED", 5760)
|
||||
DATA_SEED = config["DATA_SEED"] = getenv("DATA_SEED", SEED)
|
||||
SEQLEN = config["SEQLEN"] = getenv("SEQLEN", 8192)
|
||||
TRAIN_ON_VAL = config["TRAIN_ON_VAL"] = getenv("TRAIN_ON_VAL", 0)
|
||||
SMALL = config["SMALL"] = getenv("SMALL", 0)
|
||||
|
|
@ -1455,7 +1456,7 @@ def train_llama3():
|
|||
return fake_data(BS, SAMPLES)
|
||||
else:
|
||||
from examples.mlperf.dataloader import batch_load_llama3
|
||||
return batch_load_llama3(BS, SAMPLES, SEQLEN, BASEDIR, seed=SEED, val=bool(TRAIN_ON_VAL), small=bool(SMALL))
|
||||
return batch_load_llama3(BS, SAMPLES, SEQLEN, BASEDIR, seed=DATA_SEED, val=bool(TRAIN_ON_VAL), small=bool(SMALL))
|
||||
|
||||
if getenv("FAKEDATA", 0):
|
||||
eval_dataset = None
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export SAMPLES=$((MAX_STEPS * GBS))
|
|||
export SEQLEN=${SEQLEN:-8192}
|
||||
|
||||
export SEED=${SEED:-5760}
|
||||
export DATA_SEED=${DATA_SEED:-5760}
|
||||
|
||||
export JITBEAM=${JITBEAM:-3}
|
||||
export BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
|
|
|
|||
|
|
@ -26,7 +26,8 @@ export WARMUP_STEPS=$((WARMUP_SAMPLES / GBS))
|
|||
export SAMPLES=$((MAX_STEPS * GBS))
|
||||
export SEQLEN=${SEQLEN:-8192}
|
||||
|
||||
export SEED=${SEED:-5760}
|
||||
export SEED=${SEED:-$RANDOM}
|
||||
export DATA_SEED=${DATA_SEED:-5760}
|
||||
|
||||
export JITBEAM=${JITBEAM:-3}
|
||||
export BEAM_UOPS_MAX=6000 BEAM_UPCAST_MAX=256 BEAM_LOCAL_MAX=1024 BEAM_MIN_PROGRESS=5
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue