Compare commits

...

7 commits

Author SHA1 Message Date
Lewis Tunstall
e40f087bb9 Add recipe 2025-02-24 10:38:44 +00:00
Lewis Tunstall
c2d554b32a Disable packing 2025-02-23 21:07:43 +00:00
Lewis Tunstall
5d6e197d94 Add 7B recipe 2025-02-23 17:38:45 +00:00
Lewis Tunstall
f6cd4494de Disable liger 2025-02-23 16:43:27 +00:00
Lewis Tunstall
af5a23d01a Disable Liger 2025-02-23 14:23:13 +00:00
Lewis Tunstall
ad8b430a3e Fix 2025-02-23 07:21:18 +00:00
Lewis Tunstall
0c39870682 Add Synthetic-1 recipe 2025-02-22 21:54:46 +00:00
6 changed files with 223 additions and 5 deletions

View file

@ -0,0 +1,56 @@
# Model arguments
model_name_or_path: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-synthetic_1-v00.00
model_revision: main-step-000006288
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/verifiable-coding-problems-python
dataset_configs:
- default
# GRPO trainer config
beta: 0.001
bf16: true
use_vllm: true
vllm_device: auto
vllm_gpu_memory_utilization: 0.9
do_eval: false
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-GRPO-SYNTHETIC-1-v00.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_completions: true
log_level: info
logging_first_step: true
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
max_prompt_length: 1024
max_completion_length: 2048
max_steps: 500
num_generations: 14
num_train_epochs: 1
output_dir: data/Qwen2.5-1.5B-Open-R1-Code-GRPO
overwrite_output_dir: true
per_device_train_batch_size: 16
push_to_hub: true
report_to:
- wandb
reward_funcs:
- code
- format
reward_weights:
- 1.0
- 0.1
save_strategy: "steps"
save_steps: 50
save_total_limit: 1
seed: 42
temperature: 1.0
warmup_ratio: 0.03

View file

@ -0,0 +1,54 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/long-short-cot-mix-alpha-0.2
dataset_configs:
- all
dataset_num_proc: 8
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- aime24
- aime25
- math_500
- gpqa
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_first_step: true
logging_steps: 10
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_seq_length: 32768
max_steps: -1
num_train_epochs: 3
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
overwrite_output_dir: true
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.1
save_total_limit: 1
seed: 42
use_liger: false # Bug in TRL when true
warmup_ratio: 0.05

View file

@ -0,0 +1,54 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
dataset_configs:
- all
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- aime24
- aime25
- math_500
- gpqa
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_first_step: true
logging_steps: 10
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_seq_length: 32768
max_steps: -1
num_train_epochs: 3
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
overwrite_output_dir: true
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.1
save_total_limit: 1
seed: 42
use_liger: false # Bug in TRL when true
warmup_ratio: 0.05

View file

@ -0,0 +1,54 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
dataset_configs:
- all
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- aime24
- aime25
- math_500
- gpqa
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_first_step: true
logging_steps: 10
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_seq_length: 32768
max_steps: -1
num_train_epochs: 3
output_dir: data/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
overwrite_output_dir: true
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.1
save_total_limit: 1
seed: 42
use_liger: false # Bug in TRL when true
warmup_ratio: 0.05

View file

@ -54,7 +54,7 @@ _deps = [
"isort>=5.12.0",
"langdetect", # Needed for LightEval's extended tasks
"latex2sympy2_extended>=1.0.6",
"liger_kernel==0.5.2",
"liger_kernel==0.5.3",
"lighteval @ git+https://github.com/huggingface/lighteval.git@ebb7377b39a48ab0691e6fbd9dea57e9fe290a7e",
"math-verify==0.5.2", # Used for math verification in grpo
"packaging>=23.0",

View file

@ -32,11 +32,11 @@ WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
CONFIG_FILE=recipes/$MODEL/$TASK/config_$CONFIG_SUFFIX.yaml
GRAD_ACC_STEPS=$(grep 'gradient_accumulation_steps' $CONFIG_FILE | awk '{print $2}')
USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)
# USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)
if [ -n "$USE_VLLM" ]; then # Check if USE_VLLM is *not* empty (found)
WORLD_SIZE=$(($WORLD_SIZE-1))
fi
# if [ -n "$USE_VLLM" ]; then # Check if USE_VLLM is *not* empty (found)
# WORLD_SIZE=$(($WORLD_SIZE-1))
# fi
# Split the string into individual arguments
IFS=' ' read -ra ARGS <<< "$OPTIONAL_ARGS"