mirror of
https://github.com/huggingface/open-r1.git
synced 2026-06-24 01:54:06 +00:00
Compare commits
7 commits
main
...
qwen-coder
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e40f087bb9 | ||
|
|
c2d554b32a | ||
|
|
5d6e197d94 | ||
|
|
f6cd4494de | ||
|
|
af5a23d01a | ||
|
|
ad8b430a3e | ||
|
|
0c39870682 |
6 changed files with 223 additions and 5 deletions
|
|
@ -0,0 +1,56 @@
|
|||
# Model arguments
|
||||
model_name_or_path: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-synthetic_1-v00.00
|
||||
model_revision: main-step-000006288
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/verifiable-coding-problems-python
|
||||
dataset_configs:
|
||||
- default
|
||||
|
||||
# GRPO trainer config
|
||||
beta: 0.001
|
||||
bf16: true
|
||||
use_vllm: true
|
||||
vllm_device: auto
|
||||
vllm_gpu_memory_utilization: 0.9
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-GRPO-SYNTHETIC-1-v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_completions: true
|
||||
log_level: info
|
||||
logging_first_step: true
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
max_prompt_length: 1024
|
||||
max_completion_length: 2048
|
||||
max_steps: 500
|
||||
num_generations: 14
|
||||
num_train_epochs: 1
|
||||
output_dir: data/Qwen2.5-1.5B-Open-R1-Code-GRPO
|
||||
overwrite_output_dir: true
|
||||
per_device_train_batch_size: 16
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
reward_funcs:
|
||||
- code
|
||||
- format
|
||||
reward_weights:
|
||||
- 1.0
|
||||
- 0.1
|
||||
save_strategy: "steps"
|
||||
save_steps: 50
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
temperature: 1.0
|
||||
warmup_ratio: 0.03
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/long-short-cot-mix-alpha-0.2
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 8
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- aime24
|
||||
- aime25
|
||||
- math_500
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_first_step: true
|
||||
logging_steps: 10
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 3
|
||||
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.1
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # Bug in TRL when true
|
||||
warmup_ratio: 0.05
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- aime24
|
||||
- aime25
|
||||
- math_500
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_first_step: true
|
||||
logging_steps: 10
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 3
|
||||
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.1
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # Bug in TRL when true
|
||||
warmup_ratio: 0.05
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- aime24
|
||||
- aime25
|
||||
- math_500
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_first_step: true
|
||||
logging_steps: 10
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 3
|
||||
output_dir: data/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.1
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # Bug in TRL when true
|
||||
warmup_ratio: 0.05
|
||||
2
setup.py
2
setup.py
|
|
@ -54,7 +54,7 @@ _deps = [
|
|||
"isort>=5.12.0",
|
||||
"langdetect", # Needed for LightEval's extended tasks
|
||||
"latex2sympy2_extended>=1.0.6",
|
||||
"liger_kernel==0.5.2",
|
||||
"liger_kernel==0.5.3",
|
||||
"lighteval @ git+https://github.com/huggingface/lighteval.git@ebb7377b39a48ab0691e6fbd9dea57e9fe290a7e",
|
||||
"math-verify==0.5.2", # Used for math verification in grpo
|
||||
"packaging>=23.0",
|
||||
|
|
|
|||
|
|
@ -32,11 +32,11 @@ WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
|
|||
# Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
|
||||
CONFIG_FILE=recipes/$MODEL/$TASK/config_$CONFIG_SUFFIX.yaml
|
||||
GRAD_ACC_STEPS=$(grep 'gradient_accumulation_steps' $CONFIG_FILE | awk '{print $2}')
|
||||
USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)
|
||||
# USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)
|
||||
|
||||
if [ -n "$USE_VLLM" ]; then # Check if USE_VLLM is *not* empty (found)
|
||||
WORLD_SIZE=$(($WORLD_SIZE-1))
|
||||
fi
|
||||
# if [ -n "$USE_VLLM" ]; then # Check if USE_VLLM is *not* empty (found)
|
||||
# WORLD_SIZE=$(($WORLD_SIZE-1))
|
||||
# fi
|
||||
|
||||
# Split the string into individual arguments
|
||||
IFS=' ' read -ra ARGS <<< "$OPTIONAL_ARGS"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue