mirror of
https://github.com/huggingface/open-r1.git
synced 2026-06-24 01:54:06 +00:00
Compare commits
23 commits
main
...
qwen-coder
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
43dd44e420 | ||
|
|
be64fefb9f | ||
|
|
abee7a20cb | ||
|
|
0fdad2f978 | ||
|
|
eb2a3aa400 | ||
|
|
887dbe97f8 | ||
|
|
7e1bd3791b | ||
|
|
83c271bca5 | ||
|
|
8677506cfc | ||
|
|
6dab011078 | ||
|
|
4daec5ad9d | ||
|
|
a6b8da745f | ||
|
|
4bb2495296 | ||
|
|
6bab2d828a | ||
|
|
0523624e21 | ||
|
|
f82658d608 | ||
|
|
bc281a2f26 |
||
|
|
ba27a99028 | ||
|
|
d9b707420d |
||
|
|
a6f44b2b0a | ||
|
|
2080600c47 | ||
|
|
dae6e9af8c |
||
|
|
56f9257e67 |
37 changed files with 1839 additions and 0 deletions
53
recipes/QwQ-32B/sft/config_v00.00.yaml
Normal file
53
recipes/QwQ-32B/sft/config_v00.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 16 nodes to go fast
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/QwQ-32B
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions
|
||||
dataset_num_proc: 1
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/QwQ-32B-SFT
|
||||
hub_model_revision: v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/QwQ-SFT-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
Normal file
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Config for 16 nodes to go fast
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
Normal file
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Config for 16 nodes to go fast
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-ioi-cots-mix
|
||||
dataset_config: solutions_stop_compile
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v01.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v01.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
56
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
Normal file
56
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# Config for 4 nodes since it's only 10k samples
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/ioi-cots
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v02.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v02.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
Normal file
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Config for 4 nodes since it's only 10k samples
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/ioi-cots-filtered
|
||||
dataset_config: stop_compile
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v03.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v03.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
Normal file
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Config for 4 nodes since it's only 10k samples
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/ioi-cots-filtered
|
||||
dataset_config: stop_compile_subtask_score_gt_0
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v04.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v04.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v05.00.yaml
Normal file
57
recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v05.00.yaml
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
# Config for 4 nodes since it's only 10k samples
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/ioi-cots-filtered
|
||||
dataset_config: stop_compile_subtask_score_gt_0_subtask_points_gt_0
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_always_push: true
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
|
||||
hub_model_revision: v05.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
optim: paged_adamw_8bit
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v05.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_debug.yaml
Normal file
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_debug.yaml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Config for 16 nodes to go fast
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 1
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
|
||||
hub_model_revision: debug
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 23552 # Any larger gives OOM
|
||||
max_steps: 4
|
||||
num_train_epochs: 10
|
||||
# optim: paged_adamw_8bit
|
||||
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-debug
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 1
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: steps
|
||||
save_steps: 2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: false # fails on multi-node?
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.00.yaml
Normal file
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.00.yaml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/OpenThoughts-114k-Curated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- math_500
|
||||
- aime24
|
||||
- aime25
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
|
||||
hub_model_revision: v02.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 5.0e-07
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 3
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "epoch"
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.01.yaml
Normal file
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.01.yaml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/OpenThoughts-114k-Curated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- math_500
|
||||
- aime24
|
||||
- aime25
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
|
||||
hub_model_revision: v02.01
|
||||
hub_strategy: every_save
|
||||
learning_rate: 5.0e-07
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 3
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.01
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "epoch"
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.02.yaml
Normal file
55
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.02.yaml
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/OpenThoughts-114k-Curated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- math_500
|
||||
- aime24
|
||||
- aime25
|
||||
- gpqa
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 4
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
|
||||
hub_model_revision: v02.02
|
||||
hub_strategy: every_save
|
||||
learning_rate: 5.0e-07
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_seq_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 9
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.02
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 4
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "epoch"
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v03.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v03.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_all_cpp_py
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
|
||||
hub_model_revision: v03.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v03.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.01.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.01.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v00.01
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.01
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v01.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.01.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.01.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
|
||||
dataset_configs:
|
||||
- all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v01.01
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-06
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.01
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v02.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v02.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v02.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v02.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v03.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v03.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_w_editorials
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v03.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v03.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v04.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v04.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: test_input_generator
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v04.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v04.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v05.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v05.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: checker_interactor
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v05.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v05.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: "steps"
|
||||
save_steps: 0.2
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v06.00.yaml
Normal file
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v06.00.yaml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
|
||||
dataset_config: solutions
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v06.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v06.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v07.00.yaml
Normal file
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v07.00.yaml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
|
||||
dataset_config: solutions_w_editorials
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v07.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v07.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v08.00.yaml
Normal file
51
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v08.00.yaml
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_w_editorials_cpp_py
|
||||
dataset_num_proc: 48
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v08.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 1.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v08.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v09.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v09.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Config for 4 nodes to go fast
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/ioi-cots
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 2
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v09.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v09.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v10.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v10.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-ioi-cots-mix
|
||||
dataset_config: solutions_stop_compile
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v10.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v10.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v11.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v11.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_all
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v11.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v11.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v12.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v12.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_all_cpp_py
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v12.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v12.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v13.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v13.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_cpp_py
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v13.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v13.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v14.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v14.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_shortest
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v14.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v14.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v15.00.yaml
Normal file
52
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v15.00.yaml
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/KodCode-V1-SFT-R1
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v15.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v15.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v16.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v16.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_longest
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v16.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v16.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v17.00.yaml
Normal file
53
recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v17.00.yaml
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# Config for 1 node
|
||||
# Model arguments
|
||||
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/codeforces-cots_decontaminated
|
||||
dataset_config: solutions_shortest_and_longest
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb_v4
|
||||
bf16: true
|
||||
do_eval: false
|
||||
eval_strategy: 'no'
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
|
||||
hub_model_revision: v17.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: false
|
||||
max_grad_norm: 0.2
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v17.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
27
recipes/accelerate_configs/fsdp.yaml
Normal file
27
recipes/accelerate_configs/fsdp.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
compute_environment: LOCAL_MACHINE
|
||||
debug: false
|
||||
distributed_type: FSDP
|
||||
downcast_bf16: 'no'
|
||||
enable_cpu_affinity: false
|
||||
fsdp_config:
|
||||
fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
|
||||
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
||||
fsdp_backward_prefetch: BACKWARD_PRE
|
||||
fsdp_cpu_ram_efficient_loading: true
|
||||
fsdp_forward_prefetch: true
|
||||
fsdp_offload_params: false
|
||||
fsdp_sharding_strategy: FULL_SHARD
|
||||
fsdp_state_dict_type: FULL_STATE_DICT
|
||||
fsdp_sync_module_states: true
|
||||
fsdp_use_orig_params: true
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: bf16
|
||||
num_machines: 1
|
||||
num_processes: 8
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
27
recipes/accelerate_configs/fsdp_offload.yaml
Normal file
27
recipes/accelerate_configs/fsdp_offload.yaml
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
compute_environment: LOCAL_MACHINE
|
||||
debug: false
|
||||
distributed_type: FSDP
|
||||
downcast_bf16: 'no'
|
||||
enable_cpu_affinity: false
|
||||
fsdp_config:
|
||||
fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
|
||||
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
||||
fsdp_backward_prefetch: BACKWARD_PRE
|
||||
fsdp_cpu_ram_efficient_loading: true
|
||||
fsdp_forward_prefetch: true
|
||||
fsdp_offload_params: true
|
||||
fsdp_sharding_strategy: FULL_SHARD
|
||||
fsdp_state_dict_type: FULL_STATE_DICT
|
||||
fsdp_sync_module_states: true
|
||||
fsdp_use_orig_params: true
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: bf16
|
||||
num_machines: 1
|
||||
num_processes: 8
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
22
recipes/accelerate_configs/zero3_offload.yaml
Normal file
22
recipes/accelerate_configs/zero3_offload.yaml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
compute_environment: LOCAL_MACHINE
|
||||
debug: false
|
||||
deepspeed_config:
|
||||
deepspeed_multinode_launcher: standard
|
||||
offload_optimizer_device: cpu
|
||||
offload_param_device: cpu
|
||||
zero3_init_flag: true
|
||||
zero3_save_16bit_model: true
|
||||
zero_stage: 3
|
||||
distributed_type: DEEPSPEED
|
||||
downcast_bf16: 'no'
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
mixed_precision: bf16
|
||||
num_machines: 1
|
||||
num_processes: 8
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
||||
50
recipes/gemma-3-12b-it/sft/config_v00.00.yaml
Normal file
50
recipes/gemma-3-12b-it/sft/config_v00.00.yaml
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
# Model arguments
|
||||
model_name_or_path: google/gemma-3-12b-it
|
||||
model_revision: main
|
||||
torch_dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
# Data training arguments
|
||||
dataset_name: open-r1/math-code-mix
|
||||
dataset_config: default_solutions
|
||||
dataset_num_proc: 12
|
||||
|
||||
# SFT trainer config
|
||||
callbacks:
|
||||
- push_to_hub_revision
|
||||
benchmarks:
|
||||
- lcb
|
||||
bf16: true
|
||||
do_eval: false
|
||||
gradient_accumulation_steps: 8
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
hub_model_id: open-r1/gemma-3-12b-it-SFT
|
||||
hub_model_revision: v00.00
|
||||
hub_strategy: every_save
|
||||
learning_rate: 4.0e-05
|
||||
log_level: info
|
||||
logging_steps: 1
|
||||
logging_strategy: steps
|
||||
lr_scheduler_type: cosine_with_min_lr
|
||||
lr_scheduler_kwargs:
|
||||
min_lr_rate: 0.1
|
||||
packing: true
|
||||
max_length: 32768
|
||||
max_steps: -1
|
||||
num_train_epochs: 10
|
||||
output_dir: data/gemma-3-12b-it-SFT-v00.00
|
||||
overwrite_output_dir: true
|
||||
per_device_eval_batch_size: 1
|
||||
per_device_train_batch_size: 2
|
||||
push_to_hub: true
|
||||
report_to:
|
||||
- wandb
|
||||
save_strategy: epoch
|
||||
save_total_limit: 1
|
||||
seed: 42
|
||||
use_liger: true
|
||||
wandb_entity: huggingface
|
||||
wandb_project: open-r1
|
||||
warmup_ratio: 0.03
|
||||
|
|
@ -140,6 +140,8 @@ if __name__ == "__main__":
|
|||
if args.cleanup:
|
||||
ds = cleanup(ds)
|
||||
|
||||
ds = ds.rename_column(f"prompt", "problem")
|
||||
|
||||
new_ds_name = args.new_dataset_name or f"{args.dataset}_decontaminated"
|
||||
config_name = args.config if args.config is not None else "default"
|
||||
url = ds.push_to_hub(new_ds_name, config_name=config_name, split="train")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue