Compare commits

...

23 commits

Author SHA1 Message Date
Lewis Tunstall
43dd44e420 Add OlympicCoder 3B 2025-03-31 15:36:34 +00:00
Lewis Tunstall
be64fefb9f Add long and short recipes 2025-03-23 07:46:04 +00:00
Lewis Tunstall
abee7a20cb Add v11-v13 ablations 2025-03-16 07:52:11 +00:00
Lewis Tunstall
0fdad2f978 Tune recipe 2025-03-13 15:44:13 +00:00
Lewis Tunstall
eb2a3aa400 Add QwQ 2025-03-12 21:07:12 +00:00
Lewis Tunstall
887dbe97f8 Add moar recipes 2025-03-12 12:52:04 +00:00
Lewis Tunstall
7e1bd3791b Add IOI configs 2025-03-10 20:52:01 +00:00
Lewis Tunstall
83c271bca5 Reduce context for OOM 2025-03-10 13:38:23 +00:00
Lewis Tunstall
8677506cfc fix 2025-03-10 12:30:48 +00:00
Lewis Tunstall
6dab011078 Reveett 2025-03-10 12:28:27 +00:00
Lewis Tunstall
4daec5ad9d Align ds configs 2025-03-10 10:36:02 +00:00
Lewis Tunstall
a6b8da745f Fix optim 2025-03-10 07:54:55 +00:00
Lewis Tunstall
4bb2495296 Add fsdp 2025-03-09 21:54:54 +00:00
Lewis Tunstall
6bab2d828a Disable Liger 2025-03-09 15:35:59 +00:00
Lewis Tunstall
0523624e21 Add 32B recipe 2025-03-09 14:47:58 +00:00
Lewis Tunstall
f82658d608 Add v08 2025-03-05 09:36:00 +00:00
lewtun
bc281a2f26
Merge branch 'main' into qwen-coder-sft-configs 2025-03-04 16:22:04 +01:00
Lewis Tunstall
ba27a99028 Add v07 2025-03-03 15:22:15 +00:00
lewtun
d9b707420d
Merge branch 'main' into qwen-coder-sft-configs 2025-03-03 14:20:43 +01:00
Lewis Tunstall
a6f44b2b0a Add v06 2025-03-02 22:30:59 +00:00
Lewis Tunstall
2080600c47 Add codeforces recipes 2025-03-01 12:30:59 +00:00
lewtun
dae6e9af8c
Merge branch 'main' into qwen-coder-sft-configs 2025-03-01 11:18:16 +01:00
edbeeching
56f9257e67 configs 2025-02-26 09:11:49 +00:00
37 changed files with 1839 additions and 0 deletions

View file

@ -0,0 +1,53 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/QwQ-32B
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 1
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/QwQ-32B-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/QwQ-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,57 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,57 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-ioi-cots-mix
dataset_config: solutions_stop_compile
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v01.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v01.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,56 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/ioi-cots
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v02.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v02.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,57 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/ioi-cots-filtered
dataset_config: stop_compile
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v03.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v03.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,57 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/ioi-cots-filtered
dataset_config: stop_compile_subtask_score_gt_0
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v04.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v04.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,57 @@
# Config for 4 nodes since it's only 10k samples
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/ioi-cots-filtered
dataset_config: stop_compile_subtask_score_gt_0_subtask_points_gt_0
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_always_push: true
hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
hub_model_revision: v05.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
optim: paged_adamw_8bit
output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v05.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,55 @@
# Config for 16 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
hub_model_revision: debug
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 23552 # Any larger gives OOM
max_steps: 4
num_train_epochs: 10
# optim: paged_adamw_8bit
output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-debug
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 1
push_to_hub: true
report_to:
- wandb
save_strategy: steps
save_steps: 2
save_total_limit: 1
seed: 42
use_liger: false # fails on multi-node?
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,55 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/OpenThoughts-114k-Curated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- math_500
- aime24
- aime25
- gpqa
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
hub_model_revision: v02.00
hub_strategy: every_save
learning_rate: 5.0e-07
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_seq_length: 32768
max_steps: -1
num_train_epochs: 3
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "epoch"
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,55 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/OpenThoughts-114k-Curated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- math_500
- aime24
- aime25
- gpqa
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
hub_model_revision: v02.01
hub_strategy: every_save
learning_rate: 5.0e-07
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_seq_length: 32768
max_steps: -1
num_train_epochs: 3
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.01
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "epoch"
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,55 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/OpenThoughts-114k-Curated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- math_500
- aime24
- aime25
- gpqa
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 4
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
hub_model_revision: v02.02
hub_strategy: every_save
learning_rate: 5.0e-07
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_seq_length: 32768
max_steps: -1
num_train_epochs: 9
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.02
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 4
push_to_hub: true
report_to:
- wandb
save_strategy: "epoch"
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_all_cpp_py
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
hub_model_revision: v03.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v03.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v00.01
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.01
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v01.00
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
dataset_configs:
- all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v01.01
hub_strategy: every_save
learning_rate: 1.0e-06
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.01
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v02.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v02.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_w_editorials
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v03.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v03.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: test_input_generator
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v04.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v04.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: checker_interactor
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v05.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v05.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: "steps"
save_steps: 0.2
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,51 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
dataset_config: solutions
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v06.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v06.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,51 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
dataset_config: solutions_w_editorials
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v07.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v07.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,51 @@
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_w_editorials_cpp_py
dataset_num_proc: 48
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v08.00
hub_strategy: every_save
learning_rate: 1.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v08.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Config for 4 nodes to go fast
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/ioi-cots
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
- lcb
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 2
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v09.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v09.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-ioi-cots-mix
dataset_config: solutions_stop_compile
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v10.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v10.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_all
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v11.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v11.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_all_cpp_py
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v12.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v12.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_cpp_py
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v13.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v13.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_shortest
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v14.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v14.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,52 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/KodCode-V1-SFT-R1
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v15.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v15.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_longest
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v16.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v16.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,53 @@
# Config for 1 node
# Model arguments
model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/codeforces-cots_decontaminated
dataset_config: solutions_shortest_and_longest
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb_v4
bf16: true
do_eval: false
eval_strategy: 'no'
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
hub_model_revision: v17.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: false
max_grad_norm: 0.2
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v17.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -0,0 +1,27 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP
downcast_bf16: 'no'
enable_cpu_affinity: false
fsdp_config:
fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_cpu_ram_efficient_loading: true
fsdp_forward_prefetch: true
fsdp_offload_params: false
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: FULL_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

View file

@ -0,0 +1,27 @@
compute_environment: LOCAL_MACHINE
debug: false
distributed_type: FSDP
downcast_bf16: 'no'
enable_cpu_affinity: false
fsdp_config:
fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_backward_prefetch: BACKWARD_PRE
fsdp_cpu_ram_efficient_loading: true
fsdp_forward_prefetch: true
fsdp_offload_params: true
fsdp_sharding_strategy: FULL_SHARD
fsdp_state_dict_type: FULL_STATE_DICT
fsdp_sync_module_states: true
fsdp_use_orig_params: true
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

View file

@ -0,0 +1,22 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
deepspeed_multinode_launcher: standard
offload_optimizer_device: cpu
offload_param_device: cpu
zero3_init_flag: true
zero3_save_16bit_model: true
zero_stage: 3
distributed_type: DEEPSPEED
downcast_bf16: 'no'
machine_rank: 0
main_training_function: main
mixed_precision: bf16
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

View file

@ -0,0 +1,50 @@
# Model arguments
model_name_or_path: google/gemma-3-12b-it
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2
# Data training arguments
dataset_name: open-r1/math-code-mix
dataset_config: default_solutions
dataset_num_proc: 12
# SFT trainer config
callbacks:
- push_to_hub_revision
benchmarks:
- lcb
bf16: true
do_eval: false
gradient_accumulation_steps: 8
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
hub_model_id: open-r1/gemma-3-12b-it-SFT
hub_model_revision: v00.00
hub_strategy: every_save
learning_rate: 4.0e-05
log_level: info
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
min_lr_rate: 0.1
packing: true
max_length: 32768
max_steps: -1
num_train_epochs: 10
output_dir: data/gemma-3-12b-it-SFT-v00.00
overwrite_output_dir: true
per_device_eval_batch_size: 1
per_device_train_batch_size: 2
push_to_hub: true
report_to:
- wandb
save_strategy: epoch
save_total_limit: 1
seed: 42
use_liger: true
wandb_entity: huggingface
wandb_project: open-r1
warmup_ratio: 0.03

View file

@ -140,6 +140,8 @@ if __name__ == "__main__":
if args.cleanup:
ds = cleanup(ds)
ds = ds.rename_column(f"prompt", "problem")
new_ds_name = args.new_dataset_name or f"{args.dataset}_decontaminated"
config_name = args.config if args.config is not None else "default"
url = ds.push_to_hub(new_ds_name, config_name=config_name, split="train")