Add OlympicCoder 3B

Add long and short recipes
Add v11-v13 ablations
2026-06-24 01:54:06 +00:00 · 2025-03-31 15:36:34 +00:00 · 2025-03-23 07:46:04 +00:00 · 2025-03-16 07:52:11 +00:00 · 2025-03-13 15:44:13 +00:00 · 2025-03-12 21:07:12 +00:00
37 changed files with 1839 additions and 0 deletions
--- a/recipes/QwQ-32B/sft/config_v00.00.yaml
+++ b/recipes/QwQ-32B/sft/config_v00.00.yaml
@ -0,0 +1,53 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/QwQ-32B
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 1
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/QwQ-32B-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/QwQ-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v00.00.yaml
@ -0,0 +1,57 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v01.00.yaml
@ -0,0 +1,57 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-ioi-cots-mix
+dataset_config: solutions_stop_compile
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v01.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v01.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v02.00.yaml
@ -0,0 +1,56 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v02.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v02.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v03.00.yaml
@ -0,0 +1,57 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots-filtered
+dataset_config: stop_compile
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v03.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v03.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v04.00.yaml
@ -0,0 +1,57 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots-filtered
+dataset_config: stop_compile_subtask_score_gt_0
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v04.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v04.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v05.00.yaml
+++ b/recipes/Qwen2.5-Coder-32B-Instruct/sft/config_v05.00.yaml
@ -0,0 +1,57 @@
+# Config for 4 nodes since it's only 10k samples
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-32B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots-filtered
+dataset_config: stop_compile_subtask_score_gt_0_subtask_points_gt_0
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_always_push: true
+hub_model_id: open-r1/Qwen2.5-Coder-32B-Instruct-SFT
+hub_model_revision: v05.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+optim: paged_adamw_8bit
+output_dir: data/open-r1/Qwen2.5-Coder-32B-Instruct-SFT-v05.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_only_model: true # needed to bypass FSDP errors with saving paged optimizers
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_debug.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_debug.yaml
@ -0,0 +1,55 @@
+# Config for 16 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 1
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
+hub_model_revision: debug
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 23552 # Any larger gives OOM
+max_steps: 4
+num_train_epochs: 10
+# optim: paged_adamw_8bit
+output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-debug
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: steps
+save_steps: 2
+save_total_limit: 1
+seed: 42
+use_liger: false # fails on multi-node?
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.00.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.00.yaml
@ -0,0 +1,55 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/OpenThoughts-114k-Curated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- math_500
+- aime24
+- aime25
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
+hub_model_revision: v02.00
+hub_strategy: every_save
+learning_rate: 5.0e-07
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 3
+output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "epoch"
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.01.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.01.yaml
@ -0,0 +1,55 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/OpenThoughts-114k-Curated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- math_500
+- aime24
+- aime25
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
+hub_model_revision: v02.01
+hub_strategy: every_save
+learning_rate: 5.0e-07
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 3
+output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.01
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "epoch"
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.02.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v02.02.yaml
@ -0,0 +1,55 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/OpenThoughts-114k-Curated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- math_500
+- aime24
+- aime25
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
+hub_model_revision: v02.02
+hub_strategy: every_save
+learning_rate: 5.0e-07
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 9
+output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v02.02
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "epoch"
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v03.00.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_v03.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_all_cpp_py
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT
+hub_model_revision: v03.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-3B-Instruct-SFT-v03.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.01.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v00.01.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/OpenThoughts-114k-Code_decontaminated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v00.01
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v00.01
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v01.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.01.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v01.01.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/SYNTHETIC-1-SFT-Data-Code_decontaminated
+dataset_configs:
+- all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v01.01
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v01.01
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v02.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v02.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v02.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v02.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v03.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v03.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_w_editorials
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v03.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v03.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v04.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v04.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: test_input_generator
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v04.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v04.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v05.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v05.00.yaml
@ -0,0 +1,52 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: checker_interactor
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v05.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v05.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.2
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v06.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v06.00.yaml
@ -0,0 +1,51 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
+dataset_config: solutions
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v06.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v06.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v07.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v07.00.yaml
@ -0,0 +1,51 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces_cots_w_openthoughts_decontaminated
+dataset_config: solutions_w_editorials
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v07.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v07.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v08.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v08.00.yaml
@ -0,0 +1,51 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_w_editorials_cpp_py
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v08.00
+hub_strategy: every_save
+learning_rate: 1.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v08.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v09.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v09.00.yaml
@ -0,0 +1,52 @@
+# Config for 4 nodes to go fast
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/ioi-cots
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 2
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v09.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v09.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v10.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v10.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-ioi-cots-mix
+dataset_config: solutions_stop_compile
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v10.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v10.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v11.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v11.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_all
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v11.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v11.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v12.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v12.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_all_cpp_py
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v12.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v12.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v13.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v13.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_cpp_py
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v13.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v13.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v14.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v14.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_shortest
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v14.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v14.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v15.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v15.00.yaml
@ -0,0 +1,52 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/KodCode-V1-SFT-R1
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v15.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v15.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v16.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v16.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_longest
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v16.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v16.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v17.00.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_v17.00.yaml
@ -0,0 +1,53 @@
+# Config for 1 node
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/codeforces-cots_decontaminated
+dataset_config: solutions_shortest_and_longest
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT
+hub_model_revision: v17.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_grad_norm: 0.2
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/open-r1/Qwen2.5-Coder-7B-Instruct-SFT-v17.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/recipes/accelerate_configs/fsdp.yaml
+++ b/recipes/accelerate_configs/fsdp.yaml
@ -0,0 +1,27 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+enable_cpu_affinity: false
+fsdp_config:
+  fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: false
+  fsdp_sharding_strategy: FULL_SHARD
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/recipes/accelerate_configs/fsdp_offload.yaml
+++ b/recipes/accelerate_configs/fsdp_offload.yaml
@ -0,0 +1,27 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+distributed_type: FSDP
+downcast_bf16: 'no'
+enable_cpu_affinity: false
+fsdp_config:
+  fsdp_activation_checkpointing: false # Need fix from: https://github.com/huggingface/transformers/pull/36610
+  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
+  fsdp_backward_prefetch: BACKWARD_PRE
+  fsdp_cpu_ram_efficient_loading: true
+  fsdp_forward_prefetch: true
+  fsdp_offload_params: true
+  fsdp_sharding_strategy: FULL_SHARD
+  fsdp_state_dict_type: FULL_STATE_DICT
+  fsdp_sync_module_states: true
+  fsdp_use_orig_params: true
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/recipes/accelerate_configs/zero3_offload.yaml
+++ b/recipes/accelerate_configs/zero3_offload.yaml
@ -0,0 +1,22 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+  deepspeed_multinode_launcher: standard
+  offload_optimizer_device: cpu
+  offload_param_device: cpu
+  zero3_init_flag: true
+  zero3_save_16bit_model: true
+  zero_stage: 3
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: bf16
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/recipes/gemma-3-12b-it/sft/config_v00.00.yaml
+++ b/recipes/gemma-3-12b-it/sft/config_v00.00.yaml
@ -0,0 +1,50 @@
+# Model arguments
+model_name_or_path: google/gemma-3-12b-it
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/math-code-mix
+dataset_config: default_solutions
+dataset_num_proc: 12
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb
+bf16: true
+do_eval: false
+gradient_accumulation_steps: 8
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/gemma-3-12b-it-SFT
+hub_model_revision: v00.00
+hub_strategy: every_save
+learning_rate: 4.0e-05
+log_level: info
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: true
+max_length: 32768
+max_steps: -1
+num_train_epochs: 10
+output_dir: data/gemma-3-12b-it-SFT-v00.00
+overwrite_output_dir: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 2
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: epoch
+save_total_limit: 1
+seed: 42
+use_liger: true
+wandb_entity: huggingface
+wandb_project: open-r1
+warmup_ratio: 0.03
--- a/scripts/decontaminate.py
+++ b/scripts/decontaminate.py
@ -140,6 +140,8 @@ if __name__ == "__main__":
    if args.cleanup:
        ds = cleanup(ds)

+    ds = ds.rename_column(f"prompt", "problem")
+
    new_ds_name = args.new_dataset_name or f"{args.dataset}_decontaminated"
    config_name = args.config if args.config is not None else "default"
    url = ds.push_to_hub(new_ds_name, config_name=config_name, split="train")
Author	SHA1	Message	Date
Lewis Tunstall	43dd44e420	Add OlympicCoder 3B	2025-03-31 15:36:34 +00:00
Lewis Tunstall	be64fefb9f	Add long and short recipes	2025-03-23 07:46:04 +00:00
Lewis Tunstall	abee7a20cb	Add v11-v13 ablations	2025-03-16 07:52:11 +00:00
Lewis Tunstall	0fdad2f978	Tune recipe	2025-03-13 15:44:13 +00:00
Lewis Tunstall	eb2a3aa400	Add QwQ	2025-03-12 21:07:12 +00:00
Lewis Tunstall	887dbe97f8	Add moar recipes	2025-03-12 12:52:04 +00:00
Lewis Tunstall	7e1bd3791b	Add IOI configs	2025-03-10 20:52:01 +00:00
Lewis Tunstall	83c271bca5	Reduce context for OOM	2025-03-10 13:38:23 +00:00
Lewis Tunstall	8677506cfc	fix	2025-03-10 12:30:48 +00:00
Lewis Tunstall	6dab011078	Reveett	2025-03-10 12:28:27 +00:00
Lewis Tunstall	4daec5ad9d	Align ds configs	2025-03-10 10:36:02 +00:00
Lewis Tunstall	a6b8da745f	Fix optim	2025-03-10 07:54:55 +00:00
Lewis Tunstall	4bb2495296	Add fsdp	2025-03-09 21:54:54 +00:00
Lewis Tunstall	6bab2d828a	Disable Liger	2025-03-09 15:35:59 +00:00
Lewis Tunstall	0523624e21	Add 32B recipe	2025-03-09 14:47:58 +00:00
Lewis Tunstall	f82658d608	Add v08	2025-03-05 09:36:00 +00:00
lewtun	bc281a2f26	Merge branch 'main' into qwen-coder-sft-configs	2025-03-04 16:22:04 +01:00
Lewis Tunstall	ba27a99028	Add v07	2025-03-03 15:22:15 +00:00
lewtun	d9b707420d	Merge branch 'main' into qwen-coder-sft-configs	2025-03-03 14:20:43 +01:00
Lewis Tunstall	a6f44b2b0a	Add v06	2025-03-02 22:30:59 +00:00
Lewis Tunstall	2080600c47	Add codeforces recipes	2025-03-01 12:30:59 +00:00
lewtun	dae6e9af8c	Merge branch 'main' into qwen-coder-sft-configs	2025-03-01 11:18:16 +01:00
edbeeching	56f9257e67	configs	2025-02-26 09:11:49 +00:00