Add recipe

Disable packing
Add 7B recipe
2026-06-24 01:54:06 +00:00 · 2025-02-24 10:38:44 +00:00 · 2025-02-23 21:07:43 +00:00 · 2025-02-23 17:38:45 +00:00 · 2025-02-23 16:43:27 +00:00 · 2025-02-23 14:23:13 +00:00
6 changed files with 223 additions and 5 deletions
--- a/recipes/Qwen2.5-Coder-3B-Instruct/grpo/config_synthetic_1.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/grpo/config_synthetic_1.yaml
@ -0,0 +1,56 @@
+# Model arguments
+model_name_or_path: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-synthetic_1-v00.00
+model_revision: main-step-000006288
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/verifiable-coding-problems-python
+dataset_configs:
+- default
+
+# GRPO trainer config
+beta: 0.001
+bf16: true
+use_vllm: true
+vllm_device: auto
+vllm_gpu_memory_utilization: 0.9
+do_eval: false
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-GRPO-SYNTHETIC-1-v00.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_completions: true
+log_level: info
+logging_first_step: true
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+max_prompt_length: 1024
+max_completion_length: 2048
+max_steps: 500
+num_generations: 14
+num_train_epochs: 1
+output_dir: data/Qwen2.5-1.5B-Open-R1-Code-GRPO
+overwrite_output_dir: true
+per_device_train_batch_size: 16
+push_to_hub: true
+report_to:
+- wandb
+reward_funcs:
+- code
+- format
+reward_weights:
+- 1.0
+- 0.1
+save_strategy: "steps"
+save_steps: 50
+save_total_limit: 1
+seed: 42
+temperature: 1.0
+warmup_ratio: 0.03
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_cot_mix_0.2.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_cot_mix_0.2.yaml
@ -0,0 +1,54 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/long-short-cot-mix-alpha-0.2
+dataset_configs:
+- all
+dataset_num_proc: 8
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- aime24
+- aime25
+- math_500
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_first_step: true
+logging_steps: 10
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 3
+output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-cot-mix-0.2-v00.00
+overwrite_output_dir: true
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.1
+save_total_limit: 1
+seed: 42
+use_liger: false # Bug in TRL when true
+warmup_ratio: 0.05
--- a/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_synthetic_1.yaml
+++ b/recipes/Qwen2.5-Coder-3B-Instruct/sft/config_synthetic_1.yaml
@ -0,0 +1,54 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-3B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
+dataset_configs:
+- all
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- aime24
+- aime25
+- math_500
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_first_step: true
+logging_steps: 10
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 3
+output_dir: data/Qwen2.5-Coder-3B-Instruct-SFT-SYNTHETIC-1-v00.00
+overwrite_output_dir: true
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.1
+save_total_limit: 1
+seed: 42
+use_liger: false # Bug in TRL when true
+warmup_ratio: 0.05
--- a/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_synthetic_1.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/sft/config_synthetic_1.yaml
@ -0,0 +1,54 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: PrimeIntellect/SYNTHETIC-1-SFT-Data
+dataset_configs:
+- all
+dataset_num_proc: 48
+
+# SFT trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- aime24
+- aime25
+- math_500
+- gpqa
+- lcb
+bf16: true
+do_eval: false
+eval_strategy: 'no'
+gradient_accumulation_steps: 4
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_level: info
+logging_first_step: true
+logging_steps: 10
+logging_strategy: steps
+lr_scheduler_type: cosine_with_min_lr
+lr_scheduler_kwargs:
+  min_lr_rate: 0.1
+packing: false
+max_seq_length: 32768
+max_steps: -1
+num_train_epochs: 3
+output_dir: data/Qwen2.5-Coder-7B-Instruct-SFT-SYNTHETIC-1-v00.00
+overwrite_output_dir: true
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+save_strategy: "steps"
+save_steps: 0.1
+save_total_limit: 1
+seed: 42
+use_liger: false # Bug in TRL when true
+warmup_ratio: 0.05
--- a/setup.py
+++ b/setup.py
@ -54,7 +54,7 @@ _deps = [
    "isort>=5.12.0",
    "langdetect",  # Needed for LightEval's extended tasks
    "latex2sympy2_extended>=1.0.6",
-    "liger_kernel==0.5.2",
+    "liger_kernel==0.5.3",
    "lighteval @ git+https://github.com/huggingface/lighteval.git@ebb7377b39a48ab0691e6fbd9dea57e9fe290a7e",
    "math-verify==0.5.2",  # Used for math verification in grpo
    "packaging>=23.0",
--- a/slurm/train.slurm
+++ b/slurm/train.slurm
@ -32,11 +32,11 @@ WORLD_SIZE=$(($NUM_NODES*$GPUS_PER_NODE))
 # Due to conflicts between Accelerate's DeepSpeed configs and Transformers' TrainingArguments, we need to parse the gradient accumulation steps from the config file to ensure they match
 CONFIG_FILE=recipes/$MODEL/$TASK/config_$CONFIG_SUFFIX.yaml
 GRAD_ACC_STEPS=$(grep 'gradient_accumulation_steps' $CONFIG_FILE | awk '{print $2}')
-USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)
+# USE_VLLM=$(grep 'use_vllm:\s*true' $CONFIG_FILE) # Match "use_vllm: true" (with optional whitespace)

-if [ -n "$USE_VLLM" ]; then  # Check if USE_VLLM is *not* empty (found)
-    WORLD_SIZE=$(($WORLD_SIZE-1))
-fi
+# if [ -n "$USE_VLLM" ]; then  # Check if USE_VLLM is *not* empty (found)
+#     WORLD_SIZE=$(($WORLD_SIZE-1))
+# fi

 # Split the string into individual arguments
 IFS=' ' read -ra ARGS <<< "$OPTIONAL_ARGS"
Author	SHA1	Message	Date
Lewis Tunstall	e40f087bb9	Add recipe	2025-02-24 10:38:44 +00:00
Lewis Tunstall	c2d554b32a	Disable packing	2025-02-23 21:07:43 +00:00
Lewis Tunstall	5d6e197d94	Add 7B recipe	2025-02-23 17:38:45 +00:00
Lewis Tunstall	f6cd4494de	Disable liger	2025-02-23 16:43:27 +00:00
Lewis Tunstall	af5a23d01a	Disable Liger	2025-02-23 14:23:13 +00:00
Lewis Tunstall	ad8b430a3e	Fix	2025-02-23 07:21:18 +00:00
Lewis Tunstall	0c39870682	Add Synthetic-1 recipe	2025-02-22 21:54:46 +00:00