Add E2B test recipe

2026-06-24 01:54:06 +00:00 · 2025-03-20 08:04:46 +00:00
2 changed files with 66 additions and 0 deletions
--- a/recipes/Qwen2.5-Coder-7B-Instruct/grpo/config_synthetic_1.yaml
+++ b/recipes/Qwen2.5-Coder-7B-Instruct/grpo/config_synthetic_1.yaml
@ -0,0 +1,64 @@
+# Model arguments
+model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct
+model_revision: main
+torch_dtype: bfloat16
+attn_implementation: flash_attention_2
+
+# Data training arguments
+dataset_name: open-r1/verifiable-coding-problems-python_decontaminated
+system_prompt: "You are a helpful AI Assistant that provides well-reasoned and detailed responses. You first think about the reasoning process as an internal monologue and then provide the user with the answer. Respond in the following format: <think>\n...\n</think>\n<answer>\n...\n</answer>"
+
+# GRPO trainer config
+callbacks:
+- push_to_hub_revision
+benchmarks:
+- lcb_v4
+beta: 0.001
+bf16: true
+do_eval: false
+eval_strategy: "no"
+use_vllm: true
+vllm_device: auto
+vllm_gpu_memory_utilization: 0.7
+do_eval: false
+gradient_accumulation_steps: 14
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+hub_model_id: open-r1/Qwen2.5-Coder-7B-Instruct-GRPO
+hub_model_revision: v10.00
+hub_strategy: every_save
+learning_rate: 1.0e-06
+log_completions: true
+log_level: info
+logging_first_step: true
+logging_steps: 1
+logging_strategy: steps
+lr_scheduler_type: constant_with_warmup
+max_grad_norm: 0.2
+max_prompt_length: 1024
+max_completion_length: 4096
+max_steps: -1
+num_generations: 14
+num_train_epochs: 1
+output_dir: data/Qwen2.5-Coder-7B-Instruct-GRPO
+overwrite_output_dir: true
+per_device_train_batch_size: 4
+push_to_hub: true
+report_to:
+- wandb
+reward_funcs:
+- code
+- format
+reward_weights:
+- 1.0
+- 0.2
+save_strategy: "steps"
+save_steps: 0.1
+save_total_limit: 1
+seed: 42
+temperature: 0.7
+wandb_entity: huggingface 
+wandb_project: open-r1
+wandb_run_group: Qwen2.5-Coder-7B-Instruct-GRPO
+warmup_ratio: 0.1
--- a/src/open_r1/utils/wandb_logging.py
+++ b/src/open_r1/utils/wandb_logging.py
@ -9,3 +9,5 @@ def init_wandb_training(training_args):
        os.environ["WANDB_ENTITY"] = training_args.wandb_entity
    if training_args.wandb_project is not None:
        os.environ["WANDB_PROJECT"] = training_args.wandb_project
+    if training_args.wandb_run_group is not None:
+        os.environ["WANDB_RUN_GROUP"] = training_args.wandb_run_group