mirror of
https://github.com/huggingface/open-r1.git
synced 2026-06-24 01:54:06 +00:00
219 lines
9.2 KiB
Python
219 lines
9.2 KiB
Python
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
|
|
import unittest
|
|
|
|
from datasets import load_dataset
|
|
|
|
from e2b_code_interpreter.models import Execution, ExecutionError
|
|
from open_r1.rewards import code_reward, ioi_code_reward
|
|
from open_r1.utils.routed_morph import RoutedMorphSandbox
|
|
from open_r1.utils.routed_sandbox import RoutedSandbox
|
|
|
|
|
|
class TestCodeRewards(unittest.TestCase):
|
|
def test_python_code_reward(self):
|
|
# requires E2B, see the README.md file
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
NUM_SAMPLES = 20
|
|
samples = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
test_completions = [[{"content": sample["gold_standard_solution"]}] for sample in samples]
|
|
reward_kwargs = {"verification_info": [sample["verification_info"] for sample in samples]}
|
|
rewards = code_reward(test_completions, **reward_kwargs)
|
|
print(rewards)
|
|
assert rewards == [1.0] * NUM_SAMPLES
|
|
|
|
def test_e2b_router(self):
|
|
# run router locally: python scripts/e2b_router.py
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
NUM_SAMPLES = 128
|
|
samples = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
test_completions = [[{"content": sample["gold_standard_solution"]}] for sample in samples]
|
|
reward_kwargs = {"verification_info": [sample["verification_info"] for sample in samples]}
|
|
rewards = code_reward(test_completions, e2b_router_url="0.0.0.0:8000", **reward_kwargs)
|
|
print(rewards)
|
|
assert rewards == [1.0] * NUM_SAMPLES
|
|
|
|
def test_e2b_router_parallel(self):
|
|
# run router locally: python scripts/e2b_router.py
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
|
|
BATCH_SIZE = 32
|
|
NUM_SAMPLES = 256
|
|
|
|
def batch_code_reward(examples):
|
|
test_completions = [[{"content": solution}] for solution in examples["gold_standard_solution"]]
|
|
reward_kwargs = {
|
|
"verification_info": [verification_info for verification_info in examples["verification_info"]]
|
|
}
|
|
rewards = code_reward(test_completions, e2b_router_url="0.0.0.0:8000", **reward_kwargs)
|
|
assert rewards == [1.0] * BATCH_SIZE
|
|
return examples
|
|
|
|
code_dataset = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
code_dataset = code_dataset.map(
|
|
batch_code_reward,
|
|
batched=True,
|
|
batch_size=BATCH_SIZE,
|
|
num_proc=4,
|
|
load_from_cache_file=False,
|
|
)
|
|
|
|
def test_ioi_code_reward(self):
|
|
# This slow test case requires spinning up a bunch (I tested with ~64) of piston workers, see docs here
|
|
# slurm/piston/README.md
|
|
code_dataset = load_dataset("open-r1/ioi-reward-test-dataset")
|
|
NUM_SAMPLES = 16
|
|
samples = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
test_completions = [[{"content": f"```cpp\n{sample['sample_solution']}```"}] for sample in samples]
|
|
keys = [key for key in samples[0] if key not in ["prompt", "completion"]]
|
|
reward_kwargs = {key: [example[key] for example in samples] for key in keys}
|
|
rewards = ioi_code_reward(test_completions, **reward_kwargs)
|
|
print(rewards)
|
|
assert rewards == [1.0] * NUM_SAMPLES
|
|
|
|
def test_e2b_router_run_code_success(self):
|
|
# run router locally: python scripts/e2b_router.py
|
|
routed_sandbox = RoutedSandbox(router_url="localhost:8000")
|
|
scripts = [
|
|
"print('hello from integration test')",
|
|
"result = 2 + 2\nprint(result)",
|
|
]
|
|
|
|
results = routed_sandbox.run_code(scripts)
|
|
|
|
assert len(results) == 2
|
|
|
|
for result in results:
|
|
assert isinstance(result, Execution)
|
|
# assert result.exit_code == 0
|
|
assert result.error is None
|
|
assert "hello" in result.logs["stdout"][0] or "4" in result.logs["stdout"][0]
|
|
|
|
def test_e2b_router_run_code_with_error(self):
|
|
# run router locally: python scripts/e2b_router.py
|
|
|
|
routed_sandbox = RoutedSandbox(router_url="localhost:8000")
|
|
scripts = ["print('this is fine')", "print('unterminated string"]
|
|
|
|
results = routed_sandbox.run_code(scripts)
|
|
|
|
assert len(results) == 2
|
|
|
|
# First one should be okay
|
|
# assert results[0].exit_code == 0 # Execution object has no attribute 'exit_code'
|
|
assert results[0].error is None
|
|
assert "this is fine" in results[0].logs["stdout"][0]
|
|
|
|
# Second one should have a syntax error
|
|
|
|
# assert results[1].exit_code != 0 # Execution object has no attribute 'exit_code'
|
|
assert results[1].error is not None
|
|
assert isinstance(results[1].error, ExecutionError)
|
|
assert "SyntaxError" in results[1].error.name
|
|
|
|
def test_python_code_reward_morph(self):
|
|
# requires MorphCloud, see the README.md file
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
NUM_SAMPLES = 20
|
|
samples = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
test_completions = [[{"content": sample["gold_standard_solution"]}] for sample in samples]
|
|
reward_kwargs = {
|
|
"verification_info": [sample["verification_info"] for sample in samples],
|
|
"provider_type": "morph",
|
|
}
|
|
rewards = code_reward(test_completions, **reward_kwargs)
|
|
print(rewards)
|
|
assert rewards == [1.0] * NUM_SAMPLES
|
|
|
|
def test_morph_router(self):
|
|
# run router locally: python scripts/morph_router.py --port 8001 --max_num_sandboxes 20
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
NUM_SAMPLES = 32
|
|
samples = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
test_completions = [[{"content": sample["gold_standard_solution"]}] for sample in samples]
|
|
reward_kwargs = {
|
|
"verification_info": [sample["verification_info"] for sample in samples],
|
|
"provider_type": "morph",
|
|
"morph_router_url": "0.0.0.0:8001",
|
|
}
|
|
rewards = code_reward(test_completions, **reward_kwargs)
|
|
print(rewards)
|
|
assert rewards == [1.0] * NUM_SAMPLES
|
|
|
|
def test_morph_router_parallel(self):
|
|
# run router locally: python scripts/morph_router.py --port 8001 --max_num_sandboxes 20
|
|
code_dataset = load_dataset("open-r1/verifiable-coding-problems-python_decontaminated-tested-shuffled")
|
|
|
|
BATCH_SIZE = 32
|
|
NUM_SAMPLES = 256
|
|
|
|
def batch_code_reward(examples):
|
|
test_completions = [[{"content": solution}] for solution in examples["gold_standard_solution"]]
|
|
reward_kwargs = {
|
|
"verification_info": [verification_info for verification_info in examples["verification_info"]],
|
|
"provider_type": "morph",
|
|
"morph_router_url": "0.0.0.0:8001",
|
|
}
|
|
rewards = code_reward(test_completions, **reward_kwargs)
|
|
assert rewards == [1.0] * BATCH_SIZE
|
|
return examples
|
|
|
|
code_dataset = code_dataset["train"].select(range(NUM_SAMPLES))
|
|
code_dataset = code_dataset.map(
|
|
batch_code_reward,
|
|
batched=True,
|
|
batch_size=BATCH_SIZE,
|
|
num_proc=4,
|
|
load_from_cache_file=False,
|
|
)
|
|
|
|
def test_morph_router_run_code_success(self):
|
|
# run router locally: python scripts/morph_router.py --port 8001 --max_num_sandboxes 20
|
|
|
|
routed_sandbox = RoutedMorphSandbox(router_url="localhost:8001")
|
|
scripts = [
|
|
"print('hello from morph integration test')",
|
|
"result = 2 + 2\nprint(result)",
|
|
]
|
|
|
|
results = routed_sandbox.run_code(scripts)
|
|
|
|
assert len(results) == 2
|
|
|
|
for result in results:
|
|
assert result.exception_str is None
|
|
assert "hello" in result.text or "4" in result.text
|
|
|
|
def test_morph_router_run_code_with_error(self):
|
|
# run router locally: python scripts/morph_router.py --port 8001 --max_num_sandboxes 20
|
|
|
|
routed_sandbox = RoutedMorphSandbox(router_url="localhost:8001")
|
|
scripts = ["print('this is fine with morph')", "print('unterminated string"]
|
|
|
|
results = routed_sandbox.run_code(scripts)
|
|
|
|
assert len(results) == 2
|
|
|
|
# First one should be okay
|
|
assert results[0].exception_str is None
|
|
assert "this is fine with morph" in results[0].text
|
|
|
|
# Second one should have a syntax error
|
|
assert "SyntaxError" in results[1].text
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|