open-r1/scripts/agents/function_parser.py
amir.mahla@huggingface.co c13574e28a ADD function_parser
2025-07-17 22:43:57 +00:00

529 lines
No EOL
16 KiB
Python

#!/usr/bin/env python3
"""
Function parser for extracting function names, parameter names, and values from string function calls.
Supports both mobile and pyautogui function patterns.
"""
import re
from typing import Dict, List, Tuple, Any, Union
from dataclasses import dataclass
@dataclass
class FunctionCall:
"""Represents a parsed function call with its parameters."""
function_name: str
parameters: Dict[str, Any]
original_string: str
def to_string(self) -> str:
"""
Reconstruct the function call string from the parsed data.
Returns:
String representation of the function call
Examples:
>>> call = FunctionCall("mobile.wait", {"seconds": 3}, "mobile.wait(seconds=3)")
>>> call.to_string()
"mobile.wait(seconds=3)"
>>> call = FunctionCall("function", {"arg_0": 1, "arg_1": 2, "x": 0.5}, "function(1, 2, x=0.5)")
>>> call.to_string()
"function(1, 2, x=0.5)"
"""
if not self.parameters:
return f"{self.function_name}()"
# Separate positional and named arguments
positional_args = []
named_args = []
for name, value in self.parameters.items():
if name.startswith("arg_"):
# Positional argument
positional_args.append((int(name.split("_")[1]), value))
else:
# Named argument
named_args.append((name, value))
# Sort positional arguments by index
positional_args.sort(key=lambda x: x[0])
# Build parameter string
param_parts = []
# Add positional arguments
for _, value in positional_args:
param_parts.append(self._value_to_string(value))
# Add named arguments
for name, value in named_args:
param_parts.append(f"{name}={self._value_to_string(value)}")
return f"{self.function_name}({', '.join(param_parts)})"
def _value_to_string(self, value: Any) -> str:
"""
Convert a value to its string representation for function calls.
Args:
value: The value to convert
Returns:
String representation of the value
"""
if isinstance(value, str):
# Quote strings
return f"'{value}'"
elif isinstance(value, (list, tuple)):
# Convert lists/tuples to string representation
items = [self._value_to_string(item) for item in value]
return f"[{', '.join(items)}]"
elif isinstance(value, dict):
# Convert dictionaries to string representation
items = [f"'{k}': {self._value_to_string(v)}" for k, v in value.items()]
return f"{{{', '.join(items)}}}"
elif isinstance(value, bool):
# Convert booleans to lowercase
return str(value).lower()
elif value is None:
return "None"
else:
# Numbers and other types
return str(value)
def parse_function_call(function_string: str) -> FunctionCall:
"""
Parse a function call string and extract function name and parameters.
Args:
function_string: String representation of a function call
Returns:
FunctionCall object with parsed information
Examples:
>>> parse_function_call("mobile.wait(seconds=3)")
FunctionCall(function_name='mobile.wait', parameters={'seconds': 3}, ...)
>>> parse_function_call("pyautogui.click(x=0.8102, y=0.9463)")
FunctionCall(function_name='pyautogui.click', parameters={'x': 0.8102, 'y': 0.9463}, ...)
"""
# Remove any leading/trailing whitespace
function_string = function_string.strip()
# Pattern to match function calls with parameters
# Matches: function_name(param1=value1, param2=value2, ...)
pattern = r'^([a-zA-Z_][a-zA-Z0-9_.]*)\s*\((.*)\)$'
match = re.match(pattern, function_string)
if not match:
raise ValueError(f"Invalid function call format: {function_string}")
function_name = match.group(1)
params_string = match.group(2)
# Parse parameters
parameters = parse_parameters(params_string)
return FunctionCall(
function_name=function_name,
parameters=parameters,
original_string=function_string
)
def parse_parameters(params_string: str) -> Dict[str, Any]:
"""
Parse parameter string and extract parameter names and values.
Args:
params_string: String containing parameters (e.g., "x=0.5, y=0.6, text='hello'")
Returns:
Dictionary mapping parameter names to their values
Examples:
>>> parse_parameters("x=0.5, y=0.6")
{'x': 0.5, 'y': 0.6}
>>> parse_parameters("app_name='drupe'")
{'app_name': 'drupe'}
>>> parse_parameters("'text'")
{'arg_0': 'text'}
>>> parse_parameters("1, 3, 4")
{'arg_0': 1, 'arg_1': 3, 'arg_2': 4}
>>> parse_parameters("arg1, arg2, x=0.5")
{'arg_0': 'arg1', 'arg_1': 'arg2', 'x': 0.5}
"""
if not params_string.strip():
return {}
parameters = {}
# Split by commas, but be careful with commas inside quotes or brackets
param_parts = split_parameters(params_string)
positional_index = 0
for part in param_parts:
part = part.strip()
if not part:
continue
# Parse individual parameter
name, value = parse_single_parameter(part)
# For positional arguments, use index-based naming
if name.startswith("arg_"):
name = f"arg_{positional_index}"
positional_index += 1
parameters[name] = value
return parameters
def split_parameters(params_string: str) -> List[str]:
"""
Split parameter string by commas, respecting quotes and brackets.
Args:
params_string: String containing parameters
Returns:
List of individual parameter strings
"""
parts = []
current_part = ""
paren_count = 0
bracket_count = 0
brace_count = 0
in_quotes = False
quote_char = None
for char in params_string:
if char in ['"', "'"] and (not in_quotes or char == quote_char):
if not in_quotes:
in_quotes = True
quote_char = char
else:
in_quotes = False
quote_char = None
elif not in_quotes:
if char == '(':
paren_count += 1
elif char == ')':
paren_count -= 1
elif char == '[':
bracket_count += 1
elif char == ']':
bracket_count -= 1
elif char == '{':
brace_count += 1
elif char == '}':
brace_count -= 1
elif char == ',' and paren_count == 0 and bracket_count == 0 and brace_count == 0:
parts.append(current_part.strip())
current_part = ""
continue
current_part += char
if current_part.strip():
parts.append(current_part.strip())
return parts
def parse_single_parameter(param_string: str) -> Tuple[str, Any]:
"""
Parse a single parameter string into name and value.
Args:
param_string: String like "x=0.5" or "app_name='drupe'" or just "value"
Returns:
Tuple of (parameter_name, parameter_value)
Examples:
>>> parse_single_parameter("x=0.5")
('x', 0.5)
>>> parse_single_parameter("app_name='drupe'")
('app_name', 'drupe')
>>> parse_single_parameter("'text'")
('arg_0', 'text')
>>> parse_single_parameter("123")
('arg_0', 123)
>>> parse_single_parameter("3")
('arg_0', 3)
"""
# Pattern to match parameter name and value
pattern = r'^([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*(.+)$'
match = re.match(pattern, param_string)
if match:
# Named parameter
param_name = match.group(1)
param_value_str = match.group(2).strip()
param_value = parse_value(param_value_str)
return param_name, param_value
else:
# Positional parameter - treat as unnamed argument
param_value = parse_value(param_string)
return "arg_0", param_value
def parse_value(value_string: str) -> Any:
"""
Parse a value string into appropriate Python type.
Args:
value_string: String representation of a value
Returns:
Parsed value (int, float, str, list, etc.)
Examples:
>>> parse_value("3")
3
>>> parse_value("3.14")
3.14
>>> parse_value("'hello'")
'hello'
>>> parse_value("[0.581, 0.898]")
[0.581, 0.898]
"""
value_string = value_string.strip()
# String values (quoted)
if (value_string.startswith("'") and value_string.endswith("'")) or \
(value_string.startswith('"') and value_string.endswith('"')):
return value_string[1:-1]
# List values
if value_string.startswith('[') and value_string.endswith(']'):
return parse_list(value_string)
# Dictionary values
if value_string.startswith('{') and value_string.endswith('}'):
return parse_dict(value_string)
# Boolean values
if value_string.lower() in ['true', 'false']:
return value_string.lower() == 'true'
# None value
if value_string.lower() == 'none':
return None
# Numeric values
try:
# Try integer first
if '.' not in value_string:
return int(value_string)
else:
return float(value_string)
except ValueError:
# If it's not a number, return as string (remove quotes if present)
if value_string.startswith("'") and value_string.endswith("'"):
return value_string[1:-1]
elif value_string.startswith('"') and value_string.endswith('"'):
return value_string[1:-1]
else:
return value_string
def parse_list(list_string: str) -> List[Any]:
"""
Parse a list string into a Python list.
Args:
list_string: String like "[0.581, 0.898]"
Returns:
List of parsed values
Examples:
>>> parse_list("[0.581, 0.898]")
[0.581, 0.898]
"""
# Remove outer brackets
content = list_string[1:-1].strip()
if not content:
return []
# Split by commas, respecting nested structures
parts = split_parameters(content)
return [parse_value(part.strip()) for part in parts]
def parse_dict(dict_string: str) -> Dict[str, Any]:
"""
Parse a dictionary string into a Python dict.
Args:
dict_string: String like "{'key': 'value'}"
Returns:
Dictionary of parsed key-value pairs
"""
# Remove outer braces
content = dict_string[1:-1].strip()
if not content:
return {}
# Split by commas, respecting nested structures
parts = split_parameters(content)
result = {}
for part in parts:
part = part.strip()
if ':' in part:
key_str, value_str = part.split(':', 1)
key = parse_value(key_str.strip())
value = parse_value(value_str.strip())
result[key] = value
return result
def parse_multiple_functions(function_strings: List[str]) -> List[FunctionCall]:
"""
Parse multiple function call strings.
Args:
function_strings: List of function call strings
Returns:
List of FunctionCall objects
"""
results = []
for func_str in function_strings:
try:
result = parse_function_call(func_str)
results.append(result)
except Exception as e:
print(f"Warning: Could not parse function call '{func_str}': {e}")
continue
return results
def extract_function_calls_from_text(text: str) -> List[FunctionCall]:
"""
Extract and parse function calls from a text block.
Args:
text: Text containing function calls
Returns:
List of FunctionCall objects
"""
# Pattern to find function calls in text
# Matches: function_name(param1=value1, param2=value2)
pattern = r'[a-zA-Z_][a-zA-Z0-9_.]*\s*\([^)]*\)'
matches = re.findall(pattern, text)
return parse_multiple_functions(matches)
# Example usage and testing
if __name__ == "__main__":
# Test cases from the original code
test_cases = [
"mobile.wait(seconds=3)",
"mobile.home()",
"mobile.open_app(app_name='drupe')",
"mobile.swipe(from_coord=[0.581, 0.898], to_coord=[0.601, 0.518])",
"mobile.back()",
"mobile.long_press(x=0.799, y=0.911)",
"mobile.terminate(status='success')",
"answer('text')",
"pyautogui.hscroll(page=-0.1)",
"pyautogui.scroll(page=-0.1)",
"pyautogui.scroll(0.13)",
"pyautogui.click(x=0.8102, y=0.9463)",
"pyautogui.hotkey(keys=['ctrl', 'c'])",
"pyautogui.doubleClick()",
"pyautogui.press(keys='enter')",
"pyautogui.press(keys=['enter'])",
"pyautogui.moveTo(x=0.04, y=0.405)",
"pyautogui.write(message='bread buns')",
"pyautogui.dragTo(x=0.8102, y=0.9463)",
# Additional test cases for multiple positional arguments
"function(arg1, arg2, arg3)",
"function('hello', 123, x=0.5)",
"function(arg1, arg2, named_param='value')",
"function(1, 2, 3, 4, 5)",
"function('a', 'b', 'c', x=1, y=2)",
]
print("Testing function parser:")
print("=" * 50)
for test_case in test_cases:
try:
result = parse_function_call(test_case)
print(f"{test_case}")
print(f" Function: {result.function_name}")
print(f" Parameters: {result.parameters}")
print()
except Exception as e:
print(f"{test_case}")
print(f" Error: {e}")
print()
# Test extracting from text
print("Testing text extraction:")
print("=" * 50)
sample_text = """
mobile.wait(seconds=3)
mobile.open_app(app_name='drupe')
pyautogui.click(x=0.8102, y=0.9463)
pyautogui.write(message='bread buns')
"""
extracted = extract_function_calls_from_text(sample_text)
for func_call in extracted:
print(f"Found: {func_call.function_name} with params: {func_call.parameters}")
# Test reconstruction
print("\nTesting function call reconstruction:")
print("=" * 50)
reconstruction_tests = [
"mobile.wait(seconds=3)",
"mobile.home()",
"mobile.open_app(app_name='drupe')",
"mobile.swipe(from_coord=[0.581, 0.898], to_coord=[0.601, 0.518])",
"answer('text')",
"pyautogui.scroll(0.13)",
"pyautogui.click(x=0.8102, y=0.9463)",
"pyautogui.hotkey(keys=['ctrl', 'c'])",
"function(1, 2, 3)",
"function('hello', 123, x=0.5, y=0.8)",
"function([1, 3], 'arg2', named_param='value')",
]
for test_case in reconstruction_tests:
parsed = parse_function_call(test_case)
reconstructed = parsed.to_string()
print(f"Original: {test_case}")
print(f"Reconstructed: {reconstructed}")
print(f"Match: {test_case == reconstructed}")
print()