P14-apr-using-qlora/patch_validator/patch_validator_gui.py
2026-02-02 17:36:42 +09:00

594 lines
23 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Plausible Patch Validator GUI
A Tkinter-based GUI for human evaluation of plausible patches.
Allows evaluators to mark patches as correct, incorrect, or pending.
"""
import csv
import re
import tkinter as tk
from tkinter import ttk, messagebox, font
from collections import defaultdict
from pathlib import Path
CSV_PATH = "./evaluations.csv"
class PatchValidatorGUI:
def __init__(self, root: tk.Tk):
self.root = root
self.root.title("Plausible Patch Validator")
self.root.geometry("1400x900")
# Data storage
self.data = [] # List of all rows from CSV
self.tree_items = {} # Maps tree item id to data index
self.current_index = None # Current data index being viewed
# Hierarchical structure: problem_id -> model -> list of (data_index, patch_index)
self.hierarchy = defaultdict(lambda: defaultdict(list))
# Load data
self.load_data()
self.build_hierarchy()
# Build UI
self.setup_ui()
# Select first item
self.select_first_unevaluated()
def load_data(self):
"""Load CSV data"""
if not Path(CSV_PATH).exists():
messagebox.showerror("Error", f"CSV file not found: {CSV_PATH}\nPlease run convert_to_csv.py first.")
self.root.destroy()
return
with open(CSV_PATH, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.data = list(reader)
if not self.data:
messagebox.showerror("Error", "CSV file is empty!")
self.root.destroy()
def build_hierarchy(self):
"""Build hierarchical structure from data"""
self.hierarchy.clear()
for idx, row in enumerate(self.data):
problem_id = row["problem_id"]
model = row["model"]
patch_index = int(row["patch_index"])
self.hierarchy[problem_id][model].append((idx, patch_index))
# Sort patches within each model
for problem_id in self.hierarchy:
for model in self.hierarchy[problem_id]:
self.hierarchy[problem_id][model].sort(key=lambda x: x[1])
def setup_ui(self):
"""Setup the main UI"""
# Main container
main_paned = ttk.PanedWindow(self.root, orient=tk.HORIZONTAL)
main_paned.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# Left panel - Navigation
left_frame = ttk.Frame(main_paned, width=350)
main_paned.add(left_frame, weight=1)
# Navigation label
nav_label = ttk.Label(left_frame, text="Navigation", font=("TkDefaultFont", 12, "bold"))
nav_label.pack(pady=(5, 5))
# Progress label
self.progress_label = ttk.Label(left_frame, text="Progress: 0/0")
self.progress_label.pack(pady=(0, 5))
# Filter frame
filter_frame = ttk.Frame(left_frame)
filter_frame.pack(fill=tk.X, padx=5, pady=5)
ttk.Label(filter_frame, text="Filter:").pack(side=tk.LEFT)
self.filter_var = tk.StringVar(value="all")
filter_combo = ttk.Combobox(filter_frame, textvariable=self.filter_var,
values=["all", "unevaluated", "correct", "incorrect", "pending", "skip"],
state="readonly", width=12)
filter_combo.pack(side=tk.LEFT, padx=5)
filter_combo.bind("<<ComboboxSelected>>", self.on_filter_change)
# Treeview for navigation
tree_frame = ttk.Frame(left_frame)
tree_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
self.tree = ttk.Treeview(tree_frame, selectmode="browse")
self.tree.heading("#0", text="Problems / Models / Patches")
tree_scroll = ttk.Scrollbar(tree_frame, orient=tk.VERTICAL, command=self.tree.yview)
self.tree.configure(yscrollcommand=tree_scroll.set)
self.tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
tree_scroll.pack(side=tk.RIGHT, fill=tk.Y)
self.tree.bind("<<TreeviewSelect>>", self.on_tree_select)
# Right panel - Code viewer and buttons
right_frame = ttk.Frame(main_paned)
main_paned.add(right_frame, weight=3)
# Info frame
info_frame = ttk.Frame(right_frame)
info_frame.pack(fill=tk.X, padx=5, pady=5)
self.info_label = ttk.Label(info_frame, text="Select a patch to view", font=("TkDefaultFont", 11))
self.info_label.pack(anchor=tk.W)
self.benchmark_label = ttk.Label(info_frame, text="")
self.benchmark_label.pack(anchor=tk.W)
# Code viewer
code_frame = ttk.LabelFrame(right_frame, text="Code View")
code_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# Create text widget with custom fonts
self.code_text = tk.Text(code_frame, wrap=tk.NONE, font=("Consolas", 11), state=tk.DISABLED)
code_scroll_y = ttk.Scrollbar(code_frame, orient=tk.VERTICAL, command=self.code_text.yview)
code_scroll_x = ttk.Scrollbar(code_frame, orient=tk.HORIZONTAL, command=self.code_text.xview)
self.code_text.configure(yscrollcommand=code_scroll_y.set, xscrollcommand=code_scroll_x.set)
code_scroll_y.pack(side=tk.RIGHT, fill=tk.Y)
code_scroll_x.pack(side=tk.BOTTOM, fill=tk.X)
self.code_text.pack(fill=tk.BOTH, expand=True)
# Configure tags for syntax highlighting
self.code_text.tag_configure("buggy", foreground="red", font=("Consolas", 11, "bold"))
self.code_text.tag_configure("gold", foreground="green", font=("Consolas", 11, "bold"))
self.code_text.tag_configure("patch", foreground="blue", font=("Consolas", 11, "bold"))
self.code_text.tag_configure("normal", foreground="black")
self.code_text.tag_configure("label", foreground="gray", font=("Consolas", 10, "italic"))
# Legend
legend_frame = ttk.Frame(right_frame)
legend_frame.pack(fill=tk.X, padx=5, pady=2)
ttk.Label(legend_frame, text="Legend: ", font=("TkDefaultFont", 10, "bold")).pack(side=tk.LEFT)
ttk.Label(legend_frame, text="Buggy Line", foreground="red").pack(side=tk.LEFT, padx=5)
ttk.Label(legend_frame, text="Gold Answer", foreground="green").pack(side=tk.LEFT, padx=5)
ttk.Label(legend_frame, text="Model Patch", foreground="blue").pack(side=tk.LEFT, padx=5)
# Button frame
button_frame = ttk.Frame(right_frame)
button_frame.pack(fill=tk.X, padx=5, pady=10)
# Status indicator
self.status_label = ttk.Label(button_frame, text="Current: Not evaluated", font=("TkDefaultFont", 10))
self.status_label.pack(side=tk.LEFT, padx=10)
# Evaluation buttons
self.btn_correct = ttk.Button(button_frame, text="Correct (C)", command=lambda: self.evaluate("correct"))
self.btn_correct.pack(side=tk.RIGHT, padx=5)
self.btn_incorrect = ttk.Button(button_frame, text="Incorrect (X)", command=lambda: self.evaluate("incorrect"))
self.btn_incorrect.pack(side=tk.RIGHT, padx=5)
self.btn_pending = ttk.Button(button_frame, text="Pending (Z)", command=lambda: self.evaluate("pending"))
self.btn_pending.pack(side=tk.RIGHT, padx=5)
self.btn_skip = ttk.Button(button_frame, text="Skip (S)", command=lambda: self.evaluate("skip"))
self.btn_skip.pack(side=tk.RIGHT, padx=5)
# Keyboard shortcuts
self.root.bind("c", lambda e: self.evaluate("correct"))
self.root.bind("x", lambda e: self.evaluate("incorrect"))
self.root.bind("z", lambda e: self.evaluate("pending"))
self.root.bind("s", lambda e: self.evaluate("skip"))
self.root.bind("<Up>", lambda e: self.navigate_prev())
self.root.bind("<Down>", lambda e: self.navigate_next())
self.root.bind("<Left>", lambda e: self.navigate_prev())
self.root.bind("<Right>", lambda e: self.navigate_next())
# Populate tree
self.populate_tree()
def get_status_icon(self, evaluation: str) -> str:
"""Get status icon for evaluation"""
if evaluation == "correct":
return "\u2713" # checkmark
elif evaluation == "incorrect":
return "\u2717" # X
elif evaluation == "pending":
return "?"
elif evaluation == "skip":
return "\u21b7" # skip arrow
else:
return "\u25cb" # empty circle
def populate_tree(self):
"""Populate the treeview with hierarchical data"""
# Clear existing items
for item in self.tree.get_children():
self.tree.delete(item)
self.tree_items.clear()
filter_value = self.filter_var.get()
# Sort problem_ids
problem_ids = sorted(self.hierarchy.keys())
for problem_id in problem_ids:
# Check if this problem has any matching patches
has_matching = False
for model in self.hierarchy[problem_id]:
for data_idx, patch_idx in self.hierarchy[problem_id][model]:
eval_status = self.data[data_idx]["evaluation"]
if self._matches_filter(eval_status, filter_value):
has_matching = True
break
if has_matching:
break
if not has_matching:
continue
# Truncate long problem_id for display
display_id = problem_id if len(problem_id) <= 40 else problem_id[:37] + "..."
# Count evaluated patches for this problem
total = 0
evaluated = 0
for model in self.hierarchy[problem_id]:
for data_idx, _ in self.hierarchy[problem_id][model]:
total += 1
if self.data[data_idx]["evaluation"]:
evaluated += 1
problem_node = self.tree.insert("", "end", text=f"{display_id} [{evaluated}/{total}]",
open=False, tags=("problem",))
# Sort models
models = sorted(self.hierarchy[problem_id].keys())
for model in models:
patches = self.hierarchy[problem_id][model]
# Check if this model has any matching patches
model_patches = []
for data_idx, patch_idx in patches:
eval_status = self.data[data_idx]["evaluation"]
if self._matches_filter(eval_status, filter_value):
model_patches.append((data_idx, patch_idx, eval_status))
if not model_patches:
continue
# Count evaluated for this model
model_total = len(patches)
model_evaluated = sum(1 for d, _, _ in model_patches if self.data[d]["evaluation"])
model_node = self.tree.insert(problem_node, "end",
text=f"{model} [{model_evaluated}/{model_total}]",
open=False, tags=("model",))
for data_idx, patch_idx, eval_status in model_patches:
icon = self.get_status_icon(eval_status)
patch_node = self.tree.insert(model_node, "end",
text=f"{icon} patch {patch_idx}",
tags=("patch",))
self.tree_items[patch_node] = data_idx
self.update_progress()
def _matches_filter(self, evaluation: str, filter_value: str) -> bool:
"""Check if evaluation matches the current filter"""
if filter_value == "all":
return True
elif filter_value == "unevaluated":
return evaluation == ""
else:
return evaluation == filter_value
def on_filter_change(self, event=None):
"""Handle filter change"""
self.populate_tree()
def on_tree_select(self, event=None):
"""Handle tree selection"""
selection = self.tree.selection()
if not selection:
return
item_id = selection[0]
if item_id in self.tree_items:
self.current_index = self.tree_items[item_id]
self.display_current()
def select_first_unevaluated(self):
"""Select the first unevaluated item"""
for idx, row in enumerate(self.data):
if row["evaluation"] == "":
self.current_index = idx
self.display_current()
self.select_tree_item(idx)
return
# If all evaluated, select first item
if self.data:
self.current_index = 0
self.display_current()
self.select_tree_item(0)
def select_tree_item(self, data_idx: int):
"""Select the tree item corresponding to data index"""
for item_id, idx in self.tree_items.items():
if idx == data_idx:
# Open parent nodes
parent = self.tree.parent(item_id)
while parent:
self.tree.item(parent, open=True)
parent = self.tree.parent(parent)
self.tree.selection_set(item_id)
self.tree.see(item_id)
return
def display_current(self):
"""Display the current patch"""
if self.current_index is None:
return
row = self.data[self.current_index]
# Update info labels
self.info_label.config(text=f"Problem: {row['problem_id']}")
self.benchmark_label.config(text=f"Benchmark: {row['benchmark']} | Model: {row['model']} | Patch: {row['patch_index']}")
# Update status
eval_status = row["evaluation"]
if eval_status:
self.status_label.config(text=f"Current: {eval_status.upper()}")
else:
self.status_label.config(text="Current: Not evaluated")
# Display code
self.display_code(row)
def display_code(self, row: dict):
"""Display the code with highlighting"""
self.code_text.config(state=tk.NORMAL)
self.code_text.delete("1.0", tk.END)
input_text = row["input"]
fixed_line = row["fixed_line"]
patch = row["patch"]
# Parse input to extract buggy lines and surrounding code
lines = input_text.split("\n")
buggy_start = -1
buggy_end = -1
fixed_start = -1
buggy_lines = []
for i, line in enumerate(lines):
if "// buggy lines start:" in line:
buggy_start = i
elif "// buggy lines end:" in line:
buggy_end = i
elif "// fixed lines:" in line:
fixed_start = i
# Display code before buggy section
for i in range(buggy_start):
self.code_text.insert(tk.END, lines[i] + "\n", "normal")
# Display buggy lines with highlighting
if buggy_start >= 0 and buggy_end >= 0:
self.code_text.insert(tk.END, " // === BUGGY LINE (Original) ===\n", "label")
for i in range(buggy_start + 1, buggy_end):
self.code_text.insert(tk.END, lines[i] + "\n", "buggy")
self.code_text.insert(tk.END, " // === GOLD ANSWER (Expected) ===\n", "label")
if fixed_line:
# Remove trailing newlines for cleaner display
fixed_line = fixed_line.rstrip("\n")
for fl in fixed_line.split("\n"):
self.code_text.insert(tk.END, fl + "\n", "gold")
else:
self.code_text.insert(tk.END, "(No fixed_line provided)\n", "label")
self.code_text.insert(tk.END, " // === MODEL PATCH (To Evaluate) ===\n", "label")
if patch:
# Remove trailing newlines for cleaner display
patch = patch.rstrip("\n")
for pl in patch.split("\n"):
self.code_text.insert(tk.END, pl + "\n", "patch")
else:
self.code_text.insert(tk.END, "(Empty patch)\n", "label")
self.code_text.insert(tk.END, " // === END OF COMPARISON ===\n", "label")
# Display code after buggy section (excluding fixed lines section)
if buggy_end >= 0:
end_idx = fixed_start if fixed_start >= 0 else len(lines)
for i in range(buggy_end + 1, end_idx):
self.code_text.insert(tk.END, lines[i] + "\n", "normal")
self.code_text.config(state=tk.DISABLED)
def evaluate(self, evaluation: str):
"""Evaluate the current patch"""
if self.current_index is None:
return
# Update data
self.data[self.current_index]["evaluation"] = evaluation
# If correct, skip remaining patches in same problem/model
if evaluation == "correct":
self._skip_remaining_in_same_model()
# Save to CSV
self.save_csv()
# Update tree display
self.populate_tree()
# Move to next unevaluated item
self.navigate_next_unevaluated()
def _skip_remaining_in_same_model(self):
"""Mark remaining unevaluated patches in same problem/model as skip"""
if self.current_index is None:
return
current_row = self.data[self.current_index]
current_problem = current_row["problem_id"]
current_model = current_row["model"]
# Find all patches in same problem/model and mark unevaluated ones as skip
for data_idx, patch_idx in self.hierarchy[current_problem][current_model]:
if data_idx != self.current_index and self.data[data_idx]["evaluation"] == "":
self.data[data_idx]["evaluation"] = "skip"
def save_csv(self):
"""Save data back to CSV"""
fieldnames = ["problem_id", "benchmark", "model", "patch_index", "patch", "input", "fixed_line", "evaluation"]
with open(CSV_PATH, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(self.data)
def navigate_next_unevaluated(self):
"""Navigate to the next unevaluated item following the hierarchy"""
if self.current_index is None:
return
current_row = self.data[self.current_index]
current_problem = current_row["problem_id"]
current_model = current_row["model"]
# Try to find next in same problem, same model
found_current = False
for data_idx, patch_idx in self.hierarchy[current_problem][current_model]:
if data_idx == self.current_index:
found_current = True
continue
if found_current and self.data[data_idx]["evaluation"] == "":
self.current_index = data_idx
self.display_current()
self.select_tree_item(data_idx)
return
# Try next model in same problem
models = sorted(self.hierarchy[current_problem].keys())
found_model = False
for model in models:
if model == current_model:
found_model = True
continue
if found_model:
for data_idx, patch_idx in self.hierarchy[current_problem][model]:
if self.data[data_idx]["evaluation"] == "":
self.current_index = data_idx
self.display_current()
self.select_tree_item(data_idx)
return
# Try next problem
problem_ids = sorted(self.hierarchy.keys())
found_problem = False
for problem_id in problem_ids:
if problem_id == current_problem:
found_problem = True
continue
if found_problem:
for model in sorted(self.hierarchy[problem_id].keys()):
for data_idx, patch_idx in self.hierarchy[problem_id][model]:
if self.data[data_idx]["evaluation"] == "":
self.current_index = data_idx
self.display_current()
self.select_tree_item(data_idx)
return
# Wrap around to beginning
for problem_id in problem_ids:
for model in sorted(self.hierarchy[problem_id].keys()):
for data_idx, patch_idx in self.hierarchy[problem_id][model]:
if self.data[data_idx]["evaluation"] == "":
self.current_index = data_idx
self.display_current()
self.select_tree_item(data_idx)
return
# All evaluated!
messagebox.showinfo("Complete", "All patches have been evaluated!")
def navigate_prev(self):
"""Navigate to previous item"""
if self.current_index is None or self.current_index == 0:
return
# Find previous item in sorted order
all_indices = []
for problem_id in sorted(self.hierarchy.keys()):
for model in sorted(self.hierarchy[problem_id].keys()):
for data_idx, _ in self.hierarchy[problem_id][model]:
all_indices.append(data_idx)
try:
current_pos = all_indices.index(self.current_index)
if current_pos > 0:
self.current_index = all_indices[current_pos - 1]
self.display_current()
self.select_tree_item(self.current_index)
except ValueError:
pass
def navigate_next(self):
"""Navigate to next item"""
if self.current_index is None:
return
# Find next item in sorted order
all_indices = []
for problem_id in sorted(self.hierarchy.keys()):
for model in sorted(self.hierarchy[problem_id].keys()):
for data_idx, _ in self.hierarchy[problem_id][model]:
all_indices.append(data_idx)
try:
current_pos = all_indices.index(self.current_index)
if current_pos < len(all_indices) - 1:
self.current_index = all_indices[current_pos + 1]
self.display_current()
self.select_tree_item(self.current_index)
except ValueError:
pass
def update_progress(self):
"""Update progress label"""
total = len(self.data)
evaluated = sum(1 for row in self.data if row["evaluation"])
correct = sum(1 for row in self.data if row["evaluation"] == "correct")
incorrect = sum(1 for row in self.data if row["evaluation"] == "incorrect")
pending = sum(1 for row in self.data if row["evaluation"] == "pending")
skip = sum(1 for row in self.data if row["evaluation"] == "skip")
self.progress_label.config(
text=f"Progress: {evaluated}/{total} | Correct: {correct} | Incorrect: {incorrect} | Pending: {pending} | Skip: {skip}"
)
def main():
root = tk.Tk()
app = PatchValidatorGUI(root)
root.mainloop()
if __name__ == "__main__":
main()