rewards/reward_function.py · WalkXR-AI

from dataclasses import dataclass

@dataclass
class RewardResult:
    value: float
    explanation: str

def compute_reward(qa_score: float, agent_emotion: str, mood_arc_target: str, latency: float) -> RewardResult:
    score = qa_score

    if agent_emotion == mood_arc_target:
        score += 1.0

    if latency < 2.0:
        score += 0.5
    else:
        score -= 0.5

    explanation = (
        f"QA: {qa_score}, Match: {agent_emotion == mood_arc_target}, "
        f"Latency: {latency:.2f}s"
    )

    return RewardResult(value=score, explanation=explanation)