diff --git a/people_data_enrichment/run_eval.py b/people_data_enrichment/run_eval.py index d29db5c..1136f84 100644 --- a/people_data_enrichment/run_eval.py +++ b/people_data_enrichment/run_eval.py @@ -62,8 +62,8 @@ EVALUATION_PROMPT = f"""You are an evaluator tasked with assessing the accuracy def evaluate_agent(outputs: dict, reference_outputs: dict): - if "info" not in outputs or not isinstance(outputs["info"], dict): - return 0.0 + if "info" not in outputs: + raise ValueError("Agent output must contain 'info' key") class Score(BaseModel): """Evaluate the agent's output against the expected output.""" diff --git a/public_company_data_enrichment/run_eval.py b/public_company_data_enrichment/run_eval.py index a6f2220..ba5765e 100644 --- a/public_company_data_enrichment/run_eval.py +++ b/public_company_data_enrichment/run_eval.py @@ -35,8 +35,8 @@ EVALUATION_PROMPT = f"""You are an evaluator tasked with assessing the accuracy def evaluate_agent(outputs: dict, reference_outputs: dict): - if "info" not in outputs or not isinstance(outputs["info"], dict): - return 0.0 + if "info" not in outputs: + raise ValueError("Agent output must contain 'info' key") class Score(BaseModel): """Evaluate the agent's output against the expected output.""" diff --git a/startup_data_enrichment/run_eval.py b/startup_data_enrichment/run_eval.py index e6e26b4..843fd0d 100644 --- a/startup_data_enrichment/run_eval.py +++ b/startup_data_enrichment/run_eval.py @@ -41,8 +41,8 @@ EVALUATION_PROMPT = f"""You are an evaluator tasked with assessing the accuracy def evaluate_agent(outputs: dict, reference_outputs: dict): - if "info" not in outputs or not isinstance(outputs["info"], dict): - return 0.0 + if "info" not in outputs: + raise ValueError("Agent output must contain 'info' key") class Score(BaseModel): """Evaluate the agent's output against the expected output."""