Running a suite of evals
Here is a sample of all the code you need to run a suite of evals.
import os
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
# Configure an API key.
OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
# Load the dataset
dataset = [
{
"query": "query_string",
"context": ["chunk_1", "chunk_2"],
"response": "llm_generated_response_string",
"expected_response": "ground truth (optional)",
},
{ ... },
{ ... },
{ ... },
]
# Evaluate a dataset across a suite of eval criteria
EvalRunner.run_suite(
evals=[
RagasAnswerCorrectness(),
RagasContextPrecision(),
RagasContextRelevancy(),
RagasContextRecall(),
RagasFaithfulness(),
ResponseFaithfulness(),
Groundedness(),
ContextSufficiency(),
],
data=dataset,
max_parallel_evals=10
)
Response Format
This will return a batch result object like this: