Running a suite of evals

Here is a sample of all the code you need to run a suite of evals.

import os
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
import pandas as pd
 
from dotenv import load_dotenv
load_dotenv()
 
# Configure an API key.
OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))
 
# Load the dataset
dataset = [
    {
    "query": "query_string",
    "context": ["chunk_1", "chunk_2"],
    "response": "llm_generated_response_string",
    "expected_response": "ground truth (optional)",
    },
    { ... },
    { ... },
    { ... },
]
 
# Evaluate a dataset across a suite of eval criteria
 
EvalRunner.run_suite(
    evals=[
        RagasAnswerCorrectness(),
        RagasContextPrecision(),
        RagasContextRelevancy(),
        RagasContextRecall(),
        RagasFaithfulness(),
        ResponseFaithfulness(),
        Groundedness(),
        ContextSufficiency(),
    ],
    data=dataset,
    max_parallel_evals=10
)

Response Format

This will return a batch result object like this:

Python: Run a single eval UI: Run evals on a dataset