Skip to content

describeEval

describeEval<THarness>(name, options, define): SuiteCollector<object>

Creates a harness-backed eval suite on top of a fixture-backed Vitest test API.

THarness extends Harness<any, any, any>

string

Suite name shown by Vitest and reporters.

DescribeEvalOptions<HarnessInput<THarness>, HarnessOutput<THarness>, HarnessMetadataFor<THarness>, THarness>

Harness, automatic judges, threshold, and suite skip settings.

(it) => void

Callback that receives the eval-aware it API.

SuiteCollector<object>

import { piAiHarness } from "@vitest-evals/harness-pi-ai";
import { getModel } from "@mariozechner/pi-ai";
import { piAiJudgeHarness } from "@vitest-evals/harness-pi-ai";
import { expect } from "vitest";
import {
describeEval,
FactualityJudge,
ToolCallJudge,
toolCalls,
} from "vitest-evals";
import { createRefundAgent } from "../src/refundAgent";
const judgeHarness = piAiJudgeHarness({
model: getModel("anthropic", "claude-sonnet-4-5"),
temperature: 0,
});
describeEval("refund agent", {
harness: piAiHarness({
agent: () => createRefundAgent(),
}),
judgeHarness,
judges: [ToolCallJudge()],
}, (it) => {
it("approves a refundable invoice", async ({ run }) => {
const result = await run("Refund invoice inv_123", {
metadata: {
expected: "Invoice inv_123 should be refunded.",
},
});
expect(result.output).toMatchObject({ status: "approved" });
expect(toolCalls(result.session)).toHaveLength(2);
await expect(result).toSatisfyJudge(FactualityJudge(), {
threshold: 0.6,
});
});
});