describeEval
describeEval<
THarness>(name,options,define):SuiteCollector<object>
Creates a harness-backed eval suite on top of a fixture-backed Vitest test API.
Type Parameters
Section titled “Type Parameters”THarness
Section titled “THarness”THarness extends Harness<any, any, any>
Parameters
Section titled “Parameters”string
Suite name shown by Vitest and reporters.
options
Section titled “options”DescribeEvalOptions<HarnessInput<THarness>, HarnessOutput<THarness>, HarnessMetadataFor<THarness>, THarness>
Harness, automatic judges, threshold, and suite skip settings.
define
Section titled “define”(it) => void
Callback that receives the eval-aware it API.
Returns
Section titled “Returns”SuiteCollector<object>
Example
Section titled “Example”import { piAiHarness } from "@vitest-evals/harness-pi-ai";import { getModel } from "@mariozechner/pi-ai";import { piAiJudgeHarness } from "@vitest-evals/harness-pi-ai";import { expect } from "vitest";import { describeEval, FactualityJudge, ToolCallJudge, toolCalls,} from "vitest-evals";import { createRefundAgent } from "../src/refundAgent";
const judgeHarness = piAiJudgeHarness({ model: getModel("anthropic", "claude-sonnet-4-5"), temperature: 0,});
describeEval("refund agent", { harness: piAiHarness({ agent: () => createRefundAgent(), }), judgeHarness, judges: [ToolCallJudge()],}, (it) => { it("approves a refundable invoice", async ({ run }) => { const result = await run("Refund invoice inv_123", { metadata: { expected: "Invoice inv_123 should be refunded.", }, });
expect(result.output).toMatchObject({ status: "approved" }); expect(toolCalls(result.session)).toHaveLength(2); await expect(result).toSatisfyJudge(FactualityJudge(), { threshold: 0.6, }); });});