describeEval

describeEval<THarness>(name, options, define): SuiteCollector<object>

Creates a harness-backed eval suite on top of a fixture-backed Vitest test API.

Type Parameters

THarness

THarness extends Harness<any, any, any>

Parameters

name

string

Suite name shown by Vitest and reporters.

options

DescribeEvalOptions<HarnessInput<THarness>, HarnessOutput<THarness>, HarnessMetadataFor<THarness>, THarness>

Harness, automatic judges, threshold, and suite skip settings.

define

(it) => void

Callback that receives the eval-aware it API.

Returns

SuiteCollector<object>

Example

import { piAiHarness } from "@vitest-evals/harness-pi-ai";
import { getModel } from "@mariozechner/pi-ai";
import { piAiJudgeHarness } from "@vitest-evals/harness-pi-ai";
import { expect } from "vitest";
import {
  describeEval,
  FactualityJudge,
  ToolCallJudge,
  toolCalls,
} from "vitest-evals";
import { createRefundAgent } from "../src/refundAgent";

const judgeHarness = piAiJudgeHarness({
  model: getModel("anthropic", "claude-sonnet-4-5"),
  temperature: 0,
});

describeEval("refund agent", {
  harness: piAiHarness({
    agent: () => createRefundAgent(),
  }),
  judgeHarness,
  judges: [ToolCallJudge()],
}, (it) => {
  it("approves a refundable invoice", async ({ run }) => {
    const result = await run("Refund invoice inv_123", {
      metadata: {
        expected: "Invoice inv_123 should be refunded.",
      },
    });

    expect(result.output).toMatchObject({ status: "approved" });
    expect(toolCalls(result.session)).toHaveLength(2);
    await expect(result).toSatisfyJudge(FactualityJudge(), {
      threshold: 0.6,
    });
  });
});