[
 {
  "cohort": "fintech",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 43,
  "receipts": 38,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -13.0,
  "gold": 32,
  "cost_usd": 0.412818,
  "exact_cost": false,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "construction",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 47,
  "receipts": 19,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 5,
  "gate_blocked": false,
  "score_delta": -9.75,
  "gold": 21,
  "cost_usd": 0.385405,
  "exact_cost": false,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 47,
  "receipts": 43,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.0,
  "gold": 9,
  "cost_usd": 0.425471,
  "exact_cost": false,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 52,
  "receipts": 40,
  "hallucinations": 0,
  "mistags": 13,
  "raw_escapes": 13,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 51,
  "cost_usd": 0.458105,
  "exact_cost": false,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "bd",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 49,
  "receipts": 44,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 31,
  "cost_usd": 0.434461,
  "exact_cost": false,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 49,
  "receipts": 24,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.25,
  "gold": 41,
  "cost_usd": 0.368823,
  "exact_cost": false,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 49,
  "receipts": 45,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.75,
  "gold": 51,
  "cost_usd": 0.40026,
  "exact_cost": false,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 79,
  "receipts": 62,
  "hallucinations": 0,
  "mistags": 14,
  "raw_escapes": 14,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 33,
  "cost_usd": 0.523836,
  "exact_cost": false,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 39,
  "receipts": 34,
  "hallucinations": 1,
  "mistags": 5,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 40,
  "cost_usd": 0.391099,
  "exact_cost": false,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 39,
  "receipts": 34,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 3,
  "gate_blocked": false,
  "score_delta": -4.0,
  "gold": 33,
  "cost_usd": 0.37159,
  "exact_cost": false,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 41,
  "receipts": 41,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 27,
  "cost_usd": 0.411746,
  "exact_cost": false,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 49,
  "receipts": 40,
  "hallucinations": 1,
  "mistags": 5,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 31,
  "cost_usd": 0.471528,
  "exact_cost": false,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 46,
  "receipts": 40,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 56,
  "cost_usd": 0.473631,
  "exact_cost": false,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 43,
  "receipts": 40,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 34,
  "cost_usd": 0.380328,
  "exact_cost": false,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 45,
  "receipts": 33,
  "hallucinations": 2,
  "mistags": 15,
  "raw_escapes": 17,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 38,
  "cost_usd": 0.446923,
  "exact_cost": false,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 47,
  "receipts": 37,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.25,
  "gold": 27,
  "cost_usd": 0.403545,
  "exact_cost": false,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "field-service",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 45,
  "receipts": 28,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 1,
  "cost_usd": 0.361526,
  "exact_cost": false,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "bd",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 43,
  "receipts": 38,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.5,
  "gold": 54,
  "cost_usd": 0.467538,
  "exact_cost": false,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 55,
  "receipts": 21,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.25,
  "gold": 46,
  "cost_usd": 0.468876,
  "exact_cost": false,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 42,
  "receipts": 24,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.5,
  "gold": 44,
  "cost_usd": 0.427998,
  "exact_cost": false,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "fintech",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 45,
  "receipts": 39,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -13.0,
  "gold": 40,
  "cost_usd": 0.465554,
  "exact_cost": false,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "real-estate",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 37,
  "receipts": 33,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 7,
  "gate_blocked": false,
  "score_delta": -12.5,
  "gold": 65,
  "cost_usd": 0.383812,
  "exact_cost": false,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 52,
  "receipts": 40,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 38,
  "cost_usd": 0.427887,
  "exact_cost": false,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "construction",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 42,
  "receipts": 44,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": false,
  "score_delta": 9.5,
  "gold": 13,
  "cost_usd": 0.442438,
  "exact_cost": false,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 51,
  "receipts": 45,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 2,
  "gate_blocked": false,
  "score_delta": -14.25,
  "gold": 43,
  "cost_usd": 0.468667,
  "exact_cost": false,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 51,
  "receipts": 37,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.25,
  "gold": 48,
  "cost_usd": 0.449793,
  "exact_cost": false,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 39,
  "receipts": 34,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 2,
  "gate_blocked": false,
  "score_delta": -0.5,
  "gold": 23,
  "cost_usd": 0.416,
  "exact_cost": false,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 30,
  "receipts": 21,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -28.5,
  "gold": 53,
  "cost_usd": 0.333428,
  "exact_cost": false,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "tax",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 43,
  "receipts": 41,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 3,
  "gate_blocked": false,
  "score_delta": -0.25,
  "gold": 59,
  "cost_usd": 0.46352,
  "exact_cost": false,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 27,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.06,
  "gold": 21,
  "cost_usd": 0.044922,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.5,
  "gold": 9,
  "cost_usd": 0.042721,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 32,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -19.75,
  "gold": 51,
  "cost_usd": 0.041706,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 34,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 11.5,
  "gold": 41,
  "cost_usd": 0.042801,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.5,
  "gold": 51,
  "cost_usd": 0.035844,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 49,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.050754,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 42,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.75,
  "gold": 56,
  "cost_usd": 0.050137,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.5,
  "gold": 34,
  "cost_usd": 0.036644,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -25.5,
  "gold": 40,
  "cost_usd": 0.041072,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 20,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 65,
  "cost_usd": 0.042414,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 30,
  "receipts": 25,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.0,
  "gold": 13,
  "cost_usd": 0.056919,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.5,
  "gold": 43,
  "cost_usd": 0.043817,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -20.5,
  "gold": 48,
  "cost_usd": 0.052137,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "openai/o4-mini",
  "claims": 31,
  "receipts": 4,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.75,
  "gold": 59,
  "cost_usd": 0.048532,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.0,
  "gold": 9,
  "cost_usd": 0.048664,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.25,
  "gold": 51,
  "cost_usd": 0.046398,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 26.75,
  "gold": 27,
  "cost_usd": 0.040035,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.75,
  "gold": 31,
  "cost_usd": 0.04654,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 18,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.0,
  "gold": 56,
  "cost_usd": 0.052516,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.0,
  "gold": 27,
  "cost_usd": 0.036021,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 16,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.5,
  "gold": 46,
  "cost_usd": 0.044814,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gemma-3-27b-it / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemma-3-27b-it",
  "model_03": "openai/o4-mini",
  "claims": 11,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 13,
  "cost_usd": 0.043606,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / llama-3.3-70b-instruct / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-3.3-70b-instruct",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 21,
  "cost_usd": 0.038279,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / llama-3.3-70b-instruct / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-3.3-70b-instruct",
  "model_03": "openai/o4-mini",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 41,
  "cost_usd": 0.036046,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / llama-3.3-70b-instruct / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-3.3-70b-instruct",
  "model_03": "openai/o4-mini",
  "claims": 24,
  "receipts": 7,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 51,
  "cost_usd": 0.045459,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-maverick / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-maverick",
  "model_03": "openai/o4-mini",
  "claims": 21,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.25,
  "gold": 21,
  "cost_usd": 0.052014,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-maverick / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-maverick",
  "model_03": "openai/o4-mini",
  "claims": 21,
  "receipts": 4,
  "hallucinations": 2,
  "mistags": 2,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -13.5,
  "gold": 51,
  "cost_usd": 0.04758,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 51,
  "cost_usd": 0.045094,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -37.75,
  "gold": 41,
  "cost_usd": 0.043979,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 33,
  "cost_usd": 0.050902,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 18.5,
  "gold": 31,
  "cost_usd": 0.046547,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -13.5,
  "gold": 38,
  "cost_usd": 0.051809,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 54,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.75,
  "gold": 46,
  "cost_usd": 0.046048,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 20,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.62,
  "gold": 44,
  "cost_usd": 0.04877,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 34,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -19.75,
  "gold": 40,
  "cost_usd": 0.043386,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 29,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -35.5,
  "gold": 65,
  "cost_usd": 0.049425,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 43,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 9,
  "gate_blocked": false,
  "score_delta": -9.25,
  "gold": 38,
  "cost_usd": 0.044794,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 24,
  "receipts": 18,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.25,
  "gold": 13,
  "cost_usd": 0.052031,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 43,
  "cost_usd": 0.047596,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 40,
  "receipts": 26,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.0,
  "gold": 48,
  "cost_usd": 0.050138,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 29,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 23,
  "cost_usd": 0.04902,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 35,
  "receipts": 4,
  "hallucinations": 1,
  "mistags": 4,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 59,
  "cost_usd": 0.053113,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-mini / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-mini",
  "model_03": "openai/o4-mini",
  "claims": 80,
  "receipts": 15,
  "hallucinations": 0,
  "mistags": 13,
  "raw_escapes": 13,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.06,
  "gold": 21,
  "cost_usd": 0.073229,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-mini / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-mini",
  "model_03": "openai/o4-mini",
  "claims": 75,
  "receipts": 25,
  "hallucinations": 2,
  "mistags": 4,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.5,
  "gold": 9,
  "cost_usd": 0.072666,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / mistral-large-2512 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-large-2512",
  "model_03": "openai/o4-mini",
  "claims": 37,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 10.0,
  "gold": 21,
  "cost_usd": 0.059288,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / mistral-large-2512 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-large-2512",
  "model_03": "openai/o4-mini",
  "claims": 32,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 9,
  "cost_usd": 0.061857,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-large-2512 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-large-2512",
  "model_03": "openai/o4-mini",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.5,
  "gold": 51,
  "cost_usd": 0.052386,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / mistral-large-2512 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-large-2512",
  "model_03": "openai/o4-mini",
  "claims": 38,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 19.0,
  "gold": 33,
  "cost_usd": 0.059978,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-large-2512 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-large-2512",
  "model_03": "openai/o4-mini",
  "claims": 31,
  "receipts": 13,
  "hallucinations": 4,
  "mistags": 4,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 40,
  "cost_usd": 0.052953,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash",
  "model_03": "openai/o4-mini",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 11,
  "mistags": 4,
  "raw_escapes": 15,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.051559,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.5,
  "gold": 9,
  "cost_usd": 0.06894,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 19,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.5,
  "gold": 31,
  "cost_usd": 0.067586,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 41,
  "cost_usd": 0.056978,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.0,
  "gold": 51,
  "cost_usd": 0.064501,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 18.0,
  "gold": 33,
  "cost_usd": 0.064777,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-r1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-r1",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 31,
  "cost_usd": 0.058482,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 21,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 9,
  "cost_usd": 0.081568,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 20,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.5,
  "gold": 40,
  "cost_usd": 0.066068,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 22,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.0,
  "gold": 56,
  "cost_usd": 0.081177,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.0,
  "gold": 38,
  "cost_usd": 0.077411,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 27,
  "cost_usd": 0.068242,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 19,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.0,
  "gold": 1,
  "cost_usd": 0.063937,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 54,
  "cost_usd": 0.08142,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 19,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 44,
  "cost_usd": 0.071644,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.5,
  "gold": 40,
  "cost_usd": 0.067763,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 14,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.75,
  "gold": 13,
  "cost_usd": 0.082533,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.75,
  "gold": 43,
  "cost_usd": 0.076823,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 48,
  "cost_usd": 0.07434,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 23,
  "cost_usd": 0.078303,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -53.5,
  "gold": 53,
  "cost_usd": 0.061296,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.080754,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.03575,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 9,
  "cost_usd": 0.035525,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.0,
  "gold": 51,
  "cost_usd": 0.037452,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.25,
  "gold": 41,
  "cost_usd": 0.031544,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 51,
  "cost_usd": 0.033898,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.04421,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 56,
  "cost_usd": 0.042016,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -25.0,
  "gold": 34,
  "cost_usd": 0.032951,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -13.0,
  "gold": 40,
  "cost_usd": 0.037437,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 11,
  "raw_escapes": 11,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.75,
  "gold": 65,
  "cost_usd": 0.033669,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.0,
  "gold": 13,
  "cost_usd": 0.039682,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.5,
  "gold": 43,
  "cost_usd": 0.037328,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 48,
  "cost_usd": 0.039572,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.5,
  "gold": 59,
  "cost_usd": 0.042772,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 21,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.035663,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 21,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.75,
  "gold": 9,
  "cost_usd": 0.036858,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 21,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -21.0,
  "gold": 51,
  "cost_usd": 0.035933,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 24,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.25,
  "gold": 41,
  "cost_usd": 0.030006,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 23,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.0,
  "gold": 27,
  "cost_usd": 0.036524,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / qwen3-235b-a22b-2507",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "qwen/qwen3-235b-a22b-2507",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.5,
  "gold": 31,
  "cost_usd": 0.041689,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.035526,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 25,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.75,
  "gold": 41,
  "cost_usd": 0.030315,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.25,
  "gold": 51,
  "cost_usd": 0.034687,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.044867,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.0,
  "gold": 27,
  "cost_usd": 0.035038,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.0,
  "gold": 56,
  "cost_usd": 0.043832,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.0,
  "gold": 34,
  "cost_usd": 0.035608,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -22.25,
  "gold": 38,
  "cost_usd": 0.041881,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 23,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.0,
  "gold": 44,
  "cost_usd": 0.035988,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 31,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 38,
  "cost_usd": 0.038059,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 30,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.25,
  "gold": 13,
  "cost_usd": 0.038861,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 37,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.5,
  "gold": 48,
  "cost_usd": 0.041756,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 23,
  "cost_usd": 0.038855,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 20,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -30.5,
  "gold": 53,
  "cost_usd": 0.030882,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / llama-3.3-70b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "meta-llama/llama-3.3-70b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.039879,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-nano / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-nano",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.033913,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-nano / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-nano",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 19,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 9,
  "cost_usd": 0.033038,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-nano / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-nano",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 1,
  "hallucinations": 4,
  "mistags": 4,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.0,
  "gold": 51,
  "cost_usd": 0.033446,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "mistral-small-3.2-24b-instruct / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "mistralai/mistral-small-3.2-24b-instruct",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.032107,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "mistral-small-3.2-24b-instruct / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "mistralai/mistral-small-3.2-24b-instruct",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 9,
  "cost_usd": 0.03301,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "mistral-small-3.2-24b-instruct / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "mistralai/mistral-small-3.2-24b-instruct",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 4,
  "mistags": 1,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 16.5,
  "gold": 51,
  "cost_usd": 0.032808,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 44,
  "cost_usd": 0.034483,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.75,
  "gold": 31,
  "cost_usd": 0.039093,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 54,
  "cost_usd": 0.040389,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.5,
  "gold": 33,
  "cost_usd": 0.034941,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 1,
  "cost_usd": 0.033061,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.03987,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.0,
  "gold": 27,
  "cost_usd": 0.036408,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 4,
  "hallucinations": 1,
  "mistags": 7,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 38,
  "cost_usd": 0.039896,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 23,
  "cost_usd": 0.03695,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 1,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 31,
  "cost_usd": 0.039926,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 38,
  "cost_usd": 0.036859,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 9,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 27,
  "cost_usd": 0.037559,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.25,
  "gold": 46,
  "cost_usd": 0.041517,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -36.75,
  "gold": 53,
  "cost_usd": 0.029241,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.25,
  "gold": 40,
  "cost_usd": 0.036873,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 15,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.99,
  "gold": 44,
  "cost_usd": 0.007306,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 13,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -19.75,
  "gold": 51,
  "cost_usd": 0.007162,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 19,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 16.0,
  "gold": 31,
  "cost_usd": 0.007216,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 54,
  "cost_usd": 0.008022,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.5,
  "gold": 13,
  "cost_usd": 0.007921,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.38,
  "gold": 21,
  "cost_usd": 0.007286,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 1,
  "gate_blocked": false,
  "score_delta": -0.5,
  "gold": 33,
  "cost_usd": 0.006952,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 19,
  "hallucinations": 2,
  "mistags": 1,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 43,
  "cost_usd": 0.007133,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 47,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.008665,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 30,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.75,
  "gold": 56,
  "cost_usd": 0.008455,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 18,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -26.0,
  "gold": 1,
  "cost_usd": 0.006182,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.62,
  "gold": 9,
  "cost_usd": 0.007191,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.75,
  "gold": 32,
  "cost_usd": 0.00823,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -25.25,
  "gold": 40,
  "cost_usd": 0.008013,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 19,
  "receipts": 15,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.25,
  "gold": 27,
  "cost_usd": 0.006311,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -21.75,
  "gold": 38,
  "cost_usd": 0.007735,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.75,
  "gold": 23,
  "cost_usd": 0.006966,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 1,
  "hallucinations": 2,
  "mistags": 3,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.75,
  "gold": 31,
  "cost_usd": 0.007895,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.25,
  "gold": 38,
  "cost_usd": 0.006867,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 25.75,
  "gold": 27,
  "cost_usd": 0.006794,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 19,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.75,
  "gold": 46,
  "cost_usd": 0.007388,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 16.5,
  "gold": 41,
  "cost_usd": 0.005561,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.75,
  "gold": 53,
  "cost_usd": 0.006142,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 6,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 40,
  "cost_usd": 0.006583,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.25,
  "gold": 51,
  "cost_usd": 0.006339,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.5,
  "gold": 34,
  "cost_usd": 0.006614,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 24,
  "hallucinations": 2,
  "mistags": 10,
  "raw_escapes": 12,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.5,
  "gold": 65,
  "cost_usd": 0.006706,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.5,
  "gold": 48,
  "cost_usd": 0.007213,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-235b-a22b-2507 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-235b-a22b-2507",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 1,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 59,
  "cost_usd": 0.007824,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 21,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.75,
  "gold": 21,
  "cost_usd": 0.006791,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / command-r-08-2024 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "cohere/command-r-08-2024",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 9,
  "cost_usd": 0.009277,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 9,
  "cost_usd": 0.007855,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 9,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.0,
  "gold": 9,
  "cost_usd": 0.116389,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 131.0,
  "gold": 9,
  "cost_usd": 0.007533,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 67,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.8,
  "gold": 9,
  "cost_usd": 0.007415,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 47,
  "receipts": 0,
  "hallucinations": 29,
  "mistags": 1,
  "raw_escapes": 30,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.8,
  "gold": 9,
  "cost_usd": 0.018797,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 21.25,
  "gold": 9,
  "cost_usd": 0.007815,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.25,
  "gold": 9,
  "cost_usd": 0.008663,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 9,
  "cost_usd": 0.012639,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 73,
  "receipts": 65,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.0,
  "gold": 9,
  "cost_usd": 0.134834,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.25,
  "gold": 9,
  "cost_usd": 0.061751,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.75,
  "gold": 9,
  "cost_usd": 0.079329,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 9,
  "cost_usd": 0.062232,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.88,
  "gold": 9,
  "cost_usd": 0.007259,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 48,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 5,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.25,
  "gold": 51,
  "cost_usd": 0.007653,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.75,
  "gold": 31,
  "cost_usd": 0.008845,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.75,
  "gold": 31,
  "cost_usd": 0.102163,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 31,
  "cost_usd": 0.007624,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.5,
  "gold": 31,
  "cost_usd": 0.007385,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 0,
  "hallucinations": 9,
  "mistags": 0,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.75,
  "gold": 31,
  "cost_usd": 0.019046,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 0,
  "hallucinations": 10,
  "mistags": 0,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 31,
  "cost_usd": 0.008346,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 15,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -17.25,
  "gold": 31,
  "cost_usd": 0.009413,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 52,
  "receipts": 7,
  "hallucinations": 3,
  "mistags": 2,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 31,
  "cost_usd": 0.011767,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 59,
  "receipts": 50,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 31,
  "cost_usd": 0.135039,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.75,
  "gold": 31,
  "cost_usd": 0.082065,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.75,
  "gold": 31,
  "cost_usd": 0.096457,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.25,
  "gold": 31,
  "cost_usd": 0.076442,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 13,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.5,
  "gold": 31,
  "cost_usd": 0.007318,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 0,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 41,
  "cost_usd": 0.005507,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 39,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.25,
  "gold": 41,
  "cost_usd": 0.109789,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 15,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 41,
  "cost_usd": 0.006182,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 49,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.25,
  "gold": 41,
  "cost_usd": 0.006646,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 10,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 10.25,
  "gold": 41,
  "cost_usd": 0.016173,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.25,
  "gold": 41,
  "cost_usd": 0.006633,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 20.25,
  "gold": 41,
  "cost_usd": 0.007097,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 7,
  "hallucinations": 6,
  "mistags": 4,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 41,
  "cost_usd": 0.011869,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 54,
  "receipts": 49,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -22.75,
  "gold": 41,
  "cost_usd": 0.130357,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -26.5,
  "gold": 41,
  "cost_usd": 0.071055,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.5,
  "gold": 41,
  "cost_usd": 0.070629,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 20,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -30.75,
  "gold": 41,
  "cost_usd": 0.07232,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 41,
  "cost_usd": 0.006219,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g41"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 13,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.25,
  "gold": 51,
  "cost_usd": 0.006261,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 50,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.75,
  "gold": 33,
  "cost_usd": 0.0103,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 47,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 6,
  "gate_blocked": false,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.140033,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 137.0,
  "gold": 33,
  "cost_usd": 0.008324,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 18,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.5,
  "gold": 33,
  "cost_usd": 0.007849,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 52,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.75,
  "gold": 33,
  "cost_usd": 0.021764,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 17,
  "mistags": 1,
  "raw_escapes": 18,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -19.75,
  "gold": 33,
  "cost_usd": 0.008994,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 4,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 21.75,
  "gold": 33,
  "cost_usd": 0.010918,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 55,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 33,
  "cost_usd": 0.014552,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.5,
  "gold": 33,
  "cost_usd": 0.072935,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 20,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.25,
  "gold": 33,
  "cost_usd": 0.089562,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 33,
  "cost_usd": 0.064982,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 4,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.25,
  "gold": 33,
  "cost_usd": 0.00804,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 48,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 2,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 40,
  "cost_usd": 0.006912,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 1,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 21.75,
  "gold": 33,
  "cost_usd": 0.006027,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 50,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 7,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.0,
  "gold": 27,
  "cost_usd": 0.007683,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.0,
  "gold": 27,
  "cost_usd": 0.100167,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.75,
  "gold": 27,
  "cost_usd": 0.006429,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.5,
  "gold": 27,
  "cost_usd": 0.006611,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.25,
  "gold": 27,
  "cost_usd": 0.017355,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 19.75,
  "gold": 27,
  "cost_usd": 0.006974,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 34.25,
  "gold": 27,
  "cost_usd": 0.008594,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 41,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 27,
  "cost_usd": 0.012693,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 69,
  "receipts": 48,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.75,
  "gold": 27,
  "cost_usd": 0.148065,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.25,
  "gold": 27,
  "cost_usd": 0.065851,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 27,
  "cost_usd": 0.075399,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 4,
  "hallucinations": 1,
  "mistags": 4,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.5,
  "gold": 27,
  "cost_usd": 0.06651,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 10,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.5,
  "gold": 27,
  "cost_usd": 0.008011,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 21.5,
  "gold": 31,
  "cost_usd": 0.007821,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 40,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 56,
  "cost_usd": 0.008141,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 34,
  "cost_usd": 0.007181,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.88,
  "gold": 38,
  "cost_usd": 0.009553,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 4,
  "hallucinations": 1,
  "mistags": 9,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.87,
  "gold": 38,
  "cost_usd": 0.142118,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 8,
  "receipts": 0,
  "hallucinations": 3,
  "mistags": 0,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -38.0,
  "gold": 38,
  "cost_usd": 0.007997,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 18.25,
  "gold": 38,
  "cost_usd": 0.007957,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.25,
  "gold": 38,
  "cost_usd": 0.020064,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -38.0,
  "gold": 38,
  "cost_usd": 0.008472,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 14,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -48.0,
  "gold": 38,
  "cost_usd": 0.00948,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 41,
  "receipts": 0,
  "hallucinations": 20,
  "mistags": 8,
  "raw_escapes": 28,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 38,
  "cost_usd": 0.013898,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 69,
  "receipts": 30,
  "hallucinations": 0,
  "mistags": 21,
  "raw_escapes": 21,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -14.25,
  "gold": 38,
  "cost_usd": 0.154302,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -18.0,
  "gold": 38,
  "cost_usd": 0.068944,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 9,
  "hallucinations": 1,
  "mistags": 10,
  "raw_escapes": 11,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.37,
  "gold": 38,
  "cost_usd": 0.090884,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 9,
  "hallucinations": 6,
  "mistags": 9,
  "raw_escapes": 15,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -21.0,
  "gold": 38,
  "cost_usd": 0.072792,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 21.5,
  "gold": 38,
  "cost_usd": 0.008086,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g38"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 10,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 6.75,
  "gold": 27,
  "cost_usd": 0.006912,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / command-r-08-2024 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "cohere/command-r-08-2024",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 1,
  "cost_usd": 0.007712,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 1,
  "cost_usd": 0.007303,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 1,
  "cost_usd": 0.096633,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 18,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.5,
  "gold": 1,
  "cost_usd": 0.006252,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 1,
  "cost_usd": 0.005973,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 7,
  "mistags": 1,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 1,
  "cost_usd": 0.015375,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.75,
  "gold": 1,
  "cost_usd": 0.007374,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 11,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 3,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -41.0,
  "gold": 1,
  "cost_usd": 0.009235,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 48,
  "receipts": 39,
  "hallucinations": 0,
  "mistags": 10,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 1,
  "cost_usd": 0.139045,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.0,
  "gold": 1,
  "cost_usd": 0.073891,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 18,
  "receipts": 3,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 1,
  "cost_usd": 0.068253,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.0,
  "gold": 1,
  "cost_usd": 0.066313,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 1,
  "cost_usd": 0.006235,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 41,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 54,
  "cost_usd": 0.009295,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 54,
  "cost_usd": 0.120133,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 16,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 54,
  "cost_usd": 0.007756,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 18,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 54,
  "cost_usd": 0.007226,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 7,
  "mistags": 0,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.25,
  "gold": 54,
  "cost_usd": 0.020584,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 26,
  "mistags": 0,
  "raw_escapes": 26,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.5,
  "gold": 54,
  "cost_usd": 0.008427,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 9,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -18.75,
  "gold": 54,
  "cost_usd": 0.009773,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 53,
  "receipts": 0,
  "hallucinations": 3,
  "mistags": 0,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 54,
  "cost_usd": 0.012429,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 59,
  "receipts": 54,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 54,
  "cost_usd": 0.161415,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.0,
  "gold": 54,
  "cost_usd": 0.069658,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 6,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 54,
  "cost_usd": 0.086378,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 44,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 54,
  "cost_usd": 0.075574,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.0,
  "gold": 54,
  "cost_usd": 0.00822,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 22,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.25,
  "gold": 46,
  "cost_usd": 0.007086,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 44,
  "cost_usd": 0.008156,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 44,
  "cost_usd": 0.121251,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 14,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 176.0,
  "gold": 44,
  "cost_usd": 0.007195,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 53,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 44,
  "cost_usd": 0.007535,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 42,
  "receipts": 0,
  "hallucinations": 34,
  "mistags": 0,
  "raw_escapes": 34,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -18.25,
  "gold": 44,
  "cost_usd": 0.016876,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -24.75,
  "gold": 44,
  "cost_usd": 0.007616,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -14.62,
  "gold": 44,
  "cost_usd": 0.008908,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 56,
  "receipts": 0,
  "hallucinations": 17,
  "mistags": 0,
  "raw_escapes": 17,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 44,
  "cost_usd": 0.012255,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 89,
  "receipts": 51,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.25,
  "gold": 44,
  "cost_usd": 0.138818,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 10.38,
  "gold": 44,
  "cost_usd": 0.074103,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.25,
  "gold": 44,
  "cost_usd": 0.083258,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.5,
  "gold": 44,
  "cost_usd": 0.056531,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 16,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.62,
  "gold": 44,
  "cost_usd": 0.007339,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g44"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -11.75,
  "gold": 40,
  "cost_usd": 0.008924,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -11.75,
  "gold": 40,
  "cost_usd": 0.10937,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 40,
  "cost_usd": 0.007794,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.0,
  "gold": 40,
  "cost_usd": 0.008123,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 3,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.0,
  "gold": 40,
  "cost_usd": 0.019455,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 18,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 4,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 40,
  "cost_usd": 0.008366,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -40.0,
  "gold": 40,
  "cost_usd": 0.009367,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 44,
  "receipts": 1,
  "hallucinations": 15,
  "mistags": 3,
  "raw_escapes": 18,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 40,
  "cost_usd": 0.013112,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 62,
  "receipts": 10,
  "hallucinations": 0,
  "mistags": 12,
  "raw_escapes": 12,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -18.0,
  "gold": 40,
  "cost_usd": 0.129235,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 11,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 1,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -26.75,
  "gold": 40,
  "cost_usd": 0.086387,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 10,
  "hallucinations": 1,
  "mistags": 6,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -18.0,
  "gold": 40,
  "cost_usd": 0.080011,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.25,
  "gold": 40,
  "cost_usd": 0.066118,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.0,
  "gold": 40,
  "cost_usd": 0.008026,
  "exact_cost": true,
  "vendor_anon": "fintech-g40"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 39,
  "receipts": 20,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.25,
  "gold": 65,
  "cost_usd": 0.006735,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 14,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.25,
  "gold": 38,
  "cost_usd": 0.006886,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / command-r-08-2024 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "cohere/command-r-08-2024",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 13,
  "cost_usd": 0.010157,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 22.63,
  "gold": 13,
  "cost_usd": 0.00911,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 39,
  "receipts": 14,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 13,
  "cost_usd": 0.12719,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 6,
  "mistags": 0,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 13,
  "cost_usd": 0.008087,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 19.5,
  "gold": 13,
  "cost_usd": 0.007815,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.25,
  "gold": 13,
  "cost_usd": 0.018668,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 5,
  "mistags": 0,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.75,
  "gold": 13,
  "cost_usd": 0.008582,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.25,
  "gold": 13,
  "cost_usd": 0.009329,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 24,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 13,
  "cost_usd": 0.013646,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 75,
  "receipts": 52,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.25,
  "gold": 13,
  "cost_usd": 0.153994,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 29.88,
  "gold": 13,
  "cost_usd": 0.064574,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 10,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.63,
  "gold": 13,
  "cost_usd": 0.09891,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 40,
  "receipts": 0,
  "hallucinations": 6,
  "mistags": 0,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.5,
  "gold": 13,
  "cost_usd": 0.086225,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 16,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 16.5,
  "gold": 13,
  "cost_usd": 0.008501,
  "exact_cost": true,
  "vendor_anon": "construction-g13"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 44,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 17.5,
  "gold": 43,
  "cost_usd": 0.00729,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 39,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.5,
  "gold": 48,
  "cost_usd": 0.008252,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 7,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -16.5,
  "gold": 48,
  "cost_usd": 0.11215,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 14,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 48,
  "cost_usd": 0.007182,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.0,
  "gold": 48,
  "cost_usd": 0.007166,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.25,
  "gold": 48,
  "cost_usd": 0.019467,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 40,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.75,
  "gold": 48,
  "cost_usd": 0.007849,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 11,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -21.75,
  "gold": 48,
  "cost_usd": 0.00877,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 49,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 48,
  "cost_usd": 0.013886,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 54,
  "receipts": 36,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -9.75,
  "gold": 48,
  "cost_usd": 0.147339,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 48,
  "cost_usd": 0.069264,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.5,
  "gold": 48,
  "cost_usd": 0.084745,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 47,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.5,
  "gold": 48,
  "cost_usd": 0.071969,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 12,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -23.25,
  "gold": 48,
  "cost_usd": 0.007557,
  "exact_cost": true,
  "vendor_anon": "tax-g48"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 4,
  "mistags": 2,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.5,
  "gold": 23,
  "cost_usd": 0.008106,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 41,
  "receipts": 5,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 23,
  "cost_usd": 0.134384,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 11,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 23,
  "cost_usd": 0.006769,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 16,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.25,
  "gold": 23,
  "cost_usd": 0.007456,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 20,
  "mistags": 0,
  "raw_escapes": 20,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.25,
  "gold": 23,
  "cost_usd": 0.017404,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 9,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 22.0,
  "gold": 23,
  "cost_usd": 0.007819,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -15.5,
  "gold": 23,
  "cost_usd": 0.008677,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 27,
  "hallucinations": 2,
  "mistags": 3,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 23,
  "cost_usd": 0.012081,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 47,
  "receipts": 38,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.0,
  "gold": 23,
  "cost_usd": 0.1295,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 6,
  "hallucinations": 21,
  "mistags": 0,
  "raw_escapes": 21,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.75,
  "gold": 23,
  "cost_usd": 0.075827,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.75,
  "gold": 23,
  "cost_usd": 0.078938,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 9,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 23,
  "cost_usd": 0.071765,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 29.0,
  "gold": 23,
  "cost_usd": 0.007308,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g23"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 18,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -28.0,
  "gold": 53,
  "cost_usd": 0.005861,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 41,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 59,
  "cost_usd": 0.009141,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 38,
  "receipts": 7,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 59,
  "cost_usd": 0.126566,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 59,
  "cost_usd": 0.008055,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 4,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.0,
  "gold": 59,
  "cost_usd": 0.00795,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 20,
  "mistags": 0,
  "raw_escapes": 20,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.5,
  "gold": 59,
  "cost_usd": 0.019572,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -47.75,
  "gold": 59,
  "cost_usd": 0.008508,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 17,
  "receipts": 2,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.75,
  "gold": 59,
  "cost_usd": 0.010084,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 21,
  "hallucinations": 6,
  "mistags": 0,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 59,
  "cost_usd": 0.013865,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 65,
  "receipts": 52,
  "hallucinations": 1,
  "mistags": 2,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -2.75,
  "gold": 59,
  "cost_usd": 0.158595,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 59,
  "cost_usd": 0.107816,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 11,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 1.0,
  "gold": 59,
  "cost_usd": 0.087275,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 2.25,
  "gold": 59,
  "cost_usd": 0.079685,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "tax",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 59,
  "cost_usd": 0.00801,
  "exact_cost": true,
  "vendor_anon": "tax-g59"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-flash-lite / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-flash-lite",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.0,
  "gold": 32,
  "cost_usd": 0.016212,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 11,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.113044,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / llama-4-scout / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "meta-llama/llama-4-scout",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 14,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 25.0,
  "gold": 32,
  "cost_usd": 0.007849,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / mistral-small-3.2-24b-instruct / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "mistralai/mistral-small-3.2-24b-instruct",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 57,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.5,
  "gold": 32,
  "cost_usd": 0.008249,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 1,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.5,
  "gold": 32,
  "cost_usd": 0.018928,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4.1-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4.1-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 0,
  "hallucinations": 16,
  "mistags": 0,
  "raw_escapes": 16,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 32,
  "cost_usd": 0.008597,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-4o-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-4o-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -36.25,
  "gold": 32,
  "cost_usd": 0.009147,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5-nano / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5-nano",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 40,
  "receipts": 32,
  "hallucinations": 6,
  "mistags": 0,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 32,
  "cost_usd": 0.012874,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 66,
  "receipts": 54,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.123293,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o3-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 31,
  "receipts": 6,
  "hallucinations": 6,
  "mistags": 0,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 12.25,
  "gold": 32,
  "cost_usd": 0.079161,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 11,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 32,
  "cost_usd": 0.078732,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / o4-mini / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/o4-mini",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 20,
  "hallucinations": 22,
  "mistags": 1,
  "raw_escapes": 23,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.0,
  "gold": 32,
  "cost_usd": 0.064302,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.75,
  "gold": 32,
  "cost_usd": 0.007615,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "construction",
  "arm": "current",
  "config": "claude-sonnet-4-6 / claude-opus-4-8 / claude-opus-4-8",
  "model_01": "claude-sonnet-4-6",
  "model_02": "claude-opus-4-8",
  "model_03": "claude-opus-4-8",
  "claims": 37,
  "receipts": 32,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 4,
  "gate_blocked": false,
  "score_delta": 0.0,
  "gold": 20,
  "cost_usd": 0.45256,
  "exact_cost": false,
  "vendor_anon": "construction-g20"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / grok-4.3 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "x-ai/grok-4.3",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 23,
  "receipts": 1,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.0,
  "gold": 20,
  "cost_usd": 0.039475,
  "exact_cost": true,
  "vendor_anon": "construction-g20"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 24,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.06,
  "gold": 21,
  "cost_usd": 0.051494,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 42,
  "receipts": 11,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.8,
  "gold": 9,
  "cost_usd": 0.049122,
  "exact_cost": true,
  "vendor_anon": "field-service-g9"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 34,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -12.75,
  "gold": 31,
  "cost_usd": 0.051359,
  "exact_cost": true,
  "vendor_anon": "bd-g31"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 22,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -24.75,
  "gold": 51,
  "cost_usd": 0.047795,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 30,
  "receipts": 1,
  "hallucinations": 2,
  "mistags": 0,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -21.75,
  "gold": 40,
  "cost_usd": 0.0452,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 32,
  "receipts": 9,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 11.0,
  "gold": 33,
  "cost_usd": 0.053463,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-clinical",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 34,
  "receipts": 18,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.75,
  "gold": 27,
  "cost_usd": 0.045843,
  "exact_cost": true,
  "vendor_anon": "healthcare-clinical-g27"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 33,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 56,
  "cost_usd": 0.050666,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 28,
  "receipts": 10,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -28.75,
  "gold": 34,
  "cost_usd": 0.038373,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 34,
  "receipts": 2,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.75,
  "gold": 27,
  "cost_usd": 0.048194,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "field-service",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 17,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 1,
  "cost_usd": 0.042578,
  "exact_cost": true,
  "vendor_anon": "field-service-g1"
 },
 {
  "cohort": "bd",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 54,
  "cost_usd": 0.056405,
  "exact_cost": true,
  "vendor_anon": "bd-g54"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 23,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -41.0,
  "gold": 53,
  "cost_usd": 0.032146,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "fintech",
  "arm": "new",
  "config": "gpt-5-mini / deepseek-chat-v3.1 / o4-mini",
  "model_01": "openai/gpt-5-mini",
  "model_02": "deepseek/deepseek-chat-v3.1",
  "model_03": "openai/o4-mini",
  "claims": 40,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 17.0,
  "gold": 32,
  "cost_usd": 0.05002,
  "exact_cost": true,
  "vendor_anon": "fintech-g32"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 13,
  "hallucinations": 2,
  "mistags": 3,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.06,
  "gold": 21,
  "cost_usd": 0.121758,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 38,
  "receipts": 10,
  "hallucinations": 0,
  "mistags": 8,
  "raw_escapes": 8,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 10.25,
  "gold": 51,
  "cost_usd": 0.123158,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.75,
  "gold": 51,
  "cost_usd": 0.106536,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 14,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 3.25,
  "gold": 40,
  "cost_usd": 0.110683,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 37,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 7.0,
  "gold": 33,
  "cost_usd": 0.105027,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 34,
  "receipts": 8,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.0,
  "gold": 31,
  "cost_usd": 0.123876,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 29,
  "receipts": 9,
  "hallucinations": 0,
  "mistags": 5,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 56,
  "cost_usd": 0.135321,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 26,
  "receipts": 20,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -33.75,
  "gold": 34,
  "cost_usd": 0.103773,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 32,
  "receipts": 16,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.0,
  "gold": 27,
  "cost_usd": 0.116102,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 42,
  "receipts": 4,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 15.0,
  "gold": 46,
  "cost_usd": 0.120221,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 21,
  "hallucinations": 1,
  "mistags": 9,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.5,
  "gold": 65,
  "cost_usd": 0.10924,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 36,
  "receipts": 12,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.0,
  "gold": 38,
  "cost_usd": 0.112513,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 28,
  "receipts": 6,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.75,
  "gold": 43,
  "cost_usd": 0.13566,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gemini-2.5-pro / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "google/gemini-2.5-pro",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 30,
  "receipts": 11,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -39.75,
  "gold": 53,
  "cost_usd": 0.098701,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 49,
  "receipts": 35,
  "hallucinations": 0,
  "mistags": 9,
  "raw_escapes": 9,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 21,
  "cost_usd": 0.146589,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 42,
  "receipts": 15,
  "hallucinations": 0,
  "mistags": 6,
  "raw_escapes": 6,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 4.0,
  "gold": 51,
  "cost_usd": 0.125811,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 59,
  "receipts": 30,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -3.5,
  "gold": 51,
  "cost_usd": 0.130225,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 95,
  "receipts": 25,
  "hallucinations": 0,
  "mistags": 15,
  "raw_escapes": 15,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 33,
  "cost_usd": 0.161639,
  "exact_cost": true,
  "vendor_anon": "cx-g33"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 62,
  "receipts": 20,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -11.75,
  "gold": 40,
  "cost_usd": 0.112183,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 61,
  "receipts": 52,
  "hallucinations": 1,
  "mistags": 1,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 8.5,
  "gold": 33,
  "cost_usd": 0.125285,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 62,
  "receipts": 42,
  "hallucinations": 0,
  "mistags": 3,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 31,
  "cost_usd": 0.150855,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 79,
  "receipts": 64,
  "hallucinations": 0,
  "mistags": 7,
  "raw_escapes": 7,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 0.25,
  "gold": 56,
  "cost_usd": 0.191799,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 72,
  "receipts": 59,
  "hallucinations": 3,
  "mistags": 0,
  "raw_escapes": 3,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -7.0,
  "gold": 34,
  "cost_usd": 0.149033,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 68,
  "receipts": 59,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -6.25,
  "gold": 27,
  "cost_usd": 0.126338,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 61,
  "receipts": 60,
  "hallucinations": 2,
  "mistags": 8,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.75,
  "gold": 46,
  "cost_usd": 0.173804,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 60,
  "receipts": 48,
  "hallucinations": 1,
  "mistags": 11,
  "raw_escapes": 12,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.0,
  "gold": 65,
  "cost_usd": 0.149876,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 50,
  "receipts": 42,
  "hallucinations": 0,
  "mistags": 15,
  "raw_escapes": 15,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -8.5,
  "gold": 38,
  "cost_usd": 0.145673,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 52,
  "receipts": 39,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.25,
  "gold": 43,
  "cost_usd": 0.139828,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / gpt-5 / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "openai/gpt-5",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 61,
  "receipts": 29,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -28.0,
  "gold": 53,
  "cost_usd": 0.119285,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 },
 {
  "cohort": "construction",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 35,
  "receipts": 0,
  "hallucinations": 3,
  "mistags": 7,
  "raw_escapes": 10,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 19.75,
  "gold": 21,
  "cost_usd": 0.006774,
  "exact_cost": true,
  "vendor_anon": "construction-g21"
 },
 {
  "cohort": "ad-sales-publisher",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 2,
  "raw_escapes": 2,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 13.38,
  "gold": 51,
  "cost_usd": 0.007639,
  "exact_cost": true,
  "vendor_anon": "ad-sales-publisher-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 13,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -1.5,
  "gold": 51,
  "cost_usd": 0.006855,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g51"
 },
 {
  "cohort": "pharma-lifesci-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -0.5,
  "gold": 40,
  "cost_usd": 0.00764,
  "exact_cost": true,
  "vendor_anon": "pharma-lifesci-crm-g40"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 15,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 9.0,
  "gold": 33,
  "cost_usd": 0.007083,
  "exact_cost": true,
  "vendor_anon": "customer-success-g33"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 21,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 1,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -5.25,
  "gold": 31,
  "cost_usd": 0.009054,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g31"
 },
 {
  "cohort": "cx",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 15,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -17.25,
  "gold": 56,
  "cost_usd": 0.018936,
  "exact_cost": true,
  "vendor_anon": "cx-g56"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 20,
  "receipts": 0,
  "hallucinations": 1,
  "mistags": 0,
  "raw_escapes": 1,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -14.5,
  "gold": 34,
  "cost_usd": 0.006934,
  "exact_cost": true,
  "vendor_anon": "real-estate-g34"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -20.75,
  "gold": 27,
  "cost_usd": 0.008079,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g27"
 },
 {
  "cohort": "higher-ed-crm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 25,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": 5.75,
  "gold": 46,
  "cost_usd": 0.018197,
  "exact_cost": true,
  "vendor_anon": "higher-ed-crm-g46"
 },
 {
  "cohort": "real-estate",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 27,
  "receipts": 0,
  "hallucinations": 2,
  "mistags": 3,
  "raw_escapes": 5,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -27.0,
  "gold": 65,
  "cost_usd": 0.006845,
  "exact_cost": true,
  "vendor_anon": "real-estate-g65"
 },
 {
  "cohort": "healthcare-rcm",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 16,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 4,
  "raw_escapes": 4,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -4.25,
  "gold": 38,
  "cost_usd": 0.007007,
  "exact_cost": true,
  "vendor_anon": "healthcare-rcm-g38"
 },
 {
  "cohort": "customer-success",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": null,
  "gold": 43,
  "cost_usd": 0.007048,
  "exact_cost": true,
  "vendor_anon": "customer-success-g43"
 },
 {
  "cohort": "insurance-brokerage",
  "arm": "new",
  "config": "gpt-5-mini / qwen3-32b / mistral-small-3.2-24b-instruct",
  "model_01": "openai/gpt-5-mini",
  "model_02": "qwen/qwen3-32b",
  "model_03": "mistralai/mistral-small-3.2-24b-instruct",
  "claims": 24,
  "receipts": 0,
  "hallucinations": 0,
  "mistags": 0,
  "raw_escapes": 0,
  "postgate_escapes": 0,
  "gate_blocked": true,
  "score_delta": -10.5,
  "gold": 53,
  "cost_usd": 0.006215,
  "exact_cost": true,
  "vendor_anon": "insurance-brokerage-g53"
 }
]