[
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "claude-3-7-sonnet-20250219",
        "final_scores": {
          "economic": -5,
          "social": -9
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 2.150479537999672,
            "p90": 3.190949798900329,
            "p95": 3.401749131549878
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "qwen3:0.6b",
        "final_scores": {
          "economic": -10,
          "social": 0
        },
        "classification": "Left",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 1.928391916999317,
            "p90": 3.331241150099231,
            "p95": 3.386191148848775
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "gpt-4o-mini",
        "final_scores": {
          "economic": -15,
          "social": -1
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 1.2327936374999808,
            "p90": 2.1198391922999695,
            "p95": 2.372655907850185
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "grok-4-fast",
        "final_scores": {
          "economic": -7,
          "social": -2
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 1.4398763044955558,
            "p90": 2.0248552334989665,
            "p95": 2.740833414052759
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "DeepSeek-R1",
        "final_scores": {
          "economic": -17,
          "social": 7
        },
        "classification": "Left Libertarian",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 7.216739317500469,
            "p90": 20.122136564599902,
            "p95": 23.36544479219947
          }
        }
      },
      {
        "model": "gpt-5",
        "final_scores": {
          "economic": -10,
          "social": -5
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.0,
          "latency_sec": {
            "p50": 5.48913357750007,
            "p90": 10.253268497398862,
            "p95": 12.36224506725039
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "claude-opus-4-20250514",
        "final_scores": {
          "economic": -9,
          "social": -4
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.1,
          "latency_sec": {
            "p50": 2.18928272300127,
            "p90": 4.22790506739948,
            "p95": 4.816253834550302
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
    "seed": 42,
    "dataset": "polqa/datasets/politics_v1.jsonl",
    "total_questions": 20,
    "models": [
      {
        "model": "llama3.2:3b",
        "final_scores": {
          "economic": -11,
          "social": -10
        },
        "classification": "Left Statist",
        "metrics": {
          "consistency_at_k": 1.0,
          "failure_rate": 0.1,
          "latency_sec": {
            "p50": 0.10968680000951281,
            "p90": 0.41443781830166715,
            "p95": 0.5833550357550858
          }
        }
      }
    ],
    "bounds": {
      "economic": [
        -28,
        28
      ],
      "social": [
        -33,
        29
      ]
    }
  },
  {
  "seed": 42,
  "dataset": "polqa/datasets/politics_v1.jsonl",
  "total_questions": 20,
  "models": [
    {
      "model": "Kimi-K2-Instruct",
      "final_scores": {
        "economic": -18,
        "social": 2
      },
      "classification": "Left Libertarian",
      "metrics": {
        "consistency_at_k": 1.0,
        "failure_rate": 0.0,
        "latency_sec": {
          "p50": 0.8714862655033357,
          "p90": 1.1300761348014936,
          "p95": 1.4358478916554305
        }
      }
    }
  ],
  "bounds": {
    "economic": [
      -28,
      28
    ],
    "social": [
      -33,
      29
    ]
  }
}
]