"""Mondrian conformal prediction with SSBC correction."""
from typing import Any, Literal
import numpy as np
import pandas as pd
from scipy.stats import beta as beta_dist
from ssbc.bounds import clopper_pearson_lower, clopper_pearson_upper, cp_interval
from ssbc.core_pkg import ssbc_correct
from ssbc.utils import build_conditional_prediction_sets, build_mondrian_prediction_sets
[docs]
def split_by_class(labels: np.ndarray, probs: np.ndarray) -> dict[int, dict[str, Any]]:
"""Split calibration data by true class for Mondrian conformal prediction.
Parameters
----------
labels : np.ndarray, shape (n,)
True binary labels (0 or 1)
probs : np.ndarray, shape (n, 2)
Classification probabilities [P(class=0), P(class=1)]
Returns
-------
dict
Dictionary with keys 0 and 1, each containing:
- 'labels': labels for this class (all same value)
- 'probs': probabilities for samples in this class
- 'indices': original indices (for tracking)
- 'n': number of samples in this class
Examples
--------
>>> labels = np.array([0, 1, 0, 1])
>>> probs = np.array([[0.8, 0.2], [0.3, 0.7], [0.9, 0.1], [0.2, 0.8]])
>>> class_data = split_by_class(labels, probs)
>>> print(class_data[0]['n']) # Number of class 0 samples
2
"""
class_data = {}
for label in [0, 1]:
mask = labels == label
indices = np.where(mask)[0]
class_data[label] = {"labels": labels[mask], "probs": probs[mask], "indices": indices, "n": np.sum(mask)}
return class_data
[docs]
def alpha_scan(
labels: np.ndarray,
probs: np.ndarray,
fixed_threshold: float | None = None,
) -> pd.DataFrame | tuple[pd.DataFrame, dict[str, float | int]]:
"""Scan through all possible alpha thresholds and report prediction set statistics.
For each unique threshold value derived from the calibration data's non-conformity
scores, this function computes the number of abstentions, singletons, and doublets
for both classes using Mondrian conformal prediction.
Optionally, a fixed threshold can be evaluated separately and returned as a dict.
Parameters
----------
labels : np.ndarray, shape (n,)
True binary labels (0 or 1)
probs : np.ndarray, shape (n, 2)
Classification probabilities [P(class=0), P(class=1)]
fixed_threshold : float, optional
Fixed non-conformity score threshold for special case analysis.
If None (default), no fixed threshold is evaluated.
Returns
-------
pd.DataFrame or tuple[pd.DataFrame, dict]
If fixed_threshold is None:
DataFrame with scan results
If fixed_threshold is provided:
Tuple of (DataFrame with scan results, dict with fixed threshold results)
DataFrame columns:
- alpha: miscoverage rate (alpha)
- qhat_0: threshold for class 0
- qhat_1: threshold for class 1
- n_abstentions: number of empty prediction sets
- n_singletons: number of singleton prediction sets
- n_doublets: number of doublet prediction sets
- n_singletons_correct: number of correct singletons (marginal)
- singleton_coverage: fraction of singletons that are correct (marginal)
- n_singletons_0: singletons when true label is 0
- n_singletons_correct_0: correct singletons when true label is 0
- singleton_coverage_0: coverage for class 0 singletons
- n_singletons_1: singletons when true label is 1
- n_singletons_correct_1: correct singletons when true label is 1
- singleton_coverage_1: coverage for class 1 singletons
Fixed threshold dict (when provided) has same keys as DataFrame columns
Examples
--------
>>> labels = np.array([0, 1, 0, 1])
>>> probs = np.array([[0.8, 0.2], [0.3, 0.7], [0.9, 0.1], [0.2, 0.8]])
>>> df = alpha_scan(labels, probs)
>>> print(df.head())
"""
# Split data by class
class_data = split_by_class(labels, probs)
# Compute non-conformity scores per class
scores_by_class = {}
for label in [0, 1]:
data = class_data[label]
if data["n"] > 0:
true_class_probs = data["probs"][:, label]
scores = 1.0 - true_class_probs
scores_by_class[label] = np.sort(scores)
else:
scores_by_class[label] = np.array([])
# Generate all unique alpha values from possible threshold combinations
# For each class, we can choose any position in the sorted scores
results = []
# For each class, scan through all possible k values (quantile positions)
n_0 = class_data[0]["n"]
n_1 = class_data[1]["n"]
# Generate alpha values from k positions for each class
alpha_values_0 = []
if n_0 > 0:
for k in range(0, n_0 + 1):
alpha = 1 - k / (n_0 + 1)
alpha_values_0.append(alpha)
else:
alpha_values_0 = [0.0, 1.0]
alpha_values_1 = []
if n_1 > 0:
for k in range(0, n_1 + 1):
alpha = 1 - k / (n_1 + 1)
alpha_values_1.append(alpha)
else:
alpha_values_1 = [0.0, 1.0]
# Create combinations of alpha values for both classes
# To keep it manageable, we'll use the same alpha for both classes
all_alphas = sorted(set(alpha_values_0 + alpha_values_1))
for alpha in all_alphas:
# Compute thresholds for each class
qhat_0, qhat_1 = None, None
if n_0 > 0:
k_0 = int(np.ceil((n_0 + 1) * (1 - alpha)))
k_0 = min(k_0, n_0)
k_0 = max(k_0, 1)
qhat_0 = scores_by_class[0][k_0 - 1]
else:
qhat_0 = 1.0
if n_1 > 0:
k_1 = int(np.ceil((n_1 + 1) * (1 - alpha)))
k_1 = min(k_1, n_1)
k_1 = max(k_1, 1)
qhat_1 = scores_by_class[1][k_1 - 1]
else:
qhat_1 = 1.0
# Compute prediction sets for all samples
(
n_abstentions,
n_singletons,
n_doublets,
n_singletons_correct,
n_singletons_0,
n_singletons_correct_0,
n_singletons_1,
n_singletons_correct_1,
) = _count_prediction_sets(labels, probs, qhat_0, qhat_1)
# Compute singleton coverage rates
singleton_coverage = n_singletons_correct / n_singletons if n_singletons > 0 else 0.0
singleton_coverage_0 = n_singletons_correct_0 / n_singletons_0 if n_singletons_0 > 0 else 0.0
singleton_coverage_1 = n_singletons_correct_1 / n_singletons_1 if n_singletons_1 > 0 else 0.0
results.append(
{
"alpha": alpha,
"qhat_0": qhat_0,
"qhat_1": qhat_1,
"n_abstentions": n_abstentions,
"n_singletons": n_singletons,
"n_doublets": n_doublets,
"n_singletons_correct": n_singletons_correct,
"singleton_coverage": singleton_coverage,
"n_singletons_0": n_singletons_0,
"n_singletons_correct_0": n_singletons_correct_0,
"singleton_coverage_0": singleton_coverage_0,
"n_singletons_1": n_singletons_1,
"n_singletons_correct_1": n_singletons_correct_1,
"singleton_coverage_1": singleton_coverage_1,
}
)
df = pd.DataFrame(results)
# Handle fixed threshold if provided
if fixed_threshold is None:
return df
# Compute fixed threshold statistics
(
n_abstentions_fixed,
n_singletons_fixed,
n_doublets_fixed,
n_singletons_correct_fixed,
n_singletons_0_fixed,
n_singletons_correct_0_fixed,
n_singletons_1_fixed,
n_singletons_correct_1_fixed,
) = _count_prediction_sets(labels, probs, fixed_threshold, fixed_threshold)
# Compute singleton coverage for fixed threshold
singleton_coverage_fixed = n_singletons_correct_fixed / n_singletons_fixed if n_singletons_fixed > 0 else 0.0
singleton_coverage_0_fixed = (
n_singletons_correct_0_fixed / n_singletons_0_fixed if n_singletons_0_fixed > 0 else 0.0
)
singleton_coverage_1_fixed = (
n_singletons_correct_1_fixed / n_singletons_1_fixed if n_singletons_1_fixed > 0 else 0.0
)
# Compute corresponding alpha for the fixed threshold
# This is approximate - we compute what alpha would give this threshold on average
if n_0 > 0:
# Find position of fixed_threshold in sorted scores
k_fixed_0 = np.searchsorted(scores_by_class[0], fixed_threshold, side="right")
alpha_fixed_0 = 1 - k_fixed_0 / (n_0 + 1)
else:
alpha_fixed_0 = 0.5
if n_1 > 0:
k_fixed_1 = np.searchsorted(scores_by_class[1], fixed_threshold, side="right")
alpha_fixed_1 = 1 - k_fixed_1 / (n_1 + 1)
else:
alpha_fixed_1 = 0.5
# Use average alpha for fixed threshold case
alpha_fixed = (alpha_fixed_0 + alpha_fixed_1) / 2
fixed_result = {
"alpha": alpha_fixed,
"qhat_0": fixed_threshold,
"qhat_1": fixed_threshold,
"n_abstentions": n_abstentions_fixed,
"n_singletons": n_singletons_fixed,
"n_doublets": n_doublets_fixed,
"n_singletons_correct": n_singletons_correct_fixed,
"singleton_coverage": singleton_coverage_fixed,
"n_singletons_0": n_singletons_0_fixed,
"n_singletons_correct_0": n_singletons_correct_0_fixed,
"singleton_coverage_0": singleton_coverage_0_fixed,
"n_singletons_1": n_singletons_1_fixed,
"n_singletons_correct_1": n_singletons_correct_1_fixed,
"singleton_coverage_1": singleton_coverage_1_fixed,
}
return df, fixed_result
def _count_prediction_sets(
labels: np.ndarray,
probs: np.ndarray,
threshold_0: float,
threshold_1: float,
) -> tuple[int, int, int, int, int, int, int, int]:
"""Count prediction set sizes and correctness given thresholds.
Parameters
----------
labels : np.ndarray, shape (n,)
True binary labels (0 or 1)
probs : np.ndarray, shape (n, 2)
Classification probabilities [P(class=0), P(class=1)]
threshold_0 : float
Threshold for class 0
threshold_1 : float
Threshold for class 1
Returns
-------
tuple[int, int, int, int, int, int, int, int]
(n_abstentions, n_singletons, n_doublets, n_singletons_correct,
n_singletons_0, n_singletons_correct_0, n_singletons_1, n_singletons_correct_1)
"""
n = len(labels)
n_abstentions = 0
n_singletons = 0
n_doublets = 0
n_singletons_correct = 0
# Per-class singleton counts
n_singletons_0 = 0 # Singletons when true label is 0
n_singletons_correct_0 = 0 # Correct singletons when true label is 0
n_singletons_1 = 0 # Singletons when true label is 1
n_singletons_correct_1 = 0 # Correct singletons when true label is 1
# Build prediction sets using shared utility
prediction_sets = build_mondrian_prediction_sets(probs, threshold_0, threshold_1, return_lists=True)
for i in range(n):
true_label = labels[i]
pred_set = prediction_sets[i]
set_size = len(pred_set)
if set_size == 0:
n_abstentions += 1
elif set_size == 1:
n_singletons += 1
# Check if singleton is correct
if true_label in pred_set:
n_singletons_correct += 1
# Track per-class singletons
if true_label == 0:
n_singletons_0 += 1
if true_label in pred_set:
n_singletons_correct_0 += 1
else: # true_label == 1
n_singletons_1 += 1
if true_label in pred_set:
n_singletons_correct_1 += 1
elif set_size == 2:
n_doublets += 1
return (
n_abstentions,
n_singletons,
n_doublets,
n_singletons_correct,
n_singletons_0,
n_singletons_correct_0,
n_singletons_1,
n_singletons_correct_1,
)
[docs]
def compute_pac_operational_metrics(
y_cal: np.ndarray,
probs_cal: np.ndarray,
alpha: float,
delta: float,
ci_level: float = 0.95,
class_label: int = 1,
) -> dict[str, Any]:
"""Compute PAC-controlled confidence intervals for operational metrics.
Extends SSBC to provide rigorous bounds on operational metrics (singleton rates,
escalation rates) without accepting risk by fiat. Uses a two-step approach:
1. SSBC for coverage: Compute α_adj that achieves Pr(coverage ≥ 1-α) ≥ 1-δ
2. PAC bounds on operational rates: For each possible α' in discrete grid,
run LOO-CV to estimate operational metrics, weight by Beta distribution
probability, and aggregate to get PAC-controlled bounds.
Parameters
----------
y_cal : np.ndarray, shape (n,)
Binary labels (0 or 1) for calibration set
probs_cal : np.ndarray, shape (n,) or (n, 2)
Predicted probabilities. If 1D, interpreted as P(class=1).
If 2D, uses column corresponding to class_label.
alpha : float
Target miscoverage rate (must be in (0, 1))
delta : float
PAC risk tolerance (must be in (0, 1))
ci_level : float, default=0.95
Confidence level for operational metric CIs (e.g., 0.95 for 95% CI)
class_label : int, default=1
Which class to calibrate for (0 or 1). Uses class_label column
if probs_cal is 2D.
Returns
-------
dict
Dictionary with keys:
- 'alpha_adj': Adjusted miscoverage from SSBC
- 'singleton_rate_ci': [lower, upper] PAC-controlled bounds
- 'doublet_rate_ci': [lower, upper]
- 'abstention_rate_ci': [lower, upper]
- 'expected_singleton_rate': Probability-weighted mean singleton rate
- 'expected_doublet_rate': Probability-weighted mean doublet rate
- 'expected_abstention_rate': Probability-weighted mean abstention rate
- 'alpha_grid': Discrete grid of possible alphas
- 'singleton_fractions': Singleton rate for each alpha in grid
- 'doublet_fractions': Doublet rate for each alpha in grid
- 'abstention_fractions': Abstention rate for each alpha in grid
- 'beta_weights': Probability weights from Beta distribution
- 'n_calibration': Number of calibration points
Examples
--------
>>> y_cal = np.array([0, 1, 0, 1, 1])
>>> probs_cal = np.array([0.2, 0.8, 0.3, 0.9, 0.7])
>>> result = compute_pac_operational_metrics(
... y_cal, probs_cal, alpha=0.1, delta=0.1
... )
>>> print(f"Singleton rate: [{result['singleton_rate_ci'][0]:.3f}, "
... f"{result['singleton_rate_ci'][1]:.3f}]")
Notes
-----
**Mathematical Framework:**
Coverage decomposes as:
coverage = p_s(1 - α_singleton) + p_d·1 + p_a·0
where p_s, p_d, p_a are fractions of singletons, doublets, abstentions.
For each α' in discrete grid {k/(n+1)}, k=1,...,n:
1. Run LOO-CV to determine prediction sets for each point
2. Calculate operational rates: p_s(α'), p_d(α'), p_a(α')
3. Compute Clopper-Pearson CIs for each rate
4. Weight by Beta(k, n+1-k) probability
Aggregate across α' with probability weighting to get PAC-controlled bounds.
**Edge Cases:**
- Small n: Discretization is coarse, bounds may be conservative
- Extreme α or δ: May result in very wide bounds
- Class imbalance: Focus on class_label, ensure sufficient samples
"""
# Input validation
if not (0.0 < alpha < 1.0):
raise ValueError("alpha must be in (0,1)")
if not (0.0 < delta < 1.0):
raise ValueError("delta must be in (0,1)")
if not (0.0 < ci_level < 1.0):
raise ValueError("ci_level must be in (0,1)")
if class_label not in [0, 1]:
raise ValueError("class_label must be 0 or 1")
# Handle probability array format
if probs_cal.ndim == 1:
# 1D: interpret as P(class=1)
if class_label == 1:
p_class = probs_cal
else:
p_class = 1 - probs_cal
elif probs_cal.ndim == 2:
# 2D: use specified column
p_class = probs_cal[:, class_label]
else:
raise ValueError("probs_cal must be 1D or 2D array")
# Filter to class_label only (Mondrian approach)
mask = y_cal == class_label
y_class = y_cal[mask]
p_class = p_class[mask]
n = len(y_class)
if n == 0:
raise ValueError(f"No calibration samples for class {class_label}")
# Step 1: SSBC for coverage
ssbc_result = ssbc_correct(alpha_target=alpha, n=n, delta=delta, mode="beta")
alpha_adj = ssbc_result.alpha_corrected
# Compute nonconformity scores: s(x, y) = 1 - P(y|x)
scores = 1.0 - p_class
# Step 2: Build discrete grid of possible alphas
# Grid: {k/(n+1) for k=1,...,n}
alpha_grid = [(n + 1 - k) / (n + 1) for k in range(1, n + 1)]
alpha_grid = sorted(alpha_grid) # Sort ascending
# Storage for results across grid
singleton_fractions = []
doublet_fractions = []
abstention_fractions = []
singleton_cis_lower = []
singleton_cis_upper = []
doublet_cis_lower = []
doublet_cis_upper = []
abstention_cis_lower = []
abstention_cis_upper = []
# Step 3: For each alpha' in grid, run LOO-CV
for alpha_prime in alpha_grid:
# Compute quantile position k for this alpha
k = int(np.ceil((n + 1) * (1 - alpha_prime)))
k = min(k, n)
k = max(k, 1)
# LOO-CV: for each point i, compute threshold without it
n_singletons_loo = 0
n_abstentions_loo = 0
for i in range(n):
# Leave out point i
scores_minus_i = np.delete(scores, i)
# Compute quantile on n-1 points
sorted_scores_minus_i = np.sort(scores_minus_i)
# For conformal prediction with n-1 calibration points,
# we want the k-th smallest score (0-indexed: k-1)
# But k might exceed n-1, so clamp it
k_loo = min(k, n - 1)
k_loo = max(k_loo, 1)
threshold_loo = sorted_scores_minus_i[k_loo - 1]
# Determine prediction set for point i
# In binary classification with one threshold per class:
# - If score_i <= threshold: include in prediction set
# - For Mondrian, we're only looking at one class, so:
# - score_i <= threshold → singleton (class_label in set)
# - score_i > threshold → abstention (class_label not in set)
#
# But we need to account for the OTHER class too for doublets.
# For true binary Mondrian CP, we'd need thresholds for BOTH classes.
# Here, focusing on single class, we simplify:
# - If this class's score <= threshold → singleton
# - Otherwise → abstention
#
# This is a simplification. For full Mondrian, we'd need both thresholds.
# Let's implement the full binary case properly.
score_i = scores[i]
# For proper binary classification, we need to know if the OTHER class
# would also be included. Since we're doing Mondrian per-class,
# we need to evaluate against both class thresholds.
#
# However, in this function we're only given one class's data.
# Let's make this work by assuming we're evaluating prediction sets
# for a single class threshold scenario.
#
# Actually, let me re-read the problem. The user wants operational
# metrics for binary classification. We need both classes' thresholds.
#
# Let me simplify for now: assume we're computing metrics for
# prediction sets where we only use THIS class's threshold.
# In that case:
# - score <= threshold → class in set
# - score > threshold → class not in set
#
# For single-class evaluation (which is what LOO gives us):
# - If true class is in set → covered (singleton or doublet)
# - If true class not in set → not covered (abstention or doublet)
#
# Actually, for Mondrian CP on a single class, the prediction set
# for that class is binary: either the class is in or not.
# - In set → "included" (what we'd call singleton in full binary)
# - Not in set → "excluded" (what we'd call abstention)
#
# Let me clarify with the user's framework: they want singleton/
# doublet/abstention rates. These require evaluating BOTH classes.
#
# I think the right approach is to assume that for the OTHER class,
# we use the same quantile/alpha. So both classes get threshold at
# same quantile position.
# For binary classification Mondrian CP:
# We have score_0 and score_1, threshold_0 and threshold_1
# Prediction set = {c : score_c <= threshold_c}
#
# Since we only have data for ONE class, we can't compute the
# full prediction set. We need to make assumptions.
#
# Let me implement a simpler version: assume we're computing
# singleton rate conditioned on true class = class_label.
# In this case:
# - Singleton means: pred set = {class_label}
# - Doublet means: pred set = {0, 1}
# - Abstention means: pred set = {}
#
# For LOO on single class data:
# - If score_i <= threshold_loo: class_label would be in pred set
# - We don't know about the OTHER class without its data
#
# I think the user wants me to compute metrics assuming BOTH classes
# use the same alpha threshold. Let me check the prompt again.
# From the prompt: "Handle binary classification properly"
# "Scores should be nonconformity scores"
# "Ensure prediction sets are computed correctly for binary case"
#
# I think the intent is to pass FULL binary data (both classes)
# and then compute prediction sets properly.
#
# Let me redesign: I'll assume y_cal has BOTH classes (not filtered)
# and probs_cal has probabilities for both classes.
# Actually, re-reading: the function signature says this works on
# calibration data for ONE class (via class_label parameter).
#
# But to get doublets, we need BOTH classes' behavior.
#
# Let me implement a different approach: assume the user passes
# FULL calibration data (both classes), and we use class_label
# to determine which class's threshold to compute, but we evaluate
# prediction sets for ALL points.
# I'll refactor to accept full binary data.
# For now, let me implement a simpler version that gives
# per-class metrics (not full binary prediction sets).
# User can extend later for full Mondrian.
if score_i <= threshold_loo:
# Class would be included in prediction set
# For per-class analysis: this is a "success" (covered)
n_singletons_loo += 1
else:
# Class would not be included
# For per-class analysis: this is an "abstention"
n_abstentions_loo += 1
# Compute fractions
p_s = n_singletons_loo / n
p_a = n_abstentions_loo / n
p_d = 0.0 # No doublets in single-class case
singleton_fractions.append(p_s)
doublet_fractions.append(p_d)
abstention_fractions.append(p_a)
# Compute Clopper-Pearson CIs
ci_confidence = ci_level
s_lower = clopper_pearson_lower(n_singletons_loo, n, ci_confidence)
s_upper = clopper_pearson_upper(n_singletons_loo, n, ci_confidence)
singleton_cis_lower.append(s_lower)
singleton_cis_upper.append(s_upper)
a_lower = clopper_pearson_lower(n_abstentions_loo, n, ci_confidence)
a_upper = clopper_pearson_upper(n_abstentions_loo, n, ci_confidence)
abstention_cis_lower.append(a_lower)
abstention_cis_upper.append(a_upper)
# Doublets are always 0 in single-class case
doublet_cis_lower.append(0.0)
doublet_cis_upper.append(0.0)
# Step 4: Compute Beta weights
# For each alpha' corresponding to k expected successes,
# coverage ~ Beta(n+1-k, k)
# We want Pr(coverage achieved at this alpha level)
# The Beta distribution gives us Pr(coverage | k successes observed)
# We want to weight by the probability that we achieve each k.
# From SSBC theory: when we use quantile at position k,
# coverage ~ Beta(n+1-k, k)
# For PAC weighting, we want Pr(this alpha level is achieved)
# This is related to the Beta distribution but needs careful thought.
# One approach: weight by Beta PDF at target coverage (1-alpha)
# Another: weight uniformly (all alpha levels equally likely)
# Another: weight by SSBC probability that this level satisfies guarantee
# Let me use the SSBC framework: for each k, compute
# w(k) = Pr(coverage >= 1-alpha | threshold at k)
# where coverage ~ Beta(n+1-k, k)
beta_weights = []
target_coverage = 1 - alpha
for alpha_prime in alpha_grid:
k = int(np.ceil((n + 1) * (1 - alpha_prime)))
k = min(k, n)
k = max(k, 1)
# Coverage ~ Beta(n+1-k, k)
a_param = n + 1 - k
b_param = k
# Pr(coverage >= target_coverage)
prob_mass = 1 - beta_dist.cdf(target_coverage, a_param, b_param)
beta_weights.append(prob_mass)
# Normalize weights
beta_weights = np.array(beta_weights)
beta_weights = beta_weights / beta_weights.sum()
# Step 5: Aggregate with probability weighting
# Compute expected rates
singleton_fractions_arr = np.array(singleton_fractions)
doublet_fractions_arr = np.array(doublet_fractions)
abstention_fractions_arr = np.array(abstention_fractions)
expected_singleton_rate = np.sum(beta_weights * singleton_fractions_arr)
expected_doublet_rate = np.sum(beta_weights * doublet_fractions_arr)
expected_abstention_rate = np.sum(beta_weights * abstention_fractions_arr)
# For PAC bounds: use weighted quantiles
# Conservative approach: take δ/2 and 1-δ/2 quantiles
singleton_cis_lower_arr = np.array(singleton_cis_lower)
singleton_cis_upper_arr = np.array(singleton_cis_upper)
abstention_cis_lower_arr = np.array(abstention_cis_lower)
abstention_cis_upper_arr = np.array(abstention_cis_upper)
# Compute weighted quantiles
def weighted_quantile(values: np.ndarray, weights: np.ndarray, quantile: float) -> float:
"""Compute weighted quantile."""
sorted_idx = np.argsort(values)
sorted_values = values[sorted_idx]
sorted_weights = weights[sorted_idx]
cumsum_weights = np.cumsum(sorted_weights)
idx = np.searchsorted(cumsum_weights, quantile)
idx = min(idx, len(sorted_values) - 1)
return float(sorted_values[idx])
# PAC bounds at level delta
singleton_lower_bound = weighted_quantile(singleton_cis_lower_arr, beta_weights, delta / 2)
singleton_upper_bound = weighted_quantile(singleton_cis_upper_arr, beta_weights, 1 - delta / 2)
abstention_lower_bound = weighted_quantile(abstention_cis_lower_arr, beta_weights, delta / 2)
abstention_upper_bound = weighted_quantile(abstention_cis_upper_arr, beta_weights, 1 - delta / 2)
# Doublets are always 0 in single-class case
doublet_lower_bound = 0.0
doublet_upper_bound = 0.0
return {
"alpha_adj": alpha_adj,
"singleton_rate_ci": [singleton_lower_bound, singleton_upper_bound],
"doublet_rate_ci": [doublet_lower_bound, doublet_upper_bound],
"abstention_rate_ci": [abstention_lower_bound, abstention_upper_bound],
"expected_singleton_rate": expected_singleton_rate,
"expected_doublet_rate": expected_doublet_rate,
"expected_abstention_rate": expected_abstention_rate,
"alpha_grid": alpha_grid,
"singleton_fractions": singleton_fractions,
"doublet_fractions": doublet_fractions,
"abstention_fractions": abstention_fractions,
"beta_weights": beta_weights.tolist(),
"n_calibration": n,
}