Source code for ssbc.simulation

"""Simulation utilities for testing conformal prediction."""

import numpy as np


[docs] class BinaryClassifierSimulator: """Simulate binary classification data with probabilities from Beta distributions. This simulator generates realistic classification scenarios where the predicted probabilities for each class follow Beta distributions. Useful for testing and benchmarking conformal prediction methods. Parameters ---------- p_class1 : float Probability of drawing class 1 (class imbalance parameter) Must be in [0, 1] beta_params_class0 : tuple of (a, b) Beta distribution parameters for p(class=1) when true label is 0 Typically use parameters that give low probabilities (e.g., (2, 8)) beta_params_class1 : tuple of (a, b) Beta distribution parameters for p(class=1) when true label is 1 Typically use parameters that give high probabilities (e.g., (8, 2)) seed : int, optional Random seed for reproducibility Attributes ---------- p_class1 : float Probability of class 1 p_class0 : float Probability of class 0 (= 1 - p_class1) a0, b0 : float Beta parameters for class 0 a1, b1 : float Beta parameters for class 1 rng : numpy.random.Generator Random number generator Examples -------- >>> # Simulate imbalanced data: 10% positive class >>> # Class 0: Beta(2, 8) → mean p(class=1) = 0.2 (low scores, correct) >>> # Class 1: Beta(8, 2) → mean p(class=1) = 0.8 (high scores, correct) >>> sim = BinaryClassifierSimulator( ... p_class1=0.10, ... beta_params_class0=(2, 8), ... beta_params_class1=(8, 2), ... seed=42 ... ) >>> labels, probs = sim.generate(n_samples=100) >>> print(labels.shape) (100,) >>> print(probs.shape) (100, 2) Notes ----- The Beta distribution parameters (a, b) control the shape: - Mean = a / (a + b) - For a classifier that works well: - Class 0 should have low p(class=1): use (a, b) with a < b - Class 1 should have high p(class=1): use (a, b) with a > b """
[docs] def __init__( self, p_class1: float, beta_params_class0: tuple[float, float], beta_params_class1: tuple[float, float], seed: int | None = None, ): """Initialize the binary classifier simulator.""" if not 0 <= p_class1 <= 1: raise ValueError("p_class1 must be in [0, 1]") self.p_class1 = p_class1 self.p_class0 = 1.0 - p_class1 self.a0, self.b0 = beta_params_class0 self.a1, self.b1 = beta_params_class1 self.rng = np.random.default_rng(seed) # Validate beta parameters if self.a0 <= 0 or self.b0 <= 0: raise ValueError("Beta parameters for class 0 must be positive") if self.a1 <= 0 or self.b1 <= 0: raise ValueError("Beta parameters for class 1 must be positive")
[docs] def generate(self, n_samples: int) -> tuple[np.ndarray, np.ndarray]: """Generate n_samples of (label, p(class=0), p(class=1)). Parameters ---------- n_samples : int Number of samples to generate Returns ------- labels : np.ndarray, shape (n_samples,) True binary labels (0 or 1) probs : np.ndarray, shape (n_samples, 2) Classification probabilities [p(class=0), p(class=1)] Each row sums to 1.0 Examples -------- >>> sim = BinaryClassifierSimulator( ... p_class1=0.5, ... beta_params_class0=(2, 8), ... beta_params_class1=(8, 2), ... seed=42 ... ) >>> labels, probs = sim.generate(n_samples=5) >>> print(f"Generated {len(labels)} samples") Generated 5 samples >>> print(f"Class balance: {np.bincount(labels)}") Class balance: [2 3] """ if n_samples <= 0: raise ValueError("n_samples must be positive") # Draw true labels according to class distribution labels = self.rng.choice([0, 1], size=n_samples, p=[self.p_class0, self.p_class1]) # Initialize probability array probs = np.zeros((n_samples, 2)) # For each label, draw classification probability from appropriate Beta for i, label in enumerate(labels): if label == 0: # True label is 0: sample p(class=1) from Beta(a0, b0) p_class1 = self.rng.beta(self.a0, self.b0) else: # True label is 1: sample p(class=1) from Beta(a1, b1) p_class1 = self.rng.beta(self.a1, self.b1) probs[i, 1] = p_class1 # p(class=1) probs[i, 0] = 1.0 - p_class1 # p(class=0) return labels, probs
[docs] def __repr__(self) -> str: """String representation of the simulator.""" return ( f"BinaryClassifierSimulator(p_class1={self.p_class1:.3f}, " f"beta_class0=({self.a0}, {self.b0}), " f"beta_class1=({self.a1}, {self.b1}))" )