Source code for ssbc.bounds.statistical

"""Statistical utility functions for SSBC."""

from typing import Any

import numpy as np
from scipy import stats
from scipy.special import betaln, gammaln

# Exported public API
__all__ = [
    "clopper_pearson_lower",
    "clopper_pearson_upper",
    "clopper_pearson_intervals",
    "cp_interval",
    "prediction_bounds",
    "prediction_bounds_beta_binomial",
]



[docs]
def clopper_pearson_lower(k: int, n: int, confidence: float = 0.95) -> float:
    """Compute lower Clopper-Pearson (one-sided) confidence bound.

    Parameters
    ----------
    k : int
        Number of successes
    n : int
        Total number of trials
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% confidence)

    Returns
    -------
    float
        Lower confidence bound for the true proportion

    Examples
    --------
    >>> lower = clopper_pearson_lower(k=5, n=10, confidence=0.95)
    >>> print(f"Lower bound: {lower:.3f}")

    Notes
    -----
    Uses Beta distribution quantiles for exact binomial confidence bounds.
    For PAC-style guarantees, you may want to use delta = 1 - confidence.
    """
    if k == 0:
        return 0.0
    # L = Beta^{-1}(1-confidence; k, n-k+1)
    # Note: Using (1-confidence) as the lower tail probability
    alpha = 1 - confidence
    return float(stats.beta.ppf(alpha, k, n - k + 1))




[docs]
def clopper_pearson_upper(k: int, n: int, confidence: float = 0.95) -> float:
    """Compute upper Clopper-Pearson (one-sided) confidence bound.

    Parameters
    ----------
    k : int
        Number of successes
    n : int
        Total number of trials
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% confidence)

    Returns
    -------
    float
        Upper confidence bound for the true proportion

    Examples
    --------
    >>> upper = clopper_pearson_upper(k=5, n=10, confidence=0.95)
    >>> print(f"Upper bound: {upper:.3f}")

    Notes
    -----
    Uses Beta distribution quantiles for exact binomial confidence bounds.
    For PAC-style guarantees, you may want to use delta = 1 - confidence.
    """
    if k == n:
        return 1.0
    # U = Beta^{-1}(confidence; k+1, n-k)
    # Note: Using confidence directly for upper tail
    return float(stats.beta.ppf(confidence, k + 1, n - k))




[docs]
def clopper_pearson_intervals(labels: np.ndarray, confidence: float = 0.95) -> dict[int, dict[str, Any]]:
    """Compute Clopper-Pearson (exact binomial) confidence intervals for class prevalences.

    Parameters
    ----------
    labels : np.ndarray
        Binary labels (0 or 1)
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% CI)

    Returns
    -------
    dict
        Dictionary with keys 0 and 1, each containing:
        - 'count': number of samples in this class
        - 'proportion': observed proportion
        - 'lower': lower bound of CI
        - 'upper': upper bound of CI

    Examples
    --------
    >>> labels = np.array([0, 0, 1, 1, 0])
    >>> intervals = clopper_pearson_intervals(labels, confidence=0.95)
    >>> print(intervals[0]['proportion'])
    0.6

    Notes
    -----
    The Clopper-Pearson interval is an exact binomial confidence interval
    based on Beta distribution quantiles. It provides conservative coverage
    guarantees.
    """
    alpha = 1 - confidence
    n_total = len(labels)

    intervals = {}

    for label in [0, 1]:
        count = np.sum(labels == label)
        proportion = count / n_total if n_total > 0 else float("nan")

        # Clopper-Pearson uses Beta distribution quantiles
        # Lower bound: Beta(count, n-count+1) at alpha/2
        # Upper bound: Beta(count+1, n-count) at 1-alpha/2

        if count == 0:
            lower = 0.0
            upper = stats.beta.ppf(1 - alpha / 2, count + 1, n_total - count)
        elif count == n_total:
            lower = stats.beta.ppf(alpha / 2, count, n_total - count + 1)
            upper = 1.0
        else:
            lower = stats.beta.ppf(alpha / 2, count, n_total - count + 1)
            upper = stats.beta.ppf(1 - alpha / 2, count + 1, n_total - count)

        intervals[label] = {"count": count, "proportion": proportion, "lower": lower, "upper": upper}

    return intervals




[docs]
def cp_interval(count: int, total: int, confidence: float = 0.95) -> dict[str, float]:
    """Compute Clopper-Pearson exact confidence interval.

    Helper function for computing a single CI from count and total.

    Parameters
    ----------
    count : int
        Number of successes
    total : int
        Total number of trials
    confidence : float, default=0.95
        Confidence level

    Returns
    -------
    dict
        Dictionary with keys:
        - 'count': original count
        - 'proportion': count/total
        - 'lower': lower CI bound
        - 'upper': upper CI bound
    """
    alpha = 1 - confidence
    count = int(count)
    total = int(total)

    if total <= 0:
        return {
            "count": count,
            "proportion": float("nan"),
            "lower": 0.0,
            "upper": 1.0,
        }

    p = count / total

    if count == 0:
        lower = 0.0
        upper = stats.beta.ppf(1 - alpha / 2, 1, total)
    elif count == total:
        lower = stats.beta.ppf(alpha / 2, total, 1)
        upper = 1.0
    else:
        lower = stats.beta.ppf(alpha / 2, count, total - count + 1)
        upper = stats.beta.ppf(1 - alpha / 2, count + 1, total - count)

    return {"count": count, "proportion": float(p), "lower": float(lower), "upper": float(upper)}



def prediction_bounds_lower(k_cal: int, n_cal: int, n_test: int, confidence: float = 0.95) -> float:
    """Compute lower prediction bound accounting for both calibration and test set sampling uncertainty.

    This function computes prediction bounds for operational rates on future test sets,
    accounting for both calibration uncertainty and test set sampling variability.

    Parameters
    ----------
    k_cal : int
        Number of successes in calibration data
    n_cal : int
        Total number of samples in calibration data
    n_test : int
        Expected size of future test sets
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% prediction bounds)

    Returns
    -------
    float
        Lower prediction bound for operational rates on future test sets

    Notes
    -----
    The prediction bounds account for both:
    1. Calibration uncertainty: uncertainty in the true rate p from calibration data
    2. Test set sampling uncertainty: variability when sampling n_test points from the true distribution

    Mathematical formula:
    SE = sqrt(p̂(1-p̂) * (1/n_cal + 1/n_test))
    where p̂ = k_cal/n_cal is the estimated rate from calibration data.

    For large n_test, bounds approach calibration-only bounds.
    For small n_test, bounds are wider due to additional test set sampling uncertainty.
    """
    if n_cal <= 0 or n_test <= 0:
        return 0.0

    # Estimated rate from calibration
    p_hat = k_cal / n_cal

    # Standard error accounting for both calibration and test set sampling
    # SE = sqrt(p̂(1-p̂) * (1/n_cal + 1/n_test))
    se = np.sqrt(np.clip(p_hat * (1 - p_hat) * (1 / n_cal + 1 / n_test), 0.0, None))

    # Z-score for confidence level
    alpha = 1 - confidence
    z_score = stats.norm.ppf(alpha / 2)

    # Lower prediction bound
    lower_bound = p_hat + z_score * se

    # Ensure bounds are in [0, 1]
    return max(0.0, min(1.0, lower_bound))


def prediction_bounds_upper(k_cal: int, n_cal: int, n_test: int, confidence: float = 0.95) -> float:
    """Compute upper prediction bound accounting for both calibration and test set sampling uncertainty.

    This function computes prediction bounds for operational rates on future test sets,
    accounting for both calibration uncertainty and test set sampling variability.

    Parameters
    ----------
    k_cal : int
        Number of successes in calibration data
    n_cal : int
        Total number of samples in calibration data
    n_test : int
        Expected size of future test sets
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% prediction bounds)

    Returns
    -------
    float
        Upper prediction bound for operational rates on future test sets

    Notes
    -----
    The prediction bounds account for both:
    1. Calibration uncertainty: uncertainty in the true rate p from calibration data
    2. Test set sampling uncertainty: variability when sampling n_test points from the true distribution

    Mathematical formula:
    SE = sqrt(p̂(1-p̂) * (1/n_cal + 1/n_test))
    where p̂ = k_cal/n_cal is the estimated rate from calibration data.

    For large n_test, bounds approach calibration-only bounds.
    For small n_test, bounds are wider due to additional test set sampling uncertainty.
    """
    if n_cal <= 0 or n_test <= 0:
        return 1.0

    # Estimated rate from calibration
    p_hat = k_cal / n_cal

    # Standard error accounting for both calibration and test set sampling
    # SE = sqrt(p̂(1-p̂) * (1/n_cal + 1/n_test))
    se = np.sqrt(np.clip(p_hat * (1 - p_hat) * (1 / n_cal + 1 / n_test), 0.0, None))

    # Z-score for confidence level
    alpha = 1 - confidence
    z_score = stats.norm.ppf(1 - alpha / 2)

    # Upper prediction bound
    upper_bound = p_hat + z_score * se

    # Ensure bounds are in [0, 1]
    return max(0.0, min(1.0, upper_bound))


def _log_binomial_coef(n: int, k: int) -> float:
    """Compute log(n choose k) = log(n!) - log(k!) - log((n-k)!)."""
    return gammaln(n + 1) - gammaln(k + 1) - gammaln(n - k + 1)


def _beta_binomial_pmf(k: int, n: int, alpha: float, beta: float) -> float:
    """Beta-binomial PMF: pmf(k; n, alpha, beta) = C(n,k) * Beta(k+alpha, n-k+beta) / Beta(alpha, beta).

    Parameters
    ----------
    k : int
        Number of successes
    n : int
        Total number of trials
    alpha : float
        First Beta parameter
    beta : float
        Second Beta parameter

    Returns
    -------
    float
        Probability mass at k
    """
    return np.exp(_log_binomial_coef(n, k) + betaln(k + alpha, n - k + beta) - betaln(alpha, beta))



[docs]
def prediction_bounds_beta_binomial(
    k_cal: int,
    n_cal: int,
    n_test: int,
    confidence: float = 0.95,
    use_jeffreys: bool = False,
    tail: str = "equal_tailed",
) -> tuple[float, float]:
    """Beta-Binomial predictive interval for future empirical rate.

    This function computes exact prediction bounds using the Beta-Binomial distribution,
    which properly accounts for both calibration uncertainty and test set sampling variability.

    Parameters
    ----------
    k_cal : int
        Number of successes in calibration data
    n_cal : int
        Total number of samples in calibration data
    n_test : int
        Expected size of future test sets
    confidence : float, default=0.95
        Desired predictive mass (confidence level)
    use_jeffreys : bool, default=False
        If False, use uniform prior Beta(1,1), giving posterior Beta(k_cal+1, n_cal-k_cal+1).
        If True, use Jeffreys prior Beta(1/2,1/2), giving posterior Beta(k_cal+0.5, n_cal-k_cal+0.5).
    tail : str, default="equal_tailed"
        Interval type:
        - "equal_tailed": Invert predictive CDF (α/2 each side)
        - "hpd": Shortest high posterior density predictive interval

    Returns
    -------
    tuple[float, float]
        (lower_rate, upper_rate) for operational rates on future test sets

    Notes
    -----
    This method models:
    1. True rate p ~ Beta(alpha, beta) (posterior from calibration data)
    2. Future successes k_test | p ~ Binomial(n_test, p)
    3. Marginal predictive distribution: k_test ~ BetaBinomial(n_test, alpha, beta)
    4. Return bounds on the rate k_test / n_test

    This provides exact finite-sample prediction bounds that account for both sources
    of uncertainty without normal approximations.
    """
    if n_cal <= 0 or n_test <= 0:
        return (0.0, 1.0)

    offset = 0.5 if use_jeffreys else 1.0
    alpha = k_cal + offset
    beta = (n_cal - k_cal) + offset

    # Predictive PMF over k_test = 0..n_test
    ks = np.arange(n_test + 1)
    pmf = np.array([_beta_binomial_pmf(k, n_test, alpha, beta) for k in ks])
    pmf = pmf / pmf.sum()  # Numerical safety (normalize to ensure sum = 1)

    if tail == "equal_tailed":
        cdf = np.cumsum(pmf)
        alpha_tail = (1.0 - confidence) / 2.0

        # Lower index = smallest k with CDF >= alpha_tail
        k_lo = int(np.searchsorted(cdf, alpha_tail))
        # Upper index = largest k with CDF <= 1 - alpha_tail
        k_hi = int(np.searchsorted(cdf, 1.0 - alpha_tail, side="right") - 1)

    elif tail == "hpd":
        # Sort ks by PMF descending, then add mass until >= confidence,
        # then take min/max k in that included set. This gives a highest-density
        # predictive set, then we report its hull.
        order = np.argsort(-pmf)
        keep = []
        mass = 0.0
        for idx in order:
            keep.append(idx)
            mass += pmf[idx]
            if mass >= confidence:
                break
        k_lo = ks[min(keep)]
        k_hi = ks[max(keep)]

    else:
        raise ValueError("tail must be 'equal_tailed' or 'hpd'")

    return (k_lo / n_test, k_hi / n_test)




[docs]
def prediction_bounds(
    k_cal: int, n_cal: int, n_test: int, confidence: float = 0.95, method: str = "simple"
) -> tuple[float, float]:
    """Compute prediction bounds accounting for both calibration and test set sampling uncertainty.

    This function provides two methods for computing prediction bounds:
    1. "simple": Uses standard error formula (faster, good for large samples)
    2. "beta_binomial": Uses Beta-Binomial distribution (more accurate for small samples)

    Parameters
    ----------
    k_cal : int
        Number of successes in calibration data for a **single well-defined Bernoulli event**.
        Must be the count of a binary indicator (e.g., Z_i = 1{event}) across all n_cal trials.
    n_cal : int
        Total number of trials in calibration data for the **same Bernoulli event**.
        This is the fixed denominator (total sample size or conditional subpopulation size).
    n_test : int
        Expected number of future trials for the **same Bernoulli event**.
        For joint rates, this is the planned test size (fixed).
        For conditional rates, this is an estimated future conditional subpopulation size.
    confidence : float, default=0.95
        Confidence level (e.g., 0.95 for 95% prediction bounds)
    method : str, default="simple"
        Method to use: "simple" or "beta_binomial"

    Returns
    -------
    tuple[float, float]
        (lower_bound, upper_bound) for operational rates on future test sets

    Examples
    --------
    >>> # Simple method (default)
    >>> lower, upper = prediction_bounds(k_cal=50, n_cal=100, n_test=1000, confidence=0.95)
    >>> print(f"Simple bounds: [{lower:.3f}, {upper:.3f}]")

    >>> # Beta-Binomial method (more accurate for small samples)
    >>> lower, upper = prediction_bounds(k_cal=50, n_cal=100, n_test=1000, confidence=0.95, method="beta_binomial")
    >>> print(f"Beta-Binomial bounds: [{lower:.3f}, {upper:.3f}]")

    Notes
    -----
    The prediction bounds account for both:
    1. Calibration uncertainty: uncertainty in the true rate p from calibration data
    2. Test set sampling uncertainty: variability when sampling n_test points from the true distribution

    **Simple method** (default):
    - Mathematical formula: SE = sqrt(p̂(1-p̂) * (1/n_cal + 1/n_test))
    - Good for large sample sizes
    - Faster computation

    **Beta-Binomial method**:
    - Uses Beta-Binomial distribution for exact finite-sample modeling
    - More accurate for small sample sizes
    - Slower computation
    - Uses uniform prior Beta(1,1) and equal-tailed intervals by default
    - For advanced use (Jeffreys prior or HPD intervals), call
      prediction_bounds_beta_binomial() directly

    For large n_test, bounds approach calibration-only bounds.
    For small n_test, bounds are wider due to additional test set sampling uncertainty.

    This is the recommended function for computing operational rate bounds when
    applying fixed thresholds to future test sets.
    """
    if method == "simple":
        raw_lower = prediction_bounds_lower(k_cal, n_cal, n_test, confidence)
        raw_upper = prediction_bounds_upper(k_cal, n_cal, n_test, confidence)
        # Ensure monotonicity (clip in case of numerical noise)
        lower = min(raw_lower, raw_upper)
        upper = max(raw_lower, raw_upper)
        return (lower, upper)
    elif method == "beta_binomial":
        return prediction_bounds_beta_binomial(k_cal, n_cal, n_test, confidence)
    else:
        raise ValueError(f"Unknown method: {method}. Use 'simple' or 'beta_binomial'.")




[docs]
def ensure_ci(d: dict[str, Any] | Any, count: int, total: int, confidence: float = 0.95) -> tuple[float, float, float]:
    """Extract or compute rate and confidence interval from a dictionary.

    If the dictionary already contains rate/CI information, use it.
    Otherwise, compute Clopper-Pearson CI from count/total.

    This function re-normalizes to the requested confidence level if the
    provided dictionary is missing bounds or if the provided bounds look
    degenerate (NaN values).

    Parameters
    ----------
    d : dict
        Dictionary that may contain 'rate'/'proportion' and 'lower'/'upper'
    count : int
        Count for CI computation (if needed)
    total : int
        Total for CI computation (if needed)
    confidence : float, default=0.95
        Confidence level

    Returns
    -------
    tuple of (rate, lower, upper)
        Rate and confidence interval bounds
    """
    # Initialize with NaN to detect missing values
    r = np.nan
    lo = np.nan
    hi = np.nan

    if isinstance(d, dict):
        if "rate" in d:
            r = float(d["rate"])
        elif "proportion" in d:
            r = float(d["proportion"])

        if "ci_95" in d and isinstance(d["ci_95"], tuple | list) and len(d["ci_95"]) == 2:
            lo, hi = map(float, d["ci_95"])

        else:
            lo = float(d.get("lower", np.nan))
            hi = float(d.get("upper", np.nan))

    # If missing or invalid (NaN), compute CP interval
    need_ci = np.isnan(r) or np.isnan(lo) or np.isnan(hi)
    if need_ci:
        ci = cp_interval(count, total, confidence)
        return ci["proportion"], ci["lower"], ci["upper"]

    return r, lo, hi