Skip to content

API Reference

Main Entry Point

bootstrapx.api.bootstrap(data, statistic, *, method='bca', n_resamples=9999, batch_size=None, confidence_level=0.95, backend='auto', random_state=None, n_jobs=1, **kwargs)

bootstrapx.api.BootstrapResult dataclass

Source code in bootstrapx/api.py
@dataclass
class BootstrapResult:
    confidence_interval: ConfidenceInterval
    bootstrap_distribution: np.ndarray
    theta_hat: float
    standard_error: float
    n_resamples: int
    method: str
    extra: dict[str, Any] = field(default_factory=dict)

    def __repr__(self) -> str:
        ci = self.confidence_interval
        return (
            f"BootstrapResult(method={self.method!r}, "
            f"theta_hat={self.theta_hat:.6g}, "
            f"se={self.standard_error:.6g}, "
            f"CI=[{ci.low:.6g}, {ci.high:.6g}])"
        )

Confidence Intervals

bootstrapx.stats.confidence.percentile_interval(boot_stats, confidence_level=0.95)

Source code in bootstrapx/stats/confidence.py
def percentile_interval(boot_stats, confidence_level=0.95):
    alpha = 1.0 - confidence_level
    return ConfidenceInterval(
        low=float(np.percentile(boot_stats, 100 * alpha / 2)),
        high=float(np.percentile(boot_stats, 100 * (1 - alpha / 2))),
        method="percentile",
    )

bootstrapx.stats.confidence.basic_interval(boot_stats, theta_hat, confidence_level=0.95)

Source code in bootstrapx/stats/confidence.py
def basic_interval(boot_stats, theta_hat, confidence_level=0.95):
    alpha = 1.0 - confidence_level
    q_low = np.percentile(boot_stats, 100 * alpha / 2)
    q_high = np.percentile(boot_stats, 100 * (1 - alpha / 2))
    return ConfidenceInterval(
        low=float(2 * theta_hat - q_high),
        high=float(2 * theta_hat - q_low),
        method="basic",
    )

bootstrapx.stats.confidence.bca_interval(boot_stats, data, statistic, theta_hat, confidence_level=0.95)

Source code in bootstrapx/stats/confidence.py
def bca_interval(boot_stats, data, statistic, theta_hat, confidence_level=0.95):
    alpha = 1.0 - confidence_level

    # Bias correction z0
    prop_less = np.mean(boot_stats < theta_hat)
    # Clip to avoid infinity
    prop_less = np.clip(prop_less, 1e-10, 1 - 1e-10)
    z0 = float(sp_stats.norm.ppf(prop_less))

    # Acceleration a
    jack_stats = _jackknife(data, statistic)
    mean_jack = jack_stats.mean()
    diffs = mean_jack - jack_stats

    num = (diffs**3).sum()
    den = ((diffs**2).sum()) ** 1.5

    a_hat = num / (6 * den) if den != 0 else 0.0

    # Adjusted percentiles
    def adjust_percentile(z_alpha):
        num_adj = z0 + z_alpha
        denom_adj = 1 - a_hat * num_adj
        return float(sp_stats.norm.cdf(z0 + num_adj / denom_adj))

    p_low = adjust_percentile(sp_stats.norm.ppf(alpha / 2))
    p_high = adjust_percentile(sp_stats.norm.ppf(1 - alpha / 2))

    return ConfidenceInterval(
        low=float(np.percentile(boot_stats, 100 * p_low)),
        high=float(np.percentile(boot_stats, 100 * p_high)),
        method="bca",
    )

bootstrapx.stats.confidence.studentized_interval(data, statistic, theta_hat, boot_stats, boot_se, confidence_level=0.95)

Source code in bootstrapx/stats/confidence.py
def studentized_interval(
    data, statistic, theta_hat, boot_stats, boot_se, confidence_level=0.95
):
    alpha = 1.0 - confidence_level
    mask = boot_se > 0
    t_vals = (boot_stats[mask] - theta_hat) / boot_se[mask]

    t_low = np.percentile(t_vals, 100 * (1 - alpha / 2))
    t_high = np.percentile(t_vals, 100 * alpha / 2)

    se_hat = np.std(boot_stats, ddof=1)

    return ConfidenceInterval(
        low=float(theta_hat - t_low * se_hat),
        high=float(theta_hat - t_high * se_hat),
        method="studentized",
    )

Utilities

bootstrapx.utils.validate_data(data, *, allow_2d=False)

Source code in bootstrapx/utils.py
def validate_data(data, *, allow_2d: bool = False) -> np.ndarray:
    arr = np.asarray(data)
    if arr.ndim == 0:
        raise ValueError("Scalar data is not supported.")
    if arr.ndim > 2 or (arr.ndim == 2 and not allow_2d):
        raise ValueError(
            f"Expected 1-D array, got shape {arr.shape}. "
            "Pass allow_2d=True for matrix data."
        )
    if np.any(np.isnan(arr)):
        raise ValueError("Data contains NaN values. Remove or impute them first.")
    if arr.shape[0] < 2:
        raise ValueError("Data must have at least 2 observations.")
    return arr

bootstrapx.utils.auto_batch_size(n, n_resamples)

Source code in bootstrapx/utils.py
def auto_batch_size(n: int, n_resamples: int) -> int:
    # Heuristic: aim for chunks that fit in L2 cache but huge enough for vectorization
    # 32k elements per batch is usually a sweet spot for numpy
    target_elements = 32_768
    bs = max(1, target_elements // n)
    return min(bs, n_resamples)

Backend

bootstrapx.engine.backend.resolve_backend(requested='auto')

Source code in bootstrapx/engine/backend.py
def resolve_backend(requested: str = "auto") -> BackendKind:
    requested = requested.lower().strip()
    if requested == "auto":
        if _cuda_available():
            return BackendKind.NUMBA_CUDA
        elif _numba_available():
            return BackendKind.NUMBA_CPU
        else:
            return BackendKind.VANILLA

    mapping = {
        "numba_cpu": BackendKind.NUMBA_CPU,
        "numba_cuda": BackendKind.NUMBA_CUDA,
        "vanilla": BackendKind.VANILLA,
    }

    if requested not in mapping:
        valid = list(mapping.keys())
        raise ValueError(f"Unknown backend {requested!r}. Choose from {valid}.")

    kind = mapping[requested]
    if kind is BackendKind.NUMBA_CUDA and not _cuda_available():
        raise RuntimeError("CUDA backend requested but no GPU found.")
    return kind

bootstrapx.engine.backend.BackendKind

Bases: Enum

Source code in bootstrapx/engine/backend.py
class BackendKind(enum.Enum):
    NUMBA_CPU = "numba_cpu"
    NUMBA_CUDA = "numba_cuda"
    VANILLA = "vanilla"