Source code for pyvbmc.stats.get_hpd

import numpy as np


[docs] def get_hpd(X: np.ndarray, y: np.ndarray, hpd_frac: float = 0.8): """ Get high-posterior density dataset. Parameters ========== X : ndarray, shape (N, D) The training points. y : ndarray, shape (N, 1) The training targets. hpd_frac : float The portion of the training set to consider, by default 0.8. Returns ======= hpd_X : ndarray High-posterior density training points. hpd_y : ndarray High-posterior density training targets. hpd_range : ndarray, shape (D,) The range of values of hpd_X in each dimension. indices : ndarray The indices of the points returned with respect to the original data being passed to the function. """ N, D = X.shape # Subsample high posterior density dataset. # Sort by descending order, not ascending. order = np.argsort(y, axis=None)[::-1] hpd_N = round(hpd_frac * N) indices = order[0:hpd_N] hpd_X = X[indices] hpd_y = y[indices] if hpd_N > 0: hpd_range = np.max(hpd_X, axis=0) - np.min(hpd_X, axis=0) else: hpd_range = np.full((D), np.NaN) return hpd_X, hpd_y, hpd_range, indices