VBMC options

VBMC options#

The options can be divided into two types of options:
  • Basic default options: We expect that these options are changed by many users.

  • Advanced options: These options are for advanced users of VBMC. Do not modify them unless you know what you are doing.

You can find the default options for both groups below.

Basic options#

We expect that these options are changed by many users.

[BasicOptions]
# Level of display ("iter", "notify", "final", or "off")
display = "iter"
# Plot marginals of variational posterior at each iteration
plot = False
# Max number of iterations
max_iter = 50*(2+D)
# Max number of target fcn evals
max_fun_evals = 50*(2+D)
# Number of target fcn evals per iteration
fun_evals_per_iter = 5
# Required stable fcn evals for termination
tol_stable_count = 60
# Number of variational components to refine posterior at termination
min_final_components = 50
# Target log joint function returns noise estimate (SD) as second output
specify_target_noise = False
# Name of file to write logs to (if None, no log file will be written)
log_file_name = None
# Logging level for log file, one of ("iter", "notify", "final", or "off"), or a level from logging module (e.g. logging.DEBUG)
log_file_level = "iter"
# Logging write mode, one of ("w", "w+", or "a")
log_file_mode = "a"
# Whether to re-print column headers for optimization statistics at each iteration. If None, PyVBMC tries to guess based on plotting options.
print_iteration_header = None

Advanced Options#

These options are for advanced users of VBMC. Do not modify them unless you know what you are doing.

[AdvancedOptions]
# Explicit noise handling (0: none; 1: unknown noise level; 2: user-provided noise)
uncertainty_handling = []
# Array with indices of integer variables
integer_vars = []
# Base observation noise magnitude (standard deviation)
noise_size = []
# Max number of consecutive repeated measurements for noisy inputs
max_repeated_observations = 0
# Number of initial target fcn evals
fun_eval_start = np.maximum(D, 10)
# Base step size for stochastic gradient descent
sgd_step_size = 0.005
# Skip active sampling the first iteration after warmup
skip_active_sampling_after_warmup = False
# Use ranking criterion to pick best non-converged solution
rank_criterion = True
# Required stable iterations to switch entropy approximation
tol_stable_entropy_iters = 6
# Use variable component means for variational posterior
variable_means = True
# Use variable mixture weight for variational posterior
variable_weights = True
# Penalty multiplier for small mixture weights
weight_penalty = 0.1
# Run in diagnostics mode get additional info
diagnostics = False
# Output function
output_fcn = []
# Fraction of allowed exceptions when computing iteration stability
tol_stable_excpt_frac = 0.2
# Evaluated fcn values at X0
f_vals = []
# Weighted proposal fcn for uncertainty search
proposal_fcn = None
# Automatic nonlinear rescaling of variables
nonlinear_scaling = True
# Search acquisition fcn(s), from pyvbmc.acquisition_functions. If length is greater than 1, a function is randomly selected for each new point.
search_acq_fcn = [AcqFcnLog()]
# Samples for fast acquisition fcn eval per new point
ns_search = 2 ** 13
# Total samples for Monte Carlo approx. of the entropy
ns_ent = lambda K : 100 * K ** (2 / 3)
# Total samples for preliminary Monte Carlo approx. of the entropy
ns_ent_fast = 0
# Total samples for refined Monte Carlo approx. of the entropy
ns_ent_fine = lambda K : 2 ** 12 * K
# Whether to do a final boost
do_final_boost = True
# Total samples for Monte Carlo approx. of the entropy (final boost)
ns_ent_boost = lambda K : 200 * K ** (2 / 3)
# Total samples for preliminary Monte Carlo approx. of the entropy (final boost)
ns_ent_fast_boost = []
# Total samples for refined Monte Carlo approx. of the entropy (final boost)
ns_ent_fine_boost = []
# Total samples for Monte Carlo approx. of the entropy (active sampling)
ns_ent_active = lambda K : 20 * K ** (2 / 3)
# Total samples for preliminary Monte Carlo approx. of the entropy (active sampling)
ns_ent_fast_active = 0
# Total samples for refined Monte Carlo approx. of the entropy (active sampling)
ns_ent_fine_active = lambda K : 200 * K
# Samples for fast approximation of the ELBO
ns_elbo = lambda K : 50 * K
# Multiplier to samples for fast approx. of ELBO for incremental iterations
ns_elbo_incr = 0.1
# Starting points to refine optimization of the ELBO
elbo_starts = 2
# Max GP hyperparameter samples (decreases with training points)
ns_gp_max = 80
# Max GP hyperparameter samples during warmup
ns_gp_max_warmup = 8
# Max GP hyperparameter samples during main algorithm
ns_gp_max_main = np.Inf
# Fcn evals without improvement before stopping warmup
warmup_no_impro_threshold = 20 + 5 * D
# Also check for max fcn value improvement before stopping warmup
warmup_check_max = True
# Force stable GP hyperparameter sampling (reduce samples or start optimizing)
stable_gp_sampling = 200 + 10 * D
# Force stable GP hyperparameter sampling after reaching this number of components
stable_gp_vp_k = np.Inf
# Number of GP samples when GP is stable (0 = optimize)
stable_gp_samples = 0
# Thinning for GP hyperparameter sampling
gp_sample_thin = 5
# Initial design points for GP hyperparameter training
gp_train_n_init = 1024
# Final design points for GP hyperparameter training
gp_train_n_init_final = 64
# Initial design method for GP hyperparameter training
gp_train_init_method = "rand"
# Tolerance for optimization of GP hyperparameters
gp_tol_opt = 1e-5
# Tolerance for optimization of GP hyperparameters preliminary to MCMC
gp_tol_opt_mcmc = 1e-2
# Tolerance for optimization of GP hyperparameters during active sampling
gp_tol_opt_active = 1e-4
# Tolerance for optimization of GP hyperparameters preliminary to MCMC during active sampling
gp_tol_opt_mcmc_active = 1e-2
# Threshold True GP variance used by regulatized acquisition fcns
tol_gp_var = 1e-4
# Threshold True GP variance used to stabilize sampling
tol_gp_var_mcmc = 1e-4
# GP mean function
gp_mean_fun = "negquad"
# GP integrated mean function
gp_int_mean_fun = 0
# Max variational components as a function of training points
k_fun_max = lambda N : N ** (2 / 3)
# Variational components during warmup
k_warmup = 2
# Added variational components for stable solution
adaptive_k = 2
# High Posterior Density region (fraction of training inputs)
hpd_frac = 0.8
# Uncertainty weight True ELCBO for computing lower bound improvement
elcbo_impro_weight = 3
# Minimum fractional length scale
tol_length = 1e-6
# Size of cache for storing fcn evaluations
cache_size = 500
# Fraction of search points from starting cache (if nonempty)
cache_frac = 0.5
# Stochastic optimizer for varational parameters
stochastic_optimizer = "adam"
# Stopping threshold for stochastic optimization
tol_fun_stochastic = 1e-3
# Max iterations for stochastic optimization
max_iter_stochastic = 100 * (2 + D)
# Set stochastic optimization stepsize via GP hyperparameters
gp_stochastic_step_size = False
# Tolerance True ELBO uncertainty for stopping (if variational posterior is stable)
tol_sd = 0.1
# Stopping threshold True change of variational posterior per training point
tol_skl = 0.01 * np.sqrt(D)
# Number of stable fcn evals for stopping warmup
tol_stable_warmup = 15
# MCMC sampler for variational posteriors
variational_sampler = "malasample"
# Required ELCBO improvement per fcn eval before termination
tol_improvement = 0.01
# Use Gaussian approximation for symmetrized KL-divergence b\w iters
kl_gauss = True
# True mean of the target density (for debugging)
true_mean = []
# True covariance of the target density (for debugging)
true_cov = []
# Min number of fcn evals
min_fun_evals = 5 * D
# Min number of iterations
min_iter = D
# Fraction of search points from heavy-tailed variational posterior
heavy_tail_search_frac = 0.25
# Fraction of search points from multivariate normal
mvn_search_frac = 0.25
# Fraction of search points from multivariate normal fitted to HPD points
hpd_search_frac = 0
# Fraction of search points from uniform random box based True training inputs
box_search_frac = 0.25
# Fraction of search points from previous iterations
search_cache_frac = 0
# Always fully refit variational posterior
always_refit_vp = False
# Perform warm-up stage
warmup = True
# Special OPTIONS struct for warmup stage
warmup_options = []
# Stop warm-up when ELCBO increase below threshold (per fcn eval)
stop_warmup_thresh = 0.2
# Max log-likelihood difference for points kept after warmup
warmup_keep_threshold = 10 * D
# Max log-likelihood difference for points kept after a false-alarm warmup stop
warmup_keep_threshold_false_alarm = 100 * (D + 2)
# Reliability index required to stop warmup
stop_warmup_reliability = 100
# Optimization method for active sampling
search_optimizer = "cmaes"
# Initialize CMA-ES search SIGMA from variational posterior
search_cmaes_vp_init = True
# Take bestever solution from CMA-ES search
search_cmaes_best = False
# Max number of acquisition fcn evaluations during search
search_max_fun_evals = 500 * (D + 2)
# Weight of previous trials (per trial) for running avg of variational posterior moments
moments_run_weight = 0.9
# Upper threshold True reliability index for full retraining of GP hyperparameters
gp_retrain_threshold = 1
# Compute full ELCBO also at best midpoint
elcbo_midpoint = True
# Multiplier to widths from previous posterior for GP sampling (Inf = do not use previous widths)
gp_sample_widths = 5
# Weight of previous trials (per trial) for running avg of GP hyperparameter covariance
hyp_run_weight = 0.9
# Use weighted hyperparameter posterior covariance
weighted_hyp_cov = True
# Minimum weight for weighted hyperparameter posterior covariance
tol_cov_weight = 0
# MCMC sampler for GP hyperparameters
gp_hyp_sampler = "slicesample"
# Switch to covariance sampling below this threshold of stability index
cov_sample_thresh = 10
# Optimality tolerance for optimization of deterministic entropy
det_entropy_tol_opt = 1e-3
# Switch from deterministic entropy to stochastic entropy when reaching stability
entropy_switch = False
# Force switch to stochastic entropy at this fraction of total fcn evals
entropy_force_switch = 0.8
# Alpha value for lower/upper deterministic entropy interpolation
det_entropy_alpha = 0
# Randomize deterministic entropy alpha during active sample updates
update_random_alpha = False
# Online adaptation of alpha value for lower/upper deterministic entropy interpolation
adaptive_entropy_alpha = False
# Start with deterministic entropy only with this number of vars or more
det_entropy_min_d = 5
# Fractional tolerance for constraint violation of variational parameters
tol_con_loss = 0.01
# SD multiplier of ELCBO for computing best variational solution
best_safe_sd = 5
# When computing best solution lacking stability go back up to this fraction of iterations
best_frac_back = 0.25
# Threshold mixture component weight for pruning
tol_weight = 1e-2
# Multiplier to threshold for pruning mixture weights
pruning_threshold_multiplier = lambda K : 1 / np.sqrt(K)
# Annealing for hyperprior width of GP negative quadratic mean
annealed_gp_mean = lambda N,NMAX: 0
# Strict hyperprior for GP negative quadratic mean
constrained_gp_mean = False
# Empirical Bayes prior over some GP hyperparameters
empirical_gp_prior = False
# Minimum GP observation noise
tol_gp_noise = np.sqrt(1e-5)
# Prior mean over GP input length scale (in plausible units)
gp_length_prior_mean = np.sqrt(D / 6)
# Prior std over GP input length scale (in plausible units)
gp_length_prior_std = 0.5 * np.log(1e3)
# Upper bound True GP input lengths based True plausible box (0 = ignore)
upper_gp_length_factor = 0
# Initial samples (plausible is uniform in the plausible box)
init_design = "plausible"
# Stricter upper bound True GP negative quadratic mean function
gp_quadratic_mean_bound = True
# Heuristic output warping (fitness shaping)
fitness_shaping = False
# Output warping starting threshold
out_warp_thresh_base = 10 * D
# Output warping threshold multiplier when failed sub-threshold check
out_warp_thresh_mult = 1.25
# Output warping base threshold tolerance (fraction of current threshold)
out_warp_thresh_tol = 0.8
# Temperature for posterior tempering (allowed values T = 1234)
temperature = 1
# Use separate GP with constant mean for active search
separate_search_gp = False
# Discount observations from extremely low-density regions
noise_shaping = False
# Threshold from max observed value to start discounting
noise_shaping_threshold = 10 * D
# Proportionality factor of added noise wrt distance from threshold
noise_shaping_factor = 0.05
# Hedge True multiple acquisition functions
acq_hedge = False
# Past iterations window to judge acquisition fcn improvement
acq_hedge_iter_window = 4
# Portfolio value decay per function evaluation
acq_hedge_decay = 0.9
# MCMC variational steps before each active sampling
active_variational_samples = 0
# Apply lower bound True variational components scale during variational sampling
scale_lower_bound = True
# Perform variational optimization after each active sample
active_sample_vp_update = False
# Perform GP training after each active sample
active_sample_gp_update = False
# # iters past warmup to continue update after each active sample
active_sample_full_update_past_warmup = 2
# Perform full update during active sampling if stability above threshold
active_sample_full_update_threshold = 3
# Use previous variational posteriors to initialize optimization
variational_init_repo = False
# Extra variational components sampled from GP profile
sample_extra_vp_means = 0
# Uncertainty weight True ELCBO during active sampling
optimistic_variational_bound = 0
# # importance samples from smoothed variational posterior
active_importance_sampling_vp_samples = 100
# # importance samples from box-uniform centered True training inputs
active_importance_sampling_box_samples = 100
# # importance samples through MCMC
active_importance_sampling_mcmc_samples = 100
# Thinning for importance sampling MCMC
active_importance_sampling_mcmc_thin = 1
# fractional ESS threhsold to update GP and VP
active_sample_fess_thresh = 1
# % fractional ESS threhsold to do MCMC while active importance sampling
active_importance_sampling_fess_thresh = 0.9
# Active search bound multiplier
active_search_bound = 2
# Try integrating GP mean function
integrate_gp_mean = False
# Tolerance True closeness to bound constraints (fraction of total range)
tol_bound_x = 1e-5
# Recompute LCB max for each iteration based True current GP estimate
recompute_lcb_max = True
# Input transform for bounded variables
bounded_transform = "probit"
# Use double GP
double_gp = False
# Warp every this number of iterations
warp_every_iters = 5
# Increase delay between warpings
incremental_warp_delay = True
# Threshold True reliability index to perform warp
warp_tol_reliability = 3
# Rotate and scale input
warp_rotoscaling = True
# Regularization weight towards diagonal covariance matrix for N training inputs
warp_cov_reg = 0
# Threshold True correlation matrix for roto-scaling
warp_roto_corr_thresh = 0.05
# Min number of variational components to perform warp
warp_min_k = 5
# Immediately undo warp if not improving ELBO
warp_undo_check = True
# Improvement of ELBO required to keep a warp proposal
warp_tol_improvement = 0.1
# Multiplier tolerance of ELBO SD after warp proposal
warp_tol_sd_multiplier = 2
# Base tolerance True ELBO SD after warp proposal
warp_tol_sd_base = 1