BADS options#
- The options can be divided into two types:
Basic options: These options are of interest to most users, and cover all regular usage needs.
Advanced options: These options should rarely if ever be touched and are reserved to advanced users / developers of BADS. Please do not modify them unless you know what you are doing.
You can find the default options for both groups below.
Basic options#
We expect these options to be routinely changed by many users.
[BasicOptions]
# Level of display ("iter", "notify", "final", or "off")
display = "iter"
# Max number of iterations
max_iter = 200*D
# Max number of target fcn evals
max_fun_evals = 500 * D
# Automatic nonlinear rescaling of variables
nonlinear_scaling = True
# Complete polling around the current iterate
complete_poll = False
# Accelerate mesh contraction
accelerate_mesh = True
# Explicit noise handling (if None, determine at runtime)
uncertainty_handling = None
# Base observation noise magnitude (SD), e.g noise_size = 1.0
noise_size = None
# Target function returns noise estimate (SD) as second output
specify_target_noise = False
# Samples to estimate FVAL at the end (for noisy objectives)
noise_final_samples = 10
# Random seed used by the optimizer. If None, no seed is set.
random_seed = None
Advanced Options#
These options are reserved to advanced users / developers of BADS. We strongly advise against changing these options, unless you know what you are doing, since unexpected/untested behavior might follow.
[AdvancedOptions]
# Show optimization plots ("profile", "scatter", or "False")
plot = False
# Tolerance and termination conditions
# Tolerance on mesh size
tol_mesh = 1e-6
# Min significant change of objective fcn
tol_fun = 1e-3
# Max iterations with no significant change (doubled under uncertainty)
tol_stall_iters = int(4 + np.floor(D/2))
# Min variabitility for a fcn to be considered noisy
tol_noise = np.spacing(1.0) * self.get("tol_fun")
#Initialization
# Initialization function
init_fun = "init_sobol"
# Number of restarts attempts
restarts = 0
# Size of cache for storing fcn evaluations
cache_size = 500
# Number of initial objective fcn evaluations
fun_eval_start = D
# Pregress fcn evaluation with Y and X fields
fun_values = {}
# Array with indices of periodic variables, like periodic_vars = [1, 2]
periodic_vars = None
# Poll Options
# Poll function
poll_method = 'poll_mads_2n'
n_basis = 200 * D
# Mesh multiplicative factor between iterations
poll_mesh_multiplier = 2.0
# Force poll vectors to be on mesh
force_poll_mesh = False
# Maximum poll integer
max_poll_grid_number = 0
# Use alternative incumbent offset'
alternative_incumbent = False
# Adaptive multiplier to incumbent uncertainty'
adaptive_incumbent_shift = False
# GP-based geometric scaling factor of poll vectors'
gp_rescale_poll = 1.0
# Threshold probability of improvement (PoI); set to 0 to always complete polling'
tol_poi = 1e-6/D
# Skip polling if PoI below threshold, even with no success'
skip_poll = True
# Allow consecutive incomplete polls'
consecutive_skipping = True
# Skip polling after successful search'
skip_poll_after_search = True
# Number of failed fcn evaluations before skipping is allowed'
min_failed_poll_steps = np.inf
# Accelerate mesh after this number of stalled iterations'
accelerate_mesh_steps = 3
# Move incumbent even after insufficient improvement
sloppy_improvement = True
# Move incumbent even for the uncertain unsuccess when Sto-BADS is configured
opp_stobads = True
# Power value of the Sto-BADS incumbent decision rule: \gamma * \epsilon * frame_size**(power_value)
stobads_frame_size_scaling_power = 2
# Threshold # mesh overflows for warning';
mesh_overflow_warning = 2 + D/2
# Initial mesh size (power value)
init_mesh_size_integer = 0
# StoBADS option, if True switch to stochastic optimization and uncertain incumbent
stobads = False
# Improvement parameters
# Minimum significant improvement at unit mesh size'
tol_improvement = 1
# Exponent of forcing function'
forcing_exponent = 3/2
# Multiplier to incumbent uncertainty for acquisition functions'
incumbent_sigma_multiplier = 0.1
# Quantile when computing improvement (<0.5 for conservative improvement)'
improvement_quantile = 0.5
# Top quantile when choosing final iteration'
final_quantile = 1e-3
# Search properties
# Number of candidate search points'
n_search = 2**12
# Number of optimization iterations for search'
n_search_iter = 2
# Multiplier in ES'
es_beta = 1
# Starting scale value in ES'
es_start = 0.25
# Fraction of candidate search points with (slower) improved estimate'
search_improve_frac = 0
# Search radius expansion factor for successful search'
search_scale_success = np.sqrt(2)
# Search radius expansion factor for incremental search'
search_scale_incremental = 2
# Search radius contraction factor for failed search'
search_scale_failure = np.sqrt(0.5)
search_factor_min = 0.5
# Search function(s) (list of tuples with function name and sumrule flag)'
search_method = [('ES-wcm',1), ('ES-ell',1)]
# Iteration scale factor between poll and search'
search_grid_number = 10
# Multiplier integer scale factor between poll and search'
search_grid_multiplier = 2
# Relative search scale factor locked to poll scale factor'
search_size_locked = True
# Number of searches per iteration'
search_n_try = np.maximum(D, np.floor(3 + D/2))
# Search-triggered mesh expansion after this number of successful search rounds'
search_mesh_expand = 0
# Mesh size increment after search-triggered mesh expansion'
search_mesh_increment = 1
# Further optimize acquisition function'
search_optimize = False
# Noise parameters
# Treat incumbent as if uncertain regardless of uncertainty handling'
uncertain_incumbent = True
# Contribution to log noise magnitude from log mesh size (0 for noisy functions)'
mesh_noise_multiplier = 0.5
# Gaussian process properties
# Number of training data (minimum 200 under uncertainty)
n_train_max = 50 + 10*D
# Minimum number of training data (doubled under uncertainty)
n_train_min = 50
# Max number of training data removed if too far from current point
buffer_ntrain = 100
# Hyperparameters samples (0 = optimize)
gp_samples = 0
# MCMC sampler for GP hyperparameters
gp_hyp_sampler = "slicesample"
# High Posterior Density region (fraction of training inputs)
hpd_frac = 0.8
# Switch to covariance sampling below this threshold of stability index
cov_sample_thresh = 10
# Multiplier to widths from previous posterior for GP sampling (Inf = do not use previous widths)
gp_sample_widths = 0
# Use weighted hyperparameter posterior covariance
weighted_hyp_cov=True
# Minimum weight for weighted hyperparameter posterior covariance
tol_cov_weight = 0
# Weight of previous trials (per trial) for running avg of GP hyperparameter covariance
hyp_run_weight = 1
# Minimum fcn evals before refitting the GP
min_refit_time = 2*D
# Train GP also during poll stage
poll_training = True
# Always try a second GP fit
double_refit = False
# GP mean function
gp_mean_fun = 'const'
gp_cov_fun = 1
# Percentile of empirical GP mean
gp_mean_percentile = 90
# Empirical range of hyperprior over the mean
gp_mean_range_fun = lambda ym,y: (ym - np.median(y))/5*2
# GP definition fcn'
gp_def_fcn = ('gp_def_bads','rq',[1,1])
# GP training set selection method'
gp_method = 'nearest'
# Cluster additional points during training
gp_cluster = False
# Rotate GP basis
rotate_gp = False
# Radius of training set
gp_radius = 3
use_effective_radius = True
# GP hyper-prior over covariance'
gp_cov_prior = 'iso'
gp_fixed_mean = False
# Fit the likelihood term
fit_lik = True
# Acquisition fcn for poll stage
poll_acq_fcn = ('acq_LCB', None)
# Acquisition fcn for search stage
search_acq_fcn = ('acq_LCB', None)
# Hedge acquisition function
acq_hedge = False
# Attempts at performing the Cholesky decomposition
chol_attempts = 0
# Increase nudge to noise in case of Cholesky failure
noise_nudge = np.array([1, 0])
# Start removing training points after this number of failures
remove_points_after_tries = 1
# SVGD iterations for GP training
gp_svd_iters = 200
# Issue warning if GP hyperparameters fit fails
gp_warnings = False
# Alpha level for normality test of gp predictions
normalpha_level = 1e-6
# Number of target fcn evals per iteration
fun_evals_per_iter = 1
# Thinning for GP hyperparameter sampling
gp_sample_thin = 5
# Force stable GP hyperparameter sampling (reduce samples or start optimizing)
stable_gp_sampling = 200 + 10 * D
# Initial design points for GP hyperparameter training
gp_train_n_init = 128
# Final design points for GP hyperparameter training
gp_train_n_init_final = 8
# Initial design method for GP hyperparameter training
gp_train_init_method = "rand"
# Tolerance for optimization of GP hyperparameters
gp_tol_opt = 1e-5
# Tolerance for optimization of GP hyperparameters preliminary to MCMC
gp_tol_optmcmc = 1e-2
# Max GP hyperparameter samples (decreases with training points)
nsgp_max = 0
# Max GP hyperparameter samples during warmup
nsgp_maxwarmup = 8
# Max GP hyperparameter samples during main algorithm
nsgp_maxmain = np.inf
# Number of GP samples when GP is stable (0 = optimize)
stable_gp_samples = 0
# Tolerance for optimization of GP hyperparameters during active sampling
gp_tol_optactive = 1e-4
# Tolerance for optimization of GP hyperparameters preliminary to MCMC during active sampling
gp_tol_optmcmcactive = 1e-2
# Threshold True GP variance used by regulatized acquisition fcns
tol_gp_var = 1e-4
# Threshold True GP variance used to stabilize sampling
tol_gp_varmcmc = 1e-4
# Perform GP training after each active sample
active_sample_gp_update = False
# Extra variational components sampled from GP profile
sample_extra_vp_means = 0
# Try integrating GP mean function
integrate_gp_mean = False
# Tolerance True ELBO uncertainty for stopping (if variational posterior is stable)
tol_sd = 0.1
# Stopping threshold True change of variational posterior per training point
tol_skl = 0.01 * np.sqrt(D)
# Number of stable fcn evals for stopping warmup
tol_stable_warmup = 15
# MCMC sampler for variational posteriors
variational_sampler = "malasample"
# Use Gaussian approximation for symmetrized KL-divergence b\w iters
kl_gauss = True
# Variational components during warmup
k_warmup = 2
# Force stable GP hyperparameter sampling after reaching this number of components
stable_gp_vpk = np.inf
# GP warping function type
warp_func = 0
# Slice sampler option for prior hyper-parameter sampling method
use_slice_sampler = False
# Adaptive basis (unsupported)
hessian_update = False # Update Hessian as you go
hessian_method = 'bfgs' # Hessian update method
hessian_alternate = False # Alternate Hessian iterations
# Hedge heuristic parameters (currently used during the search stage)
hedge_gamma = 0.125
hedge_beta = 1e-3/self.get('tol_fun')
hedge_decay = 0.1**(1/(2*D))
# Max number of consecutive repeated measurements for noisy inputs
max_repeated_observations = 0
# Multiplicative discount True acquisition fcn to repeat measurement at the same location
repeated_acq_discount = 1
# Base step size for stochastic gradient descent
sgd_step_size = 0.005
# Use ranking criterion to pick best non-converged solution
rank_criterion = True
# Run in diagnostics mode get additional info
diagnostics = False
# Output function
output_fcn = None
# Evaluated function values at X0
f_vals = None
# Samples for fast acquisition fcn eval per new point
ns_search = 2 ** 13
# Set stochastic optimization stepsize via GP hyperparameters
gp_stochastic_step_size = False
# Min number of fcn evals
min_fun_evals = 5 * D
# Min number of iterations
min_iter = D
# Fraction of search points from heavy-tailed variational posterior
heavy_tail_search_frac = 0.25
# Fraction of search points from multivariate normal
mvn_search_frac = 0.25
# Fraction of search points from multivariate normal fitted to HPD points
hpd_search_frac = 0
# Fraction of search points from uniform random box based True training inputs
box_search_frac = 0.25
# Fraction of search points from previous iterations
search_cache_frac = 0
# Empirical Bayes prior over some GP hyperparameters
empirical_gp_prior = False
# Minimum GP observation noise
tol_gp_noise = np.sqrt(1e-5)
# Prior mean over GP input length scale (in plausible units)
gp_length_prior_mean = np.sqrt(D / 6)
# Prior std over GP input length scale (in plausible units)
gp_length_prior_std = 0.5 * np.log(1e3)
# Upper bound True GP input lengths based True plausible box (0 = ignore)
upper_gp_length_factor = 0
# Initial samples (plausible is uniform in the plausible box)
init_design = "plausible"
# Stricter upper bound True GP negative quadratic mean function
gp_quadratic_mean_bound = True
# bandwidth parameter for GP smoothing (in units of plausible box)
bandwidth = 0
# Heuristic output warping (fitness shaping)
fitness_shaping = False
# Output warping starting threshold
out_warp_thresh_base = 10 * D
# Output warping threshold multiplier when failed sub-threshold check
out_warp_thresh_mult = 1.25
# Output warping base threshold tolerance (fraction of current threshold)
out_warp_thresh_tol = 0.8
# Temperature for posterior tempering (allowed values T = 1234)
temperature = 1
# Use separate GP with constant mean for active search
separate_search_gp = False
# Discount observations from extremely low-density regions
noise_shaping = False
# Threshold from max observed value to start discounting
noise_shaping_threshold = 10 * D
# Proportionality factor of added noise wrt distance from threshold
noise_shaping_factor = 0.05
# Past iterations window to judge acquisition fcn improvement
acq_hedge_iter_window = 4
# Portfolio value decay per function evaluation
acqhedge_decay = 0.9
# Active search bound multiplier
active_search_bound = 2
# Tolerance True closeness to bound constraints (fraction of total range)
tol_bound_x = 1e-5
# Recompute LCB max for each iteration based True current GP estimate
recompute_lcb_max = True
# Use double GP
double_gp = False
# Warp every this number of iterations
warp_every_iters = 5
# Increase delay between warpings
incremental_warp_delay = True
# Threshold True reliability index to perform warp
warp_tol_reliability = 3
# Rotate and scale input
warp_proto_scaling = True
# Regularization weight towards diagonal covariance matrix for N training inputs
warp_cov_reg = 0
# Threshold True correlation matrix for roto-scaling
warp_proto_corr_thresh = 0.05