BADS options

Contents

BADS options#

The options can be divided into two types:

Basic options: These options are of interest to most users, and cover all regular usage needs.
Advanced options: These options should rarely if ever be touched and are reserved to advanced users / developers of BADS. Please do not modify them unless you know what you are doing.

You can find the default options for both groups below.

Basic options#

We expect these options to be routinely changed by many users.

[BasicOptions]
# Level of display ("iter", "notify", "final", or "off")
display = "iter"
# Max number of iterations
max_iter = 200*D
# Max number of target fcn evals
max_fun_evals = 500 * D
# Automatic nonlinear rescaling of variables
nonlinear_scaling = True
# Complete polling around the current iterate
complete_poll = False
# Accelerate mesh contraction
accelerate_mesh = True
# Explicit noise handling (if None, determine at runtime)
uncertainty_handling = None
# Base observation noise magnitude (SD), e.g noise_size = 1.0
noise_size = None
# Target function returns noise estimate (SD) as second output
specify_target_noise = False
# Samples to estimate FVAL at the end (for noisy objectives)
noise_final_samples = 10
# Random seed used by the optimizer. If None, no seed is set.
random_seed = None

Advanced Options#

These options are reserved to advanced users / developers of BADS. We strongly advise against changing these options, unless you know what you are doing, since unexpected/untested behavior might follow.

[AdvancedOptions]
# Show optimization plots ("profile", "scatter", or "False")
plot = False

# Tolerance and termination conditions
# Tolerance on mesh size
tol_mesh = 1e-6
# Min significant change of objective fcn
tol_fun = 1e-3
# Max iterations with no significant change (doubled under uncertainty)
tol_stall_iters = int(4 + np.floor(D/2))
# Min variabitility for a fcn to be considered noisy
tol_noise = np.spacing(1.0) * self.get("tol_fun")

#Initialization
# Initialization function
init_fun = "init_sobol"
# Number of restarts attempts
restarts = 0
# Size of cache for storing fcn evaluations
cache_size = 500
# Number of initial objective fcn evaluations
fun_eval_start = D
# Pregress fcn evaluation with Y and X fields
fun_values = {}
# Array with indices of periodic variables, like periodic_vars = [1, 2]
periodic_vars = None

# Poll Options
# Poll function
poll_method                              = 'poll_mads_2n'        

n_basis                                  = 200 * D
# Mesh multiplicative factor between iterations
poll_mesh_multiplier                      = 2.0                   
# Force poll vectors to be on mesh
force_poll_mesh                           = False                 
# Maximum poll integer
max_poll_grid_number                       = 0                     
# Use alternative incumbent offset'
alternative_incumbent                    = False                 
# Adaptive multiplier to incumbent uncertainty'
adaptive_incumbent_shift                  = False                 
# GP-based geometric scaling factor of poll vectors'
gp_rescale_poll                           = 1.0                   
# Threshold probability of improvement (PoI); set to 0 to always complete polling'
tol_poi                                  = 1e-6/D                
# Skip polling if PoI below threshold, even with no success'
skip_poll                                = True                  
# Allow consecutive incomplete polls'
consecutive_skipping                     = True                  
# Skip polling after successful search'
skip_poll_after_search                     = True                  
# Number of failed fcn evaluations before skipping is allowed'
min_failed_poll_steps                      = np.inf                
# Accelerate mesh after this number of stalled iterations'
accelerate_mesh_steps                     = 3                  
# Move incumbent even after insufficient improvement
sloppy_improvement                       = True                  
# Move incumbent even for the uncertain unsuccess when Sto-BADS is configured
opp_stobads                             = True                  
# Power value of the Sto-BADS incumbent decision rule:  \gamma *  \epsilon * frame_size**(power_value)
stobads_frame_size_scaling_power        = 2                     
# Threshold # mesh overflows for warning';
mesh_overflow_warning                    = 2 + D/2               
# Initial mesh size (power value)
init_mesh_size_integer                  = 0                     

# StoBADS option, if True switch to stochastic optimization and uncertain incumbent
stobads = False

# Improvement parameters
# Minimum significant improvement at unit mesh size'
tol_improvement          = 1                    
# Exponent of forcing function'
forcing_exponent         = 3/2                  
# Multiplier to incumbent uncertainty for acquisition functions'
incumbent_sigma_multiplier = 0.1                 
# Quantile when computing improvement (<0.5 for conservative improvement)'
improvement_quantile     = 0.5                  
# Top quantile when choosing final iteration'
final_quantile           = 1e-3                 

# Search properties
# Number of candidate search points'
n_search                 = 2**12                
# Number of optimization iterations for search'
n_search_iter             = 2                    
# Multiplier in ES'
es_beta                  = 1                    
# Starting scale value in ES'
es_start                 = 0.25                 
# Fraction of candidate search points with (slower) improved estimate'
search_improve_frac       = 0                    
# Search radius expansion factor for successful search'
search_scale_success      = np.sqrt(2)           
# Search radius expansion factor for incremental search'
search_scale_incremental  = 2                    
# Search radius contraction factor for failed search'
search_scale_failure      = np.sqrt(0.5)         
search_factor_min         = 0.5
# Search function(s) (list of tuples with function name and sumrule flag)'
search_method            = [('ES-wcm',1), ('ES-ell',1)]  
# Iteration scale factor between poll and search'
search_grid_number        = 10                   
# Multiplier integer scale factor between poll and search'
search_grid_multiplier    = 2                    
# Relative search scale factor locked to poll scale factor'
search_size_locked       = True                  
# Number of searches per iteration'
search_n_try              = np.maximum(D, np.floor(3 + D/2)) 
# Search-triggered mesh expansion after this number of successful search rounds'
search_mesh_expand        = 0                    
# Mesh size increment after search-triggered mesh expansion'
search_mesh_increment     = 1                    
# Further optimize acquisition function'
search_optimize          = False                

# Noise parameters
# Treat incumbent as if uncertain regardless of uncertainty handling'
uncertain_incumbent      = True                  
# Contribution to log noise magnitude from log mesh size (0 for noisy functions)'
mesh_noise_multiplier      = 0.5                  

# Gaussian process properties
# Number of training data (minimum 200 under uncertainty)
n_train_max                = 50 + 10*D           
# Minimum number of training data (doubled under uncertainty)
n_train_min                = 50                  
# Max number of training data removed if too far from current point
buffer_ntrain             = 100                 
# Hyperparameters samples (0 = optimize)
gp_samples                 = 0                     
# MCMC sampler for GP hyperparameters
gp_hyp_sampler = "slicesample"                    
# High Posterior Density region (fraction of training inputs)
hpd_frac = 0.8                                  
# Switch to covariance sampling below this threshold of stability index
cov_sample_thresh = 10                            
# Multiplier to widths from previous posterior for GP sampling (Inf = do not use previous widths)
gp_sample_widths = 0                              
# Use weighted hyperparameter posterior covariance
weighted_hyp_cov=True                             
# Minimum weight for weighted hyperparameter posterior covariance
tol_cov_weight = 0                                
# Weight of previous trials (per trial) for running avg of GP hyperparameter covariance
hyp_run_weight = 1                                
# Minimum fcn evals before refitting the GP
min_refit_time            = 2*D                   
# Train GP also during poll stage
poll_training            = True                  
# Always try a second GP fit
double_refit             = False  
# GP mean function               
gp_mean_fun              = 'const'
gp_cov_fun               = 1
# Percentile of empirical GP mean
gp_mean_percentile        = 90                    
# Empirical range of hyperprior over the mean
gp_mean_range_fun          = lambda ym,y: (ym - np.median(y))/5*2   
# GP definition fcn'
gp_def_fcn                = ('gp_def_bads','rq',[1,1])  
# GP training set selection method'
gp_method                = 'nearest'             
# Cluster additional points during training
gp_cluster               = False                 
# Rotate GP basis
rotate_gp                = False                 
# Radius of training set
gp_radius                = 3                     
use_effective_radius      = True                
# GP hyper-prior over covariance'
gp_cov_prior              = 'iso'                  
gp_fixed_mean             = False
# Fit the likelihood term
fit_lik                  = True                  
# Acquisition fcn for poll stage
poll_acq_fcn              = ('acq_LCB', None)         
# Acquisition fcn for search stage
search_acq_fcn            = ('acq_LCB', None)         
# Hedge acquisition function
acq_hedge                = False                  
# Attempts at performing the Cholesky decomposition
chol_attempts            = 0                    
# Increase nudge to noise in case of Cholesky failure
noise_nudge             = np.array([1, 0])                
# Start removing training points after this number of failures
remove_points_after_tries  = 1                    
# SVGD iterations for GP training
gp_svd_iters              = 200                  
# Issue warning if GP hyperparameters fit fails
gp_warnings             = False          
# Alpha level for normality test of gp predictions
normalpha_level          = 1e-6                 
# Number of target fcn evals per iteration
fun_evals_per_iter = 1                      
# Thinning for GP hyperparameter sampling
gp_sample_thin = 5                            
# Force stable GP hyperparameter sampling (reduce samples or start optimizing)
stable_gp_sampling = 200 + 10 * D             
# Initial design points for GP hyperparameter training
gp_train_n_init = 128                         
# Final design points for GP hyperparameter training
gp_train_n_init_final = 8                      
# Initial design method for GP hyperparameter training
gp_train_init_method = "rand"                  
# Tolerance for optimization of GP hyperparameters
gp_tol_opt = 1e-5                             
# Tolerance for optimization of GP hyperparameters preliminary to MCMC
gp_tol_optmcmc = 1e-2                         
# Max GP hyperparameter samples (decreases with training points)
nsgp_max = 0                                 
# Max GP hyperparameter samples during warmup
nsgp_maxwarmup = 8                           
# Max GP hyperparameter samples during main algorithm
nsgp_maxmain = np.inf                        
# Number of GP samples when GP is stable (0 = optimize)
stable_gp_samples = 0                         
# Tolerance for optimization of GP hyperparameters during active sampling
gp_tol_optactive = 1e-4                       
# Tolerance for optimization of GP hyperparameters preliminary to MCMC during active sampling
gp_tol_optmcmcactive = 1e-2                   
# Threshold True GP variance used by regulatized acquisition fcns
tol_gp_var = 1e-4                             
# Threshold True GP variance used to stabilize sampling
tol_gp_varmcmc = 1e-4                        
# Perform GP training after each active sample
active_sample_gp_update = False                
# Extra variational components sampled from GP profile
sample_extra_vp_means = 0                      
# Try integrating GP mean function
integrate_gp_mean = False                     
# Tolerance True ELBO uncertainty for stopping (if variational posterior is stable)
tol_sd = 0.1                                 
# Stopping threshold True change of variational posterior per training point
tol_skl = 0.01 * np.sqrt(D)                  
# Number of stable fcn evals for stopping warmup
tol_stable_warmup = 15                        
# MCMC sampler for variational posteriors
variational_sampler = "malasample"           
# Use Gaussian approximation for symmetrized KL-divergence b\w iters
kl_gauss = True                              
# Variational components during warmup
k_warmup = 2                                 
# Force stable GP hyperparameter sampling after reaching this number of components
stable_gp_vpk = np.inf                        
# GP warping function type
warp_func = 0                                
# Slice sampler option for prior hyper-parameter sampling method
use_slice_sampler = False                   

# Adaptive basis (unsupported)
hessian_update           = False                 # Update Hessian as you go
hessian_method           = 'bfgs'               # Hessian update method
hessian_alternate        = False                 # Alternate Hessian iterations

# Hedge heuristic parameters (currently used during the search stage)
hedge_gamma              = 0.125
hedge_beta               = 1e-3/self.get('tol_fun')
hedge_decay              = 0.1**(1/(2*D))

# Max number of consecutive repeated measurements for noisy inputs
max_repeated_observations = 0
# Multiplicative discount True acquisition fcn to repeat measurement at the same location
repeated_acq_discount = 1
# Base step size for stochastic gradient descent
sgd_step_size = 0.005
# Use ranking criterion to pick best non-converged solution
rank_criterion = True
# Run in diagnostics mode get additional info
diagnostics = False
# Output function
output_fcn = None

# Evaluated function values at X0
f_vals = None
# Samples for fast acquisition fcn eval per new point
ns_search = 2 ** 13
# Set stochastic optimization stepsize via GP hyperparameters
gp_stochastic_step_size = False
# Min number of fcn evals
min_fun_evals = 5 * D
# Min number of iterations
min_iter = D
# Fraction of search points from heavy-tailed variational posterior
heavy_tail_search_frac = 0.25
# Fraction of search points from multivariate normal
mvn_search_frac = 0.25
# Fraction of search points from multivariate normal fitted to HPD points
hpd_search_frac = 0
# Fraction of search points from uniform random box based True training inputs
box_search_frac = 0.25
# Fraction of search points from previous iterations
search_cache_frac = 0
# Empirical Bayes prior over some GP hyperparameters
empirical_gp_prior = False
# Minimum GP observation noise
tol_gp_noise = np.sqrt(1e-5)
# Prior mean over GP input length scale (in plausible units)
gp_length_prior_mean = np.sqrt(D / 6)
# Prior std over GP input length scale (in plausible units)
gp_length_prior_std = 0.5 * np.log(1e3)
# Upper bound True GP input lengths based True plausible box (0 = ignore)
upper_gp_length_factor = 0
# Initial samples (plausible is uniform in the plausible box)
init_design = "plausible"
# Stricter upper bound True GP negative quadratic mean function
gp_quadratic_mean_bound = True
# bandwidth parameter for GP smoothing (in units of plausible box)
bandwidth = 0
# Heuristic output warping (fitness shaping)
fitness_shaping = False
# Output warping starting threshold
out_warp_thresh_base = 10 * D
# Output warping threshold multiplier when failed sub-threshold check
out_warp_thresh_mult = 1.25
# Output warping base threshold tolerance (fraction of current threshold)
out_warp_thresh_tol = 0.8
# Temperature for posterior tempering (allowed values T = 1234)
temperature = 1
# Use separate GP with constant mean for active search
separate_search_gp = False
# Discount observations from extremely low-density regions
noise_shaping = False
# Threshold from max observed value to start discounting
noise_shaping_threshold = 10 * D
# Proportionality factor of added noise wrt distance from threshold
noise_shaping_factor = 0.05
# Past iterations window to judge acquisition fcn improvement
acq_hedge_iter_window = 4
# Portfolio value decay per function evaluation
acqhedge_decay = 0.9

# Active search bound multiplier
active_search_bound = 2
# Tolerance True closeness to bound constraints (fraction of total range)
tol_bound_x = 1e-5
# Recompute LCB max for each iteration based True current GP estimate
recompute_lcb_max = True
# Use double GP
double_gp = False
# Warp every this number of iterations
warp_every_iters = 5
# Increase delay between warpings
incremental_warp_delay = True
# Threshold True reliability index to perform warp
warp_tol_reliability = 3
# Rotate and scale input
warp_proto_scaling = True
# Regularization weight towards diagonal covariance matrix for N training inputs
warp_cov_reg = 0
# Threshold True correlation matrix for roto-scaling
warp_proto_corr_thresh = 0.05