Source code for smt.utils.design_space

"""
Author: Jasper Bussemaker <jasper.bussemaker@dlr.de>

This package is distributed under New BSD license.
"""

from typing import List, Optional, Sequence, Tuple, Union

import numpy as np

from smt.sampling_methods import LHS

try:
    from ConfigSpace import (
        CategoricalHyperparameter,
        Configuration,
        ConfigurationSpace,
        EqualsCondition,
        ForbiddenAndConjunction,
        ForbiddenEqualsClause,
        ForbiddenInClause,
        InCondition,
        OrdinalHyperparameter,
        UniformFloatHyperparameter,
        UniformIntegerHyperparameter,
        ForbiddenLessThanRelation,
    )
    from ConfigSpace.exceptions import ForbiddenValueError
    from ConfigSpace.util import get_random_neighbor

    HAS_CONFIG_SPACE = True

except ImportError:
    HAS_CONFIG_SPACE = False

    class Configuration:
        pass

    class ConfigurationSpace:
        pass

    class UniformIntegerHyperparameter:
        pass


def ensure_design_space(xt=None, xlimits=None, design_space=None) -> "BaseDesignSpace":
    """Interface to turn legacy input formats into a DesignSpace"""

    if design_space is not None and isinstance(design_space, BaseDesignSpace):
        return design_space

    if xlimits is not None:
        return DesignSpace(xlimits)

    if xt is not None:
        return DesignSpace([[np.min(xt) - 0.99, np.max(xt) + 1e-4]] * xt.shape[1])

    raise ValueError("Nothing defined that could be interpreted as a design space!")


class DesignVariable:
    """Base class for defining a design variable"""

    upper: Union[float, int]
    lower: Union[float, int]

    def get_typename(self):
        return self.__class__.__name__

    def get_limits(self) -> Union[list, tuple]:
        raise NotImplementedError

    def __str__(self):
        raise NotImplementedError

    def __repr__(self):
        raise NotImplementedError


[docs] class FloatVariable(DesignVariable): """A continuous design variable, varying between its lower and upper bounds""" def __init__(self, lower: float, upper: float): if upper <= lower: raise ValueError( f"Upper bound should be higher than lower bound: {upper} <= {lower}" ) self.lower = lower self.upper = upper def get_limits(self) -> Tuple[float, float]: return self.lower, self.upper def __str__(self): return f"Float ({self.lower}, {self.upper})" def __repr__(self): return f"{self.get_typename()}({self.lower}, {self.upper})"
[docs] class IntegerVariable(DesignVariable): """An integer variable that can take any integer value between the bounds (inclusive)""" def __init__(self, lower: int, upper: int): if upper <= lower: raise ValueError( f"Upper bound should be higher than lower bound: {upper} <= {lower}" ) self.lower = lower self.upper = upper def get_limits(self) -> Tuple[int, int]: return self.lower, self.upper def __str__(self): return f"Int ({self.lower}, {self.upper})" def __repr__(self): return f"{self.get_typename()}({self.lower}, {self.upper})"
[docs] class OrdinalVariable(DesignVariable): """An ordinal variable that can take any of the given value, and where order between the values matters""" def __init__(self, values: List[Union[str, int, float]]): if len(values) < 2: raise ValueError(f"There should at least be 2 values: {values}") self.values = values @property def lower(self) -> int: return 0 @property def upper(self) -> int: return len(self.values) - 1 def get_limits(self) -> List[str]: # We convert to integer strings for compatibility reasons return [str(i) for i in range(len(self.values))] def __str__(self): return f"Ord {self.values}" def __repr__(self): return f"{self.get_typename()}({self.values})"
[docs] class CategoricalVariable(DesignVariable): """A categorical variable that can take any of the given values, and where order does not matter""" def __init__(self, values: List[Union[str, int, float]]): if len(values) < 2: raise ValueError(f"There should at least be 2 values: {values}") self.values = values @property def lower(self) -> int: return 0 @property def upper(self) -> int: return len(self.values) - 1 @property def n_values(self): return len(self.values) def get_limits(self) -> List[Union[str, int, float]]: # We convert to strings for compatibility reasons return [str(value) for value in self.values] def __str__(self): return f"Cat {self.values}" def __repr__(self): return f"{self.get_typename()}({self.values})"
[docs] class BaseDesignSpace: """ Interface for specifying (hierarchical) design spaces. This class itself only specifies the functionality that any design space definition should implement: - a way to specify the design variables, their types, and their bounds or options - a way to correct a set of design vectors such that they satisfy all design space hierarchy constraints - a way to query which design variables are acting for a set of design vectors - a way to impute a set of design vectors such that non-acting design variables are assigned some default value - a way to sample n valid design vectors from the design space If you want to actually define a design space, use the `DesignSpace` class! Note that the correction, querying, and imputation mechanisms should all be implemented in one function (`correct_get_acting`), as usually these operations are tightly related. """ def __init__(self, design_variables: List[DesignVariable] = None): self._design_variables = design_variables self._is_cat_mask = None self._is_conditionally_acting_mask = None self.seed = None self.has_valcons_ord_int = False @property def design_variables(self) -> List[DesignVariable]: if self._design_variables is None: self._design_variables = dvs = self._get_design_variables() if dvs is None: raise RuntimeError( "Design space should either specify the design variables upon initialization " "or as output from _get_design_variables!" ) return self._design_variables @property def is_cat_mask(self) -> np.ndarray: """Boolean mask specifying for each design variable whether it is a categorical variable""" if self._is_cat_mask is None: self._is_cat_mask = np.array( [isinstance(dv, CategoricalVariable) for dv in self.design_variables] ) return self._is_cat_mask @property def is_all_cont(self) -> bool: """Whether or not the space is continuous""" is_continuous = all( isinstance(dv, FloatVariable) for dv in self.design_variables ) return is_continuous @property def is_conditionally_acting(self) -> np.ndarray: """Boolean mask specifying for each design variable whether it is conditionally acting (can be non-acting)""" if self._is_conditionally_acting_mask is None: self._is_conditionally_acting_mask = self._is_conditionally_acting() return self._is_conditionally_acting_mask @property def n_dv(self) -> int: """Get the number of design variables""" return len(self.design_variables) def correct_get_acting(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Correct the given matrix of design vectors and return the corrected vectors and the is_acting matrix. It is automatically detected whether input is provided in unfolded space or not. Parameters ---------- x: np.ndarray [n_obs, dim] - Input variables Returns ------- x_corrected: np.ndarray [n_obs, dim] - Corrected and imputed input variables is_acting: np.ndarray [n_obs, dim] - Boolean matrix specifying for each variable whether it is acting or non-acting """ # Detect whether input is provided in unfolded space x = np.atleast_2d(x) if x.shape[1] == self.n_dv: x_is_unfolded = False elif x.shape[1] == self._get_n_dim_unfolded(): x_is_unfolded = True else: raise ValueError(f"Incorrect shape, expecting {self.n_dv} columns!") # If needed, fold before correcting if x_is_unfolded: x, _ = self.fold_x(x) indi = 0 for i in self.design_variables: if not (isinstance(i, FloatVariable)): x[:, indi] = np.int64(np.round(x[:, indi], 0)) indi += 1 # Correct and get the is_acting matrix x_corrected, is_acting = self._correct_get_acting(x) # Check conditionally-acting status if np.any(~is_acting[:, ~self.is_conditionally_acting]): raise RuntimeError("Unconditionally acting variables cannot be non-acting!") # Unfold if needed if x_is_unfolded: x_corrected, is_acting = self.unfold_x(x_corrected, is_acting) return x_corrected, is_acting def decode_values( self, x: np.ndarray, i_dv: int = None ) -> List[Union[str, int, float, list]]: """ Return decoded values: converts ordinal and categorical back to their original values. If i_dv is given, decoding is done for one specific design variable only. If i_dv=None, decoding will be done for all design variables: 1d input is interpreted as a design vector, 2d input is interpreted as a set of design vectors. """ def _decode_dv(x_encoded: np.ndarray, i_dv_decode): dv = self.design_variables[i_dv_decode] if isinstance(dv, (OrdinalVariable, CategoricalVariable)): values = dv.values decoded_values = [values[int(x_ij)] for x_ij in x_encoded] return decoded_values # No need to decode integer or float variables return list(x_encoded) # Decode one design variable if i_dv is not None: if len(x.shape) == 2: x_i = x[:, i_dv] elif len(x.shape) == 1: x_i = x else: raise ValueError("Expected either 1 or 2-dimensional matrix!") # No need to decode for integer or float variable return _decode_dv(x_i, i_dv_decode=i_dv) # Decode design vectors n_dv = self.n_dv is_1d = len(x.shape) == 1 x_mat = np.atleast_2d(x) if x_mat.shape[1] != n_dv: raise ValueError( f"Incorrect number of inputs, expected {n_dv} design variables, received {x_mat.shape[1]}" ) decoded_des_vars = [_decode_dv(x_mat[:, i], i_dv_decode=i) for i in range(n_dv)] decoded_des_vectors = [ [decoded_des_vars[i][ix] for i in range(n_dv)] for ix in range(x_mat.shape[0]) ] return decoded_des_vectors[0] if is_1d else decoded_des_vectors def sample_valid_x( self, n: int, unfolded=False, random_state=None ) -> Tuple[np.ndarray, np.ndarray]: """ Sample n design vectors and additionally return the is_acting matrix. Parameters ---------- n: int - Number of samples to generate unfolded: bool - Whether to return the samples in unfolded space (each categorical level gets its own dimension) Returns ------- x: np.ndarray [n, dim] - Valid design vectors is_acting: np.ndarray [n, dim] - Boolean matrix specifying for each variable whether it is acting or non-acting """ # Sample from the design space x, is_acting = self._sample_valid_x(n, random_state=random_state) # Check conditionally-acting status if np.any(~is_acting[:, ~self.is_conditionally_acting]): raise RuntimeError("Unconditionally acting variables cannot be non-acting!") # Unfold if needed if unfolded: x, is_acting = self.unfold_x(x, is_acting) return x, is_acting def get_x_limits(self) -> list: """Returns the variable limit definitions in SMT < 2.0 style""" return [dv.get_limits() for dv in self.design_variables] def get_num_bounds(self): """ Get bounds for the design space. Returns ------- np.ndarray [nx, 2] - Bounds of each dimension """ return np.array([(dv.lower, dv.upper) for dv in self.design_variables]) def get_unfolded_num_bounds(self): """ Get bounds for the unfolded continuous space. Returns ------- np.ndarray [nx cont, 2] - Bounds of each dimension where limits for categorical variables are expanded to [0, 1] """ unfolded_x_limits = [] for dv in self.design_variables: if isinstance(dv, CategoricalVariable): unfolded_x_limits += [[0, 1]] * dv.n_values elif isinstance(dv, OrdinalVariable): # Note that this interpretation is slightly different from the original mixed_integer implementation in # smt: we simply map ordinal values to integers, instead of converting them to integer literals # This ensures that each ordinal value gets sampled evenly, also if the values themselves represent # unevenly spaced (e.g. log-spaced) values unfolded_x_limits.append([dv.lower, dv.upper]) else: unfolded_x_limits.append(dv.get_limits()) return np.array(unfolded_x_limits).astype(float) def fold_x( self, x: np.ndarray, is_acting: np.ndarray = None, fold_mask: np.ndarray = None, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Fold x and optionally is_acting. Folding reverses the one-hot encoding of categorical variables applied by unfolding. Parameters ---------- x: np.ndarray [n, dim_unfolded] - Unfolded samples is_acting: np.ndarray [n, dim_unfolded] - Boolean matrix specifying for each unfolded variable whether it is acting or non-acting fold_mask: np.ndarray [dim_folded] - Mask specifying which design variables to apply folding for Returns ------- x_folded: np.ndarray [n, dim] - Folded samples is_acting_folded: np.ndarray [n, dim] - (Optional) boolean matrix specifying for each folded variable whether it is acting or non-acting """ # Get number of unfolded dimension x = np.atleast_2d(x) x_folded = np.zeros((x.shape[0], len(self.design_variables))) is_acting_folded = ( np.ones(x_folded.shape, dtype=bool) if is_acting is not None else None ) i_x_unfold = 0 for i, dv in enumerate(self.design_variables): if (isinstance(dv, CategoricalVariable)) and ( fold_mask is None or fold_mask[i] ): n_dim_cat = dv.n_values # Categorical values are folded by reversed one-hot encoding: # [[1, 0, 0], [0, 1, 0], [0, 0, 1]] --> [0, 1, 2].T x_cat_unfolded = x[:, i_x_unfold : i_x_unfold + n_dim_cat] value_index = np.argmax(x_cat_unfolded, axis=1) x_folded[:, i] = value_index # The is_acting matrix is repeated column-wise, so we can just take the first column if is_acting is not None: is_acting_folded[:, i] = is_acting[:, i_x_unfold] i_x_unfold += n_dim_cat else: x_folded[:, i] = x[:, i_x_unfold] if is_acting is not None: is_acting_folded[:, i] = is_acting[:, i_x_unfold] i_x_unfold += 1 return x_folded, is_acting_folded def unfold_x( self, x: np.ndarray, is_acting: np.ndarray = None, fold_mask: np.ndarray = None ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Unfold x and optionally is_acting. Unfolding creates one extra dimension for each categorical variable using one-hot encoding. Parameters ---------- x: np.ndarray [n, dim] - Folded samples is_acting: np.ndarray [n, dim] - Boolean matrix specifying for each variable whether it is acting or non-acting fold_mask: np.ndarray [dim_folded] - Mask specifying which design variables to apply folding for Returns ------- x_unfolded: np.ndarray [n, dim_unfolded] - Unfolded samples is_acting_unfolded: np.ndarray [n, dim_unfolded] - (Optional) boolean matrix specifying for each unfolded variable whether it is acting or non-acting """ # Get number of unfolded dimension n_dim_unfolded = self._get_n_dim_unfolded() x = np.atleast_2d(x) x_unfolded = np.zeros((x.shape[0], n_dim_unfolded)) is_acting_unfolded = ( np.ones(x_unfolded.shape, dtype=bool) if is_acting is not None else None ) i_x_unfold = 0 for i, dv in enumerate(self.design_variables): if isinstance(dv, CategoricalVariable) and ( fold_mask is None or fold_mask[i] ): n_dim_cat = dv.n_values x_cat = x_unfolded[:, i_x_unfold : i_x_unfold + n_dim_cat] # Categorical values are unfolded by one-hot encoding: # [0, 1, 2].T --> [[1, 0, 0], [0, 1, 0], [0, 0, 1]] x_i_int = x[:, i].astype(int) for i_level in range(n_dim_cat): has_value_mask = x_i_int == i_level x_cat[has_value_mask, i_level] = 1 # The is_acting matrix is simply repeated column-wise if is_acting is not None: is_acting_unfolded[:, i_x_unfold : i_x_unfold + n_dim_cat] = ( np.tile(is_acting[:, [i]], (1, n_dim_cat)) ) i_x_unfold += n_dim_cat else: x_unfolded[:, i_x_unfold] = x[:, i] if is_acting is not None: is_acting_unfolded[:, i_x_unfold] = is_acting[:, i] i_x_unfold += 1 x_unfolded = x_unfolded[:, :i_x_unfold] if is_acting is not None: is_acting_unfolded = is_acting_unfolded[:, :i_x_unfold] return x_unfolded, is_acting_unfolded def _get_n_dim_unfolded(self) -> int: return sum( [ dv.n_values if isinstance(dv, CategoricalVariable) else 1 for dv in self.design_variables ] ) @staticmethod def _round_equally_distributed(x_cont, lower: int, upper: int): """ To ensure equal distribution of continuous values to discrete values, we first stretch-out the continuous values to extend to 0.5 beyond the integer limits and then round. This ensures that the values at the limits get a large-enough share of the continuous values. """ x_cont[x_cont < lower] = lower x_cont[x_cont > upper] = upper diff = upper - lower x_stretched = (x_cont - lower) * ((diff + 0.9999) / (diff + 1e-16)) - 0.5 return np.round(x_stretched) + lower """IMPLEMENT FUNCTIONS BELOW"""
[docs] def _get_design_variables(self) -> List[DesignVariable]: """Return the design variables defined in this design space if not provided upon initialization of the class"""
[docs] def _is_conditionally_acting(self) -> np.ndarray: """ Return for each design variable whether it is conditionally acting or not. A design variable is conditionally acting if it MAY be non-acting. Returns ------- is_conditionally_acting: np.ndarray [dim] - Boolean vector specifying for each design variable whether it is conditionally acting """ raise NotImplementedError
[docs] def _correct_get_acting(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Correct the given matrix of design vectors and return the corrected vectors and the is_acting matrix. Parameters ---------- x: np.ndarray [n_obs, dim] - Input variables Returns ------- x_corrected: np.ndarray [n_obs, dim] - Corrected and imputed input variables is_acting: np.ndarray [n_obs, dim] - Boolean matrix specifying for each variable whether it is acting or non-acting """ raise NotImplementedError
[docs] def _sample_valid_x( self, n: int, random_state=None, ) -> Tuple[np.ndarray, np.ndarray]: """ Sample n design vectors and additionally return the is_acting matrix. Returns ------- x: np.ndarray [n, dim] - Valid design vectors is_acting: np.ndarray [n, dim] - Boolean matrix specifying for each variable whether it is acting or non-acting """ raise NotImplementedError
def __str__(self): raise NotImplementedError def __repr__(self): raise NotImplementedError
VarValueType = Union[int, str, List[Union[int, str]]] def raise_config_space(): raise RuntimeError("Dependencies are not installed, run: pip install smt[cs]")
[docs] class DesignSpace(BaseDesignSpace): """ Class for defining a (hierarchical) design space by defining design variables, defining decreed variables (optional), and adding value constraints (optional). Numerical bounds can be requested using `get_num_bounds()`. If needed, it is possible to get the legacy SMT < 2.0 `xlimits` format using `get_x_limits()`. Parameters ---------- design_variables: list[DesignVariable] - The list of design variables: FloatVariable, IntegerVariable, OrdinalVariable, or CategoricalVariable Examples -------- Instantiate the design space with all its design variables: >>> from smt.utils.design_space import * >>> ds = DesignSpace([ >>> CategoricalVariable(['A', 'B']), # x0 categorical: A or B; order is not relevant >>> OrdinalVariable(['C', 'D', 'E']), # x1 ordinal: C, D or E; order is relevant >>> IntegerVariable(0, 2), # x2 integer between 0 and 2 (inclusive): 0, 1, 2 >>> FloatVariable(0, 1), # c3 continuous between 0 and 1 >>> ]) >>> assert len(ds.design_variables) == 4 You can define decreed variables (conditional activation): >>> ds.declare_decreed_var(decreed_var=1, meta_var=0, meta_value='A') # Activate x1 if x0 == A Decreed variables can be chained (however no cycles and no "diamonds" are supported): Note: only if ConfigSpace is installed! pip install smt[cs] >>> ds.declare_decreed_var(decreed_var=2, meta_var=1, meta_value=['C', 'D']) # Activate x2 if x1 == C or D If combinations of values between two variables are not allowed, this can be done using a value constraint: Note: only if ConfigSpace is installed! pip install smt[cs] >>> ds.add_value_constraint(var1=0, value1='A', var2=2, value2=[0, 1]) # Forbid x0 == A && x2 == 0 or 1 After defining everything correctly, you can then use the design space object to correct design vectors and get information about which design variables are acting: >>> x_corr, is_acting = ds.correct_get_acting(np.array([ >>> [0, 0, 2, .25], >>> [0, 2, 1, .75], >>> ])) >>> assert np.all(x_corr == np.array([ >>> [0, 0, 2, .25], >>> [0, 2, 0, .75], >>> ])) >>> assert np.all(is_acting == np.array([ >>> [True, True, True, True], >>> [True, True, False, True], # x2 is not acting if x1 != C or D (0 or 1) >>> ])) It is also possible to randomly sample design vectors conforming to the constraints: >>> x_sampled, is_acting_sampled = ds.sample_valid_x(100) You can also instantiate a purely-continuous design space from bounds directly: >>> continuous_design_space = DesignSpace([(0, 1), (0, 2), (.5, 5.5)]) >>> assert continuous_design_space.n_dv == 3 If needed, it is possible to get the legacy design space definition format: >>> xlimits = ds.get_x_limits() >>> cont_bounds = ds.get_num_bounds() >>> unfolded_cont_bounds = ds.get_unfolded_num_bounds() """ def __init__( self, design_variables: Union[List[DesignVariable], list, np.ndarray], random_state=None, ): self.sampler = None # Assume float variable bounds as inputs def _is_num(val): try: float(val) return True except ValueError: return False if len(design_variables) > 0 and not isinstance( design_variables[0], DesignVariable ): converted_dvs = [] for bounds in design_variables: if len(bounds) != 2 or not _is_num(bounds[0]) or not _is_num(bounds[1]): raise RuntimeError( f"Expecting either a list of DesignVariable objects or float variable " f"bounds! Unrecognized: {bounds!r}" ) converted_dvs.append(FloatVariable(bounds[0], bounds[1])) design_variables = converted_dvs self.random_state = random_state # For testing self._cs = None self._cs_cate = None if HAS_CONFIG_SPACE: cs_vars = {} cs_vars_cate = {} self.isinteger = False for i, dv in enumerate(design_variables): name = f"x{i}" if isinstance(dv, FloatVariable): cs_vars[name] = UniformFloatHyperparameter( name, lower=dv.lower, upper=dv.upper ) cs_vars_cate[name] = UniformFloatHyperparameter( name, lower=dv.lower, upper=dv.upper ) elif isinstance(dv, IntegerVariable): cs_vars[name] = FixedIntegerParam( name, lower=dv.lower, upper=dv.upper ) listvalues = [] for i in range(int(dv.upper - dv.lower + 1)): listvalues.append(str(int(i + dv.lower))) cs_vars_cate[name] = CategoricalHyperparameter( name, choices=listvalues ) self.isinteger = True elif isinstance(dv, OrdinalVariable): cs_vars[name] = OrdinalHyperparameter(name, sequence=dv.values) cs_vars_cate[name] = CategoricalHyperparameter( name, choices=dv.values ) elif isinstance(dv, CategoricalVariable): cs_vars[name] = CategoricalHyperparameter(name, choices=dv.values) cs_vars_cate[name] = CategoricalHyperparameter( name, choices=dv.values ) else: raise ValueError(f"Unknown variable type: {dv!r}") seed = self._to_seed(random_state) self._cs = NoDefaultConfigurationSpace(space=cs_vars, seed=seed) ## Fix to make constraints work correctly with either IntegerVariable or OrdinalVariable ## ConfigSpace is malfunctioning self._cs_cate = NoDefaultConfigurationSpace(space=cs_vars_cate, seed=seed) # dict[int, dict[any, list[int]]]: {meta_var_idx: {value: [decreed_var_idx, ...], ...}, ...} self._meta_vars = {} self._is_decreed = np.zeros((len(design_variables),), dtype=bool) super().__init__(design_variables)
[docs] def declare_decreed_var( self, decreed_var: int, meta_var: int, meta_value: VarValueType ): """ Define a conditional (decreed) variable to be active when the meta variable has (one of) the provided values. Parameters ---------- decreed_var: int - Index of the conditional variable (the variable that is conditionally active) meta_var: int - Index of the meta variable (the variable that determines whether the conditional var is active) meta_value: int | str | list[int|str] - The value or list of values that the meta variable can have to activate the decreed var """ # ConfigSpace implementation if self._cs is not None: # Get associated parameters decreed_param = self._get_param(decreed_var) meta_param = self._get_param(meta_var) # Add a condition that checks for equality (if single value given) or in-collection (if sequence given) if isinstance(meta_value, Sequence): condition = InCondition(decreed_param, meta_param, meta_value) else: condition = EqualsCondition(decreed_param, meta_param, meta_value) ## Fix to make constraints work correctly with either IntegerVariable or OrdinalVariable ## ConfigSpace is malfunctioning self._cs.add_condition(condition) decreed_param = self._get_param2(decreed_var) meta_param = self._get_param2(meta_var) # Add a condition that checks for equality (if single value given) or in-collection (if sequence given) if isinstance(meta_value, Sequence): try: condition = InCondition( decreed_param, meta_param, list(np.atleast_1d(np.array(meta_value, dtype=str))), ) except ValueError: condition = InCondition( decreed_param, meta_param, list(np.atleast_1d(np.array(meta_value, dtype=float))), ) else: try: condition = EqualsCondition( decreed_param, meta_param, str(meta_value) ) except ValueError: condition = EqualsCondition(decreed_param, meta_param, meta_value) self._cs_cate.add_condition(condition) # Simplified implementation else: # Variables cannot be both meta and decreed at the same time if self._is_decreed[meta_var]: raise RuntimeError( f"Variable cannot be both meta and decreed ({meta_var})!" ) # Variables can only be decreed by one meta var if self._is_decreed[decreed_var]: raise RuntimeError(f"Variable is already decreed: {decreed_var}") # Define meta-decreed relationship if meta_var not in self._meta_vars: self._meta_vars[meta_var] = {} meta_var_obj = self.design_variables[meta_var] for value in ( meta_value if isinstance(meta_value, Sequence) else [meta_value] ): encoded_value = value if isinstance(meta_var_obj, (OrdinalVariable, CategoricalVariable)): if value in meta_var_obj.values: encoded_value = meta_var_obj.values.index(value) if encoded_value not in self._meta_vars[meta_var]: self._meta_vars[meta_var][encoded_value] = [] self._meta_vars[meta_var][encoded_value].append(decreed_var) # Mark as decreed (conditionally acting) self._is_decreed[decreed_var] = True
[docs] def add_value_constraint( self, var1: int, value1: VarValueType, var2: int, value2: VarValueType ): """ Define a constraint where two variables cannot have the given values at the same time. Parameters ---------- var1: int - Index of the first variable value1: int | str | list[int|str] - Value or values that the first variable is checked against var2: int - Index of the second variable value2: int | str | list[int|str] - Value or values that the second variable is checked against """ if self._cs is None: raise_config_space() # Get parameters param1 = self._get_param(var1) param2 = self._get_param(var2) mixint_types = (UniformIntegerHyperparameter, OrdinalHyperparameter) self.has_valcons_ord_int = isinstance(param1, mixint_types) or isinstance( param2, mixint_types ) if not (isinstance(param1, UniformFloatHyperparameter)) and not ( isinstance(param2, UniformFloatHyperparameter) ): # Add forbidden clauses if isinstance(value1, Sequence): clause1 = ForbiddenInClause(param1, value1) else: clause1 = ForbiddenEqualsClause(param1, value1) if isinstance(value2, Sequence): clause2 = ForbiddenInClause(param2, value2) else: clause2 = ForbiddenEqualsClause(param2, value2) constraint_clause = ForbiddenAndConjunction(clause1, clause2) self._cs.add_forbidden_clause(constraint_clause) else: if value1 in [">", "<"] and value2 in [">", "<"] and value1 != value2: if value1 == "<": constraint_clause = ForbiddenLessThanRelation(param1, param2) self._cs.add_forbidden_clause(constraint_clause) else: constraint_clause = ForbiddenLessThanRelation(param2, param1) self._cs.add_forbidden_clause(constraint_clause) else: raise ValueError("Bad definition of DesignSpace.") ## Fix to make constraints work correctly with either IntegerVariable or OrdinalVariable ## ConfigSpace is malfunctioning # Get parameters param1 = self._get_param2(var1) param2 = self._get_param2(var2) # Add forbidden clauses if not (isinstance(param1, UniformFloatHyperparameter)) and not ( isinstance(param2, UniformFloatHyperparameter) ): if isinstance(value1, Sequence): clause1 = ForbiddenInClause(param1, str(value1)) else: clause1 = ForbiddenEqualsClause(param1, str(value1)) if isinstance(value2, Sequence): try: clause2 = ForbiddenInClause( param2, list(np.atleast_1d(np.array(value2, dtype=str))) ) except ValueError: clause2 = ForbiddenInClause( param2, list(np.atleast_1d(np.array(value2, dtype=float))) ) else: try: clause2 = ForbiddenEqualsClause(param2, str(value2)) except ValueError: clause2 = ForbiddenEqualsClause(param2, value2) constraint_clause = ForbiddenAndConjunction(clause1, clause2) self._cs_cate.add_forbidden_clause(constraint_clause)
def _get_param(self, idx): try: return self._cs.get_hyperparameter(f"x{idx}") except KeyError: raise KeyError(f"Variable not found: {idx}") def _get_param2(self, idx): try: return self._cs_cate.get_hyperparameter(f"x{idx}") except KeyError: raise KeyError(f"Variable not found: {idx}") @property def _cs_var_idx(self): """ ConfigurationSpace applies topological sort when adding conditions, so compared to what we expect the order of parameters might have changed. This property contains the indices of the params in the ConfigurationSpace. """ names = self._cs.get_hyperparameter_names() return np.array( [names.index(f"x{ix}") for ix in range(len(self.design_variables))] ) @property def _inv_cs_var_idx(self): """ See _cs_var_idx. This function returns the opposite mapping: the positions of our design variables for each param. """ return np.array( [int(param[1:]) for param in self._cs.get_hyperparameter_names()] ) def _is_conditionally_acting(self) -> np.ndarray: # Decreed variables are the conditionally acting variables return self._is_decreed def _correct_get_acting(self, x: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """Correct and impute design vectors""" x = x.astype(float) if self._cs is not None: # Normalize value according to what ConfigSpace expects self._normalize_x(x) # Get corrected Configuration objects by mapping our design vectors # to the ordering of the ConfigurationSpace inv_cs_var_idx = self._inv_cs_var_idx configs = [] for xi in x: configs.append(self._get_correct_config(xi[inv_cs_var_idx])) # Convert Configuration objects to design vectors and get the is_active matrix return self._configs_to_x(configs) # Simplified implementation # Correct discrete variables x_corr = x.copy() self._normalize_x(x_corr, cs_normalize=False) # Determine which variables are acting is_acting = np.ones(x_corr.shape, dtype=bool) is_acting[:, self._is_decreed] = False for i, xi in enumerate(x_corr): for i_meta, decrees in self._meta_vars.items(): meta_var_value = xi[i_meta] if meta_var_value in decrees: i_decreed_vars = decrees[meta_var_value] is_acting[i, i_decreed_vars] = True # Impute non-acting variables self._impute_non_acting(x_corr, is_acting) return x_corr, is_acting def _to_seed(self, random_state=None): seed = None if isinstance(random_state, int): seed = random_state elif isinstance(random_state, np.random.RandomState): seed = random_state.get_state()[1][0] return seed def _sample_valid_x( self, n: int, random_state=None ) -> Tuple[np.ndarray, np.ndarray]: """Sample design vectors""" # Simplified implementation: sample design vectors in unfolded space x_limits_unfolded = self.get_unfolded_num_bounds() if self.random_state is None: self.random_state = random_state if self._cs is not None: # Sample Configuration objects if self.seed is None: seed = self._to_seed(random_state) self.seed = seed self._cs.seed(self.seed) if self.seed is not None: self.seed += 1 configs = self._cs.sample_configuration(n) if n == 1: configs = [configs] # Convert Configuration objects to design vectors and get the is_active matrix return self._configs_to_x(configs) else: if self.sampler is None: self.sampler = LHS( xlimits=x_limits_unfolded, random_state=random_state, criterion="ese", ) x = self.sampler(n) # Fold and cast to discrete x, _ = self.fold_x(x) self._normalize_x(x, cs_normalize=False) # Get acting information and impute return self.correct_get_acting(x) def _get_correct_config(self, vector: np.ndarray) -> Configuration: config = Configuration(self._cs, vector=vector) # Unfortunately we cannot directly ask which parameters SHOULD be active # https://github.com/automl/ConfigSpace/issues/253#issuecomment-1513216665 # Therefore, we temporarily fix it with a very dirty workaround: catch the error raised in check_configuration # to find out which parameters should be inactive while True: try: ## Fix to make constraints work correctly with either IntegerVariable or OrdinalVariable ## ConfigSpace is malfunctioning if self.isinteger and self.has_valcons_ord_int: vector2 = np.copy(vector) self._cs_denormalize_x_ordered(np.atleast_2d(vector2)) indvec = 0 for hp in self._cs_cate: if ( (str(self._cs.get_hyperparameter(hp)).split()[2]) == "UniformInteger," and ( str(self._cs_cate.get_hyperparameter(hp)).split()[2][:3] ) == "Cat" and not (np.isnan(vector2[indvec])) ): vector2[indvec] = int(vector2[indvec]) - int( str(self._cs_cate.get_hyperparameter(hp)).split()[4][ 1:-1 ] ) indvec += 1 self._normalize_x_no_integer(np.atleast_2d(vector2)) config2 = Configuration(self._cs_cate, vector=vector2) config2.is_valid_configuration() config.is_valid_configuration() return config except ValueError as e: error_str = str(e) if "Inactive hyperparameter" in error_str: # Deduce which parameter is inactive inactive_param_name = error_str.split("'")[1] param_idx = self._cs.get_idx_by_hyperparameter_name( inactive_param_name ) # Modify the vector and create a new Configuration vector = config.get_array().copy() vector[param_idx] = np.nan config = Configuration(self._cs, vector=vector) # At this point, the parameter active statuses are set correctly, so we only need to correct the # configuration to one that does not violate the forbidden clauses elif isinstance(e, ForbiddenValueError): if self.seed is None: seed = self._to_seed(self.random_state) self.seed = seed if not (self.has_valcons_ord_int): return get_random_neighbor(config, seed=self.seed) else: vector = config.get_array().copy() indvec = 0 vector2 = np.copy(vector) ## Fix to make constraints work correctly with either IntegerVariable or OrdinalVariable ## ConfigSpace is malfunctioning for hp in self._cs_cate: if ( str(self._cs_cate.get_hyperparameter(hp)).split()[2][:3] ) == "Cat" and not (np.isnan(vector2[indvec])): vector2[indvec] = int(vector2[indvec]) indvec += 1 config2 = Configuration(self._cs_cate, vector=vector2) config3 = get_random_neighbor(config2, seed=self.seed) vector3 = config3.get_array().copy() config4 = Configuration(self._cs, vector=vector3) return config4 else: raise def _configs_to_x( self, configs: List["Configuration"] ) -> Tuple[np.ndarray, np.ndarray]: x = np.zeros((len(configs), len(self.design_variables))) is_acting = np.zeros(x.shape, dtype=bool) if len(configs) == 0: return x, is_acting cs_var_idx = self._cs_var_idx for i, config in enumerate(configs): x[i, :] = config.get_array()[cs_var_idx] # De-normalize continuous and integer variables self._cs_denormalize_x(x) # Set is_active flags and impute x is_acting = np.isfinite(x) self._impute_non_acting(x, is_acting) return x, is_acting def _impute_non_acting(self, x: np.ndarray, is_acting: np.ndarray): for i, dv in enumerate(self.design_variables): if isinstance(dv, FloatVariable): # Impute continuous variables to the mid of their bounds x[~is_acting[:, i], i] = 0.5 * (dv.upper - dv.lower) else: # Impute discrete variables to their lower bounds lower = 0 if isinstance(dv, (IntegerVariable, OrdinalVariable)): lower = dv.lower x[~is_acting[:, i], i] = lower def _normalize_x(self, x: np.ndarray, cs_normalize=True): for i, dv in enumerate(self.design_variables): if isinstance(dv, FloatVariable): if cs_normalize: dv.lower = min(np.min(x[:, i]), dv.lower) dv.upper = max(np.max(x[:, i]), dv.upper) x[:, i] = np.clip( (x[:, i] - dv.lower) / (dv.upper - dv.lower + 1e-16), 0, 1 ) elif isinstance(dv, IntegerVariable): x[:, i] = self._round_equally_distributed(x[:, i], dv.lower, dv.upper) if cs_normalize: # After rounding, normalize between 0 and 1, where 0 and 1 represent the stretched bounds x[:, i] = (x[:, i] - dv.lower + 0.49999) / ( dv.upper - dv.lower + 0.9999 ) def _normalize_x_no_integer(self, x: np.ndarray, cs_normalize=True): ordereddesign_variables = [ self.design_variables[i] for i in self._inv_cs_var_idx ] for i, dv in enumerate(ordereddesign_variables): if isinstance(dv, FloatVariable): if cs_normalize: x[:, i] = np.clip( (x[:, i] - dv.lower) / (dv.upper - dv.lower + 1e-16), 0, 1 ) elif isinstance(dv, (OrdinalVariable, CategoricalVariable)): # To ensure equal distribution of continuous values to discrete values, we first stretch-out the # continuous values to extend to 0.5 beyond the integer limits and then round. This ensures that the # values at the limits get a large-enough share of the continuous values x[:, i] = self._round_equally_distributed(x[:, i], dv.lower, dv.upper) def _cs_denormalize_x(self, x: np.ndarray): for i, dv in enumerate(self.design_variables): if isinstance(dv, FloatVariable): x[:, i] = x[:, i] * (dv.upper - dv.lower) + dv.lower elif isinstance(dv, IntegerVariable): # Integer values are normalized similarly to what is done in _round_equally_distributed x[:, i] = np.round( x[:, i] * (dv.upper - dv.lower + 0.9999) + dv.lower - 0.49999 ) def _cs_denormalize_x_ordered(self, x: np.ndarray): ordereddesign_variables = [ self.design_variables[i] for i in self._inv_cs_var_idx ] for i, dv in enumerate(ordereddesign_variables): if isinstance(dv, FloatVariable): x[:, i] = x[:, i] * (dv.upper - dv.lower) + dv.lower elif isinstance(dv, IntegerVariable): # Integer values are normalized similarly to what is done in _round_equally_distributed x[:, i] = np.round( x[:, i] * (dv.upper - dv.lower + 0.9999) + dv.lower - 0.49999 ) def __str__(self): dvs = "\n".join([f"x{i}: {dv!s}" for i, dv in enumerate(self.design_variables)]) return f"Design space:\n{dvs}" def __repr__(self): return f"{self.__class__.__name__}({self.design_variables!r})"
class NoDefaultConfigurationSpace(ConfigurationSpace): """ConfigurationSpace that supports no default configuration""" def get_default_configuration(self, *args, **kwargs): raise NotImplementedError def _check_default_configuration(self, *args, **kwargs): pass class FixedIntegerParam(UniformIntegerHyperparameter): def get_neighbors( self, value: float, rs: np.random.RandomState, number: int = 4, transform: bool = False, std: float = 0.2, ) -> List[int]: # Temporary fix until https://github.com/automl/ConfigSpace/pull/313 is released center = self._transform(value) lower, upper = self.lower, self.upper if upper - lower - 1 < number: neighbors = sorted(set(range(lower, upper + 1)) - {center}) if transform: return neighbors return self._inverse_transform(np.asarray(neighbors)).tolist() return super().get_neighbors( value, rs, number=number, transform=transform, std=std )