Typed Optimization (#531)

* WIP * Add ML example * Save for merge * Update * Parameter types more (#13) * fix: import error from exception module (#525) * fix: replace list with sequence (#524) * Fix min window type check (#523) * fix: replace dict with Mapping * fix: replace list with Sequence * fix: add type hint * fix: does not accept None * Change docs badge (#527) * fix: parameter, target_space * fix: constraint, bayesian_optimization * fix: ParamsType --------- Co-authored-by: till-m <36440677+till-m@users.noreply.github.com> * Use `.masks` not `._masks` * User `super` to call kernel * Update logging for parameters * Disable SDR when non-float parameters are present * Add demo script for typed optimization * Update parameters, testing * Remove sorting, gradient optimize only continuous params * Go back to `wrap_kernel` * Update code * Remove `tqdm` dependency, use EI acq * Add more text to typed optimization notebook. * Save files while moving device * Update with custom parameter type example * Mention that parameters are not sorted * Change array reg warning * Update Citations, parameter notebook --------- Co-authored-by: phi-friday <phi.friday@gmail.com>
bayesian-optimization · Dec 27, 2024 · 0ef608f · 0ef608f
1 parent e487e5f
commit 0ef608f
Show file tree

Hide file tree

Showing 24 changed files with 2,082 additions and 425 deletions.
diff --git a/README.md b/README.md
@@ -185,3 +185,16 @@ For constrained optimization:
     year={2014}
 }
 ```
+
+For optimization over non-float parameters:
+```
+@article{garrido2020dealing,
+  title={Dealing with categorical and integer-valued variables in bayesian optimization with gaussian processes},
+  author={Garrido-Merch{\'a}n, Eduardo C and Hern{\'a}ndez-Lobato, Daniel},
+  journal={Neurocomputing},
+  volume={380},
+  pages={20--35},
+  year={2020},
+  publisher={Elsevier}
+}
+```
diff --git a/bayes_opt/acquisition.py b/bayes_opt/acquisition.py
@@ -127,7 +127,7 @@ def suggest(
             self._fit_gp(gp=gp, target_space=target_space)
 
         acq = self._get_acq(gp=gp, constraint=target_space.constraint)
-        return self._acq_min(acq, target_space.bounds, n_random=n_random, n_l_bfgs_b=n_l_bfgs_b)
+        return self._acq_min(acq, target_space, n_random=n_random, n_l_bfgs_b=n_l_bfgs_b)
 
     def _get_acq(
         self, gp: GaussianProcessRegressor, constraint: ConstraintModel | None = None
@@ -182,7 +182,7 @@ def acq(x: NDArray[Float]) -> NDArray[Float]:
     def _acq_min(
         self,
         acq: Callable[[NDArray[Float]], NDArray[Float]],
-        bounds: NDArray[Float],
+        space: TargetSpace,
         n_random: int = 10_000,
         n_l_bfgs_b: int = 10,
     ) -> NDArray[Float]:
@@ -197,10 +197,8 @@ def _acq_min(
         acq : Callable
             Acquisition function to use. Should accept an array of parameters `x`.
 
-        bounds : np.ndarray
-            Bounds of the search space. For `N` parameters this has shape
-            `(N, 2)` with `[i, 0]` the lower bound of parameter `i` and
-            `[i, 1]` the upper bound.
+        space : TargetSpace
+            The target space over which to optimize.
 
         n_random : int
             Number of random samples to use.
@@ -217,15 +215,22 @@ def _acq_min(
         if n_random == 0 and n_l_bfgs_b == 0:
             error_msg = "Either n_random or n_l_bfgs_b needs to be greater than 0."
             raise ValueError(error_msg)
-        x_min_r, min_acq_r = self._random_sample_minimize(acq, bounds, n_random=n_random)
-        x_min_l, min_acq_l = self._l_bfgs_b_minimize(acq, bounds, n_x_seeds=n_l_bfgs_b)
-        # Either n_random or n_l_bfgs_b is not 0 => at least one of x_min_r and x_min_l is not None
-        if min_acq_r < min_acq_l:
-            return x_min_r
-        return x_min_l
+        x_min_r, min_acq_r, x_seeds = self._random_sample_minimize(
+            acq, space, n_random=max(n_random, n_l_bfgs_b), n_x_seeds=n_l_bfgs_b
+        )
+        if n_l_bfgs_b:
+            x_min_l, min_acq_l = self._l_bfgs_b_minimize(acq, space, x_seeds=x_seeds)
+            # Either n_random or n_l_bfgs_b is not 0 => at least one of x_min_r and x_min_l is not None
+            if min_acq_r > min_acq_l:
+                return x_min_l
+        return x_min_r
 
     def _random_sample_minimize(
-        self, acq: Callable[[NDArray[Float]], NDArray[Float]], bounds: NDArray[Float], n_random: int
+        self,
+        acq: Callable[[NDArray[Float]], NDArray[Float]],
+        space: TargetSpace,
+        n_random: int,
+        n_x_seeds: int = 0,
     ) -> tuple[NDArray[Float] | None, float]:
         """Random search to find the minimum of `acq` function.
 
@@ -234,14 +239,14 @@ def _random_sample_minimize(
         acq : Callable
             Acquisition function to use. Should accept an array of parameters `x`.
 
-        bounds : np.ndarray
-            Bounds of the search space. For `N` parameters this has shape
-            `(N, 2)` with `[i, 0]` the lower bound of parameter `i` and
-            `[i, 1]` the upper bound.
+        space : TargetSpace
+            The target space over which to optimize.
 
         n_random : int
             Number of random samples to use.
 
+        n_x_seeds : int
+            Number of top points to return, for use as starting points for L-BFGS-B.
         Returns
         -------
         x_min : np.ndarray
@@ -252,14 +257,22 @@ def _random_sample_minimize(
         """
         if n_random == 0:
             return None, np.inf
-        x_tries = self.random_state.uniform(bounds[:, 0], bounds[:, 1], size=(n_random, bounds.shape[0]))
+        x_tries = space.random_sample(n_random, random_state=self.random_state)
         ys = acq(x_tries)
         x_min = x_tries[ys.argmin()]
         min_acq = ys.min()
-        return x_min, min_acq
+        if n_x_seeds != 0:
+            idxs = np.argsort(ys)[-n_x_seeds:]
+            x_seeds = x_tries[idxs]
+        else:
+            x_seeds = []
+        return x_min, min_acq, x_seeds
 
     def _l_bfgs_b_minimize(
-        self, acq: Callable[[NDArray[Float]], NDArray[Float]], bounds: NDArray[Float], n_x_seeds: int = 10
+        self,
+        acq: Callable[[NDArray[Float]], NDArray[Float]],
+        space: TargetSpace,
+        x_seeds: NDArray[Float] | None = None,
     ) -> tuple[NDArray[Float] | None, float]:
         """Random search to find the minimum of `acq` function.
 
@@ -268,13 +281,11 @@ def _l_bfgs_b_minimize(
         acq : Callable
             Acquisition function to use. Should accept an array of parameters `x`.
 
-        bounds : np.ndarray
-            Bounds of the search space. For `N` parameters this has shape
-            `(N, 2)` with `[i, 0]` the lower bound of parameter `i` and
-            `[i, 1]` the upper bound.
+        space : TargetSpace
+            The target space over which to optimize.
 
-        n_x_seeds : int
-            Number of starting points for the L-BFGS-B optimizer.
+        x_seeds : int
+            Starting points for the L-BFGS-B optimizer.
 
         Returns
         -------
@@ -284,33 +295,44 @@ def _l_bfgs_b_minimize(
         min_acq : float
             Acquisition function value at `x_min`
         """
-        if n_x_seeds == 0:
-            return None, np.inf
-        x_seeds = self.random_state.uniform(bounds[:, 0], bounds[:, 1], size=(n_x_seeds, bounds.shape[0]))
+        continuous_dimensions = space.continuous_dimensions
+        continuous_bounds = space.bounds[continuous_dimensions]
+
+        if not continuous_dimensions.any():
+            min_acq = np.inf
+            x_min = np.array([np.nan] * space.bounds.shape[0])
+            return x_min, min_acq
 
         min_acq: float | None = None
         x_try: NDArray[Float]
         x_min: NDArray[Float]
         for x_try in x_seeds:
-            # Find the minimum of minus the acquisition function
-            res: OptimizeResult = minimize(acq, x_try, bounds=bounds, method="L-BFGS-B")
 
+            def continuous_acq(x: NDArray[Float], x_try=x_try) -> NDArray[Float]:
+                x_try[continuous_dimensions] = x
+                return acq(x_try)
+
+            # Find the minimum of minus the acquisition function
+            res: OptimizeResult = minimize(
+                continuous_acq, x_try[continuous_dimensions], bounds=continuous_bounds, method="L-BFGS-B"
+            )
             # See if success
             if not res.success:
                 continue
 
             # Store it if better than previous minimum(maximum).
             if min_acq is None or np.squeeze(res.fun) >= min_acq:
-                x_min = res.x
+                x_try[continuous_dimensions] = res.x
+                x_min = x_try
                 min_acq = np.squeeze(res.fun)
 
         if min_acq is None:
             min_acq = np.inf
-            x_min = np.array([np.nan] * bounds.shape[0])
+            x_min = np.array([np.nan] * space.bounds.shape[0])
 
         # Clip output to make sure it lies within the bounds. Due to floating
         # point technicalities this is not always the case.
-        return np.clip(x_min, bounds[:, 0], bounds[:, 1]), min_acq
+        return np.clip(x_min, space.bounds[:, 0], space.bounds[:, 1]), min_acq
 
 
 class UpperConfidenceBound(AcquisitionFunction):

diff --git a/bayes_opt/bayesian_optimization.py b/bayes_opt/bayesian_optimization.py
@@ -16,13 +16,15 @@
 
 from bayes_opt import acquisition
 from bayes_opt.constraint import ConstraintModel
+from bayes_opt.domain_reduction import DomainTransformer
 from bayes_opt.event import DEFAULT_EVENTS, Events
 from bayes_opt.logger import _get_default_logger
+from bayes_opt.parameter import wrap_kernel
 from bayes_opt.target_space import TargetSpace
 from bayes_opt.util import ensure_rng
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Iterable, Mapping, Sequence
+    from collections.abc import Callable, Iterable, Mapping
 
     from numpy.random import RandomState
     from numpy.typing import NDArray
@@ -31,6 +33,7 @@
     from bayes_opt.acquisition import AcquisitionFunction
     from bayes_opt.constraint import ConstraintModel
     from bayes_opt.domain_reduction import DomainTransformer
+    from bayes_opt.parameter import BoundsMapping, ParamsType
 
     Float = np.floating[Any]
 
@@ -114,7 +117,7 @@ def __init__(
     ):
         self._random_state = ensure_rng(random_state)
         self._allow_duplicate_points = allow_duplicate_points
-        self._queue: deque[Mapping[str, float] | Sequence[float] | NDArray[Float]] = deque()
+        self._queue: deque[ParamsType] = deque()
 
         if acquisition_function is None:
             if constraint is None:
@@ -128,15 +131,6 @@ def __init__(
         else:
             self._acquisition_function = acquisition_function
 
-        # Internal GP regressor
-        self._gp = GaussianProcessRegressor(
-            kernel=Matern(nu=2.5),
-            alpha=1e-6,
-            normalize_y=True,
-            n_restarts_optimizer=5,
-            random_state=self._random_state,
-        )
-
         if constraint is None:
             # Data structure containing the function to be optimized, the
             # bounds of its domain, and a record of the evaluations we have
@@ -158,14 +152,22 @@ def __init__(
             )
             self.is_constrained = True
 
+        # Internal GP regressor
+        self._gp = GaussianProcessRegressor(
+            kernel=wrap_kernel(Matern(nu=2.5), transform=self._space.kernel_transform),
+            alpha=1e-6,
+            normalize_y=True,
+            n_restarts_optimizer=5,
+            random_state=self._random_state,
+        )
+
         self._verbose = verbose
         self._bounds_transformer = bounds_transformer
         if self._bounds_transformer:
-            try:
-                self._bounds_transformer.initialize(self._space)
-            except (AttributeError, TypeError) as exc:
-                error_msg = "The transformer must be an instance of DomainTransformer"
-                raise TypeError(error_msg) from exc
+            if not isinstance(self._bounds_transformer, DomainTransformer):
+                msg = "The transformer must be an instance of DomainTransformer"
+                raise TypeError(msg)
+            self._bounds_transformer.initialize(self._space)
 
         self._sorting_warning_already_shown = False  # TODO: remove in future version
         super().__init__(events=DEFAULT_EVENTS)
@@ -204,10 +206,7 @@ def res(self) -> list[dict[str, Any]]:
         return self._space.res()
 
     def register(
-        self,
-        params: Mapping[str, float] | Sequence[float] | NDArray[Float],
-        target: float,
-        constraint_value: float | NDArray[Float] | None = None,
+        self, params: ParamsType, target: float, constraint_value: float | NDArray[Float] | None = None
     ) -> None:
         """Register an observation with known target.
 
@@ -225,20 +224,18 @@ def register(
         # TODO: remove in future version
         if isinstance(params, np.ndarray) and not self._sorting_warning_already_shown:
             msg = (
-                "You're attempting to register an np.ndarray. Currently, the optimizer internally sorts"
-                " parameters by key and expects any registered array to respect this order. In future"
-                " versions this behaviour will change and the order as given by the pbounds dictionary"
-                " will be used. If you wish to retain sorted parameters, please manually sort your pbounds"
+                "You're attempting to register an np.ndarray. In previous versions, the optimizer internally"
+                " sorted parameters by key and expected any registered array to respect this order."
+                " In the current and any future version the order as given by the pbounds dictionary will be"
+                " used. If you wish to retain sorted parameters, please manually sort your pbounds"
                 " dictionary before constructing the optimizer."
             )
             warn(msg, stacklevel=1)
             self._sorting_warning_already_shown = True
         self._space.register(params, target, constraint_value)
         self.dispatch(Events.OPTIMIZATION_STEP)
 
-    def probe(
-        self, params: Mapping[str, float] | Sequence[float] | NDArray[Float], lazy: bool = True
-    ) -> None:
+    def probe(self, params: ParamsType, lazy: bool = True) -> None:
         """Evaluate the function at the given points.
 
         Useful to guide the optimizer.
@@ -255,10 +252,10 @@ def probe(
         # TODO: remove in future version
         if isinstance(params, np.ndarray) and not self._sorting_warning_already_shown:
             msg = (
-                "You're attempting to register an np.ndarray. Currently, the optimizer internally sorts"
-                " parameters by key and expects any registered array to respect this order. In future"
-                " versions this behaviour will change and the order as given by the pbounds dictionary"
-                " will be used. If you wish to retain sorted parameters, please manually sort your pbounds"
+                "You're attempting to register an np.ndarray. In previous versions, the optimizer internally"
+                " sorted parameters by key and expected any registered array to respect this order."
+                " In the current and any future version the order as given by the pbounds dictionary will be"
+                " used. If you wish to retain sorted parameters, please manually sort your pbounds"
                 " dictionary before constructing the optimizer."
             )
             warn(msg, stacklevel=1)
@@ -270,10 +267,10 @@ def probe(
             self._space.probe(params)
             self.dispatch(Events.OPTIMIZATION_STEP)
 
-    def suggest(self) -> dict[str, float]:
+    def suggest(self) -> dict[str, float | NDArray[Float]]:
         """Suggest a promising point to probe next."""
         if len(self._space) == 0:
-            return self._space.array_to_params(self._space.random_sample())
+            return self._space.array_to_params(self._space.random_sample(random_state=self._random_state))
 
         # Finding argmax of the acquisition function.
         suggestion = self._acquisition_function.suggest(gp=self._gp, target_space=self._space, fit_gp=True)
@@ -292,7 +289,7 @@ def _prime_queue(self, init_points: int) -> None:
             init_points = max(init_points, 1)
 
         for _ in range(init_points):
-            sample = self._space.random_sample()
+            sample = self._space.random_sample(random_state=self._random_state)
             self._queue.append(self._space.array_to_params(sample))
 
     def _prime_subscriptions(self) -> None:
@@ -344,7 +341,7 @@ def maximize(self, init_points: int = 5, n_iter: int = 25) -> None:
 
         self.dispatch(Events.OPTIMIZATION_END)
 
-    def set_bounds(self, new_bounds: Mapping[str, NDArray[Float] | Sequence[float]]) -> None:
+    def set_bounds(self, new_bounds: BoundsMapping) -> None:
         """Modify the bounds of the search space.
 
         Parameters
@@ -356,4 +353,6 @@ def set_bounds(self, new_bounds: Mapping[str, NDArray[Float] | Sequence[float]])
 
     def set_gp_params(self, **params: Any) -> None:
         """Set parameters of the internal Gaussian Process Regressor."""
+        if "kernel" in params:
+            params["kernel"] = wrap_kernel(kernel=params["kernel"], transform=self._space.kernel_transform)
         self._gp.set_params(**params)
diff --git a/bayes_opt/constraint.py b/bayes_opt/constraint.py
@@ -9,6 +9,8 @@
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import Matern
 
+from bayes_opt.parameter import wrap_kernel
+
 if TYPE_CHECKING:
     from collections.abc import Callable
 
@@ -55,6 +57,7 @@ def __init__(
         fun: Callable[..., float] | Callable[..., NDArray[Float]] | None,
         lb: float | NDArray[Float],
         ub: float | NDArray[Float],
+        transform: Callable[[Any], Any] | None = None,
         random_state: int | RandomState | None = None,
     ) -> None:
         self.fun = fun
@@ -68,7 +71,7 @@ def __init__(
 
         self._model = [
             GaussianProcessRegressor(
-                kernel=Matern(nu=2.5),
+                kernel=wrap_kernel(Matern(nu=2.5), transform) if transform is not None else Matern(nu=2.5),
                 alpha=1e-6,
                 normalize_y=True,
                 n_restarts_optimizer=5,