diff --git a/CHANGELOG.md b/CHANGELOG.md index d95abc5b1..c12a89f37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - `Parameter` classes have now a layer structure [#1045](https://github.com/facebookresearch/nevergrad/pull/1045) which simplifies changing their behavior. In future PRs this system will take charge of bounds, other constraints, sampling etc. +- `DE` initial sampling as been updated to take bounds into accounts [#1058](https://github.com/facebookresearch/nevergrad/pull/1058) ### Other changes diff --git a/nevergrad/benchmark/test_xpbase.py b/nevergrad/benchmark/test_xpbase.py index d4c99ac52..dbf35ed45 100644 --- a/nevergrad/benchmark/test_xpbase.py +++ b/nevergrad/benchmark/test_xpbase.py @@ -48,9 +48,7 @@ def test_run_packed_artificial_function() -> None: ) xp = xpbase.Experiment(func, optimizer="OnePlusOne", budget=24, num_workers=2, batch_mode=True, seed=14) summary = xp.run() - np.testing.assert_almost_equal( - summary["loss"], -9784.829729792353, decimal=1 - ) # makes sure seeding works! + np.testing.assert_almost_equal(summary["loss"], -9784.8, decimal=1) # makes sure seeding works! def test_noisy_artificial_function_loss() -> None: diff --git a/nevergrad/functions/images/core.py b/nevergrad/functions/images/core.py index 4c4a4430f..16d1b87b7 100644 --- a/nevergrad/functions/images/core.py +++ b/nevergrad/functions/images/core.py @@ -3,10 +3,11 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -import cv2 -from pathlib import Path +import os import itertools +from pathlib import Path +import cv2 import numpy as np import PIL.Image import torch.nn as nn @@ -17,6 +18,7 @@ import nevergrad as ng import nevergrad.common.typing as tp +from nevergrad.common import errors from .. import base from . import imagelosses @@ -259,6 +261,8 @@ def __init__( if not torch.cuda.is_available(): use_gpu = False # Storing high level information.. + if os.environ.get("CIRCLECI", False): + raise errors.UnsupportedExperiment("ImageFromPGAN is not well supported in CircleCI") self.pgan_model = torch.hub.load( "facebookresearch/pytorch_GAN_zoo:hub", "PGAN", diff --git a/nevergrad/optimization/base.py b/nevergrad/optimization/base.py index f56fb95a4..5bc07ba57 100644 --- a/nevergrad/optimization/base.py +++ b/nevergrad/optimization/base.py @@ -213,11 +213,14 @@ def pareto_front( ---- During non-multiobjective optimization, this returns the current pessimistic best """ - if self._hypervolume_pareto is None: - return [self.provide_recommendation()] - return self._hypervolume_pareto.pareto_front( - size=size, subset=subset, subset_tentatives=subset_tentatives + pareto = ( + [] + if self._hypervolume_pareto is None + else self._hypervolume_pareto.pareto_front( + size=size, subset=subset, subset_tentatives=subset_tentatives + ) ) + return pareto if pareto else [self.provide_recommendation()] def dump(self, filepath: tp.Union[str, Path]) -> None: """Pickles the optimizer into a file.""" diff --git a/nevergrad/optimization/differentialevolution.py b/nevergrad/optimization/differentialevolution.py index 4baf975d7..abdfc2798 100644 --- a/nevergrad/optimization/differentialevolution.py +++ b/nevergrad/optimization/differentialevolution.py @@ -5,11 +5,10 @@ import warnings import numpy as np -from scipy import stats import nevergrad.common.typing as tp from nevergrad.parametrization import parameter as p from . import base -from . import sequences +from . import oneshot class Crossover: @@ -100,7 +99,7 @@ def __init__( self._penalize_cheap_violations = True self._uid_queue = base.utils.UidQueue() self.population: tp.Dict[str, p.Parameter] = {} - self.sampler: tp.Optional[sequences.Sampler] = None + self.sampler: tp.Optional[base.Optimizer] = None def recommend(self) -> p.Parameter: # This is NOT the naive version. We deal with noise. if self._config.recommendation != "noisy": @@ -117,18 +116,21 @@ def recommend(self) -> p.Parameter: # This is NOT the naive version. We deal wi def _internal_ask_candidate(self) -> p.Parameter: if len(self.population) < self.llambda: # initialization phase init = self._config.initialization - if self.sampler is None and init != "gaussian": + if self.sampler is None and init not in ["gaussian", "parametrization"]: assert init in ["LHS", "QR"] - sampler_cls = sequences.LHSSampler if init == "LHS" else sequences.HammersleySampler - self.sampler = sampler_cls( - self.dimension, budget=self.llambda, scrambling=init == "QR", random_state=self._rng + self.sampler = oneshot.SamplingSearch( + sampler=init if init == "LHS" else "Hammersley", scrambled=init == "QR", scale=self.scale + )( + self.parametrization, + budget=self.llambda, ) - new_guy = self.scale * ( - self._rng.normal(0, 1, self.dimension) - if self.sampler is None - else stats.norm.ppf(self.sampler()) - ) - candidate = self.parametrization.spawn_child().set_standardized_data(new_guy) + if init == "parametrization": + candidate = self.parametrization.sample() + elif self.sampler is not None: + candidate = self.sampler.ask() + else: + new_guy = self.scale * self._rng.normal(0, 1, self.dimension) + candidate = self.parametrization.spawn_child().set_standardized_data(new_guy) candidate.heritage["lineage"] = candidate.uid # new lineage self.population[candidate.uid] = candidate self._uid_queue.asked.add(candidate.uid) @@ -146,10 +148,11 @@ def _internal_ask_candidate(self) -> p.Parameter: # redefine the different parents in case of multiobjective optimization if self._config.multiobjective_adaptation and self.num_objectives > 1: pareto = self.pareto_front() + # can't use choice directly on pareto, because parametrization can be iterable if pareto: - best = parent if parent in pareto else self._rng.choice(pareto) + best = parent if parent in pareto else pareto[self._rng.choice(len(pareto))] if len(pareto) > 2: # otherwise, not enough diversity - a, b = self._rng.choice(pareto, size=2, replace=False) + a, b = (pareto[idx] for idx in self._rng.choice(len(pareto), size=2, replace=False)) # define donor data_a, data_b, data_best = ( indiv.get_standardized_data(reference=self.parametrization) for indiv in (a, b, best) @@ -228,8 +231,9 @@ class DifferentialEvolution(base.ConfiguredOptimizer): Parameters ---------- - initialization: "LHS", "QR" or "gaussian" - algorithm/distribution used for the initialization phase + initialization: "parametrization", "LHS" or "QR" + algorithm/distribution used for the initialization phase. If "parametrization", this uses the + sample method of the parametrization. scale: float or str scale of random component of the updates recommendation: "pessimistic", "optimistic", "mean" or "noisy" @@ -256,7 +260,7 @@ class DifferentialEvolution(base.ConfiguredOptimizer): def __init__( self, *, - initialization: str = "gaussian", + initialization: str = "parametrization", scale: tp.Union[str, float] = 1.0, recommendation: str = "optimistic", crossover: tp.Union[str, float] = 0.5, @@ -268,7 +272,7 @@ def __init__( ) -> None: super().__init__(_DE, locals(), as_config=True) assert recommendation in ["optimistic", "pessimistic", "noisy", "mean"] - assert initialization in ["gaussian", "LHS", "QR"] + assert initialization in ["gaussian", "LHS", "QR", "parametrization"] assert isinstance(scale, float) or scale == "mini" if not isinstance(popsize, int): assert popsize in ["large", "dimension", "standard"] diff --git a/nevergrad/optimization/experimentalvariants.py b/nevergrad/optimization/experimentalvariants.py index 72a31d75f..14f13009c 100644 --- a/nevergrad/optimization/experimentalvariants.py +++ b/nevergrad/optimization/experimentalvariants.py @@ -22,7 +22,7 @@ ParametrizationDE = DifferentialEvolution(crossover="parametrization").set_name( "ParametrizationDE", register=True ) -MiniDE = DifferentialEvolution(scale="mini").set_name("MiniDE", register=True) +MiniDE = DifferentialEvolution(initialization="gaussian", scale="mini").set_name("MiniDE", register=True) MiniLhsDE = DifferentialEvolution(initialization="LHS", scale="mini").set_name("MiniLhsDE", register=True) MiniQrDE = DifferentialEvolution(initialization="QR", scale="mini").set_name("MiniQrDE", register=True) AlmostRotationInvariantDEAndBigPop = DifferentialEvolution(crossover=0.9, popsize="dimension").set_name( diff --git a/nevergrad/optimization/multiobjective/core.py b/nevergrad/optimization/multiobjective/core.py index 88ffc4d55..ba4ef4c27 100644 --- a/nevergrad/optimization/multiobjective/core.py +++ b/nevergrad/optimization/multiobjective/core.py @@ -166,8 +166,8 @@ def pareto_front( if size is None or size >= len(self._pareto): # No limit: we return the full set. return self._pareto if subset == "random": - return self._rng.choice(self._pareto, size) # type: ignore - tentatives = [self._rng.choice(self._pareto, size) for _ in range(subset_tentatives)] + return self._rng.choice(self._pareto, size).tolist() # type: ignore + tentatives = [self._rng.choice(self._pareto, size).tolist() for _ in range(subset_tentatives)] if self._hypervolume is None: raise RuntimeError("Hypervolume not initialized, not supported") # TODO fix hypervolume = self._hypervolume @@ -192,4 +192,4 @@ def pareto_front( raise ValueError(f'Unknown subset for Pareto-Set subsampling: "{subset}"') score += best_score ** 2 if subset != "EPS" else max(score, best_score) scores += [score] - return tentatives[scores.index(min(scores))] + return tentatives[scores.index(min(scores))] # type: ignore diff --git a/nevergrad/optimization/oneshot.py b/nevergrad/optimization/oneshot.py index eb4892ebc..8696386b3 100644 --- a/nevergrad/optimization/oneshot.py +++ b/nevergrad/optimization/oneshot.py @@ -75,14 +75,12 @@ def avg_of_k_best(archive: utils.Archive[utils.MultiValue], method: str = "dimfo raise ValueError(f"{method} not implemented as a method for choosing k in avg_of_k_best.") k = 1 if k < 1 else int(k) # Wasted time. - first_k_individuals = [ - k for k in sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k] - ] + first_k_individuals = sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k] assert len(first_k_individuals) == k return np.array(sum(p[0] for p in first_k_individuals) / k) -# # # # # classes of optimizers # # # # # +# # # # # classes of optimizers # # # # # class OneShotOptimizer(base.Optimizer): @@ -99,7 +97,7 @@ class OneShotOptimizer(base.Optimizer): # - Some variants use a rescaling depending on the budget and the dimension. -# # # # # One-shot optimizers: all fitness evaluations are in parallel. # # # # # +# # # # # One-shot optimizers: all fitness evaluations are in parallel. # # # # # # pylint: disable=too-many-arguments,too-many-instance-attributes diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py index 0c6835561..cec42244c 100644 --- a/nevergrad/optimization/optimizerlib.py +++ b/nevergrad/optimization/optimizerlib.py @@ -69,9 +69,11 @@ def __init__( noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None, mutation: str = "gaussian", crossover: bool = False, + use_pareto: bool = False, ) -> None: super().__init__(parametrization, budget=budget, num_workers=num_workers) self._sigma: float = 1 + self.use_pareto = use_pareto all_params = paramhelpers.flatten_parameter(self.parametrization) arity = max( len(param.choices) if isinstance(param, p.TransitionChoice) else 500 @@ -153,6 +155,12 @@ def _internal_ask_candidate(self) -> p.Parameter: # crossover mutator = mutations.Mutator(self._rng) pessimistic = self.current_bests["pessimistic"].parameter.spawn_child() + if self.num_objectives > 1 and self.use_pareto: # multiobjective + # revert to using a sample of the pareto front (not "pessimistic" though) + pareto = ( + self.pareto_front() + ) # we can't use choice directly, because numpy does not like iterables + pessimistic = pareto[self._rng.choice(len(pareto))].spawn_child() ref = self.parametrization if self.crossover and self._num_ask % 2 == 1 and len(self.archive) > 2: data = mutator.crossover( @@ -293,6 +301,8 @@ class ParametrizedOnePlusOne(base.ConfiguredOptimizer): - `"lengler"`: specific mutation rate chosen as a function of the dimension and iteration index. crossover: bool whether to add a genetic crossover step every other iteration. + use_pareto: bool + whether to restart from a random pareto element in multiobjective mode, instead of the last one added Notes ----- @@ -310,6 +320,7 @@ def __init__( noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None, mutation: str = "gaussian", crossover: bool = False, + use_pareto: bool = False, ) -> None: super().__init__(_OnePlusOne, locals()) diff --git a/nevergrad/optimization/test_callbacks.py b/nevergrad/optimization/test_callbacks.py index 33f1b0ab6..178118795 100644 --- a/nevergrad/optimization/test_callbacks.py +++ b/nevergrad/optimization/test_callbacks.py @@ -33,9 +33,9 @@ def test_log_parameters(tmp_path: Path) -> None: logs = logger.load_flattened() assert len(logs) == 32 assert isinstance(logs[-1]["1"], float) - assert len(logs[-1]) == 35 + assert len(logs[-1]) == 36 logs = logger.load_flattened(max_list_elements=2) - assert len(logs[-1]) == 27 + assert len(logs[-1]) == 28 # deletion logger = callbacks.ParametersLogger(filepath, append=False) assert not logger.load() diff --git a/nevergrad/optimization/test_optimizerlib.py b/nevergrad/optimization/test_optimizerlib.py index 9f6e15fa2..647837bd0 100644 --- a/nevergrad/optimization/test_optimizerlib.py +++ b/nevergrad/optimization/test_optimizerlib.py @@ -715,7 +715,7 @@ def _multiobjective(z: np.ndarray) -> tp.Tuple[float, float, float]: return (abs(x - 1), abs(y + 1), abs(x - y)) -@pytest.mark.parametrize("name", ["DE", "ES"]) # type: ignore +@pytest.mark.parametrize("name", ["DE", "ES", "OnePlusOne"]) # type: ignore @testing.suppress_nevergrad_warnings() # hides bad loss def test_mo_constrained(name: str) -> None: optimizer = optlib.registry[name](2, budget=60) @@ -733,6 +733,26 @@ def constraint(arg: tp.Any) -> bool: # pylint: disable=unused-argument assert optimizer._rank_method is not None # make sure the nsga2 ranker is used +@pytest.mark.parametrize("name", ["DE", "ES", "OnePlusOne"]) # type: ignore +@testing.suppress_nevergrad_warnings() # hides bad loss +def test_mo_with_nan(name: str) -> None: + param = ng.p.Instrumentation(x=ng.p.Scalar(lower=0, upper=5), y=ng.p.Scalar(lower=0, upper=3)) + optimizer = optlib.registry[name](param, budget=60) + optimizer.tell(ng.p.MultiobjectiveReference(), [10, 10, 10]) + for _ in range(50): + cand = optimizer.ask() + optimizer.tell(cand, [-38, 0, np.nan]) + + +def test_de_sampling() -> None: + param = ng.p.Scalar(lower=-100, upper=100).set_mutation(sigma=1) + opt = optlib.LhsDE(param, budget=600, num_workers=100) + above_50 = 0 + for _ in range(100): + above_50 += abs(opt.ask().value) > 50 + assert above_50 > 20 # should be around 50 + + def test_paraportfolio_de() -> None: workers = 40 opt = optlib.ParaPortfolio(12, budget=100 * workers, num_workers=workers)