From eff62c2f5156835f814e57bfdfe68e56c93b05f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= <jrapin.github@gmail.com>
Date: Thu, 4 Mar 2021 12:34:32 +0100
Subject: [PATCH] Add a sampling layers to structure choice parameters (#1065)

---
 CHANGELOG.md                                  |   3 +
 nevergrad/optimization/optimizerlib.py        |  18 ++-
 nevergrad/optimization/test_callbacks.py      |   4 +-
 nevergrad/optimization/test_optimizerlib.py   |   8 +-
 nevergrad/parametrization/_datalayers.py      |  37 ++++-
 nevergrad/parametrization/_layering.py        |  42 ++++-
 nevergrad/parametrization/choice.py           | 146 ++++--------------
 nevergrad/parametrization/data.py             |  10 +-
 nevergrad/parametrization/mutation.py         |   4 +-
 nevergrad/parametrization/test_layers.py      |  32 +++-
 nevergrad/parametrization/test_mutation.py    |   2 +-
 nevergrad/parametrization/test_parameter.py   |  12 +-
 .../parametrization/test_parameters_legacy.py |  17 +-
 nevergrad/parametrization/test_utils.py       |  33 ++--
 14 files changed, 188 insertions(+), 180 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a95ce0c7..9c353a208 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@
 
 - `copy()` method of a `Parameter` does not change the parameters's random state anymore (it used to reset it to `None` [#1048](https://github.com/facebookresearch/nevergrad/pull/1048)
 - `MultiobjectiveFunction` does not exist anymore  [#1034](https://github.com/facebookresearch/nevergrad/pull/1034).
+- `Choice` and `TransitionChoice` have some of their API changed for uniformization. In particular, `indices` is now an
+  `ng.p.Array` (and not an `np.ndarray`) which contains the selected indices (or index) of the `Choice`. The sampling is
+  performed by specific "layers" that are applied to `Data` parameters [#1065](https://github.com/facebookresearch/nevergrad/pull/1065).
 
 ### Important changes
 
diff --git a/nevergrad/optimization/optimizerlib.py b/nevergrad/optimization/optimizerlib.py
index f5e5f2c5b..5da25570f 100644
--- a/nevergrad/optimization/optimizerlib.py
+++ b/nevergrad/optimization/optimizerlib.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 import os
 import logging
+import itertools
 from collections import deque
 import warnings
 import cma
@@ -16,6 +17,8 @@
 from nevergrad.parametrization import transforms
 from nevergrad.parametrization import discretization
 from nevergrad.parametrization import helpers as paramhelpers
+from nevergrad.parametrization import _layering
+from nevergrad.parametrization import _datalayers
 from . import base
 from . import mutations
 from .base import registry as registry
@@ -1109,6 +1112,7 @@ def __init__(
         if non_deterministic_descriptor:
             for param in subparams:
                 param.descriptors.deterministic_function = False
+        print(subparams, [x.dimension for x in subparams])
         # synchronize random state and create optimizers
         self.optims: tp.List[base.Optimizer] = []
         mono, multi = monovariate_optimizer, multivariate_optimizer
@@ -2170,10 +2174,16 @@ def __init__(
         self.noise_from_instrumentation = self.has_noise and descr.deterministic_function
         self.fully_continuous = descr.continuous
         all_params = paramhelpers.flatten_parameter(self.parametrization)
-        choicetags = [p.BaseChoice.ChoiceTag.as_tag(x) for x in all_params.values()]
-        self.has_discrete_not_softmax = any(issubclass(ct.cls, p.TransitionChoice) for ct in choicetags)
-        self._has_discrete = any(issubclass(ct.cls, p.BaseChoice) for ct in choicetags)
-        self._arity = max(ct.arity for ct in choicetags)
+        # figure out if there is any discretization layers
+        int_layers = list(
+            itertools.chain.from_iterable([_layering.Int.filter_from(x) for x in all_params.values()])
+        )
+        int_layers = [x for x in int_layers if x.arity is not None]  # only "Choice" instances for now
+        self.has_discrete_not_softmax = any(
+            not isinstance(lay, _datalayers.SoftmaxSampling) for lay in int_layers
+        )
+        self._has_discrete = bool(int_layers)
+        self._arity: int = max((lay.arity for lay in int_layers), default=-1)  # type: ignore
         if self.fully_continuous:
             self._arity = -1
         self._optim: tp.Optional[base.Optimizer] = None
diff --git a/nevergrad/optimization/test_callbacks.py b/nevergrad/optimization/test_callbacks.py
index 178118795..180dce774 100644
--- a/nevergrad/optimization/test_callbacks.py
+++ b/nevergrad/optimization/test_callbacks.py
@@ -33,9 +33,9 @@ def test_log_parameters(tmp_path: Path) -> None:
     logs = logger.load_flattened()
     assert len(logs) == 32
     assert isinstance(logs[-1]["1"], float)
-    assert len(logs[-1]) == 36
+    assert len(logs[-1]) == 31
     logs = logger.load_flattened(max_list_elements=2)
-    assert len(logs[-1]) == 28
+    assert len(logs[-1]) == 27
     # deletion
     logger = callbacks.ParametersLogger(filepath, append=False)
     assert not logger.load()
diff --git a/nevergrad/optimization/test_optimizerlib.py b/nevergrad/optimization/test_optimizerlib.py
index 83119f0af..723b938fd 100644
--- a/nevergrad/optimization/test_optimizerlib.py
+++ b/nevergrad/optimization/test_optimizerlib.py
@@ -611,7 +611,7 @@ def test_shiwa_dim1() -> None:
     ],  # pylint: disable=too-many-arguments
 )
 @testing.suppress_nevergrad_warnings()
-def test_shiwa_selection(
+def test_ngopt_selection(
     name: str, param: tp.Any, budget: int, num_workers: int, expected: str, caplog: tp.Any
 ) -> None:
     with caplog.at_level(logging.DEBUG, logger="nevergrad.optimization.optimizerlib"):
@@ -640,8 +640,8 @@ def test_bo_ordering() -> None:
         ("NGOpt8", 3, 1, False, 100, ["OnePlusOne", "OnePlusOne"]),
         ("NGOpt8", 3, 1, False, 200, ["SQP", "SQP"]),
         ("NGOpt8", 3, 1, True, 1000, ["SQP", "monovariate", "monovariate"]),
-        (None, 3, 1, False, 1000, ["CMA", "CMA"]),
-        (None, 3, 20, False, 1000, ["MetaModel", "MetaModel"]),
+        (None, 3, 1, False, 1000, ["CMA", "OnePlusOne"]),
+        (None, 3, 20, False, 1000, ["MetaModel", "OnePlusOne"]),
     ],
 )
 def test_ngo_split_optimizer(
@@ -664,7 +664,7 @@ def test_ngo_split_optimizer(
         if fake_learning
         else ng.p.Choice(["const", ng.p.Array(init=list(range(dimension)))])
     )
-    opt: tp.Union[base.ConfiguredOptimizer, tp.Type[base.Optimizer]] = (
+    opt: base.OptCls = (
         xpvariants.MetaNGOpt10
         if name is None
         else (optlib.ConfSplitOptimizer(multivariate_optimizer=optlib.registry[name]))
diff --git a/nevergrad/parametrization/_datalayers.py b/nevergrad/parametrization/_datalayers.py
index 2dcdb6629..d1069da83 100644
--- a/nevergrad/parametrization/_datalayers.py
+++ b/nevergrad/parametrization/_datalayers.py
@@ -9,8 +9,10 @@
 import nevergrad.common.typing as tp
 from nevergrad.common import errors
 from . import _layering
+from ._layering import Int as Int
 from .data import Data
 from .core import Parameter
+from . import discretization
 from . import transforms as trans
 from . import utils
 
@@ -20,7 +22,7 @@
 BL = tp.TypeVar("BL", bound="BoundLayer")
 
 
-class Operation(_layering.Layered):
+class Operation(_layering.Layered, _layering.Filterable):
 
     _LAYER_LEVEL = _layering.Level.OPERATION
     _LEGACY = False
@@ -30,10 +32,6 @@ def __init__(self, *args: tp.Any, **kwargs: tp.Any) -> None:
         if any(isinstance(x, Parameter) for x in args + tuple(kwargs.values())):
             raise errors.NevergradTypeError("Operation with Parameter instances are not supported")
 
-    @classmethod
-    def filter_from(cls: tp.Type[Op], parameter: Parameter) -> tp.List[Op]:
-        return [x for x in parameter._layers if isinstance(x, cls)]
-
 
 class BoundLayer(Operation):
 
@@ -116,7 +114,7 @@ def _layered_sample(self) -> "Data":
         shape = super()._layered_get_value().shape
         child = root.spawn_child()
         # send new val to the layer under this one for the child
-        new_val = root.random_state.uniform(size=shape)
+        new_val = self.random_state.uniform(size=shape)
         child._layers[self._layer_index].set_normalized_value(new_val)  # type: ignore
         return child
 
@@ -267,3 +265,30 @@ def _layered_get_value(self) -> np.ndarray:
 
     def _layered_set_value(self, value: np.ndarray) -> None:
         super()._layered_set_value(self._transform.backward(value))
+
+
+class SoftmaxSampling(Int):
+    def __init__(self, arity: int, deterministic: bool = False) -> None:
+        super().__init__()
+        self.arity = arity
+        self.deterministic = deterministic
+
+    def _layered_get_value(self) -> tp.Any:
+        if self._cache is None:
+            value = _layering.Layered._layered_get_value(self)
+            if value.ndim != 2 or value.shape[1] != self.arity:
+                raise ValueError(f"Dimension 1 should be the arity {self.arity}")
+            encoder = discretization.Encoder(value, rng=self.random_state)
+            self._cache = encoder.encode(deterministic=self.deterministic)
+        return self._cache
+
+    def _layered_set_value(self, value: tp.Any) -> tp.Any:
+        if not isinstance(value, np.ndarray) and not value.dtype == int:
+            raise TypeError("Expected an integer array, got {value}")
+        if self.arity is None:
+            raise RuntimeError("Arity is not initialized")
+        self._cache = value
+        out = np.zeros((value.size, self.arity), dtype=float)
+        coeff = discretization.weight_for_reset(self.arity)
+        out[np.arange(value.size, dtype=int), value] = coeff
+        super()._layered_set_value(out)
diff --git a/nevergrad/parametrization/_layering.py b/nevergrad/parametrization/_layering.py
index 4cdbee8e7..8a68d4ae2 100644
--- a/nevergrad/parametrization/_layering.py
+++ b/nevergrad/parametrization/_layering.py
@@ -12,6 +12,7 @@
 
 
 L = tp.TypeVar("L", bound="Layered")
+F = tp.TypeVar("F", bound="Filterable")
 X = tp.TypeVar("X")
 
 
@@ -22,8 +23,9 @@ class Level(Enum):
     OPERATION = 10
 
     # final
-    ARRAY_CASTING = 800
-    INTEGER_CASTING = 900
+    INTEGER_CASTING = 800
+    ARRAY_CASTING = 900
+    SCALAR_CASTING = 950
     CONSTRAINT = 1000  # must be the last layer
 
 
@@ -34,7 +36,7 @@ class Layered:
     Layers can be added and will be ordered depending on their level
     """
 
-    _LAYER_LEVEL = Level.OPERATION
+    _LAYER_LEVEL = Level.OPERATION  # this provides an order for the layers
 
     def __init__(self) -> None:
         self._layers = [self]
@@ -92,6 +94,10 @@ def _layered_del_value(self) -> None:
     def _layered_sample(self) -> "Layered":
         return self._call_deeper("_layered_sample")  # type: ignore
 
+    @property
+    def random_state(self) -> np.random.RandomState:
+        return self._layers[0].random_state  # use the root random state
+
     def copy(self: L) -> L:
         """Creates a new unattached layer with the same behavior"""
         new = copy.copy(self)
@@ -177,7 +183,7 @@ def __delete__(self, obj: Layered) -> None:
 class _ScalarCasting(Layered):
     """Cast Array as a scalar"""
 
-    _LAYER_LEVEL = Level.INTEGER_CASTING
+    _LAYER_LEVEL = Level.SCALAR_CASTING
 
     def _layered_get_value(self) -> float:
         out = super()._layered_get_value()  # pulls from previous layer
@@ -204,10 +210,32 @@ def _layered_set_value(self, value: tp.ArrayLike) -> None:
         super()._layered_set_value(np.asarray(value))
 
 
-class Int(Layered):
+class Filterable:
+    @classmethod
+    def filter_from(cls: tp.Type[F], parameter: Layered) -> tp.List[F]:
+        return [x for x in parameter._layers if isinstance(x, cls)]  # type: ignore
+
+
+class Int(Layered, Filterable):
     """Cast Data as integer (or integer array)"""
 
-    _LAYER_LEVEL = Level.OPERATION
+    _LAYER_LEVEL = Level.INTEGER_CASTING
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.arity: tp.Optional[int] = None
+        self.deterministic = True
+        self._cache: tp.Optional[np.ndarray] = None
 
     def _layered_get_value(self) -> np.ndarray:
-        return np.round(super()._layered_get_value()).astype(int)  # type: ignore
+        bounds = self._layers[0].bounds  # type: ignore
+        out = np.round(super()._layered_get_value()).astype(int)
+        # make sure rounding does not reach beyond the bounds
+        if bounds[0] is not None:
+            out = np.maximum(int(np.round(bounds[0] + 0.5)), out)
+        if bounds[1] is not None:
+            out = np.minimum(int(np.round(bounds[1] - 0.5)), out)
+        return out  # type: ignore
+
+    def _layered_del_value(self) -> None:
+        self._cache = None  # clear cache!
diff --git a/nevergrad/parametrization/choice.py b/nevergrad/parametrization/choice.py
index dd400b281..00665c3d0 100644
--- a/nevergrad/parametrization/choice.py
+++ b/nevergrad/parametrization/choice.py
@@ -2,13 +2,12 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-import warnings
 import numpy as np
 import nevergrad.common.typing as tp
 from . import discretization
-from . import utils
 from . import core
 from . import container
+from . import _datalayers
 from .data import Array
 
 # weird pylint issue on "Descriptors"
@@ -19,25 +18,7 @@
 T = tp.TypeVar("T", bound="TransitionChoice")
 
 
-class ChoiceTag(tp.NamedTuple):
-    cls: tp.Type[core.Parameter]
-    arity: int
-
-    @classmethod
-    def as_tag(cls, param: core.Parameter) -> "ChoiceTag":
-        # arrays inherit tags to identify them as bound to a choice
-        if cls in param.heritage:  # type: ignore
-            output = param.heritage[cls]  # type: ignore
-            assert isinstance(output, cls)
-            return output
-        arity = len(param.choices) if isinstance(param, BaseChoice) else -1
-        return cls(type(param), arity)
-
-
 class BaseChoice(container.Container):
-
-    ChoiceTag = ChoiceTag
-
     def __init__(
         self, *, choices: tp.Iterable[tp.Any], repetitions: tp.Optional[int] = None, **kwargs: tp.Any
     ) -> None:
@@ -48,14 +29,6 @@ def __init__(
             raise ValueError("{self._class__.__name__} received an empty list of options.")
         super().__init__(choices=container.Tuple(*lchoices), **kwargs)
 
-    def _compute_descriptors(self) -> utils.Descriptors:
-        deterministic = getattr(self, "_deterministic", True)
-        ordered = not hasattr(self, "_deterministic")
-        internal = utils.Descriptors(
-            deterministic=deterministic, continuous=not deterministic, ordered=ordered
-        )
-        return self.choices.descriptors & internal
-
     def __len__(self) -> int:
         """Number of choices"""
         return len(self.choices)
@@ -71,13 +44,14 @@ def _get_parameters_str(self) -> str:
     @property
     def index(self) -> int:  # delayed choice
         """Index of the chosen option"""
-        assert self.indices.size == 1
-        return int(self.indices[0])
+        inds = self.indices.value
+        assert inds.size == 1
+        return int(inds[0])
 
     @property
-    def indices(self) -> np.ndarray:
-        """Indices of the chosen options"""
-        raise NotImplementedError  # TODO remove index?
+    def indices(self) -> Array:
+        """Array of indices of the chosen option"""
+        return self["indices"]  # type: ignore
 
     @property
     def choices(self) -> container.Tuple:
@@ -87,9 +61,9 @@ def choices(self) -> container.Tuple:
     def _layered_get_value(self) -> tp.Any:
         if self._repetitions is None:
             return core.as_parameter(self.choices[self.index]).value
-        return tuple(core.as_parameter(self.choices[ind]).value for ind in self.indices)
+        return tuple(core.as_parameter(self.choices[ind]).value for ind in self.indices.value)
 
-    def _layered_set_value(self, value: tp.List[tp.Any]) -> np.ndarray:
+    def _layered_set_value(self, value: tp.List[tp.Any]) -> None:
         """Must be adapted to each class
         This handles a list of values, not just one
         """  # TODO this is currenlty very messy, may need some improvement
@@ -107,11 +81,11 @@ def _layered_set_value(self, value: tp.List[tp.Any]) -> np.ndarray:
                     pass
             if indices[i] == -1:
                 raise ValueError(f"Could not figure out where to put value {value}")
-        return indices
+        self.indices.value = indices
 
     def get_value_hash(self) -> tp.Hashable:
         hashes: tp.List[tp.Hashable] = []
-        for ind in self.indices:
+        for ind in self.indices.value:
             c = self.choices[int(ind)]
             const = isinstance(c, core.Constant) or not isinstance(c, core.Parameter)
             hashes.append(int(ind) if const else (int(ind), c.get_value_hash()))
@@ -161,12 +135,13 @@ def __init__(
     ) -> None:
         lchoices = list(choices)
         rep = 1 if repetitions is None else repetitions
+        indices = Array(shape=(rep, len(lchoices)), mutable_sigma=False)
+        indices.add_layer(_datalayers.SoftmaxSampling(len(lchoices), deterministic=deterministic))
         super().__init__(
             choices=lchoices,
             repetitions=repetitions,
-            weights=Array(shape=(rep, len(lchoices)), mutable_sigma=False),
+            indices=indices,
         )
-        self.weights.heritage[BaseChoice.ChoiceTag] = BaseChoice.ChoiceTag(self.__class__, len(lchoices))
         self._deterministic = deterministic
         self._indices: tp.Optional[np.ndarray] = None
 
@@ -178,54 +153,22 @@ def _get_name(self) -> str:
             name = cls + "{det}" + name[len(cls) :]
         return name
 
-    @property
-    def indices(self) -> np.ndarray:  # delayed choice
-        """Index of the chosen option"""
-        if self._indices is None:
-            self._draw(deterministic=self._deterministic)
-        assert self._indices is not None
-        return self._indices
-
-    @property
-    def weights(self) -> Array:
-        """The weights used to draw the value"""
-        return self["weights"]  # type: ignore
-
-    @property
-    def probabilities(self) -> np.ndarray:
-        """The probabilities used to draw the value"""
-        exp = np.exp(self.weights.value)
-        return exp / np.sum(exp)  # type: ignore
-
-    def _layered_set_value(self, value: tp.Any) -> np.ndarray:
-        indices = super()._layered_set_value(value)
-        self._indices = indices
-        # force new probabilities
-        arity = self.weights.value.shape[1]
-        coeff = discretization.weight_for_reset(arity)
-        self.weights._value.fill(0.0)  # reset since there is no reference
-        out = np.array(self.weights._value, copy=True)  # just a zero matrix
-        out[np.arange(indices.size), indices] = coeff
-        self.weights.set_standardized_data(out.ravel(), deterministic=True)
-        return indices
-
-    def _draw(self, deterministic: bool = True) -> None:
-        encoder = discretization.Encoder(self.weights.value, rng=self.random_state)
-        self._indices = encoder.encode(deterministic=deterministic or self._deterministic)
-
     def _internal_set_standardized_data(
         self: C, data: np.ndarray, reference: C, deterministic: bool = False
     ) -> None:
+        softmax = self.indices._layers[-2]
+        assert isinstance(softmax, _datalayers.SoftmaxSampling)
+        softmax.deterministic = deterministic or self._deterministic
         super()._internal_set_standardized_data(data, reference=reference, deterministic=deterministic)
-        self._draw(deterministic=deterministic)
+        # pylint: disable=pointless-statement
+        self.indices  # make sure to draw
+        softmax.deterministic = self._deterministic
 
     def mutate(self) -> None:
         # force random_state sync
         self.random_state  # pylint: disable=pointless-statement
-        self.weights.mutate()
-        self._draw(deterministic=self._deterministic)
-        indices = set(self.indices)
-        for ind in indices:
+        self.indices.mutate()
+        for ind in self.indices.value:
             self.choices[ind].mutate()
 
 
@@ -259,47 +202,24 @@ def __init__(
         repetitions: tp.Optional[int] = None,
     ) -> None:
         choices = list(choices)
-        positions = Array(init=len(choices) / 2.0 * np.ones((repetitions if repetitions is not None else 1,)))
-        positions.set_bounds(0, len(choices), method="gaussian")
-        positions.heritage[BaseChoice.ChoiceTag] = BaseChoice.ChoiceTag(self.__class__, len(choices))
+        indices = Array(init=len(choices) / 2.0 * np.ones((repetitions if repetitions is not None else 1,)))
+        indices.set_bounds(0, len(choices), method="gaussian")
+        indices = indices - 0.5
+        intcasting = _datalayers.Int()
+        intcasting.arity = len(choices)
+        indices.add_layer(intcasting)
         super().__init__(
             choices=choices,
             repetitions=repetitions,
-            positions=positions,
+            indices=indices,
             transitions=transitions if isinstance(transitions, Array) else np.array(transitions, copy=False),
         )
         assert self.transitions.value.ndim == 1
 
-    @property
-    def indices(self) -> np.ndarray:
-        return np.minimum(len(self) - 1e-9, self.positions.value).astype(int)  # type: ignore
-
-    def _layered_set_value(self, value: tp.Any) -> np.ndarray:
-        indices = super()._layered_set_value(value)  # only one value for this class
-        self._set_index(indices)
-        return indices
-
-    def _set_index(self, indices: np.ndarray) -> None:
-        self.positions.value = indices + 0.5
-
     @property
     def transitions(self) -> Array:
-        """The weights used to draw the step to the next value"""
         return self["transitions"]  # type: ignore
 
-    @property
-    def position(self) -> Array:
-        """The continuous version of the index (used when working with standardized space)"""
-        warnings.warn(
-            "position is replaced by positions in order to allow for repetitions", DeprecationWarning
-        )
-        return self.positions
-
-    @property
-    def positions(self) -> Array:
-        """The continuous version of the index (used when working with standardized space)"""
-        return self["positions"]  # type: ignore
-
     def mutate(self) -> None:
         # force random_state sync
         self.random_state  # pylint: disable=pointless-statement
@@ -307,12 +227,12 @@ def mutate(self) -> None:
         transitions.mutate()
         rep = 1 if self._repetitions is None else self._repetitions
         #
-        enc = discretization.Encoder(np.ones((rep, 1)) * np.log(self.transitions.value), self.random_state)
+        enc = discretization.Encoder(np.ones((rep, 1)) * np.log(self["transitions"].value), self.random_state)
         moves = enc.encode()
         signs = self.random_state.choice([-1, 1], size=rep)
-        new_index = np.clip(self.indices + signs * moves, 0, len(self) - 1)
-        self._set_index(new_index.ravel())
+        new_index = np.clip(self.indices.value + signs * moves, 0, len(self) - 1)
+        self.indices.value = new_index
         # mutate corresponding parameter
-        indices = set(self.indices)
+        indices = set(self.indices.value)
         for ind in indices:
             self.choices[ind].mutate()
diff --git a/nevergrad/parametrization/data.py b/nevergrad/parametrization/data.py
index 70f62d489..f859f27e1 100644
--- a/nevergrad/parametrization/data.py
+++ b/nevergrad/parametrization/data.py
@@ -143,7 +143,15 @@ def dimension(self) -> int:
         return int(np.prod(self._value.shape))
 
     def _compute_descriptors(self) -> utils.Descriptors:
-        return utils.Descriptors(continuous=not self.integer)
+        from . import _datalayers
+
+        intlayers = _layering.Int.filter_from(self)
+        deterministic = all(lay.deterministic for lay in intlayers)
+        return utils.Descriptors(
+            deterministic=deterministic,
+            continuous=not (deterministic and bool(intlayers)),
+            ordered=not any(isinstance(lay, _datalayers.SoftmaxSampling) for lay in intlayers),
+        )
 
     def _get_name(self) -> str:
         cls = self.__class__.__name__
diff --git a/nevergrad/parametrization/mutation.py b/nevergrad/parametrization/mutation.py
index ae9308d23..3e583ef4a 100644
--- a/nevergrad/parametrization/mutation.py
+++ b/nevergrad/parametrization/mutation.py
@@ -271,6 +271,6 @@ def _apply_array(self, arrays: tp.Sequence[np.ndarray]) -> np.ndarray:
         assert data.shape == self.shape
         shift = self.shift.value
         # update shift arrray
-        shifts = self.shift.weights.value
-        self.shift.weights.value = np.roll(shifts, shift)  # update probas
+        shifts = self.shift.indices._value
+        self.shift.indices._value = np.roll(shifts, shift)  # update probas
         return np.roll(data, shift, axis=self.axis)  # type: ignore
diff --git a/nevergrad/parametrization/test_layers.py b/nevergrad/parametrization/test_layers.py
index b6b582c0d..e0c5f7e0e 100644
--- a/nevergrad/parametrization/test_layers.py
+++ b/nevergrad/parametrization/test_layers.py
@@ -90,4 +90,34 @@ def test_clipping_standardized_data() -> None:
 
 def test_bound_estimation() -> None:
     param = (_datalayers.Bound(-10, 10)(ng.p.Scalar()) + 3) * 5
-    assert param.bounds == (-35, 65)
+    assert param.bounds == (-35, 65)  # type: ignore
+
+
+def test_softmax_layer() -> None:
+    param = ng.p.Array(shape=(4, 3))
+    param.random_state.seed(12)
+    param.add_layer(_datalayers.SoftmaxSampling(arity=3))
+    assert param.value.tolist() == [0, 2, 0, 1]
+    assert param.value.tolist() == [0, 2, 0, 1], "Different indices at the second call"
+    del param.value
+    assert param.value.tolist() == [0, 2, 2, 0], "Same indices after resampling"
+    param.value = [0, 1, 2, 0]  # type: ignore
+    assert param.value.tolist() == [0, 1, 2, 0]
+    expected = np.zeros((4, 3))
+    expected[[0, 1, 2, 3], [0, 1, 2, 0]] = 0.6931
+    np.testing.assert_array_almost_equal(param._value, expected, decimal=4)
+
+
+def test_deterministic_softmax_layer() -> None:
+    param = ng.p.Array(shape=(1, 100))
+    param.add_layer(_datalayers.SoftmaxSampling(arity=100, deterministic=True))
+    param._value[0, 12] = 1
+    assert param.value.tolist() == [12]
+
+
+def test_bounded_int_casting() -> None:
+    param = _datalayers.Bound(-10.9, 10.9, method="clipping")(ng.p.Scalar())
+    param.add_layer(_datalayers.Int())
+    for move, val in [(2.4, 2), (0.2, 3), (42, 10), (-42, -10)]:
+        param.set_standardized_data([move])
+        assert param.value == val, f"Wrong value after move {move}"
diff --git a/nevergrad/parametrization/test_mutation.py b/nevergrad/parametrization/test_mutation.py
index 65ee07ddd..209fbe726 100644
--- a/nevergrad/parametrization/test_mutation.py
+++ b/nevergrad/parametrization/test_mutation.py
@@ -84,7 +84,7 @@ def test_tuned_translation() -> None:
     expected = np.array([3, 0, 1, 2])[:, None].dot(np.ones((1, 2)))
     np.testing.assert_array_equal(out, expected)
     roll.mutate()
-    assert np.sum(np.abs(roll.shift.weights.value)) > 0
+    assert np.sum(np.abs(roll.shift.indices._value)) > 0
 
 
 @testing.parametrized(
diff --git a/nevergrad/parametrization/test_parameter.py b/nevergrad/parametrization/test_parameter.py
index 9f7295b8d..02aaa3f1d 100644
--- a/nevergrad/parametrization/test_parameter.py
+++ b/nevergrad/parametrization/test_parameter.py
@@ -181,14 +181,14 @@ def check_parameter_freezable(param: par.Parameter) -> None:
             par.Instrumentation(par.Array(shape=(2,)), string="blublu", truc="plop"),
             "Instrumentation(Tuple(Array{(2,)}),Dict(string=blublu,truc=plop))",
         ),
-        (par.Choice([1, 12]), "Choice(choices=Tuple(1,12),weights=Array{(1,2)})"),
+        (par.Choice([1, 12]), "Choice(choices=Tuple(1,12),indices=Array{(1,2),SoftmaxSampling})"),
         (
             par.Choice([1, 12], deterministic=True),
-            "Choice{det}(choices=Tuple(1,12),weights=Array{(1,2)})",
+            "Choice{det}(choices=Tuple(1,12),indices=Array{(1,2),SoftmaxSampling})",
         ),
         (
             par.TransitionChoice([1, 12]),
-            "TransitionChoice(choices=Tuple(1,12),positions=Array{Cd(0,2)},transitions=[1. 1.])",
+            "TransitionChoice(choices=Tuple(1,12),indices=Array{Cd(0,2),Add,Int},transitions=[1. 1.])",
         ),
     ],
 )
@@ -360,9 +360,7 @@ def test_choice_repetitions() -> None:
     assert len(choice) == 4
     assert choice.value == (0, 2)
     choice.value = (3, 1)
-    expected = np.zeros((2, 4))
-    expected[[0, 1], [3, 1]] = 0.588
-    np.testing.assert_almost_equal(choice.weights.value, expected, decimal=3)
+    assert choice.indices.value.tolist() == [3, 1]
     choice.mutate()
 
 
@@ -372,7 +370,7 @@ def test_transition_choice_repetitions() -> None:
     assert len(choice) == 4
     assert choice.value == (2, 2)
     choice.value = (3, 1)
-    np.testing.assert_almost_equal(choice.positions.value, [3.5, 1.5], decimal=3)
+    np.testing.assert_almost_equal(choice.indices.value, [3, 1], decimal=3)
     choice.mutate()
     assert choice.value == (3, 0)
 
diff --git a/nevergrad/parametrization/test_parameters_legacy.py b/nevergrad/parametrization/test_parameters_legacy.py
index 8aac648cd..54970ea06 100644
--- a/nevergrad/parametrization/test_parameters_legacy.py
+++ b/nevergrad/parametrization/test_parameters_legacy.py
@@ -10,7 +10,6 @@
 import typing as tp
 import numpy as np
 import pytest
-from nevergrad.common import testing
 from . import parameter as p
 
 
@@ -32,7 +31,7 @@ def test_instrumentation() -> None:
     data = instru2.spawn_child(new_value=((4, 3), dict(a=0, b=3))).get_standardized_data(reference=instru2)
     np.testing.assert_array_almost_equal(data, [4, -1.1503, 0, 0, 0, 0.5878], decimal=4)
     args, kwargs = instru.spawn_child().set_standardized_data(data, deterministic=True).value
-    testing.printed_assert_equal((args, kwargs), ((4.0, 3), {"a": 0, "b": 3}))
+    assert (args, kwargs) == ((4.0, 3), {"a": 0, "b": 3})
     assert "3),Dict(a=TransitionChoice(choices=Tuple(0,1,2,3)," in repr(
         instru
     ), f"Erroneous representation {instru}"
@@ -50,19 +49,19 @@ def test_instrumentation() -> None:
     # instru2 = mvar.Instrumentation(*instru.args, **instru.kwargs)  # TODO: OUCH SILENT FAIL
     instru2.copy()
     data = np.random.normal(0, 1, size=6)
-    testing.printed_assert_equal(
-        instru2.spawn_child().set_standardized_data(data, deterministic=True).value,
-        instru.spawn_child().set_standardized_data(data, deterministic=True).value,
+    assert (
+        instru2.spawn_child().set_standardized_data(data, deterministic=True).value
+        == instru.spawn_child().set_standardized_data(data, deterministic=True).value
     )
     # check naming
     instru_str = (
         "Instrumentation(Tuple(Scalar[sigma=Log{exp=2.0}],3),"
         "Dict(a=TransitionChoice(choices=Tuple(0,1,2,3),"
-        "positions=Array{Cd(0,4)},transitions=[1. 1.]),"
-        "b=Choice(choices=Tuple(0,1,2,3),weights=Array{(1,4)})))"
+        "indices=Array{Cd(0,4),Add,Int},transitions=[1. 1.]),"
+        "b=Choice(choices=Tuple(0,1,2,3),indices=Array{(1,4),SoftmaxSampling})))"
     )
-    testing.printed_assert_equal(instru.name, instru_str)
-    testing.printed_assert_equal("blublu", instru.set_name("blublu").name)
+    assert instru.name == instru_str
+    assert instru.set_name("blublu").name == "blublu"
 
 
 def _false(value: tp.Any) -> bool:  # pylint: disable=unused-argument
diff --git a/nevergrad/parametrization/test_utils.py b/nevergrad/parametrization/test_utils.py
index 1725da293..0f1c39b3c 100644
--- a/nevergrad/parametrization/test_utils.py
+++ b/nevergrad/parametrization/test_utils.py
@@ -49,17 +49,17 @@ def test_command_function() -> None:
     v_tuple_=(True, p.Tuple(p.Scalar(), p.Array(shape=(2,))), ("0", "1")),
     instrumentation=(False, p.Instrumentation(p.Scalar(), y=p.Scalar()), ("", "0", "y")),
     instrumentation_v=(True, p.Instrumentation(p.Scalar(), y=p.Scalar()), ("0", "y")),
-    choice=(False, p.Choice([p.Scalar(), "blublu"]), ("", "choices", "choices.0", "choices.1", "weights")),
-    v_choice=(True, p.Choice([p.Scalar(), "blublu"]), ("", "choices.0", "weights")),
+    choice=(False, p.Choice([p.Scalar(), "blublu"]), ("", "choices", "choices.0", "choices.1", "indices")),
+    v_choice=(True, p.Choice([p.Scalar(), "blublu"]), ("", "choices.0", "indices")),
     tuple_choice_dict=(
         False,
         p.Tuple(p.Choice([p.Dict(x=p.Scalar(), y=12), p.Scalar()])),
-        ("", "0", "0.choices", "0.choices.0", "0.choices.0.x", "0.choices.0.y", "0.choices.1", "0.weights"),
+        ("", "0", "0.choices", "0.choices.0", "0.choices.0.x", "0.choices.0.y", "0.choices.1", "0.indices"),
     ),
     v_tuple_choice_dict=(
         True,
         p.Tuple(p.Choice([p.Dict(x=p.Scalar(), y=12), p.Scalar()])),
-        ("0", "0.choices.0.x", "0.choices.1", "0.weights"),
+        ("0", "0.choices.0.x", "0.choices.1", "0.indices"),
     ),
 )
 def test_flatten_parameter(no_container: bool, param: p.Parameter, keys: tp.Iterable[str]) -> None:
@@ -72,7 +72,7 @@ def test_flatten_parameter(no_container: bool, param: p.Parameter, keys: tp.Iter
     # that everything works as intended
     v_tuple_choice_dict=(
         p.Tuple(p.Choice([p.Dict(x=p.Scalar(), y=12), p.Scalar()])),
-        ["0.choices.0.x", "0.choices.1", "0.weights"],
+        ["0.choices.0.x", "0.choices.1", "0.indices"],
     ),
     multiple=(
         p.Instrumentation(
@@ -81,7 +81,7 @@ def test_flatten_parameter(no_container: bool, param: p.Parameter, keys: tp.Iter
             z=p.Array(init=[12, 12]).set_bounds(lower=12, upper=15),
             y=p.Array(init=[1, 1]),
         ),
-        ["0", "x.choices.1", "x.weights", "y", "z"],
+        ["0", "x.choices.1", "x.indices", "y", "z"],
     ),
 )
 def test_split_as_data_parameters(param: p.Parameter, names: tp.List[str]) -> None:
@@ -90,15 +90,15 @@ def test_split_as_data_parameters(param: p.Parameter, names: tp.List[str]) -> No
 
 
 @testing.parametrized(
-    order_0=(0, ("", "choices.0.x", "choices.1", "weights")),
-    order_1=(1, ("", "choices.0.x", "choices.1", "weights", "choices.1#sigma", "choices.0.x#sigma")),
+    order_0=(0, ("", "choices.0.x", "choices.1", "indices")),
+    order_1=(1, ("", "choices.0.x", "choices.1", "indices", "choices.1#sigma", "choices.0.x#sigma")),
     order_2=(
         2,
         (
             "",
             "choices.0.x",
             "choices.1",
-            "weights",
+            "indices",
             "choices.1#sigma",
             "choices.0.x#sigma",
             "choices.1#sigma#sigma",
@@ -110,7 +110,7 @@ def test_split_as_data_parameters(param: p.Parameter, names: tp.List[str]) -> No
             "",
             "choices.0.x",
             "choices.1",
-            "weights",
+            "indices",
             "choices.1#sigma",
             "choices.0.x#sigma",
             "choices.1#sigma#sigma",
@@ -128,19 +128,6 @@ def test_descriptors() -> None:
     assert repr(desc) == "Descriptors(ordered=False)"
 
 
-@testing.parametrized(
-    dict_param=(p.Dict(x=p.Scalar(), y=12), p.Dict, -1),
-    scalar=(p.Scalar(), p.Scalar, -1),
-    array=(p.Array(shape=(3, 2)), p.Array, -1),
-    choice=(p.Choice([1, 2, 3]), p.Choice, 3),
-    choice_weight=(p.Choice([1, 2, 3]).weights, p.Choice, 3),
-)
-def test_parameter_as_choice_tag(param: p.Parameter, cls: tp.Type[p.Parameter], arity: int) -> None:
-    tag = p.BaseChoice.ChoiceTag.as_tag(param)
-    assert tag.cls == cls
-    assert tag.arity == arity
-
-
 @testing.parametrized(
     true=(True, 0.0),
     false=(False, 1.0),