diff --git a/keras/losses.py b/keras/losses.py index 21841e2f5e7..1d8a06f6f18 100644 --- a/keras/losses.py +++ b/keras/losses.py @@ -44,7 +44,7 @@ class Loss: To be implemented by subclasses: * `call()`: Contains the logic for loss calculation using `y_true`, - `y_pred`. + `y_pred`. Example subclass implementation: @@ -52,7 +52,7 @@ class Loss: class MeanSquaredError(Loss): def call(self, y_true, y_pred): - return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1) + return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1) ``` When using a Loss under a `tf.distribute.Strategy`, except passing it @@ -69,16 +69,17 @@ def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name=None): """Initializes `Loss` class. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. """ losses_utils.ReductionV2.validate(reduction) self.reduction = reduction @@ -102,26 +103,26 @@ def __call__(self, y_true, y_pred, sample_weight=None): """Invokes the `Loss` instance. Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except - sparse loss functions such as sparse categorical crossentropy where - shape = `[batch_size, d0, .. dN-1]` - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` - sample_weight: Optional `sample_weight` acts as a coefficient for the - loss. If a scalar is provided, then the loss is simply scaled by the - given value. If `sample_weight` is a tensor of size `[batch_size]`, - then the total loss for each sample of the batch is rescaled by the - corresponding element in the `sample_weight` vector. If the shape of - `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be - broadcasted to this shape), then each loss element of `y_pred` is - scaled by the corresponding value of `sample_weight`. (Note - on`dN-1`: all loss functions reduce by 1 dimension, usually - axis=-1.) + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, + except sparse loss functions such as sparse categorical + crossentropy where shape = `[batch_size, d0, .. dN-1]` + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` + sample_weight: Optional `sample_weight` acts as a coefficient for + the loss. If a scalar is provided, then the loss is simply + scaled by the given value. If `sample_weight` is a tensor of + size `[batch_size]`, then the total loss for each sample of the + batch is rescaled by the corresponding element in the + `sample_weight` vector. If the shape of `sample_weight` is + `[batch_size, d0, .. dN-1]` (or can be broadcasted to this + shape), then each loss element of `y_pred` is scaled by the + corresponding value of `sample_weight`. (Note on`dN-1`: all loss + functions reduce by 1 dimension, usually axis=-1.) Returns: - Weighted loss float `Tensor`. If `reduction` is `NONE`, this has - shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note - `dN-1` because all loss functions reduce by 1 dimension, usually - axis=-1.) + Weighted loss float `Tensor`. If `reduction` is `NONE`, this has + shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. + (Note `dN-1` because all loss functions reduce by 1 dimension, + usually axis=-1.) Raises: ValueError: If the shape of `sample_weight` is invalid. @@ -183,13 +184,13 @@ def call(self, y_true, y_pred): """Invokes the `Loss` instance. Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except - sparse loss functions such as sparse categorical crossentropy where - shape = `[batch_size, d0, .. dN-1]` - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, + except sparse loss functions such as sparse categorical + crossentropy where shape = `[batch_size, d0, .. dN-1]` + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` Returns: - Loss values with the shape `[batch_size, d0, .. dN-1]`. + Loss values with the shape `[batch_size, d0, .. dN-1]`. """ raise NotImplementedError("Must be implemented in subclasses.") @@ -229,19 +230,20 @@ def __init__( """Initializes `LossFunctionWrapper` class. Args: - fn: The loss function to wrap, with signature `fn(y_true, y_pred, - **kwargs)`. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. - **kwargs: The keyword arguments that are passed on to `fn`. + fn: The loss function to wrap, with signature `fn(y_true, y_pred, + **kwargs)`. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + **kwargs: The keyword arguments that are passed on to `fn`. """ super().__init__(reduction=reduction, name=name) self.fn = fn @@ -251,11 +253,11 @@ def call(self, y_true, y_pred): """Invokes the `LossFunctionWrapper` instance. Args: - y_true: Ground truth values. - y_pred: The predicted values. + y_true: Ground truth values. + y_pred: The predicted values. Returns: - Loss values per sample. + Loss values per sample. """ if tf.is_tensor(y_pred) and tf.is_tensor(y_true): y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( @@ -343,17 +345,18 @@ def __init__( """Initializes `MeanSquaredError` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to - 'mean_squared_error'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_squared_error'. """ super().__init__(mean_squared_error, name=name, reduction=reduction) @@ -404,17 +407,18 @@ def __init__( """Initializes `MeanAbsoluteError` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to - 'mean_absolute_error'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_absolute_error'. """ super().__init__(mean_absolute_error, name=name, reduction=reduction) @@ -471,17 +475,18 @@ def __init__( """Initializes `MeanAbsolutePercentageError` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to - 'mean_absolute_percentage_error'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_absolute_percentage_error'. """ super().__init__( mean_absolute_percentage_error, name=name, reduction=reduction @@ -535,17 +540,18 @@ def __init__( """Initializes `MeanSquaredLogarithmicError` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to - 'mean_squared_logarithmic_error'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to + 'mean_squared_logarithmic_error'. """ super().__init__( mean_squared_logarithmic_error, name=name, reduction=reduction @@ -561,10 +567,10 @@ class BinaryCrossentropy(LossFunctionWrapper): - `y_true` (true label): This is either 0 or 1. - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in [0., 1.] when - `from_logits=False`). + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + `from_logits=False`). **Recommended Usage:** (set `from_logits=True`) @@ -572,8 +578,8 @@ class BinaryCrossentropy(LossFunctionWrapper): ```python model.compile( - loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), - .... + loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), + .... ) ``` @@ -627,27 +633,28 @@ def __init__( """Initializes `BinaryCrossentropy` instance. Args: - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` contains probabilities (i.e., values in [0, - 1]). - label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When > - 0, we compute the loss between the predicted labels and a smoothed - version of the true labels, where the smoothing squeezes the labels - towards 0.5. Larger values of `label_smoothing` correspond to - heavier smoothing. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Name for the op. Defaults to 'binary_crossentropy'. + from_logits: Whether to interpret `y_pred` as a tensor of + [logit](https://en.wikipedia.org/wiki/Logit) values. By default, + we assume that `y_pred` contains probabilities (i.e., values in + [0, 1]). + label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. + When > 0, we compute the loss between the predicted labels and a + smoothed version of the true labels, where the smoothing + squeezes the labels towards 0.5. Larger values of + `label_smoothing` correspond to heavier smoothing. + axis: The axis along which to compute crossentropy (the features + axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction option will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Name for the op. Defaults to 'binary_crossentropy'. """ super().__init__( binary_crossentropy, @@ -669,10 +676,10 @@ class BinaryFocalCrossentropy(LossFunctionWrapper): - `y_true` (true label): This is either 0 or 1. - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when - `from_logits=False`). + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + `from_logits=False`). According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it helps to apply a "focal factor" to down-weight easy examples and focus more @@ -765,35 +772,35 @@ class BinaryFocalCrossentropy(LossFunctionWrapper): Args: - apply_class_balancing: A bool, whether to apply weight balancing on the - binary classes 0 and 1. - alpha: A weight balancing factor for class 1, default is `0.25` as - mentioned in reference [Lin et al., 2018]( - https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is - `1.0 - alpha`. - gamma: A focusing parameter used to compute the focal factor, default is - `2.0` as mentioned in the reference - [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). - label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When > - `0`, we compute the loss between the predicted labels and a smoothed - version of the true labels, where the smoothing squeezes the labels - towards `0.5`. Larger values of `label_smoothing` correspond to heavier - smoothing. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to `-1`. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Name for the op. Defaults to 'binary_focal_crossentropy'. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in reference [Lin et al., 2018]( + https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is + `1.0 - alpha`. + gamma: A focusing parameter used to compute the focal factor, default is + `2.0` as mentioned in the reference + [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). + from_logits: Whether to interpret `y_pred` as a tensor of + [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we + assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). + label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. + When > `0`, we compute the loss between the predicted labels and a + smoothed version of the true labels, where the smoothing squeezes + the labels towards `0.5`. Larger values of `label_smoothing` + correspond to heavier smoothing. + axis: The axis along which to compute crossentropy (the features axis). + Defaults to `-1`. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all cases + this defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Name for the op. Defaults to 'binary_focal_crossentropy'. """ def __init__( @@ -892,25 +899,26 @@ def __init__( """Initializes `CategoricalCrossentropy` instance. Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, - meaning the confidence on label values are relaxed. For example, if - `0.1`, use `0.1 / num_classes` for non-target labels and - `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. - Defaults to 'categorical_crossentropy'. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + label_smoothing: Float in [0, 1]. When > 0, label values are + smoothed, meaning the confidence on label values are relaxed. + For example, if `0.1`, use `0.1 / num_classes` for non-target + labels and `0.9 + 0.1 / num_classes` for target labels. + axis: The axis along which to compute crossentropy (the features + axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to loss. + Default value is `AUTO`. `AUTO` indicates that the reduction + option will be determined by the usage context. For almost all + cases this defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` + will raise an error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'categorical_crossentropy'. """ super().__init__( categorical_crossentropy, @@ -1119,24 +1127,26 @@ def __init__( """Initializes `SparseCategoricalCrossentropy` instance. Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - ignore_class: Optional integer. The ID of a class to be ignored during - loss computation. This is useful, for example, in segmentation - problems featuring a "void" class (commonly -1 or 255) in - segmentation maps. - By default (`ignore_class=None`), all classes are considered. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to - 'sparse_categorical_crossentropy'. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + ignore_class: Optional integer. The ID of a class to be ignored + during loss computation. This is useful, for example, in + segmentation problems featuring a "void" class (commonly -1 or + 255) in segmentation maps. + By default (`ignore_class=None`), all classes are considered. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'sparse_categorical_crossentropy'. """ super().__init__( sparse_categorical_crossentropy, @@ -1147,6 +1157,83 @@ def __init__( ) +@keras_export("keras.losses.CosineSimilarity") +class CosineSimilarity(LossFunctionWrapper): + """Computes the cosine similarity between labels and predictions. + + Note that it is a number between -1 and 1. When it is a negative number + between -1 and 0, 0 indicates orthogonality and values closer to -1 + indicate greater similarity. The values closer to 1 indicate greater + dissimilarity. This makes it usable as a loss function in a setting + where you try to maximize the proximity between predictions and targets. + If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0 + regardless of the proximity between predictions and targets. + + `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` + + Standalone usage: + + >>> y_true = [[0., 1.], [1., 1.]] + >>> y_pred = [[1., 0.], [1., 1.]] + >>> # Using 'auto'/'sum_over_batch_size' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) + >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] + >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] + >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) + >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2 + >>> cosine_loss(y_true, y_pred).numpy() + -0.5 + + >>> # Calling with 'sample_weight'. + >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() + -0.0999 + + >>> # Using 'sum' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, + ... reduction=tf.keras.losses.Reduction.SUM) + >>> cosine_loss(y_true, y_pred).numpy() + -0.999 + + >>> # Using 'none' reduction type. + >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, + ... reduction=tf.keras.losses.Reduction.NONE) + >>> cosine_loss(y_true, y_pred).numpy() + array([-0., -0.999], dtype=float32) + + Usage with the `compile()` API: + + ```python + model.compile(optimizer='sgd', + loss=tf.keras.losses.CosineSimilarity(axis=1)) + ``` + + Args: + axis: The axis along which the cosine similarity is computed + (the features axis). Defaults to -1. + reduction: Type of `tf.keras.losses.Reduction` to apply to loss. + Default value is `AUTO`. `AUTO` indicates that the reduction option + will be determined by the usage context. For almost all cases this + defaults to `SUM_OVER_BATCH_SIZE`. When used under a + `tf.distribute.Strategy`, except via `Model.compile()` and + `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an + error. Please see this custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'cosine_similarity'. + """ + + def __init__( + self, + axis=-1, + reduction=losses_utils.ReductionV2.AUTO, + name="cosine_similarity", + ): + super().__init__( + cosine_similarity, reduction=reduction, name=name, axis=axis + ) + + @keras_export("keras.losses.Hinge") class Hinge(LossFunctionWrapper): """Computes the hinge loss between `y_true` & `y_pred`. @@ -1192,16 +1279,17 @@ def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="hinge"): """Initializes `Hinge` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'hinge'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'hinge'. """ super().__init__(hinge, name=name, reduction=reduction) @@ -1253,16 +1341,17 @@ def __init__( """Initializes `SquaredHinge` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'squared_hinge'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'squared_hinge'. """ super().__init__(squared_hinge, name=name, reduction=reduction) @@ -1312,16 +1401,18 @@ def __init__( """Initializes `CategoricalHinge` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'categorical_hinge'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'categorical_hinge'. """ super().__init__(categorical_hinge, name=name, reduction=reduction) @@ -1368,16 +1459,17 @@ def __init__(self, reduction=losses_utils.ReductionV2.AUTO, name="poisson"): """Initializes `Poisson` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'poisson'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'poisson'. """ super().__init__(poisson, name=name, reduction=reduction) @@ -1427,16 +1519,17 @@ def __init__( """Initializes `LogCosh` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'log_cosh'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'log_cosh'. """ super().__init__(log_cosh, name=name, reduction=reduction) @@ -1487,16 +1580,18 @@ def __init__( """Initializes `KLDivergence` instance. Args: - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'kl_divergence'. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. + Defaults to 'kl_divergence'. """ super().__init__(kl_divergence, name=name, reduction=reduction) @@ -1554,18 +1649,19 @@ def __init__( """Initializes `Huber` instance. Args: - delta: A float, the point where the Huber loss function changes from a - quadratic to linear. - reduction: Type of `tf.keras.losses.Reduction` to apply to - loss. Default value is `AUTO`. `AUTO` indicates that the reduction - option will be determined by the usage context. For almost all cases - this defaults to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. Defaults to 'huber_loss'. + delta: A float, the point where the Huber loss function changes from + a quadratic to linear. + reduction: Type of `tf.keras.losses.Reduction` to apply to + loss. Default value is `AUTO`. `AUTO` indicates that the + reduction ption will be determined by the usage context. For + almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When + used under a `tf.distribute.Strategy`, except via + `Model.compile()` and `Model.fit()`, using `AUTO` or + `SUM_OVER_BATCH_SIZE` will raise an error. Please see this + custom training [tutorial]( + https://www.tensorflow.org/tutorials/distribute/custom_training) + for more details. + name: Optional name for the instance. Defaults to 'huber_loss'. """ super().__init__(huber, name=name, reduction=reduction, delta=delta) @@ -1597,11 +1693,11 @@ def mean_squared_error(y_true, y_pred): ... loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1)) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. + Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1612,15 +1708,15 @@ def _ragged_tensor_apply_loss(loss_fn, y_true, y_pred, y_pred_extra_dim=False): """Apply a loss function on a per batch basis. Args: - loss_fn: The loss function - y_true: truth values (RaggedTensor) - y_pred: predicted values (RaggedTensor) - y_pred_extra_dim: whether y_pred has an additional dimension compared to + loss_fn: The loss function + y_true: truth values (RaggedTensor) + y_pred: predicted values (RaggedTensor) + y_pred_extra_dim: whether y_pred has an additional dimension compared to y_true Returns: - Loss-function result. A dense tensor if the output has a single dimension - (per-batch loss value); a ragged tensor otherwise. + Loss-function result. A dense tensor if the output has a single + dimension (per-batch loss value); a ragged tensor otherwise. """ def rt_is_equiv_dense(rt): @@ -1630,7 +1726,7 @@ def rt_is_equiv_dense(rt): without loss of information. Args: - rt: RaggedTensor. + rt: RaggedTensor. """ return tf.reduce_all( [ @@ -1702,14 +1798,15 @@ def _ragged_tensor_mse(y_true, y_pred): """Implements support for handling RaggedTensors. Args: - y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: RaggedTensor predicted values. + shape = `[batch_size, d0, .. dN]`. Returns: - Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. - When the number of dimensions of the batch feature vector [d0, .. dN] is - greater than one the return value is a RaggedTensor. Otherwise a Dense - tensor with dimensions [batch_size] is returned. + Mean squared error values. shape = `[batch_size, d0, .. dN-1]`. + When the number of dimensions of the batch feature vector [d0, .. dN] is + greater than one the return value is a RaggedTensor. Otherwise, a Dense + tensor with dimensions [batch_size] is returned. """ return _ragged_tensor_apply_loss(mean_squared_error, y_true, y_pred) @@ -1738,11 +1835,11 @@ def mean_absolute_error(y_true, y_pred): ... loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1)) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`. + Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1781,12 +1878,12 @@ def mean_absolute_percentage_error(y_true, y_pred): ... 100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1)) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Mean absolute percentage error values. shape = `[batch_size, d0, .. - dN-1]`. + Mean absolute percentage error values. shape = `[batch_size, d0, .. + dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1832,12 +1929,12 @@ def mean_squared_logarithmic_error(y_true, y_pred): ... np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1)) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Mean squared logarithmic error values. shape = `[batch_size, d0, .. - dN-1]`. + Mean squared logarithmic error values. shape = `[batch_size, d0, .. + dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1890,13 +1987,13 @@ def squared_hinge(y_true, y_pred): ... np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1)) Args: - y_true: The ground truth values. `y_true` values are expected to be -1 or - 1. If binary (0 or 1) labels are provided we will convert them to -1 or - 1. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: The ground truth values. `y_true` values are expected to be -1 + or 1. If binary (0 or 1) labels are provided we will convert them to + -1 or 1. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`. + Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1924,13 +2021,13 @@ def hinge(y_true, y_pred): ... np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1)) Args: - y_true: The ground truth values. `y_true` values are expected to be -1 or - 1. If binary (0 or 1) labels are provided they will be converted to -1 - or 1. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: The ground truth values. `y_true` values are expected to be -1 + or 1. If binary (0 or 1) labels are provided we will convert them to + -1 or 1. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Hinge loss values. shape = `[batch_size, d0, .. dN-1]`. + Hinge loss values. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1958,12 +2055,12 @@ def categorical_hinge(y_true, y_pred): >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.)) Args: - y_true: The ground truth values. `y_true` values are expected to be - either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor). - y_pred: The predicted values. + y_true: The ground truth values. `y_true` values are expected to be + either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor). + y_pred: The predicted values. Returns: - Categorical hinge loss values. + Categorical hinge loss values. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -1987,13 +2084,13 @@ def huber(y_true, y_pred, delta=1.0): where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss Args: - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - delta: A float, the point where the Huber loss function changes from a - quadratic to linear. + y_true: tensor of true targets. + y_pred: tensor of predicted targets. + delta: A float, the point where the Huber loss function changes from a + quadratic to linear. Returns: - Tensor with one scalar loss entry per sample. + Tensor with one scalar loss entry per sample. """ y_pred = tf.cast(y_pred, dtype=backend.floatx()) y_true = tf.cast(y_true, dtype=backend.floatx()) @@ -2040,11 +2137,11 @@ def log_cosh(y_true, y_pred): ... atol=1e-5) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Logcosh error values. shape = `[batch_size, d0, .. dN-1]`. + Logcosh error values. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -2077,18 +2174,18 @@ def categorical_crossentropy( array([0.0513, 2.303], dtype=float32) Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Defaults to -1. The dimension along which the entropy is - computed. + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Defaults to -1. The dimension along which the entropy is + computed. Returns: - Categorical crossentropy loss value. + Categorical crossentropy loss value. """ if isinstance(axis, bool): raise ValueError( @@ -2131,18 +2228,18 @@ def _ragged_tensor_categorical_crossentropy( """Implements support for handling RaggedTensors. Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to -1. + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: The axis along which to compute crossentropy (the features axis). + Defaults to -1. Returns: - Categorical crossentropy loss value. + Categorical crossentropy loss value. Expected shape: (batch, sequence_len, n_classes) with sequence_len being variable per batch. @@ -2337,19 +2434,20 @@ def sparse_categorical_crossentropy( [0.0000000e+00, 0.0000000e+00]]], dtype=float32) Args: - y_true: Ground truth values. - y_pred: The predicted values. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - axis: Defaults to -1. The dimension along which the entropy is - computed. - ignore_class: Optional integer. The ID of a class to be ignored during - loss computation. This is useful, for example, in segmentation - problems featuring a "void" class (commonly -1 or 255) in segmentation - maps. By default (`ignore_class=None`), all classes are considered. + y_true: Ground truth values. + y_pred: The predicted values. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + axis: Defaults to -1. The dimension along which the entropy is + computed. + ignore_class: Optional integer. The ID of a class to be ignored during + loss computation. This is useful, for example, in segmentation + problems featuring a "void" class (commonly -1 or 255) in + segmentation maps. By default (`ignore_class=None`), all classes are + considered. Returns: - Sparse categorical crossentropy loss value. + Sparse categorical crossentropy loss value. """ return backend.sparse_categorical_crossentropy( y_true, @@ -2404,18 +2502,18 @@ def binary_crossentropy( array([0.916 , 0.714], dtype=float32) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by - squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing` - for the target class and `0.5 * label_smoothing` for the non-target - class. - axis: The axis along which the mean is computed. Defaults to -1. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by + squeezing them towards 0.5 That is, using + `1. - 0.5 * label_smoothing` for the target class and + `0.5 * label_smoothing` for the non-target class. + axis: The axis along which the mean is computed. Defaults to -1. Returns: - Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. + Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -2441,17 +2539,17 @@ def _ragged_tensor_binary_crossentropy( """Implements support for handling RaggedTensors. Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Axis along which to compute crossentropy. + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Axis along which to compute crossentropy. Returns: - Binary crossentropy loss value. + Binary crossentropy loss value. Expected shape: (batch, sequence_len) with sequence_len being variable per batch. @@ -2514,24 +2612,25 @@ def binary_focal_crossentropy( array([0.330, 0.206], dtype=float32) Args: - y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. - y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. - apply_class_balancing: A bool, whether to apply weight balancing on the - binary classes 0 and 1. - alpha: A weight balancing factor for class 1, default is `0.25` as - mentioned in the reference. The weight for class 0 is `1.0 - alpha`. - gamma: A focusing parameter, default is `2.0` as mentioned in the - reference. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the - labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 * - label_smoothing` for the target class and `0.5 * label_smoothing` for - the non-target class. - axis: The axis along which the mean is computed. Defaults to `-1`. + y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. + y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference. The weight for class 0 is `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the + labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 * + label_smoothing` for the target class and `0.5 * label_smoothing` + for the non-target class. + axis: The axis along which the mean is computed. Defaults to `-1`. Returns: - Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. + Binary focal crossentropy loss value. + shape = `[batch_size, d0, .. dN-1]`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -2579,25 +2678,25 @@ def _ragged_tensor_binary_focal_crossentropy( the number of batches. Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - apply_class_balancing: A bool, whether to apply weight balancing on the - binary classes 0 and 1. - alpha: A weight balancing factor for class 1, default is `0.25` as - mentioned in the reference [Lin et al., 2018]( - https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is - `1.0 - alpha`. - gamma: A focusing parameter, default is `2.0` as mentioned in the - reference. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Axis along which to compute crossentropy. + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference [Lin et al., 2018]( + https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is + `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Axis along which to compute crossentropy. Returns: - Binary focal crossentropy loss value. + Binary focal crossentropy loss value. """ fn = functools.partial( binary_focal_crossentropy, @@ -2641,14 +2740,14 @@ def kl_divergence(y_true, y_pred): ... loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1)) Args: - y_true: Tensor of true targets. - y_pred: Tensor of predicted targets. + y_true: Tensor of true targets. + y_pred: Tensor of predicted targets. Returns: - A `Tensor` with loss. + A `Tensor` with loss. Raises: - TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. + TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -2677,14 +2776,14 @@ def poisson(y_true, y_pred): ... atol=1e-5) Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. Returns: - Poisson loss value. shape = `[batch_size, d0, .. dN-1]`. + Poisson loss value. shape = `[batch_size, d0, .. dN-1]`. Raises: - InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes. + InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes. """ y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) @@ -2727,95 +2826,18 @@ def cosine_similarity(y_true, y_pred, axis=-1): array([-0., -0.999, 0.999], dtype=float32) Args: - y_true: Tensor of true targets. - y_pred: Tensor of predicted targets. - axis: Axis along which to determine similarity. + y_true: Tensor of true targets. + y_pred: Tensor of predicted targets. + axis: Axis along which to determine similarity. Returns: - Cosine similarity tensor. + Cosine similarity tensor. """ y_true = tf.linalg.l2_normalize(y_true, axis=axis) y_pred = tf.linalg.l2_normalize(y_pred, axis=axis) return -tf.reduce_sum(y_true * y_pred, axis=axis) -@keras_export("keras.losses.CosineSimilarity") -class CosineSimilarity(LossFunctionWrapper): - """Computes the cosine similarity between labels and predictions. - - Note that it is a number between -1 and 1. When it is a negative number - between -1 and 0, 0 indicates orthogonality and values closer to -1 - indicate greater similarity. The values closer to 1 indicate greater - dissimilarity. This makes it usable as a loss function in a setting - where you try to maximize the proximity between predictions and targets. - If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0 - regardless of the proximity between predictions and targets. - - `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))` - - Standalone usage: - - >>> y_true = [[0., 1.], [1., 1.]] - >>> y_pred = [[1., 0.], [1., 1.]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1) - >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] - >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] - >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) - >>> # = -((0. + 0.) + (0.5 + 0.5)) / 2 - >>> cosine_loss(y_true, y_pred).numpy() - -0.5 - - >>> # Calling with 'sample_weight'. - >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy() - -0.0999 - - >>> # Using 'sum' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, - ... reduction=tf.keras.losses.Reduction.SUM) - >>> cosine_loss(y_true, y_pred).numpy() - -0.999 - - >>> # Using 'none' reduction type. - >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, - ... reduction=tf.keras.losses.Reduction.NONE) - >>> cosine_loss(y_true, y_pred).numpy() - array([-0., -0.999], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss=tf.keras.losses.CosineSimilarity(axis=1)) - ``` - - Args: - axis: The axis along which the cosine similarity is computed - (the features axis). Defaults to -1. - reduction: Type of `tf.keras.losses.Reduction` to apply to loss. - Default value is `AUTO`. `AUTO` indicates that the reduction option will - be determined by the usage context. For almost all cases this defaults - to `SUM_OVER_BATCH_SIZE`. When used under a - `tf.distribute.Strategy`, except via `Model.compile()` and - `Model.fit()`, using `AUTO` or `SUM_OVER_BATCH_SIZE` - will raise an error. Please see this custom training [tutorial]( - https://www.tensorflow.org/tutorials/distribute/custom_training) - for more details. - name: Optional name for the instance. - """ - - def __init__( - self, - axis=-1, - reduction=losses_utils.ReductionV2.AUTO, - name="cosine_similarity", - ): - super().__init__( - cosine_similarity, reduction=reduction, name=name, axis=axis - ) - - # Aliases. bce = BCE = binary_crossentropy @@ -2849,10 +2871,12 @@ def serialize(loss, use_legacy_format=False): """Serializes loss function or `Loss` instance. Args: - loss: A Keras `Loss` instance or a loss function. + loss: A Keras `Loss` instance or a loss function. + use_legacy_format: Boolean, whether to use the legacy serialization + format. Defaults to `False`. Returns: - Loss configuration dictionary. + Loss configuration dictionary. """ if not isinstance(loss, Loss): warnings.warn( @@ -2872,8 +2896,10 @@ def deserialize(name, custom_objects=None, use_legacy_format=False): Args: name: Loss configuration. custom_objects: Optional dictionary mapping names (strings) to custom - objects (classes and functions) to be considered during - deserialization. + objects (classes and functions) to be considered during + deserialization. + use_legacy_format: Boolean, whether to use the legacy serialization + format. Defaults to `False`. Returns: A Keras `Loss` instance or a loss function. @@ -2917,15 +2943,15 @@ def get(identifier): Args: - identifier: A loss identifier. One of None or string name of a loss - function/class or loss configuration dictionary or a loss function or a - loss class instance. + identifier: A loss identifier. One of None or string name of a loss + function/class or loss configuration dictionary or a loss function + or a loss class instance. Returns: - A Keras loss as a `function`/ `Loss` class instance. + A Keras loss as a `function`/ `Loss` class instance. Raises: - ValueError: If `identifier` cannot be interpreted. + ValueError: If `identifier` cannot be interpreted. """ if identifier is None: return None