Skip to content

Commit

Permalink
cleanup activation
Browse files Browse the repository at this point in the history
Fix #62.
Fix #63.
  • Loading branch information
albertz committed Nov 5, 2021
1 parent 162208e commit 640168f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 100 deletions.
2 changes: 2 additions & 0 deletions nn/_generate_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"cond", "masked_computation", "subnetwork",

"source", # we have get_extern_data instead
"activation", # will be explicit. https://github.com/rwth-i6/returnn_common/issues/63
"swap_axes",
"gather_nd", # -> gather
"softmax", # misleading (because not just activation), also we will have a separate softmax activation
Expand Down Expand Up @@ -507,6 +508,7 @@ def has_recurrent_state(self) -> bool:
_IgnoreParamNames = {
"self", "name", "network", "output",
"n_out", "out_type", "sources", "target", "loss", "loss_", "size_target",
"activation", # more explicitly decoupled. https://github.com/rwth-i6/returnn_common/issues/62
"name_scope", "reuse_params",
"rec_previous_layer", "control_dependencies_on_output",
"state", "initial_state", "initial_output",
Expand Down
90 changes: 1 addition & 89 deletions nn/_generated_layers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
This file is auto-generated by _generate_layers.py.
RETURNN: 1.20211027.132338+git.a773137
RETURNN: 1.20211104.153922+git.bc19cba
These are the RETURNN layers directly wrapped.
Note that we intentionally exclude some layers or options for more consistency.
Expand Down Expand Up @@ -222,72 +222,6 @@ def scaled_gradient(
return mod(source, name=name)


class _Activation(_Base):
"""
This layer just applies an activation function.
See :func:`returnn.tf.util.basic.get_activation_function` about supported functions.
Also see :class:`EvalLayer` and :class:`CombineLayer` for similar layers.
"""
returnn_layer_class = 'activation'
has_recurrent_state = False
has_variables = False

# noinspection PyShadowingBuiltins,PyShadowingNames
def __init__(self,
*,
activation: str,
**kwargs):
"""
:param str activation: e.g. "relu", "tanh", etc
"""
super().__init__(**kwargs)
self.activation = activation

def get_opts(self):
"""
Return all options
"""
opts = {
'activation': self.activation,
}
opts = {key: value for (key, value) in opts.items() if value is not NotSpecified}
return {**opts, **super().get_opts()}

# noinspection PyShadowingBuiltins,PyShadowingNames
def make_layer_dict(self,
source: LayerRef,
) -> LayerDictRaw:
"""
Make layer dict
"""
assert isinstance(source, LayerRef)
return {
'class': 'activation',
'from': source,
**self.get_opts()}


# noinspection PyShadowingBuiltins,PyShadowingNames
def activation(
source: LayerRef,
*,
activation: str,
name: Optional[Union[str, NameCtx]] = None) -> Layer:
"""
This layer just applies an activation function.
See :func:`returnn.tf.util.basic.get_activation_function` about supported functions.
Also see :class:`EvalLayer` and :class:`CombineLayer` for similar layers.
:param LayerRef source:
:param str activation: e.g. "relu", "tanh", etc
:param str|None name:
"""
mod = _Activation(
activation=activation,
)
return mod(source, name=name)


class BatchNorm(_Copy):
"""
Implements batch-normalization (https://arxiv.org/abs/1502.03167) as a separate layer.
Expand Down Expand Up @@ -1143,7 +1077,6 @@ class Linear(_Base):
def __init__(self,
n_out: int,
*,
activation: Optional[str] = NotSpecified,
with_bias: bool = NotSpecified,
grad_filter: Optional[float] = NotSpecified,
forward_weights_init: str = NotSpecified,
Expand All @@ -1152,7 +1085,6 @@ def __init__(self,
**kwargs):
"""
:param int n_out: output dimension
:param str|None activation: e.g. "relu", or None
:param bool with_bias:
:param float|None grad_filter: if grad norm is higher than this threshold (before activation), the grad is removed
:param str forward_weights_init: see :func:`returnn.tf.util.basic.get_initializer`
Expand All @@ -1161,7 +1093,6 @@ def __init__(self,
"""
super().__init__(**kwargs)
self.n_out = n_out
self.activation = activation
self.with_bias = with_bias
self.grad_filter = grad_filter
self.forward_weights_init = forward_weights_init
Expand All @@ -1174,7 +1105,6 @@ def get_opts(self):
"""
opts = {
'n_out': self.n_out,
'activation': self.activation,
'with_bias': self.with_bias,
'grad_filter': self.grad_filter,
'forward_weights_init': self.forward_weights_init,
Expand Down Expand Up @@ -3170,7 +3100,6 @@ def __init__(self,
input_split_feature_dim: Optional[int] = NotSpecified,
auto_use_channel_first: bool = NotSpecified,
with_bias: Union[bool, NotSpecified] = NotSpecified,
activation: Optional[str] = NotSpecified,
forward_weights_init: Any = NotSpecified,
bias_init: Any = NotSpecified,
filter_perm: Optional[Dict[str, str]] = NotSpecified,
Expand All @@ -3193,7 +3122,6 @@ def __init__(self,
will be divided by input_split_feature_dim, thus it must be a multiple of that value.
:param bool auto_use_channel_first: convert the input to NCHW or not
:param bool|NotSpecified with_bias: if True, will add a bias to the output features. False by default
:param None|str activation: if set, will apply this function at the end
:param forward_weights_init:
:param bias_init:
:param dict[str,str]|None filter_perm: transposes the filter (input filter as layer)
Expand All @@ -3210,7 +3138,6 @@ def __init__(self,
self.input_split_feature_dim = input_split_feature_dim
self.auto_use_channel_first = auto_use_channel_first
self.with_bias = with_bias
self.activation = activation
self.forward_weights_init = forward_weights_init
self.bias_init = bias_init
self.filter_perm = filter_perm
Expand All @@ -3231,7 +3158,6 @@ def get_opts(self):
'input_split_feature_dim': self.input_split_feature_dim,
'auto_use_channel_first': self.auto_use_channel_first,
'with_bias': self.with_bias,
'activation': self.activation,
'forward_weights_init': self.forward_weights_init,
'bias_init': self.bias_init,
'filter_perm': self.filter_perm,
Expand Down Expand Up @@ -3455,7 +3381,6 @@ def __init__(self,
n_out: int,
*,
filter_size: List[int],
activation: Optional[str],
strides: Optional[List[int]] = NotSpecified,
padding: str = NotSpecified,
remove_padding: Any = NotSpecified,
Expand All @@ -3468,7 +3393,6 @@ def __init__(self,
"""
:param int n_out: output dimension
:param list[int] filter_size:
:param str|None activation:
:param list[int]|None strides: specifies the upscaling. by default, same as filter_size
:param str padding: "same" or "valid"
:param list[int]|int remove_padding:
Expand All @@ -3482,7 +3406,6 @@ def __init__(self,
super().__init__(**kwargs)
self.n_out = n_out
self.filter_size = filter_size
self.activation = activation
self.strides = strides
self.padding = padding
self.remove_padding = remove_padding
Expand All @@ -3499,7 +3422,6 @@ def get_opts(self):
opts = {
'n_out': self.n_out,
'filter_size': self.filter_size,
'activation': self.activation,
'strides': self.strides,
'padding': self.padding,
'remove_padding': self.remove_padding,
Expand Down Expand Up @@ -4523,7 +4445,6 @@ class _Combine(_Base):
def __init__(self,
*,
kind: str,
activation: Optional[str] = NotSpecified,
with_bias: bool = NotSpecified,
eval: Union[str, callable] = NotSpecified,
eval_locals: Optional[Dict[str]] = NotSpecified,
Expand All @@ -4532,15 +4453,13 @@ def __init__(self,
"""
:param str kind:
currently accepted values are `average`, `add`, `sub`, `mul`, `truediv`, `logical_and`, `logical_or`, or `eval`
:param str|None activation: if provided, activation function to apply, e.g. "tanh" or "relu"
:param bool with_bias: if given, will add a trainable bias tensor
:param str|callable eval: for kind="eval", will eval this string. or function. see :func:`_op_kind_eval`
:param dict[str]|None eval_locals: locals for eval
:param bool eval_for_output_loss: will do the same eval on layer.output_loss
"""
super().__init__(**kwargs)
self.kind = kind
self.activation = activation
self.with_bias = with_bias
self.eval = eval
self.eval_locals = eval_locals
Expand All @@ -4552,7 +4471,6 @@ def get_opts(self):
"""
opts = {
'kind': self.kind,
'activation': self.activation,
'with_bias': self.with_bias,
'eval': self.eval,
'eval_locals': self.eval_locals,
Expand Down Expand Up @@ -4580,7 +4498,6 @@ def _combine(
source: Union[List[LayerRef], Tuple[LayerRef]],
*,
kind: str,
activation: Optional[str] = NotSpecified,
with_bias: bool = NotSpecified,
eval: Union[str, callable] = NotSpecified,
eval_locals: Optional[Dict[str]] = NotSpecified,
Expand All @@ -4597,7 +4514,6 @@ def _combine(
:param list[LayerRef]|tuple[LayerRef] source:
:param str kind:
currently accepted values are `average`, `add`, `sub`, `mul`, `truediv`, `logical_and`, `logical_or`, or `eval`
:param str|None activation: if provided, activation function to apply, e.g. "tanh" or "relu"
:param bool with_bias: if given, will add a trainable bias tensor
:param str|callable eval: for kind="eval", will eval this string. or function. see :func:`_op_kind_eval`
:param dict[str]|None eval_locals: locals for eval
Expand All @@ -4606,7 +4522,6 @@ def _combine(
"""
mod = _Combine(
kind=kind,
activation=activation,
with_bias=with_bias,
eval=eval,
eval_locals=eval_locals,
Expand Down Expand Up @@ -4673,7 +4588,6 @@ def eval(
source: Union[LayerRef, List[LayerRef], Tuple[LayerRef]],
*,
eval: str,
activation: Optional[str] = NotSpecified,
with_bias: bool = NotSpecified,
eval_locals: Optional[Dict[str]] = NotSpecified,
eval_for_output_loss: bool = NotSpecified,
Expand All @@ -4689,15 +4603,13 @@ def eval(
:param LayerRef|list[LayerRef]|tuple[LayerRef] source:
:param str eval: will eval this string. see :func:`_op_kind_eval`
:param str|None activation: if provided, activation function to apply, e.g. "tanh" or "relu"
:param bool with_bias: if given, will add a trainable bias tensor
:param dict[str]|None eval_locals: locals for eval
:param bool eval_for_output_loss: will do the same eval on layer.output_loss
:param str|None name:
"""
mod = _Eval(
eval=eval,
activation=activation,
with_bias=with_bias,
eval_locals=eval_locals,
eval_for_output_loss=eval_for_output_loss,
Expand Down
30 changes: 19 additions & 11 deletions nn/math_.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,50 +3,58 @@
(potential activation functions).
"""

from .base import LayerRef, Layer
from ._generated_layers import activation
from .base import LayerRef, Layer, make_layer


def relu(x: LayerRef) -> Layer:
"""ReLU"""
return activation(x, activation="relu")
return _activation(x, activation="relu")


def elu(x: LayerRef) -> Layer:
"""ELU https://arxiv.org/abs/1511.07289"""
return activation(x, activation="elu")
return _activation(x, activation="elu")


def selu(x: LayerRef) -> Layer:
"""SELU https://arxiv.org/abs/1706.02515"""
return activation(x, activation="selu")
return _activation(x, activation="selu")


def gelu(x: LayerRef) -> Layer:
"""GELU https://arxiv.org/abs/1606.08415"""
return activation(x, activation="gelu")
return _activation(x, activation="gelu")


def exp(x: LayerRef) -> Layer:
"""exp"""
return activation(x, activation="exp")
return _activation(x, activation="exp")


def log(x: LayerRef) -> Layer:
"""log"""
return activation(x, activation="log")
return _activation(x, activation="log")


def tanh(x: LayerRef) -> Layer:
"""tanh"""
return activation(x, activation="tanh")
return _activation(x, activation="tanh")


def sigmoid(x: LayerRef) -> Layer:
"""sigmoid"""
return activation(x, activation="sigmoid")
return _activation(x, activation="sigmoid")


def swish(x: LayerRef) -> Layer:
"""swish"""
return activation(x, activation="swish")
return _activation(x, activation="swish")


def _activation(x: LayerRef, activation: str) -> Layer:
"""
RETURNN ActivationLayer.
Only for internal use.
If anything is missing here in this module, please just add it.
"""
return make_layer({"class": "activation", "from": x, "activation": activation}, name=activation)

0 comments on commit 640168f

Please sign in to comment.