philipperemy · DrumsnChocolate · Sep 19, 2024 · Oct 2, 2024 · Oct 24, 2024 · DrumsnChocolate
diff --git a/README.md b/README.md
@@ -43,7 +43,6 @@ TCN(
     kernel_initializer='he_normal',
     use_batch_norm=False,
     use_layer_norm=False,
-    use_weight_norm=False,
     go_backwards=False,
     return_state=False,
     **kwargs
@@ -64,7 +63,6 @@ TCN(
 - `kernel_initializer`: Initializer for the kernel weights matrix (Conv1D).
 - `use_batch_norm`: Whether to use batch normalization in the residual layers or not.
 - `use_layer_norm`: Whether to use layer normalization in the residual layers or not.
-- `use_weight_norm`: Whether to use weight normalization in the residual layers or not.
 - `go_backwards`: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence.
 - `return_state`: Boolean. Whether to return the last state in addition to the output. Default: False.
 - `kwargs`: Any other set of arguments for configuring the parent class Layer. For example "name=str", Name of the model. Use unique names when using multiple TCN.
@@ -96,7 +94,7 @@ Here are some of my notes regarding my experience using TCN:
 - `activation`: Leave it to default. I have never changed it.
 - `kernel_initializer`: If the training of the TCN gets stuck, it might be worth changing this parameter. For example: `glorot_uniform`.
 
-- `use_batch_norm`, `use_weight_norm`, `use_layer_norm`: Use normalization if your network is big enough and the task contains enough data. I usually prefer using `use_layer_norm`, but you can try them all and see which one works the best.
+- `use_batch_norm`, `use_layer_norm`: Use normalization if your network is big enough and the task contains enough data. I usually prefer using `use_layer_norm`, but you can try them both and see which one works the best.
 
 
 ### Receptive field

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
     os.environ['GRPC_PYTHON_BUILD_SYSTEM_OPENSSL'] = '1'
     os.environ['GRPC_PYTHON_BUILD_SYSTEM_ZLIB'] = '1'
 
-install_requires = ['numpy', tensorflow, 'tensorflow_addons']
+install_requires = ['numpy', tensorflow]
 
 setup(
     name='keras-tcn',

diff --git a/tasks/adding_problem/main.py b/tasks/adding_problem/main.py
@@ -26,7 +26,6 @@ def run_task():
         nb_stacks=1,
         max_len=x_train.shape[1],
         use_skip_connections=False,
-        use_weight_norm=True,
         regression=True,
         dropout_rate=0
     )

diff --git a/tasks/copy_memory/main.py b/tasks/copy_memory/main.py
@@ -30,7 +30,6 @@ def run_task():
                          use_skip_connections=True,
                          opt='rmsprop',
                          lr=5e-4,
-                         use_weight_norm=True,
                          return_sequences=True)
 
     print(f'x_train.shape = {x_train.shape}')

diff --git a/tasks/mnist_pixel/main.py b/tasks/mnist_pixel/main.py
@@ -14,7 +14,6 @@ def run_task():
                          dilations=[2 ** i for i in range(9)],
                          nb_stacks=1,
                          max_len=x_train[0:1].shape[1],
-                         use_weight_norm=True,
                          use_skip_connections=True)
 
     print(f'x_train.shape = {x_train.shape}')

diff --git a/tasks/time_series_forecasting.py b/tasks/time_series_forecasting.py
@@ -38,7 +38,6 @@
         kernel_size=2,
         use_skip_connections=False,
         use_batch_norm=False,
-        use_weight_norm=False,
         use_layer_norm=False
         ),
     Dense(1, activation='linear')

diff --git a/tcn/tcn.py b/tcn/tcn.py
@@ -36,7 +36,6 @@ def __init__(self,
                  kernel_initializer: str = 'he_normal',
                  use_batch_norm: bool = False,
                  use_layer_norm: bool = False,
-                 use_weight_norm: bool = False,
                  **kwargs):
         """Defines the residual block for the WaveNet TCN
         Args:
@@ -51,7 +50,6 @@ def __init__(self,
             kernel_initializer: Initializer for the kernel weights matrix (Conv1D).
             use_batch_norm: Whether to use batch normalization in the residual layers or not.
             use_layer_norm: Whether to use layer normalization in the residual layers or not.
-            use_weight_norm: Whether to use weight normalization in the residual layers or not.
             kwargs: Any initializers for Layer class.
         """
 
@@ -63,7 +61,6 @@ def __init__(self,
         self.dropout_rate = dropout_rate
         self.use_batch_norm = use_batch_norm
         self.use_layer_norm = use_layer_norm
-        self.use_weight_norm = use_weight_norm
         self.kernel_initializer = kernel_initializer
         self.layers = []
         self.shape_match_conv = None
@@ -99,20 +96,13 @@ def build(self, input_shape):
                         name=name,
                         kernel_initializer=self.kernel_initializer
                     )
-                    if self.use_weight_norm:
-                        from tensorflow_addons.layers import WeightNormalization
-                        # wrap it. WeightNormalization API is different than BatchNormalization or LayerNormalization.
-                        with K.name_scope('norm_{}'.format(k)):
-                            conv = WeightNormalization(conv)
                     self._build_layer(conv)
 
                 with K.name_scope('norm_{}'.format(k)):
                     if self.use_batch_norm:
                         self._build_layer(BatchNormalization())
                     elif self.use_layer_norm:
                         self._build_layer(LayerNormalization())
-                    elif self.use_weight_norm:
-                        pass  # done above.
 
                 with K.name_scope('act_and_dropout_{}'.format(k)):
                     self._build_layer(Activation(self.activation, name='Act_Conv1D_{}'.format(k)))
@@ -200,7 +190,6 @@ class TCN(Layer):
             kernel_initializer: Initializer for the kernel weights matrix (Conv1D).
             use_batch_norm: Whether to use batch normalization in the residual layers or not.
             use_layer_norm: Whether to use layer normalization in the residual layers or not.
-            use_weight_norm: Whether to use weight normalization in the residual layers or not.
             go_backwards: Boolean (default False). If True, process the input sequence backwards and
             return the reversed sequence.
             return_state: Boolean. Whether to return the last state in addition to the output. Default: False.
@@ -223,7 +212,6 @@ def __init__(self,
                  kernel_initializer='he_normal',
                  use_batch_norm=False,
                  use_layer_norm=False,
-                 use_weight_norm=False,
                  go_backwards=False,
                  return_state=False,
                  **kwargs):
@@ -240,7 +228,6 @@ def __init__(self,
         self.kernel_initializer = kernel_initializer
         self.use_batch_norm = use_batch_norm
         self.use_layer_norm = use_layer_norm
-        self.use_weight_norm = use_weight_norm
         self.go_backwards = go_backwards
         self.return_state = return_state
         self.skip_connections = []
@@ -251,7 +238,7 @@ def __init__(self,
         self.output_slice_index = None  # in case return_sequence=False
         self.padding_same_and_time_dim_unknown = False  # edge case if padding='same' and time_dim = None
 
-        if self.use_batch_norm + self.use_layer_norm + self.use_weight_norm > 1:
+        if self.use_batch_norm + self.use_layer_norm > 1:
             raise ValueError('Only one normalization can be specified at once.')
 
         if isinstance(self.nb_filters, list):
@@ -298,7 +285,6 @@ def build(self, input_shape):
                                                           dropout_rate=self.dropout_rate,
                                                           use_batch_norm=self.use_batch_norm,
                                                           use_layer_norm=self.use_layer_norm,
-                                                          use_weight_norm=self.use_weight_norm,
                                                           kernel_initializer=self.kernel_initializer,
                                                           name='residual_block_{}'.format(len(self.residual_blocks))))
                 # build newest residual block
@@ -388,7 +374,6 @@ def get_config(self):
         config['activation'] = self.activation_name
         config['use_batch_norm'] = self.use_batch_norm
         config['use_layer_norm'] = self.use_layer_norm
-        config['use_weight_norm'] = self.use_weight_norm
         config['kernel_initializer'] = self.kernel_initializer
         config['go_backwards'] = self.go_backwards
         config['return_state'] = self.return_state
@@ -414,8 +399,7 @@ def compiled_tcn(num_feat,  # type: int
                  opt='adam',
                  lr=0.002,
                  use_batch_norm=False,
-                 use_layer_norm=False,
-                 use_weight_norm=False):
+                 use_layer_norm=False,):
     # type: (...) -> Model
     """Creates a compiled TCN model for a given task (i.e. regression or classification).
     Classification uses a sparse categorical loss. Please input class ids and not one-hot encodings.
@@ -440,7 +424,6 @@ def compiled_tcn(num_feat,  # type: int
         lr: Learning rate.
         use_batch_norm: Whether to use batch normalization in the residual layers or not.
         use_layer_norm: Whether to use layer normalization in the residual layers or not.
-        use_weight_norm: Whether to use weight normalization in the residual layers or not.
     Returns:
         A compiled keras TCN.
     """
@@ -451,8 +434,7 @@ def compiled_tcn(num_feat,  # type: int
 
     x = TCN(nb_filters, kernel_size, nb_stacks, dilations, padding,
             use_skip_connections, dropout_rate, return_sequences,
-            activation, kernel_initializer, use_batch_norm, use_layer_norm,
-            use_weight_norm, name=name)(input_layer)
+            activation, kernel_initializer, use_batch_norm, use_layer_norm, name=name)(input_layer)
 
     print('x.shape=', x.shape)