diff --git a/README.md b/README.md
index bd121391..30d57fae 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Would you like to build/train a model using Keras/Python? And would you like to
 Layer types typically used in image recognition/generation are supported, making many popular model architectures possible (see [Performance section](#performance)).
 
 * `Add`, `Concatenate`, `Subtract`, `Multiply`, `Average`, `Maximum`, `Minimum`, `Dot`
-* `AveragePooling1D/2D`, `GlobalAveragePooling1D/2D`
+* `AveragePooling1D/2D/3D`, `GlobalAveragePooling1D/2D/3D`
 * `Bidirectional`, `TimeDistributed`, `GRU`, `LSTM`, `CuDNNGRU`, `CuDNNLSTM`
 * `Conv1D/2D`, `SeparableConv2D`, `DepthwiseConv2D`
 * `Cropping1D/2D/3D`, `ZeroPadding1D/2D/3D`
@@ -52,7 +52,7 @@ Layer types typically used in image recognition/generation are supported, making
 * `SpatialDropout1D`, `SpatialDropout2D`, `SpatialDropout3D`
 * `RandomContrast`, `RandomFlip`, `RandomHeight`
 * `RandomRotation`, `RandomTranslation`, `RandomWidth`, `RandomZoom`
-* `MaxPooling1D/2D`, `GlobalMaxPooling1D/2D`
+* `MaxPooling1D/2D/3D`, `GlobalMaxPooling1D/2D/3D`
 * `ELU`, `LeakyReLU`, `ReLU`, `SeLU`, `PReLU`
 * `Sigmoid`, `Softmax`, `Softplus`, `Tanh`
 * `Exponential`, `GELU`, `Softsign`, `Rescaling`
@@ -73,13 +73,13 @@ Layer types typically used in image recognition/generation are supported, making
 
 ### Currently not supported are the following:
 
-`ActivityRegularization`, `AdditiveAttention`, `Attention`, `AveragePooling3D`,
+`ActivityRegularization`, `AdditiveAttention`, `Attention`
 `CategoryEncoding`, `CenterCrop`, `Conv2DTranspose` ([why](FAQ.md#why-are-conv2dtranspose-layers-not-supported)),
 `Conv3D`, `ConvLSTM1D`, `ConvLSTM2D`, `Discretization`,
 `GRUCell`, `Hashing`,
 `IntegerLookup`, `Lambda` ([why](FAQ.md#why-are-lambda-layers-not-supported)),
 `LayerNormalization`, `LocallyConnected1D`, `LocallyConnected2D`,
-`LSTMCell`, `Masking`, `MaxPooling3D`, `MultiHeadAttention`,
+`LSTMCell`, `Masking`, `MultiHeadAttention`,
 `RepeatVector`, `Resizing`, `RNN`, `SimpleRNN`,
 `SimpleRNNCell`, `StackedRNNCells`, `StringLookup`, `TextVectorization`,
 `ThresholdedReLU`, `UnitNormalization`, `Upsampling3D`, `temporal` models
diff --git a/include/fdeep/common.hpp b/include/fdeep/common.hpp
index 45b4ab2f..694f25be 100644
--- a/include/fdeep/common.hpp
+++ b/include/fdeep/common.hpp
@@ -36,14 +36,6 @@
 #include <string>
 #include <stdexcept>
 
-#if defined(__GNUC__) || defined(__GNUG__)
-#define FDEEP_FORCE_INLINE __attribute__((always_inline)) inline
-#elif defined(_MSC_VER)
-#define FDEEP_FORCE_INLINE __forceinline
-#else
-#define FDEEP_FORCE_INLINE inline
-#endif
-
 namespace fdeep { namespace internal
 {
 
diff --git a/include/fdeep/convolution3d.hpp b/include/fdeep/convolution3d.hpp
new file mode 100644
index 00000000..e2d3fa30
--- /dev/null
+++ b/include/fdeep/convolution3d.hpp
@@ -0,0 +1,125 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/common.hpp"
+
+#include "fdeep/filter.hpp"
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <vector>
+
+namespace fdeep { namespace internal
+{
+
+struct convolution3d_config
+{
+    std::size_t pad_front_;
+    std::size_t pad_back_;
+    std::size_t pad_top_;
+    std::size_t pad_bottom_;
+    std::size_t pad_left_;
+    std::size_t pad_right_;
+    std::size_t out_size_d4_;
+    std::size_t out_height_;
+    std::size_t out_width_;
+};
+
+inline convolution3d_config preprocess_convolution_3d(
+    const shape3& filter_shape,
+    const shape3& strides,
+    padding pad_type,
+    std::size_t input_shape_size_d4,
+    std::size_t input_shape_height,
+    std::size_t input_shape_width)
+{
+    const int filter_size_d4 = static_cast<int>(filter_shape.size_dim_4_);
+    const int filter_height = static_cast<int>(filter_shape.height_);
+    const int filter_width = static_cast<int>(filter_shape.width_);
+    const int in_size_d4 = static_cast<int>(input_shape_size_d4);
+    const int in_height = static_cast<int>(input_shape_height);
+    const int in_width = static_cast<int>(input_shape_width);
+    const int strides_d4 = static_cast<int>(strides.size_dim_4_);
+    const int strides_y = static_cast<int>(strides.height_);
+    const int strides_x = static_cast<int>(strides.width_);
+
+    int out_size_d4 = 0;
+    int out_height = 0;
+    int out_width = 0;
+
+    if (pad_type == padding::same || pad_type == padding::causal)
+    {
+        out_size_d4 = fplus::ceil(static_cast<float>(in_size_d4) / static_cast<float>(strides_d4) - 0.001);
+        out_height = fplus::ceil(static_cast<float>(in_height) / static_cast<float>(strides_y) - 0.001);
+        out_width  = fplus::ceil(static_cast<float>(in_width) / static_cast<float>(strides_x) - 0.001);
+    }
+    else
+    {
+        out_size_d4 = fplus::ceil(static_cast<float>(in_size_d4 - filter_size_d4 + 1) / static_cast<float>(strides_d4) - 0.001);
+        out_height = fplus::ceil(static_cast<float>(in_height - filter_height + 1) / static_cast<float>(strides_y) - 0.001);
+        out_width = fplus::ceil(static_cast<float>(in_width - filter_width + 1) / static_cast<float>(strides_x) - 0.001);
+    }
+
+    int pad_front = 0;
+    int pad_back = 0;
+    int pad_top = 0;
+    int pad_bottom = 0;
+    int pad_left = 0;
+    int pad_right = 0;
+
+    if (pad_type == padding::same)
+    {
+        int pad_along_d4 = 0;
+        int pad_along_height = 0;
+        int pad_along_width = 0;
+
+        if (in_size_d4 % strides_d4 == 0)
+            pad_along_d4 = std::max(filter_size_d4 - strides_d4, 0);
+        else
+            pad_along_d4 = std::max(filter_size_d4 - (in_size_d4 % strides_d4), 0);
+        if (in_height % strides_y == 0)
+            pad_along_height = std::max(filter_height - strides_y, 0);
+        else
+            pad_along_height = std::max(filter_height - (in_height % strides_y), 0);
+        if (in_width % strides_x == 0)
+            pad_along_width = std::max(filter_width - strides_x, 0);
+        else
+            pad_along_width = std::max(filter_width - (in_width % strides_x), 0);
+
+        pad_front = pad_along_d4 / 2;
+        pad_back = pad_along_d4 - pad_front;
+        pad_top = pad_along_height / 2;
+        pad_bottom = pad_along_height - pad_top;
+        pad_left = pad_along_width / 2;
+        pad_right = pad_along_width - pad_left;
+    }
+    else if (pad_type == padding::causal)
+    {
+        pad_front = filter_size_d4 - 1;
+        pad_top = filter_height - 1;
+        pad_left = filter_width - 1;
+    }
+
+    std::size_t out_size_d4_size_t = fplus::integral_cast_throw<std::size_t>(out_size_d4);
+    std::size_t out_height_size_t = fplus::integral_cast_throw<std::size_t>(out_height);
+    std::size_t out_width_size_t = fplus::integral_cast_throw<std::size_t>(out_width);
+    std::size_t pad_front_size_t = fplus::integral_cast_throw<std::size_t>(pad_front);
+    std::size_t pad_back_size_t = fplus::integral_cast_throw<std::size_t>(pad_back);
+    std::size_t pad_top_size_t = fplus::integral_cast_throw<std::size_t>(pad_top);
+    std::size_t pad_bottom_size_t = fplus::integral_cast_throw<std::size_t>(pad_bottom);
+    std::size_t pad_left_size_t = fplus::integral_cast_throw<std::size_t>(pad_left);
+    std::size_t pad_right_size_t = fplus::integral_cast_throw<std::size_t>(pad_right);
+
+    return {pad_front_size_t, pad_back_size_t,
+        pad_top_size_t, pad_bottom_size_t,
+        pad_left_size_t, pad_right_size_t,
+        out_size_d4_size_t, out_height_size_t, out_width_size_t};
+}
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/fdeep.hpp b/include/fdeep/fdeep.hpp
index fd74a1e8..27a9f649 100644
--- a/include/fdeep/fdeep.hpp
+++ b/include/fdeep/fdeep.hpp
@@ -14,6 +14,7 @@
 #include "fdeep/tensor_pos.hpp"
 #include "fdeep/node.hpp"
 #include "fdeep/shape2.hpp"
+#include "fdeep/shape3.hpp"
 #include "fdeep/tensor_shape.hpp"
 #include "fdeep/tensor_shape_variable.hpp"
 #include "fdeep/recurrent_ops.hpp"
diff --git a/include/fdeep/import_model.hpp b/include/fdeep/import_model.hpp
index a04cd58d..9c4e833b 100644
--- a/include/fdeep/import_model.hpp
+++ b/include/fdeep/import_model.hpp
@@ -29,7 +29,7 @@
 
 #include "fdeep/layers/add_layer.hpp"
 #include "fdeep/layers/average_layer.hpp"
-#include "fdeep/layers/average_pooling_2d_layer.hpp"
+#include "fdeep/layers/average_pooling_3d_layer.hpp"
 #include "fdeep/layers/batch_normalization_layer.hpp"
 #include "fdeep/layers/bidirectional_layer.hpp"
 #include "fdeep/layers/concatenate_layer.hpp"
@@ -42,10 +42,8 @@
 #include "fdeep/layers/exponential_layer.hpp"
 #include "fdeep/layers/flatten_layer.hpp"
 #include "fdeep/layers/gelu_layer.hpp"
-#include "fdeep/layers/global_average_pooling_1d_layer.hpp"
-#include "fdeep/layers/global_max_pooling_1d_layer.hpp"
-#include "fdeep/layers/global_average_pooling_2d_layer.hpp"
-#include "fdeep/layers/global_max_pooling_2d_layer.hpp"
+#include "fdeep/layers/global_average_pooling_3d_layer.hpp"
+#include "fdeep/layers/global_max_pooling_3d_layer.hpp"
 #include "fdeep/layers/hard_sigmoid_layer.hpp"
 #include "fdeep/layers/input_layer.hpp"
 #include "fdeep/layers/layer.hpp"
@@ -56,13 +54,13 @@
 #include "fdeep/layers/permute_layer.hpp"
 #include "fdeep/layers/prelu_layer.hpp"
 #include "fdeep/layers/linear_layer.hpp"
-#include "fdeep/layers/max_pooling_2d_layer.hpp"
+#include "fdeep/layers/max_pooling_3d_layer.hpp"
 #include "fdeep/layers/maximum_layer.hpp"
 #include "fdeep/layers/minimum_layer.hpp"
 #include "fdeep/layers/model_layer.hpp"
 #include "fdeep/layers/multiply_layer.hpp"
 #include "fdeep/layers/normalization_layer.hpp"
-#include "fdeep/layers/pooling_2d_layer.hpp"
+#include "fdeep/layers/pooling_3d_layer.hpp"
 #include "fdeep/layers/relu_layer.hpp"
 #include "fdeep/layers/repeat_vector_layer.hpp"
 #include "fdeep/layers/rescaling_layer.hpp"
@@ -244,6 +242,26 @@ inline shape2 create_shape2(const nlohmann::json& data)
     }
 }
 
+inline shape3 create_shape3(const nlohmann::json& data)
+{
+    if (data.is_array())
+    {
+        assertion(data.size() == 1 || data.size() == 2 || data.size() == 3,
+            "invalid number of dimensions in shape2");
+        if (data.size() == 1)
+            return shape3(1, 1, data[0]);
+        if (data.size() == 2)
+            return shape3(1, data[0], data[1]);
+        else
+            return shape3(data[0], data[1], data[2]);
+    }
+    else
+    {
+        const std::size_t width = data;
+        return shape3(1, 1, width);
+    }
+}
+
 inline std::size_t create_size_t(const nlohmann::json& int_data)
 {
     const int val = int_data;
@@ -520,71 +538,43 @@ inline layer_ptr create_identity_layer(
     return std::make_shared<linear_layer>(name);
 }
 
-inline layer_ptr create_max_pooling_2d_layer(
+inline layer_ptr create_max_pooling_3d_layer(
     const get_param_f&, const nlohmann::json& data,
     const std::string& name)
 {
-    const auto pool_size = create_shape2(data["config"]["pool_size"]);
-    const auto strides = create_shape2(data["config"]["strides"]);
-    const bool channels_first = json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
+    const auto pool_size = create_shape3(data["config"]["pool_size"]);
+    const auto strides = create_shape3(data["config"]["strides"]);
     const std::string padding_str = data["config"]["padding"];
     const auto pad_type = create_padding(padding_str);
-    return std::make_shared<max_pooling_2d_layer>(name,
-        pool_size, strides, channels_first, pad_type);
+    return std::make_shared<max_pooling_3d_layer>(name,
+        pool_size, strides, pad_type);
 }
 
-inline layer_ptr create_average_pooling_2d_layer(
+inline layer_ptr create_average_pooling_3d_layer(
     const get_param_f&, const nlohmann::json& data,
     const std::string& name)
 {
-    const auto pool_size = create_shape2(data["config"]["pool_size"]);
-    const auto strides = create_shape2(data["config"]["strides"]);
-    const bool channels_first = json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
+    const auto pool_size = create_shape3(data["config"]["pool_size"]);
+    const auto strides = create_shape3(data["config"]["strides"]);
     const std::string padding_str = data["config"]["padding"];
 
     const auto pad_type = create_padding(padding_str);
-    return std::make_shared<average_pooling_2d_layer>(name,
-        pool_size, strides, channels_first, pad_type);
+    return std::make_shared<average_pooling_3d_layer>(name,
+        pool_size, strides, pad_type);
 }
 
-inline layer_ptr create_global_max_pooling_1d_layer(
-    const get_param_f&, const nlohmann::json& data,
-    const std::string& name)
-{
-    const bool channels_first = json_obj_has_member(data, "config")
-        && json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
-
-    return std::make_shared<global_max_pooling_1d_layer>(name, channels_first);
-}
-
-inline layer_ptr create_global_max_pooling_2d_layer(
-    const get_param_f&, const nlohmann::json& data,
-    const std::string& name)
-{
-    const bool channels_first = json_obj_has_member(data, "config")
-        && json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
-
-    return std::make_shared<global_max_pooling_2d_layer>(name, channels_first);
-}
-
-inline layer_ptr create_global_average_pooling_1d_layer(
-    const get_param_f&, const nlohmann::json& data,
+inline layer_ptr create_global_max_pooling_3d_layer(
+    const get_param_f&, const nlohmann::json&,
     const std::string& name)
 {
-    const bool channels_first = json_obj_has_member(data, "config")
-        && json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
-
-    return std::make_shared<global_average_pooling_1d_layer>(name, channels_first);
+    return std::make_shared<global_max_pooling_3d_layer>(name);
 }
 
-inline layer_ptr create_global_average_pooling_2d_layer(
-    const get_param_f&, const nlohmann::json& data,
+inline layer_ptr create_global_average_pooling_3d_layer(
+    const get_param_f&, const nlohmann::json&,
     const std::string& name)
 {
-    const bool channels_first = json_obj_has_member(data, "config")
-        && json_object_get(data["config"], "data_format", std::string("channels_last")) == "channels_first";
-
-    return std::make_shared<global_average_pooling_2d_layer>(name, channels_first);
+    return std::make_shared<global_average_pooling_3d_layer>(name);
 }
 
 inline layer_ptr create_upsampling_1d_layer(
@@ -1248,14 +1238,18 @@ inline layer_ptr create_layer(const get_param_f& get_param,
             {"PReLU", create_prelu_layer },
             {"ELU", create_elu_layer_isolated},
             {"ReLU", create_relu_layer_isolated},
-            {"MaxPooling1D", create_max_pooling_2d_layer},
-            {"MaxPooling2D", create_max_pooling_2d_layer},
-            {"AveragePooling1D", create_average_pooling_2d_layer},
-            {"AveragePooling2D", create_average_pooling_2d_layer},
-            {"GlobalMaxPooling1D", create_global_max_pooling_1d_layer},
-            {"GlobalMaxPooling2D", create_global_max_pooling_2d_layer},
-            {"GlobalAveragePooling1D", create_global_average_pooling_1d_layer},
-            {"GlobalAveragePooling2D", create_global_average_pooling_2d_layer},
+            {"MaxPooling1D", create_max_pooling_3d_layer},
+            {"MaxPooling2D", create_max_pooling_3d_layer},
+            {"MaxPooling3D", create_max_pooling_3d_layer},
+            {"AveragePooling1D", create_average_pooling_3d_layer},
+            {"AveragePooling2D", create_average_pooling_3d_layer},
+            {"AveragePooling3D", create_average_pooling_3d_layer},
+            {"GlobalMaxPooling1D", create_global_max_pooling_3d_layer},
+            {"GlobalMaxPooling2D", create_global_max_pooling_3d_layer},
+            {"GlobalMaxPooling3D", create_global_max_pooling_3d_layer},
+            {"GlobalAveragePooling1D", create_global_average_pooling_3d_layer},
+            {"GlobalAveragePooling2D", create_global_average_pooling_3d_layer},
+            {"GlobalAveragePooling3D", create_global_average_pooling_3d_layer},
             {"UpSampling1D", create_upsampling_1d_layer},
             {"UpSampling2D", create_upsampling_2d_layer},
             {"Dense", create_dense_layer},
diff --git a/include/fdeep/layers/average_pooling_2d_layer.hpp b/include/fdeep/layers/average_pooling_2d_layer.hpp
deleted file mode 100644
index 20be0983..00000000
--- a/include/fdeep/layers/average_pooling_2d_layer.hpp
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/pooling_2d_layer.hpp"
-
-#include <limits>
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-FDEEP_FORCE_INLINE tensor average_pool_2d(
-    std::size_t pool_height, std::size_t pool_width,
-    std::size_t strides_y, std::size_t strides_x,
-    bool channels_first,
-    padding pad_type,
-    const tensor& in)
-{
-    const float_type invalid = std::numeric_limits<float_type>::lowest();
-
-    const std::size_t feature_count = channels_first
-        ? in.shape().height_
-        : in.shape().depth_
-        ;
-
-    const std::size_t in_height = channels_first
-        ? in.shape().width_
-        : in.shape().height_
-        ;
-
-    const std::size_t in_width = channels_first
-        ? in.shape().depth_
-        : in.shape().width_
-        ;
-
-    const auto conv_cfg = preprocess_convolution(
-        shape2(pool_height, pool_width),
-        shape2(strides_y, strides_x),
-        pad_type, in_height, in_width);
-
-    int pad_top_int = static_cast<int>(conv_cfg.pad_top_);
-    int pad_left_int = static_cast<int>(conv_cfg.pad_left_);
-    const std::size_t out_height = conv_cfg.out_height_;
-    const std::size_t out_width = conv_cfg.out_width_;
-
-    if (channels_first)
-    {
-        tensor out(
-            tensor_shape_with_changed_rank(
-                tensor_shape(feature_count, out_height, out_width),
-                in.shape().rank()),
-            0);
-
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            for (std::size_t y = 0; y < out_height; ++y)
-            {
-                for (std::size_t x = 0; x < out_width; ++x)
-                {
-                    float_type val = 0;
-                    std::size_t divisor = 0;
-                    for (std::size_t yf = 0; yf < pool_height; ++yf)
-                    {
-                        int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
-                        for (std::size_t xf = 0; xf < pool_width; ++xf)
-                        {
-                            int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
-                            const auto current = in.get_x_z_padded(invalid, z, in_get_y, in_get_x);
-                            if (current != invalid)
-                            {
-                                val += current;
-                                divisor += 1;
-                            }
-                        }
-                    }
-
-                    out.set_ignore_rank(tensor_pos(z, y, x), val / static_cast<float_type>(divisor));
-                }
-            }
-        }
-        return out;
-    }
-    else
-    {
-        tensor out(
-            tensor_shape_with_changed_rank(
-                tensor_shape(out_height, out_width, feature_count),
-                in.shape().rank()),
-            0);
-
-        for (std::size_t y = 0; y < out_height; ++y)
-        {
-            for (std::size_t x = 0; x < out_width; ++x)
-            {
-                for (std::size_t z = 0; z < feature_count; ++z)
-                {
-                    float_type val = 0;
-                    std::size_t divisor = 0;
-                    for (std::size_t yf = 0; yf < pool_height; ++yf)
-                    {
-                        int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
-                        for (std::size_t xf = 0; xf < pool_width; ++xf)
-                        {
-                            int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
-                            const auto current = in.get_y_x_padded(invalid,
-                                in_get_y, in_get_x, z);
-                            if (current != invalid)
-                            {
-                                val += current;
-                                divisor += 1;
-                            }
-                        }
-                    }
-
-                    out.set_ignore_rank(tensor_pos(y, x, z), val / static_cast<float_type>(divisor));
-                }
-            }
-        }
-        return out;
-    }
-}
-
-class average_pooling_2d_layer : public pooling_2d_layer
-{
-public:
-    explicit average_pooling_2d_layer(const std::string& name,
-        const shape2& pool_size, const shape2& strides, bool channels_first,
-        padding p) :
-        pooling_2d_layer(name, pool_size, strides, channels_first, p)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        if (pool_size_ == shape2(2, 2) && strides_ == shape2(2, 2))
-            return average_pool_2d(2, 2, 2, 2, channels_first_, padding_, in);
-        else if (pool_size_ == shape2(4, 4) && strides_ == shape2(4, 4))
-            return average_pool_2d(4, 4, 4, 4, channels_first_, padding_, in);
-        else
-            return average_pool_2d(
-                pool_size_.height_, pool_size_.width_,
-                strides_.height_, strides_.width_,
-                channels_first_, padding_, in);
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/average_pooling_3d_layer.hpp b/include/fdeep/layers/average_pooling_3d_layer.hpp
new file mode 100644
index 00000000..a32fcc61
--- /dev/null
+++ b/include/fdeep/layers/average_pooling_3d_layer.hpp
@@ -0,0 +1,60 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/layers/pooling_3d_layer.hpp"
+
+#include <limits>
+#include <string>
+
+namespace fdeep { namespace internal
+{
+
+void inner_average_pool(const tensor& in, tensor& out,
+    std::size_t pool_size_d4, std::size_t pool_height, std::size_t pool_width,
+    std::size_t strides_d4, std::size_t strides_y, std::size_t strides_x,
+    std::size_t d4, std::size_t y, std::size_t x, std::size_t z,
+    int pad_front_int, int pad_top_int, int pad_left_int)
+{
+    const float_type invalid = std::numeric_limits<float_type>::lowest();
+    float_type val = 0;
+    std::size_t divisor = 0;
+    for (std::size_t d4f = 0; d4f < pool_size_d4; ++d4f)
+    {
+        int in_get_d4 = static_cast<int>(strides_d4 * d4 + d4f) - pad_front_int;
+        for (std::size_t yf = 0; yf < pool_height; ++yf)
+        {
+            int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
+            for (std::size_t xf = 0; xf < pool_width; ++xf)
+            {
+                int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
+                const auto current = in.get_padded(invalid,
+                    0, in_get_d4, in_get_y, in_get_x, static_cast<int>(z));
+                if (current != invalid)
+                {
+                    val += current;
+                    divisor += 1;
+                }
+            }
+        }
+    }
+    out.set_ignore_rank(tensor_pos(d4, y, x, z), val / static_cast<float_type>(divisor));
+}
+
+class average_pooling_3d_layer : public pooling_3d_layer
+{
+public:
+    explicit average_pooling_3d_layer(const std::string& name,
+        const shape3& pool_size, const shape3& strides,
+        padding p) :
+        pooling_3d_layer(name, pool_size, strides, p,
+        &inner_average_pool)
+    {
+    }
+};
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_average_pooling_1d_layer.hpp b/include/fdeep/layers/global_average_pooling_1d_layer.hpp
deleted file mode 100644
index 1f6b0738..00000000
--- a/include/fdeep/layers/global_average_pooling_1d_layer.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/global_pooling_layer.hpp"
-
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-class global_average_pooling_1d_layer : public global_pooling_layer
-{
-public:
-    explicit global_average_pooling_1d_layer(const std::string& name, bool channels_first) :
-    global_pooling_layer(name, channels_first)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        const std::size_t feature_count = channels_first_
-            ? in.shape().width_
-            : in.shape().depth_
-            ;
-
-        const std::size_t step_count = channels_first_
-            ? in.shape().depth_
-            : in.shape().width_
-            ;
-
-        tensor out(tensor_shape(feature_count), 0);
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            float_type val = 0;
-            for (std::size_t x = 0; x < step_count; ++x)
-            {
-                if (channels_first_)
-                    val += in.get_ignore_rank(tensor_pos(z, x));
-                else
-                    val += in.get_ignore_rank(tensor_pos(x, z));
-            }
-            out.set_ignore_rank(tensor_pos(z), val / static_cast<float_type>(step_count));
-        }
-        return out;
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_average_pooling_2d_layer.hpp b/include/fdeep/layers/global_average_pooling_2d_layer.hpp
deleted file mode 100644
index dd898e92..00000000
--- a/include/fdeep/layers/global_average_pooling_2d_layer.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/global_pooling_layer.hpp"
-
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-class global_average_pooling_2d_layer : public global_pooling_layer
-{
-public:
-    explicit global_average_pooling_2d_layer(const std::string& name, bool channels_first) :
-    global_pooling_layer(name, channels_first)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        const std::size_t feature_count = channels_first_
-            ? in.shape().height_
-            : in.shape().depth_
-            ;
-
-        const std::size_t in_height = channels_first_
-            ? in.shape().width_
-            : in.shape().height_
-            ;
-
-        const std::size_t in_width = channels_first_
-            ? in.shape().depth_
-            : in.shape().width_
-            ;
-
-        tensor out(tensor_shape(feature_count), 0);
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            float_type val = 0;
-            for (std::size_t y = 0; y < in_height; ++y)
-            {
-                for (std::size_t x = 0; x < in_width; ++x)
-                {
-                    if (channels_first_)
-                        val += in.get_ignore_rank(tensor_pos(z, y, x));
-                    else
-                        val += in.get_ignore_rank(tensor_pos(y, x, z));
-                }
-            }
-            out.set_ignore_rank(tensor_pos(z), val / static_cast<float_type>(in_height * in_width));
-        }
-        return out;
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_average_pooling_3d_layer.hpp b/include/fdeep/layers/global_average_pooling_3d_layer.hpp
new file mode 100644
index 00000000..d2e62181
--- /dev/null
+++ b/include/fdeep/layers/global_average_pooling_3d_layer.hpp
@@ -0,0 +1,46 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/layers/global_pooling_layer.hpp"
+
+#include <string>
+
+namespace fdeep { namespace internal
+{
+
+class global_average_pooling_3d_layer : public global_pooling_layer
+{
+public:
+    explicit global_average_pooling_3d_layer(const std::string& name) :
+    global_pooling_layer(name)
+    {
+    }
+protected:
+    tensor pool(const tensor& in) const override
+    {
+        tensor out(tensor_shape(in.shape().depth_), 0);
+        for (std::size_t z = 0; z < in.shape().depth_; ++z)
+        {
+            float_type val = 0;
+            for (std::size_t d4 = 0; d4 < in.shape().size_dim_4_; ++d4)
+            {   
+                for (std::size_t y = 0; y < in.shape().height_; ++y)
+                {
+                    for (std::size_t x = 0; x < in.shape().width_; ++x)
+                    {
+                        val += in.get_ignore_rank(tensor_pos(d4, y, x, z));
+                    }
+                }
+            }
+            out.set_ignore_rank(tensor_pos(z), val / static_cast<float_type>(in.shape().size_dim_4_ * in.shape().height_ * in.shape().width_));
+        }
+        return out;
+    }
+};
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_max_pooling_1d_layer.hpp b/include/fdeep/layers/global_max_pooling_1d_layer.hpp
deleted file mode 100644
index 05d5ecaf..00000000
--- a/include/fdeep/layers/global_max_pooling_1d_layer.hpp
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/global_pooling_layer.hpp"
-
-#include <algorithm>
-#include <limits>
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-class global_max_pooling_1d_layer : public global_pooling_layer
-{
-public:
-    explicit global_max_pooling_1d_layer(const std::string& name, bool channels_first) :
-    global_pooling_layer(name, channels_first)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        const std::size_t feature_count = channels_first_
-            ? in.shape().width_
-            : in.shape().depth_
-            ;
-
-        const std::size_t step_count = channels_first_
-            ? in.shape().depth_
-            : in.shape().width_
-            ;
-
-        tensor out(tensor_shape(feature_count), 0);
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            float_type val = std::numeric_limits<float_type>::lowest();
-            for (std::size_t x = 0; x < step_count; ++x)
-            {
-                if (channels_first_)
-                    val = std::max(val, in.get_ignore_rank(tensor_pos(z, x)));
-                else
-                    val = std::max(val, in.get_ignore_rank(tensor_pos(x, z)));
-            }
-            out.set_ignore_rank(tensor_pos(z), val);
-        }
-        return out;
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_max_pooling_2d_layer.hpp b/include/fdeep/layers/global_max_pooling_2d_layer.hpp
deleted file mode 100644
index 12db9a19..00000000
--- a/include/fdeep/layers/global_max_pooling_2d_layer.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/global_pooling_layer.hpp"
-
-#include <algorithm>
-#include <limits>
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-class global_max_pooling_2d_layer : public global_pooling_layer
-{
-public:
-    explicit global_max_pooling_2d_layer(const std::string& name, bool channels_first) :
-    global_pooling_layer(name, channels_first)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        const std::size_t feature_count = channels_first_
-            ? in.shape().height_
-            : in.shape().depth_
-            ;
-
-        const std::size_t in_height = channels_first_
-            ? in.shape().width_
-            : in.shape().height_
-            ;
-
-        const std::size_t in_width = channels_first_
-            ? in.shape().depth_
-            : in.shape().width_
-            ;
-
-        tensor out(tensor_shape(feature_count), 0);
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            float_type val = std::numeric_limits<float_type>::lowest();
-            for (std::size_t y = 0; y < in_height; ++y)
-            {
-                for (std::size_t x = 0; x < in_width; ++x)
-                {
-                    if (channels_first_)
-                        val = std::max(val, in.get_ignore_rank(tensor_pos(z, y, x)));
-                    else
-                        val = std::max(val, in.get_ignore_rank(tensor_pos(y, x, z)));
-                }
-            }
-            out.set_ignore_rank(tensor_pos(z), val);
-        }
-        return out;
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_max_pooling_3d_layer.hpp b/include/fdeep/layers/global_max_pooling_3d_layer.hpp
new file mode 100644
index 00000000..79ef0474
--- /dev/null
+++ b/include/fdeep/layers/global_max_pooling_3d_layer.hpp
@@ -0,0 +1,48 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/layers/global_pooling_layer.hpp"
+
+#include <algorithm>
+#include <limits>
+#include <string>
+
+namespace fdeep { namespace internal
+{
+
+class global_max_pooling_3d_layer : public global_pooling_layer
+{
+public:
+    explicit global_max_pooling_3d_layer(const std::string& name) :
+    global_pooling_layer(name)
+    {
+    }
+protected:
+    tensor pool(const tensor& in) const override
+    {
+        tensor out(tensor_shape(in.shape().depth_), 0);
+        for (std::size_t z = 0; z < in.shape().depth_; ++z)
+        {
+            float_type val = std::numeric_limits<float_type>::lowest();
+            for (std::size_t d4 = 0; d4 < in.shape().size_dim_4_; ++d4)
+            {   
+                for (std::size_t y = 0; y < in.shape().height_; ++y)
+                {
+                    for (std::size_t x = 0; x < in.shape().width_; ++x)
+                    {
+                        val = std::max(val, in.get_ignore_rank(tensor_pos(d4, y, x, z)));
+                    }
+                }
+            }
+            out.set_ignore_rank(tensor_pos(z), val);
+        }
+        return out;
+    }
+};
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/global_pooling_layer.hpp b/include/fdeep/layers/global_pooling_layer.hpp
index df749066..9444c562 100644
--- a/include/fdeep/layers/global_pooling_layer.hpp
+++ b/include/fdeep/layers/global_pooling_layer.hpp
@@ -22,9 +22,8 @@ namespace fdeep { namespace internal
 class global_pooling_layer : public layer
 {
 public:
-    explicit global_pooling_layer(const std::string& name, bool channels_first) :
-        layer(name),
-        channels_first_(channels_first)
+    explicit global_pooling_layer(const std::string& name) :
+        layer(name)
     {
     }
 protected:
@@ -34,8 +33,6 @@ class global_pooling_layer : public layer
         return {pool(input)};
     }
     virtual tensor pool(const tensor& input) const = 0;
-
-    bool channels_first_;
 };
 
 } } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/max_pooling_2d_layer.hpp b/include/fdeep/layers/max_pooling_2d_layer.hpp
deleted file mode 100644
index dcc41c00..00000000
--- a/include/fdeep/layers/max_pooling_2d_layer.hpp
+++ /dev/null
@@ -1,142 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/pooling_2d_layer.hpp"
-
-#include <algorithm>
-#include <limits>
-#include <string>
-
-namespace fdeep { namespace internal
-{
-
-FDEEP_FORCE_INLINE tensor max_pool_2d(
-    std::size_t pool_height, std::size_t pool_width,
-    std::size_t strides_y, std::size_t strides_x,
-    bool channels_first,
-    padding pad_type,
-    const tensor& in)
-{
-    const float_type invalid = std::numeric_limits<float_type>::lowest();
-
-    const std::size_t feature_count = channels_first
-        ? in.shape().height_
-        : in.shape().depth_
-        ;
-
-    const std::size_t in_height = channels_first
-        ? in.shape().width_
-        : in.shape().height_
-        ;
-
-    const std::size_t in_width = channels_first
-        ? in.shape().depth_
-        : in.shape().width_
-        ;
-
-    const auto conv_cfg = preprocess_convolution(
-        shape2(pool_height, pool_width),
-        shape2(strides_y, strides_x),
-        pad_type, in_height, in_width);
-
-    int pad_top_int = static_cast<int>(conv_cfg.pad_top_);
-    int pad_left_int = static_cast<int>(conv_cfg.pad_left_);
-    const std::size_t out_height = conv_cfg.out_height_;
-    const std::size_t out_width = conv_cfg.out_width_;
-
-    if (channels_first)
-    {
-        tensor out(
-            tensor_shape_with_changed_rank(
-                tensor_shape(feature_count, out_height, out_width),
-                in.shape().rank()),
-            0);
-
-        for (std::size_t z = 0; z < feature_count; ++z)
-        {
-            for (std::size_t y = 0; y < out_height; ++y)
-            {
-                for (std::size_t x = 0; x < out_width; ++x)
-                {
-                    float_type val = std::numeric_limits<float_type>::lowest();
-                    for (std::size_t yf = 0; yf < pool_height; ++yf)
-                    {
-                        int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
-                        for (std::size_t xf = 0; xf < pool_width; ++xf)
-                        {
-                            int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
-                            const auto current = in.get_x_z_padded(invalid, z, in_get_y, in_get_x);
-                            val = std::max(val, current);
-                        }
-                    }
-
-                    out.set_ignore_rank(tensor_pos(z, y, x), val);
-                }
-            }
-        }
-        return out;
-    }
-    else
-    {
-        tensor out(
-            tensor_shape_with_changed_rank(
-                tensor_shape(out_height, out_width, feature_count),
-                in.shape().rank()),
-            0);
-
-        for (std::size_t y = 0; y < out_height; ++y)
-        {
-            for (std::size_t x = 0; x < out_width; ++x)
-            {
-                for (std::size_t z = 0; z < feature_count; ++z)
-                {
-                    float_type val = std::numeric_limits<float_type>::lowest();
-                    for (std::size_t yf = 0; yf < pool_height; ++yf)
-                    {
-                        int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
-                        for (std::size_t xf = 0; xf < pool_width; ++xf)
-                        {
-                            int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
-                            const auto current = in.get_y_x_padded(invalid, in_get_y, in_get_x, z);
-                            val = std::max(val, current);
-                        }
-                    }
-
-                    out.set_ignore_rank(tensor_pos(y, x, z), val);
-                }
-            }
-        }
-        return out;
-    }
-}
-
-class max_pooling_2d_layer : public pooling_2d_layer
-{
-public:
-    explicit max_pooling_2d_layer(const std::string& name,
-        const shape2& pool_size, const shape2& strides, bool channels_first,
-        padding p) :
-        pooling_2d_layer(name, pool_size, strides, channels_first, p)
-    {
-    }
-protected:
-    tensor pool(const tensor& in) const override
-    {
-        if (pool_size_ == shape2(2, 2) && strides_ == shape2(2, 2))
-            return max_pool_2d(2, 2, 2, 2, channels_first_, padding_, in);
-        else if (pool_size_ == shape2(4, 4) && strides_ == shape2(4, 4))
-            return max_pool_2d(4, 4, 4, 4, channels_first_, padding_, in);
-        else
-            return max_pool_2d(
-                pool_size_.height_, pool_size_.width_,
-                strides_.height_, strides_.width_,
-                channels_first_, padding_, in);
-    }
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/max_pooling_3d_layer.hpp b/include/fdeep/layers/max_pooling_3d_layer.hpp
new file mode 100644
index 00000000..7a62f9cf
--- /dev/null
+++ b/include/fdeep/layers/max_pooling_3d_layer.hpp
@@ -0,0 +1,55 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/layers/pooling_3d_layer.hpp"
+
+#include <limits>
+#include <string>
+
+namespace fdeep { namespace internal
+{
+
+void inner_max_pool(const tensor& in, tensor& out,
+    std::size_t pool_size_d4, std::size_t pool_height, std::size_t pool_width,
+    std::size_t strides_d4, std::size_t strides_y, std::size_t strides_x,
+    std::size_t d4, std::size_t y, std::size_t x, std::size_t z,
+    int pad_front_int, int pad_top_int, int pad_left_int)
+{
+    const float_type invalid = std::numeric_limits<float_type>::lowest();
+    float_type val = std::numeric_limits<float_type>::lowest();
+    for (std::size_t d4f = 0; d4f < pool_size_d4; ++d4f)
+    {
+        int in_get_d4 = static_cast<int>(strides_d4 * d4 + d4f) - pad_front_int;
+        for (std::size_t yf = 0; yf < pool_height; ++yf)
+        {
+            int in_get_y = static_cast<int>(strides_y * y + yf) - pad_top_int;
+            for (std::size_t xf = 0; xf < pool_width; ++xf)
+            {
+                int in_get_x = static_cast<int>(strides_x * x + xf) - pad_left_int;
+                const auto current = in.get_padded(invalid,
+                    0, in_get_d4, in_get_y, in_get_x, static_cast<int>(z));
+                val = std::max(val, current);
+            }
+        }
+    }
+    out.set_ignore_rank(tensor_pos(d4, y, x, z), val);
+}
+
+class max_pooling_3d_layer : public pooling_3d_layer
+{
+public:
+    explicit max_pooling_3d_layer(const std::string& name,
+        const shape3& pool_size, const shape3& strides,
+        padding p) :
+        pooling_3d_layer(name, pool_size, strides, p,
+        &inner_max_pool)
+    {
+    }
+};
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/pooling_2d_layer.hpp b/include/fdeep/layers/pooling_2d_layer.hpp
deleted file mode 100644
index dce8cbc7..00000000
--- a/include/fdeep/layers/pooling_2d_layer.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2016, Tobias Hermann.
-// https://github.com/Dobiasd/frugally-deep
-// Distributed under the MIT License.
-// (See accompanying LICENSE file or at
-//  https://opensource.org/licenses/MIT)
-
-#pragma once
-
-#include "fdeep/layers/layer.hpp"
-#include "fdeep/convolution.hpp"
-
-#include <fplus/fplus.hpp>
-
-#include <cassert>
-#include <cstddef>
-#include <string>
-#include <vector>
-
-namespace fdeep { namespace internal
-{
-
-// Abstract base class for pooling layers
-class pooling_2d_layer : public layer
-{
-public:
-    explicit pooling_2d_layer(const std::string& name,
-        const shape2& pool_size, const shape2& strides, bool channels_first,
-        padding p) :
-        layer(name),
-        pool_size_(pool_size),
-        strides_(strides),
-        channels_first_(channels_first),
-        padding_(p)
-    {
-    }
-protected:
-    tensors apply_impl(const tensors& inputs) const override final
-    {
-        const auto& input = single_tensor_from_tensors(inputs);
-        return {pool(input)};
-    }
-
-    virtual tensor pool(const tensor& input) const = 0;
-
-    shape2 pool_size_;
-    shape2 strides_;
-    bool channels_first_;
-    padding padding_;
-};
-
-} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/layers/pooling_3d_layer.hpp b/include/fdeep/layers/pooling_3d_layer.hpp
new file mode 100644
index 00000000..b2a660dc
--- /dev/null
+++ b/include/fdeep/layers/pooling_3d_layer.hpp
@@ -0,0 +1,98 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/layers/layer.hpp"
+#include "fdeep/convolution3d.hpp"
+
+#include <fplus/fplus.hpp>
+
+#include <cassert>
+#include <cstddef>
+#include <string>
+#include <vector>
+
+namespace fdeep { namespace internal
+{
+
+typedef void (*inner_pooling_func)(
+    const tensor&, tensor& out,
+    std::size_t, std::size_t, std::size_t,
+    std::size_t, std::size_t, std::size_t,
+    std::size_t, std::size_t, std::size_t, std::size_t,
+    int, int, int
+);
+
+// Abstract base class for pooling layers
+class pooling_3d_layer : public layer
+{
+public:
+    explicit pooling_3d_layer(const std::string& name,
+        const shape3& pool_size, const shape3& strides,
+        padding p, const inner_pooling_func inner_f) :
+        layer(name),
+        pool_size_(pool_size),
+        strides_(strides),
+        padding_(p),
+        inner_f_(inner_f)
+    {
+    }
+protected:
+    tensor pool(const tensor& in) const
+    {
+        const auto conv_cfg = preprocess_convolution_3d(
+            shape3(pool_size_.size_dim_4_ , pool_size_.height_, pool_size_.width_),
+            shape3(strides_.size_dim_4_, strides_.height_, strides_.width_),
+            padding_, in.shape().size_dim_4_, in.shape().height_, in.shape().width_);
+
+        int pad_front_int = static_cast<int>(conv_cfg.pad_front_);
+        int pad_top_int = static_cast<int>(conv_cfg.pad_top_);
+        int pad_left_int = static_cast<int>(conv_cfg.pad_left_);
+
+        const std::size_t out_size_d4 = conv_cfg.out_size_d4_;
+        const std::size_t out_height = conv_cfg.out_height_;
+        const std::size_t out_width = conv_cfg.out_width_;
+
+        tensor out(
+            tensor_shape_with_changed_rank(
+                tensor_shape(out_size_d4, out_height, out_width, in.shape().depth_),
+                in.shape().rank()),
+            0);
+
+        for (std::size_t d4 = 0; d4 < out_size_d4; ++d4)
+        {
+            for (std::size_t y = 0; y < out_height; ++y)
+            {
+                for (std::size_t x = 0; x < out_width; ++x)
+                {
+                    for (std::size_t z = 0; z < in.shape().depth_; ++z)
+                    {
+                        inner_f_(in, out,
+                            pool_size_.size_dim_4_, pool_size_.height_, pool_size_.width_,
+                            strides_.size_dim_4_, strides_.height_, strides_.width_,
+                            d4, y, x, z,
+                            pad_front_int, pad_top_int, pad_left_int);
+                    }
+                }
+            }
+        }
+        return out;
+    }
+
+    tensors apply_impl(const tensors& inputs) const override final
+    {
+        const auto& input = single_tensor_from_tensors(inputs);
+        return {pool(input)};
+    }
+
+    shape3 pool_size_;
+    shape3 strides_;
+    padding padding_;
+    inner_pooling_func inner_f_;
+};
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/shape3.hpp b/include/fdeep/shape3.hpp
new file mode 100644
index 00000000..c35e073b
--- /dev/null
+++ b/include/fdeep/shape3.hpp
@@ -0,0 +1,48 @@
+// Copyright 2016, Tobias Hermann.
+// https://github.com/Dobiasd/frugally-deep
+// Distributed under the MIT License.
+// (See accompanying LICENSE file or at
+//  https://opensource.org/licenses/MIT)
+
+#pragma once
+
+#include "fdeep/common.hpp"
+
+#include <cstddef>
+#include <cstdlib>
+#include <string>
+
+namespace fdeep { namespace internal
+{
+
+class shape3
+{
+public:
+    explicit shape3(
+        std::size_t size_dim_4,
+        std::size_t height,
+        std::size_t width) :
+            size_dim_4_(size_dim_4),
+            height_(height),
+            width_(width)
+    {
+    }
+    std::size_t volume() const
+    {
+        return size_dim_4_ * height_ * width_;
+    }
+
+    std::size_t size_dim_4_;
+    std::size_t height_;
+    std::size_t width_;
+};
+
+inline bool operator == (const shape3& lhs, const shape3& rhs)
+{
+    return
+        lhs.size_dim_4_ == rhs.size_dim_4_ &&
+        lhs.height_ == rhs.height_ &&
+        lhs.width_ == rhs.width_;
+}
+
+} } // namespace fdeep, namespace internal
diff --git a/include/fdeep/tensor.hpp b/include/fdeep/tensor.hpp
index 3fc83ff2..76cf68db 100644
--- a/include/fdeep/tensor.hpp
+++ b/include/fdeep/tensor.hpp
@@ -66,25 +66,24 @@ class tensor
     {
         return (*values_)[idx_ignore_rank(pos)];
     }
-    float_type get_y_x_padded(float_type pad_value,
-        int y, int x, std::size_t z) const
+    float_type get_padded(float_type pad_value,
+        int d5, int d4, int y, int x, int z) const
     {
-        if (y < 0 || y >= static_cast<int>(shape().height_) ||
-            x < 0 || x >= static_cast<int>(shape().width_))
-        {
-            return pad_value;
-        }
-        return get_ignore_rank(tensor_pos(static_cast<std::size_t>(y), static_cast<std::size_t>(x), z));
-    }
-    float_type get_x_z_padded(float_type pad_value,
-        std::size_t y, int x, int z) const
-    {
-        if (x < 0 || x >= static_cast<int>(shape().width_) ||
+        if (d5 < 0 || d5 >= static_cast<int>(shape().size_dim_5_) ||
+            d4 < 0 || d4 >= static_cast<int>(shape().size_dim_4_) ||
+            y < 0 || y >= static_cast<int>(shape().height_) ||
+            x < 0 || x >= static_cast<int>(shape().width_) ||
             z < 0 || z >= static_cast<int>(shape().depth_))
         {
             return pad_value;
         }
-        return get_ignore_rank(tensor_pos(y, static_cast<std::size_t>(x), static_cast<std::size_t>(z)));
+        return get_ignore_rank(tensor_pos(
+            static_cast<std::size_t>(d5),
+            static_cast<std::size_t>(d4),
+            static_cast<std::size_t>(y),
+            static_cast<std::size_t>(x),
+            static_cast<std::size_t>(z)
+            ));
     }
     void set(const tensor_pos& pos, float_type value)
     {
diff --git a/include/fdeep/tensor_shape_variable.hpp b/include/fdeep/tensor_shape_variable.hpp
index 06a16486..b93d2c03 100644
--- a/include/fdeep/tensor_shape_variable.hpp
+++ b/include/fdeep/tensor_shape_variable.hpp
@@ -8,8 +8,6 @@
 
 #include "fdeep/common.hpp"
 
-#include "fdeep/shape2.hpp"
-
 #include <algorithm>
 #include <cstddef>
 #include <cstdlib>
diff --git a/keras_export/convert_model.py b/keras_export/convert_model.py
index 817570b1..913c4544 100755
--- a/keras_export/convert_model.py
+++ b/keras_export/convert_model.py
@@ -601,12 +601,9 @@ def get_layer_weights(layer, name):
     result = {}
     layer_type = type(layer).__name__
     if hasattr(layer, 'data_format'):
-        if layer_type in ['AveragePooling1D', 'MaxPooling1D', 'AveragePooling2D', 'MaxPooling2D',
-                          'GlobalAveragePooling1D', 'GlobalMaxPooling1D', 'GlobalAveragePooling2D',
-                          'GlobalMaxPooling2D']:
-            assert layer.data_format == 'channels_last' or layer.data_format == 'channels_first'
-        else:
-            assert layer.data_format == 'channels_last'
+        assert layer.data_format == 'channels_last'
+    if hasattr(layer, 'keepdims'):  # Pooling layers
+        assert not layer.keepdims
 
     show_func = get_layer_functions_dict().get(layer_type, None)
     shown_layer = None
diff --git a/keras_export/generate_test_models.py b/keras_export/generate_test_models.py
index 0ce49791..2f32b68f 100644
--- a/keras_export/generate_test_models.py
+++ b/keras_export/generate_test_models.py
@@ -13,11 +13,13 @@
 from tensorflow.keras.layers import Embedding, Normalization, Rescaling
 from tensorflow.keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D
 from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D
+from tensorflow.keras.layers import GlobalAveragePooling3D, GlobalMaxPooling3D
 from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Activation
 from tensorflow.keras.layers import LSTM, GRU
 from tensorflow.keras.layers import LeakyReLU, ELU, PReLU, ReLU
 from tensorflow.keras.layers import MaxPooling1D, AveragePooling1D, UpSampling1D
 from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D, UpSampling2D
+from tensorflow.keras.layers import MaxPooling3D, AveragePooling3D
 from tensorflow.keras.layers import Multiply, Add, Subtract, Average, Maximum, Minimum, Dot
 from tensorflow.keras.layers import Permute, Reshape, RepeatVector
 from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D
@@ -152,14 +154,10 @@ def get_test_model_exhaustive():
     outputs.append(Cropping1D((2, 3))(inputs[6]))
     outputs.append(MaxPooling1D(2)(inputs[6]))
     outputs.append(MaxPooling1D(2, strides=2, padding='same')(inputs[6]))
-    outputs.append(MaxPooling1D(2, data_format="channels_first")(inputs[6]))
     outputs.append(AveragePooling1D(2)(inputs[6]))
     outputs.append(AveragePooling1D(2, strides=2, padding='same')(inputs[6]))
-    outputs.append(AveragePooling1D(2, data_format="channels_first")(inputs[6]))
     outputs.append(GlobalMaxPooling1D()(inputs[6]))
-    outputs.append(GlobalMaxPooling1D(data_format="channels_first")(inputs[6]))
     outputs.append(GlobalAveragePooling1D()(inputs[6]))
-    outputs.append(GlobalAveragePooling1D(data_format="channels_first")(inputs[6]))
 
     outputs.append(Normalization(axis=None, mean=2.1, variance=2.2)(inputs[4]))
     outputs.append(Normalization(axis=-1, mean=2.1, variance=2.2)(inputs[6]))
@@ -187,18 +185,18 @@ def get_test_model_exhaustive():
     outputs.append(DepthwiseConv2D((1, 2))(inputs[4]))
 
     outputs.append(MaxPooling2D((2, 2))(inputs[4]))
-    # todo: check if TensorFlow >= 2.8 supports this
-    # outputs.append(MaxPooling2D((2, 2), data_format="channels_first")(inputs[4]))
+    outputs.append(MaxPooling3D((2, 2, 2))(inputs[2]))
     outputs.append(MaxPooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4]))
+    outputs.append(MaxPooling3D((1, 3, 5), strides=(2, 3, 4), padding='same')(inputs[2]))
     outputs.append(AveragePooling2D((2, 2))(inputs[4]))
-    # todo: check if TensorFlow >= 2.8 supports this
-    # outputs.append(AveragePooling2D((2, 2), data_format="channels_first")(inputs[4]))
+    outputs.append(AveragePooling3D((2, 2, 2))(inputs[2]))
     outputs.append(AveragePooling2D((1, 3), strides=(2, 3), padding='same')(inputs[4]))
+    outputs.append(AveragePooling3D((1, 3, 5), strides=(2, 3, 4), padding='same')(inputs[2]))
 
     outputs.append(GlobalAveragePooling2D()(inputs[4]))
-    outputs.append(GlobalAveragePooling2D(data_format="channels_first")(inputs[4]))
+    outputs.append(GlobalAveragePooling3D()(inputs[2]))
     outputs.append(GlobalMaxPooling2D()(inputs[4]))
-    outputs.append(GlobalMaxPooling2D(data_format="channels_first")(inputs[4]))
+    outputs.append(GlobalMaxPooling3D()(inputs[2]))
 
     outputs.append(Permute((3, 4, 1, 5, 2))(inputs[0]))
     outputs.append(Permute((1, 5, 3, 2, 4))(inputs[0]))