Fix neuroglancer_precomputed image encoding issues

Previously, the neuroglancer_precomputed driver incorrectly encoded jpeg and png-format chunks in the case that the x and y dimensions of the chunk size differ. In particular, the chunks were encoded with the y dimension specified as the width, and x * z specified as the height, but in fact the data was stored with the x dimension as the inner-most dimension. While all pixels were stored in the correct linear order, and could be decoded properly by both tensorstore and neuroglancer, the image data was misaligned to image rows, such that individual chunks did not display correctly in a normal image viewer, and compression performed poorly due to this misaligned. While png is lossless and poor compression merely increases the size, with jpeg this also tended to introduce extreme artifacts. This commit also enables support for 16-bit PNG images with up to 4 channels. Fixes google/neuroglancer#677. PiperOrigin-RevId: 708415255 Change-Id: I057cc3fc4f073026e8bf96cc543be5a769c626c2
google · Dec 20, 2024 · a5a9613 · a5a9613
1 parent 4e07c92
commit a5a9613
Show file tree

Hide file tree

Showing 7 changed files with 124 additions and 58 deletions.
diff --git a/tensorstore/driver/neuroglancer_precomputed/BUILD b/tensorstore/driver/neuroglancer_precomputed/BUILD
@@ -66,14 +66,19 @@ tensorstore_cc_test(
         "//tensorstore:contiguous_layout",
         "//tensorstore:data_type",
         "//tensorstore:index",
+        "//tensorstore:static_cast",
         "//tensorstore:strided_layout",
+        "//tensorstore/internal/image",
+        "//tensorstore/internal/image:jpeg",
+        "//tensorstore/internal/image:png",
         "//tensorstore/util:status_testutil",
         "//tensorstore/util:str_cat",
         "@com_github_nlohmann_json//:json",
         "@com_google_absl//absl/base:core_headers",
         "@com_google_absl//absl/status",
         "@com_google_absl//absl/strings:cord",
         "@com_google_googletest//:gtest_main",
+        "@com_google_riegeli//riegeli/bytes:cord_reader",
     ],
 )
 

diff --git a/tensorstore/driver/neuroglancer_precomputed/chunk_encoding.cc b/tensorstore/driver/neuroglancer_precomputed/chunk_encoding.cc
@@ -95,7 +95,8 @@ template <typename ImageReader>
 Result<SharedArray<const void>> DecodeImageChunk(
     DataType dtype, span<const Index, 4> partial_shape,
     StridedLayoutView<4> chunk_layout, absl::Cord encoded_input) {
-  // `array` will contain decoded jpeg with C-order `(z, y, x, channel)` layout.
+  // `array` will contain decoded image with C-order `(z, y, x, channel)`
+  // layout.
   //
   // If number of channels is 1, then this is equivalent to the
   // `(channel, z, y, x)` layout in `chunk_layout`.
@@ -140,7 +141,7 @@ Result<SharedArray<const void>> DecodeImageChunk(
   // Partial chunk, or number of channels is not 1.  Must copy to obtain the
   // expected `chunk_layout`.
   //
-  // It is safe to value initialize because the out-of-bounds positions will
+  // It is safe to default initialize because the out-of-bounds positions will
   // never be read.  If resize is supported, this must change, however.
   SharedArray<void> full_decoded_array(
       internal::AllocateAndConstructSharedElements(chunk_layout.num_elements(),
@@ -275,6 +276,7 @@ template <typename ImageWriter, typename Options>
 Result<absl::Cord> EncodeImageChunk(Options options, DataType dtype,
                                     span<const Index, 4> shape,
                                     ArrayView<const void> array) {
+  // czyx
   Array<const void, 4> partial_source(
       array.element_pointer(),
       StridedLayout<4>({shape[1], shape[2], shape[3], shape[0]},
@@ -287,10 +289,11 @@ Result<absl::Cord> EncodeImageChunk(Options options, DataType dtype,
     ImageWriter writer;
     riegeli::CordWriter<> cord_writer(&buffer);
     TENSORSTORE_RETURN_IF_ERROR(writer.Initialize(&cord_writer, options));
-    ImageInfo info{/*.height =*/static_cast<int32_t>(shape[3]),
-                   /*.width =*/static_cast<int32_t>(shape[1] * shape[2]),
-                   /*.num_components =*/static_cast<int32_t>(shape[0]),
-                   /*.data_type =*/dtype};
+    ImageInfo info{
+        /*.height =*/static_cast<int32_t>(shape[1] * shape[2]),  // z * y
+        /*.width =*/static_cast<int32_t>(shape[3]),              // x
+        /*.num_components =*/static_cast<int32_t>(shape[0]),     // c
+        /*.data_type =*/dtype};
     TENSORSTORE_RETURN_IF_ERROR(writer.Encode(
         info, tensorstore::span(reinterpret_cast<const unsigned char*>(
                                     contiguous_array.data()),

diff --git a/tensorstore/driver/neuroglancer_precomputed/chunk_encoding_test.cc b/tensorstore/driver/neuroglancer_precomputed/chunk_encoding_test.cc
@@ -19,6 +19,11 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <functional>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -28,29 +33,39 @@
 #include "absl/status/status.h"
 #include "absl/strings/cord.h"
 #include <nlohmann/json_fwd.hpp>
+#include "riegeli/bytes/cord_reader.h"
 #include "tensorstore/array.h"
 #include "tensorstore/contiguous_layout.h"
 #include "tensorstore/data_type.h"
 #include "tensorstore/driver/neuroglancer_precomputed/metadata.h"
 #include "tensorstore/index.h"
+#include "tensorstore/internal/image/image_info.h"
+#include "tensorstore/internal/image/image_reader.h"
+#include "tensorstore/internal/image/jpeg_reader.h"
+#include "tensorstore/internal/image/png_reader.h"
+#include "tensorstore/static_cast.h"
 #include "tensorstore/strided_layout.h"
 #include "tensorstore/util/status_testutil.h"
 #include "tensorstore/util/str_cat.h"
 
 namespace {
 
+using ::tensorstore::DataType;
+using ::tensorstore::dtype_v;
 using ::tensorstore::Index;
 using ::tensorstore::MatchesStatus;
+using ::tensorstore::internal_image::ImageReader;
 using ::tensorstore::internal_neuroglancer_precomputed::DecodeChunk;
 using ::tensorstore::internal_neuroglancer_precomputed::EncodeChunk;
 using ::tensorstore::internal_neuroglancer_precomputed::MultiscaleMetadata;
 
 /// Parameters for the test fixture.
 struct P {
   ::nlohmann::json metadata_json;
-  tensorstore::DataType dtype;
-  bool compare = true;
+  DataType dtype;
+  double max_root_mean_squared_error = 0;
   bool truncate = true;
+  std::function<std::unique_ptr<ImageReader>()> get_image_reader;
 };
 
 class ChunkEncodingTest : public testing::TestWithParam<P> {
@@ -82,6 +97,29 @@ class ChunkEncodingTest : public testing::TestWithParam<P> {
   }
 };
 
+template <typename T>
+double GetRootMeanSquaredErrorImpl(tensorstore::ArrayView<const void> array_a,
+                                   tensorstore::ArrayView<const void> array_b) {
+  double mean_squared_error = 0;
+  tensorstore::IterateOverArrays(
+      [&](const T* a, const T* b) {
+        double diff = static_cast<double>(*a) - static_cast<double>(*b);
+        mean_squared_error += diff * diff;
+      },
+      /*constraints=*/{},
+      tensorstore::StaticDataTypeCast<const T, tensorstore::unchecked>(array_a),
+      tensorstore::StaticDataTypeCast<const T, tensorstore::unchecked>(
+          array_b));
+  return std::sqrt(mean_squared_error / array_a.num_elements());
+}
+
+double GetRootMeanSquaredError(tensorstore::ArrayView<const void> array_a,
+                               tensorstore::ArrayView<const void> array_b) {
+  assert(array_a.dtype() == array_b.dtype());
+  assert(array_a.dtype() == dtype_v<uint8_t>);
+  return GetRootMeanSquaredErrorImpl<uint8_t>(array_a, array_b);
+}
+
 TEST_P(ChunkEncodingTest, Roundtrip) {
   auto metadata_json = GetParam().metadata_json;
   auto dtype = GetParam().dtype;
@@ -101,17 +139,32 @@ TEST_P(ChunkEncodingTest, Roundtrip) {
 
   if (!out.empty() && GetParam().truncate) {
     // Test that truncating the chunk leads to a decoding error.
-    auto corrupt = out.Subcord(0, out.size() - 1);
+    auto corrupt = out.Subcord(
+        0, out.size() - std::min(static_cast<size_t>(5), out.size()));
     EXPECT_THAT(
         DecodeChunk(chunk_indices, metadata, scale_index, chunk_layout,
                     corrupt),
         testing::AnyOf(MatchesStatus(absl::StatusCode::kDataLoss),
                        MatchesStatus(absl::StatusCode::kInvalidArgument)));
   }
 
-  if (GetParam().compare) {
+  if (double max_rms_error = GetParam().max_root_mean_squared_error) {
+    EXPECT_LT(GetRootMeanSquaredError(decode_result, array), max_rms_error)
+        << "original=" << array << ", decoded=" << decode_result;
+  } else {
     EXPECT_THAT(decode_result, array);
   }
+
+  if (GetParam().get_image_reader) {
+    auto image_reader = GetParam().get_image_reader();
+    riegeli::CordReader reader{&out};
+    TENSORSTORE_ASSERT_OK(image_reader->Initialize(&reader));
+    auto image_info = image_reader->GetImageInfo();
+    EXPECT_EQ(image_info.width, 3);
+    EXPECT_EQ(image_info.height, 5 * 4);
+    EXPECT_EQ(image_info.num_components, metadata.num_channels);
+    EXPECT_EQ(image_info.dtype, dtype);
+  }
 }
 
 std::vector<P> GenerateParams() {
@@ -129,43 +182,59 @@ std::vector<P> GenerateParams() {
                             {"size", {10, 11, 12}}}}},
                          {"type", "image"}};
     // Test raw
-    param.dtype = tensorstore::dtype_v<uint16_t>;
-    result.push_back(param);
+    {
+      P param_raw = param;
+      param_raw.dtype = dtype_v<uint16_t>;
+      result.push_back(param_raw);
+    }
 
     // Test png
     // Simple truncation isn't sufficient to corrupt png files.
-    param.truncate = false;
     if (num_channels >= 1 && num_channels <= 4) {
-      param.metadata_json["scales"][0]["encoding"] = "png";
-      // uint8
-      param.dtype = tensorstore::dtype_v<uint8_t>;
-      result.push_back(param);
-      // uint16
-      if (num_channels == 1) {
-        param.dtype = tensorstore::dtype_v<uint16_t>;
-        result.push_back(param);
+      P param_png = param;
+      param_png.truncate = false;
+      param_png.metadata_json["scales"][0]["encoding"] = "png";
+      param_png.get_image_reader = [] {
+        return std::make_unique<tensorstore::internal_image::PngReader>();
+      };
+      for (auto dtype : {
+               static_cast<DataType>(dtype_v<uint8_t>),
+               static_cast<DataType>(dtype_v<uint16_t>),
+           }) {
+        param_png.dtype = dtype;
+        result.push_back(param_png);
       }
     }
-    param.truncate = true;
 
     // Test jpeg
-    // JPEG is lossy, so we can't compare values.
-    param.compare = false;
     if (num_channels == 1 || num_channels == 3) {
-      param.metadata_json["scales"][0]["encoding"] = "jpeg";
-      param.dtype = tensorstore::dtype_v<uint8_t>;
-      result.push_back(param);
+      // JPEG is lossy, so we can't compare values exactly.
+      P param_jpeg = param;
+      param_jpeg.max_root_mean_squared_error = 3;
+      param_jpeg.metadata_json["scales"][0]["encoding"] = "jpeg";
+      param_jpeg.dtype = dtype_v<uint8_t>;
+      param_jpeg.get_image_reader = [] {
+        return std::make_unique<tensorstore::internal_image::JpegReader>();
+      };
+      result.push_back(param_jpeg);
     }
-    param.compare = true;
 
     // Test compressed segmentation
-    param.metadata_json["scales"][0]["encoding"] = "compressed_segmentation";
-    param.metadata_json["scales"][0]["compressed_segmentation_block_size"] = {
-        2, 3, 4};
-    param.dtype = tensorstore::dtype_v<uint32_t>;
-    result.push_back(param);
-    param.dtype = tensorstore::dtype_v<uint64_t>;
-    result.push_back(param);
+    {
+      P param_cseg = param;
+      param_cseg.metadata_json["scales"][0]["encoding"] =
+          "compressed_segmentation";
+      param_cseg
+          .metadata_json["scales"][0]["compressed_segmentation_block_size"] = {
+          2, 3, 4};
+      for (auto dtype : {
+               static_cast<DataType>(dtype_v<uint32_t>),
+               static_cast<DataType>(dtype_v<uint64_t>),
+           }) {
+        param_cseg.dtype = dtype;
+        result.push_back(param_cseg);
+      }
+    }
   }
   return result;
 }

diff --git a/tensorstore/driver/neuroglancer_precomputed/driver_test.cc b/tensorstore/driver/neuroglancer_precomputed/driver_test.cc
@@ -979,7 +979,7 @@ TEST(DriverTest, Jpeg1Channel) {
             tensorstore::StaticDataTypeCast<const uint8_t,
                                             tensorstore::unchecked>(read_array),
             array),
-        5);
+        3);
 
     // Test corrupt "jpeg" chunk handling
     ::nlohmann::json storage_spec{{"driver", "memory"}};

diff --git a/tensorstore/driver/neuroglancer_precomputed/metadata.cc b/tensorstore/driver/neuroglancer_precomputed/metadata.cc
@@ -123,27 +123,22 @@ absl::Status ValidateEncodingDataType(ScaleMetadata::Encoding encoding,
     case ScaleMetadata::Encoding::raw:
       break;
     case ScaleMetadata::Encoding::png:
-      if (dtype.valid() && dtype.id() != DataTypeId::uint8_t &&
-          dtype.id() != DataTypeId::uint16_t) {
+      if (dtype.valid() && (dtype != dtype_v<uint8_t>) &&
+          (dtype != dtype_v<uint16_t>)) {
         return absl::InvalidArgumentError(tensorstore::StrCat(
             "\"png\" encoding only supported for uint8 and uint16, not for ",
             dtype));
       }
       if (num_channels) {
-        if (dtype.valid() && dtype.id() == DataTypeId::uint16_t &&
-            *num_channels != 1) {
-          return absl::InvalidArgumentError(tensorstore::StrCat(
-              "\"png\" encoding for uint16 only supports 1 channel, not ",
-              *num_channels));
-        } else if (*num_channels == 0 || *num_channels > 4) {
+        if (*num_channels == 0 || *num_channels > 4) {
           return absl::InvalidArgumentError(tensorstore::StrCat(
               "\"png\" encoding only supports 1 to 4 channels, not ",
               *num_channels));
         }
       }
       break;
     case ScaleMetadata::Encoding::jpeg:
-      if (dtype.valid() && dtype.id() != DataTypeId::uint8_t) {
+      if (dtype.valid() && dtype != dtype_v<uint8_t>) {
         return absl::InvalidArgumentError(tensorstore::StrCat(
             "\"jpeg\" encoding only supported for uint8, not for ", dtype));
       }
@@ -155,8 +150,7 @@ absl::Status ValidateEncodingDataType(ScaleMetadata::Encoding encoding,
       break;
     case ScaleMetadata::Encoding::compressed_segmentation:
       if (!dtype.valid()) break;
-      if (dtype.id() != DataTypeId::uint32_t &&
-          dtype.id() != DataTypeId::uint64_t) {
+      if (dtype != dtype_v<uint32_t> && dtype != dtype_v<uint64_t>) {
         return absl::InvalidArgumentError(tensorstore::StrCat(
             "compressed_segmentation encoding only supported for "
             "uint32 and uint64, not for ",

diff --git a/tensorstore/driver/neuroglancer_precomputed/metadata_test.cc b/tensorstore/driver/neuroglancer_precomputed/metadata_test.cc
@@ -442,8 +442,7 @@ TEST(MetadataTest, ParseEncodingsAndDataTypes) {
     EXPECT_EQ(ScaleMetadata::Encoding::png, m.scales[0].encoding);
   }
 
-  {
-    int num_channels = 1;
+  for (int num_channels : {1, 2, 3, 4}) {
     const auto dtype = ::tensorstore::dtype_v<uint16_t>;
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(
         auto m, MultiscaleMetadata::FromJson(GetMetadata(
@@ -465,7 +464,7 @@ TEST(MetadataTest, ParseEncodingsAndDataTypes) {
                               ::nlohmann::json::value_t::discarded, 10)),
               MatchesStatus(absl::StatusCode::kInvalidArgument, ".*10.*"));
 
-  // Test that jpeg_quality is valid for `png` encoding.
+  // Test that png_level is valid for `png` encoding.
   for (int png_level : {0, 6, 9}) {
     TENSORSTORE_ASSERT_OK_AND_ASSIGN(
         auto m, MultiscaleMetadata::FromJson(GetMetadata(

diff --git a/tensorstore/internal/image/png_writer.cc b/tensorstore/internal/image/png_writer.cc
@@ -30,6 +30,7 @@
 #include "tensorstore/data_type.h"
 #include "tensorstore/internal/image/image_info.h"
 #include "tensorstore/internal/image/image_view.h"
+#include "tensorstore/util/endian.h"
 #include "tensorstore/util/span.h"
 #include "tensorstore/util/status.h"
 
@@ -140,7 +141,7 @@ absl::Status PngWriter::Context::Encode(
     if (info.dtype == dtype_v<uint8_t>) {
       png_set_packing(png_ptr_);
     }
-    if (info.dtype == dtype_v<uint16_t>) {
+    if (info.dtype == dtype_v<uint16_t> && endian::native == endian::little) {
       png_set_swap(png_ptr_);
     }
     if (options_.compression_level >= 0 && options_.compression_level <= 9) {
@@ -191,14 +192,9 @@ absl::Status PngWriter::IsSupported(const ImageInfo& info) {
         "PNG image only supports uint8 and uint16 dtypes, but received: %s",
         info.dtype.name()));
   }
-  if (info.dtype == dtype_v<uint8_t> &&
-      (info.num_components == 0 || info.num_components > 4)) {
+  if (info.num_components == 0 || info.num_components > 4) {
     return absl::DataLossError(absl::StrFormat(
-        "PNG uint8 image expected 1 to 4 components, but received: %d",
-        info.num_components));
-  } else if (info.dtype == dtype_v<uint16_t> && info.num_components != 1) {
-    return absl::DataLossError(absl::StrFormat(
-        "PNG uint16 image expected 1 component, but received: %d",
+        "PNG image expected 1 to 4 components, but received: %d",
         info.num_components));
   }
   return absl::OkStatus();