From 72b5833005c8027f0994043db722930e997f67c8 Mon Sep 17 00:00:00 2001 From: Ben Greiner Date: Thu, 12 Jan 2023 18:06:23 +0100 Subject: [PATCH 1/2] use dtype=object for sequences first not accepted by np.asarray --- numcodecs/json.py | 5 ++++- numcodecs/msgpacks.py | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/numcodecs/json.py b/numcodecs/json.py index 670f2235..b803a77b 100644 --- a/numcodecs/json.py +++ b/numcodecs/json.py @@ -54,7 +54,10 @@ def __init__(self, encoding='utf-8', skipkeys=False, ensure_ascii=True, self._decoder = _json.JSONDecoder(**self._decoder_config) def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return self._encoder.encode(items).encode(self._text_encoding) diff --git a/numcodecs/msgpacks.py b/numcodecs/msgpacks.py index 026f583a..65564984 100644 --- a/numcodecs/msgpacks.py +++ b/numcodecs/msgpacks.py @@ -52,7 +52,10 @@ def __init__(self, use_single_float=False, use_bin_type=True, raw=False): self.raw = raw def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return msgpack.packb(items, use_bin_type=self.use_bin_type, From bee8aa7f24ae2dd409f3fd742419c3123eb754b1 Mon Sep 17 00:00:00 2001 From: Ben Greiner Date: Thu, 12 Jan 2023 18:07:02 +0100 Subject: [PATCH 2/2] parameterize test_non_numpy_inputs --- numcodecs/tests/test_json.py | 36 +++++++++++++------------ numcodecs/tests/test_msgpacks.py | 45 +++++++++++++++++--------------- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/numcodecs/tests/test_json.py b/numcodecs/tests/test_json.py index 7e8fcd64..8dac2b41 100644 --- a/numcodecs/tests/test_json.py +++ b/numcodecs/tests/test_json.py @@ -2,7 +2,7 @@ import numpy as np - +import pytest from numcodecs.json import JSON from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array, @@ -53,21 +53,23 @@ def test_backwards_compatibility(): check_backwards_compatibility(JSON.codec_id, arrays, codecs) -def test_non_numpy_inputs(): +@pytest.mark.parametrize( + "input_data, dtype", + [ + ([0, 1], None), + ([[0, 1], [2, 3]], None), + ([[0], [1], [2, 3]], object), + ([[[0, 0]], [[1, 1]], [[2, 3]]], None), + (["1"], None), + (["11", "11"], None), + (["11", "1", "1"], None), + ([{}], None), + ([{"key": "value"}, ["list", "of", "strings"]], object), + ] +) +def test_non_numpy_inputs(input_data, dtype): # numpy will infer a range of different shapes and dtypes for these inputs. # Make sure that round-tripping through encode preserves this. - data = [ - [0, 1], - [[0, 1], [2, 3]], - [[0], [1], [2, 3]], - [[[0, 0]], [[1, 1]], [[2, 3]]], - ["1"], - ["11", "11"], - ["11", "1", "1"], - [{}], - [{"key": "value"}, ["list", "of", "strings"]], - ] - for input_data in data: - for codec in codecs: - output_data = codec.decode(codec.encode(input_data)) - assert np.array_equal(np.array(input_data), output_data) + for codec in codecs: + output_data = codec.decode(codec.encode(input_data)) + assert np.array_equal(np.array(input_data, dtype=dtype), output_data) diff --git a/numcodecs/tests/test_msgpacks.py b/numcodecs/tests/test_msgpacks.py index 6aeadcf0..d76aa125 100644 --- a/numcodecs/tests/test_msgpacks.py +++ b/numcodecs/tests/test_msgpacks.py @@ -2,6 +2,7 @@ import numpy as np +import pytest try: @@ -52,30 +53,32 @@ def test_backwards_compatibility(): check_backwards_compatibility(codec.codec_id, arrays, [codec]) -def test_non_numpy_inputs(): +@pytest.mark.parametrize( + "input_data, dtype", + [ + ([0, 1], None), + ([[0, 1], [2, 3]], None), + ([[0], [1], [2, 3]], object), + ([[[0, 0]], [[1, 1]], [[2, 3]]], None), + (["1"], None), + (["11", "11"], None), + (["11", "1", "1"], None), + ([{}], None), + ([{"key": "value"}, ["list", "of", "strings"]], object), + ([b"1"], None), + ([b"11", b"11"], None), + ([b"11", b"1", b"1"], None), + ([{b"key": b"value"}, [b"list", b"of", b"strings"]], object), + ] +) +def test_non_numpy_inputs(input_data, dtype): codec = MsgPack() # numpy will infer a range of different shapes and dtypes for these inputs. # Make sure that round-tripping through encode preserves this. - data = [ - [0, 1], - [[0, 1], [2, 3]], - [[0], [1], [2, 3]], - [[[0, 0]], [[1, 1]], [[2, 3]]], - ["1"], - ["11", "11"], - ["11", "1", "1"], - [{}], - [{"key": "value"}, ["list", "of", "strings"]], - [b"1"], - [b"11", b"11"], - [b"11", b"1", b"1"], - [{b"key": b"value"}, [b"list", b"of", b"strings"]], - ] - for input_data in data: - actual = codec.decode(codec.encode(input_data)) - expect = np.array(input_data) - assert expect.shape == actual.shape - assert np.array_equal(expect, actual) + actual = codec.decode(codec.encode(input_data)) + expect = np.array(input_data, dtype=dtype) + assert expect.shape == actual.shape + assert np.array_equal(expect, actual) def test_encode_decode_shape_dtype_preserved():