From 1dc39fb105d8a3e153d87cccbc17917c46d3b1fb Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 09:50:37 -0500
Subject: [PATCH 1/9] implement fletcher32

---
 numcodecs/__init__.py              |  3 ++
 numcodecs/fletcher32.pyx           | 54 ++++++++++++++++++++++++++++++
 numcodecs/tests/test_fletcher32.py | 24 +++++++++++++
 setup.py                           | 28 +++++++++++++++-
 4 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 numcodecs/fletcher32.pyx
 create mode 100644 numcodecs/tests/test_fletcher32.py

diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py
index 53f3e795..1e3c8536 100644
--- a/numcodecs/__init__.py
+++ b/numcodecs/__init__.py
@@ -111,3 +111,6 @@
     register_codec(VLenUTF8)
     register_codec(VLenBytes)
     register_codec(VLenArray)
+
+from numcodecs.fletcher32 import Fletcher32
+register_codec(Fletcher32)
diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
new file mode 100644
index 00000000..6c300f34
--- /dev/null
+++ b/numcodecs/fletcher32.pyx
@@ -0,0 +1,54 @@
+# cython: boundscheck=False
+# cython: wraparound=False
+# cython: overflowcheck=False
+# cython: cdivision=True
+
+import struct
+import numpy as np
+
+from numcodecs.abc import Codec
+from numcodecs.compat import ensure_contiguous_ndarray
+
+from libc.stdint cimport uint8_t, uint16_t, uint32_t
+
+cpdef uint32_t fletcher32(const uint16_t[::1] data):
+    cdef:
+        uint32_t sum1 = 0
+        uint32_t sum2 = 0
+        int index
+        int size = data.shape[0]
+
+    for index in range(0, size):
+        sum1 = (sum1 + data[index]) % 0xffff
+        sum2 = (sum2 + sum1) % 0xffff
+
+    return (sum2 << 16) | sum1
+
+
+class Fletcher32(Codec):
+    codec_id = "fletcher32"
+
+    def encode(self, buf):
+        buf = ensure_contiguous_ndarray(buf).ravel()
+        if len(buf) % 2:
+            # rare, odd size of bytes data only
+            arr = np.frombuffer(buf.tobytes() + b"\x00", dtype="uint16")
+            val = fletcher32(arr)
+        else:
+            val = fletcher32(buf.view('uint16'))
+        return buf.tobytes() + struct.pack("<I", val)
+
+    def decode(self, buf, out=None):
+        b = ensure_contiguous_ndarray(buf).view('uint8')
+        if len(buf) % 2:
+            # rare, odd size of bytes data only
+            arr = np.frombuffer(b.tobytes() + b"\x00", dtype="uint16")
+            val = fletcher32(arr)
+        else:
+            val = fletcher32(b[:-4].view('uint16'))
+        found = b[-4:].view('uint32')[0]
+        assert val == found
+        if out:
+            out.view("uint8")[:] = b[:-4]
+            return out
+        return memoryview(b[:-4])
diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py
new file mode 100644
index 00000000..5db75e13
--- /dev/null
+++ b/numcodecs/tests/test_fletcher32.py
@@ -0,0 +1,24 @@
+import numpy as np
+import pytest
+
+from numcodecs.fletcher32 import Fletcher32, fletcher32
+
+
+@pytest.mark.parametrize("inval,outval", [
+    [b"abcdef", 1448095018],
+    [b"abcdefgh", 3957429649]
+])
+def test_vectors(inval, outval):
+    arr = np.array(list(inval), dtype="uint8").view('uint16')
+    assert fletcher32(arr) == outval
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    ["uint8", "int32", "float32"]
+)
+def test_with_data(dtype):
+    data = np.empty(100, dtype=dtype)
+    f = Fletcher32()
+    arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype)
+    assert (arr == data).all()
diff --git a/setup.py b/setup.py
index cf950f40..14072445 100644
--- a/setup.py
+++ b/setup.py
@@ -198,6 +198,31 @@ def vlen_extension():
     return extensions
 
 
+def fletcher_extension():
+    info('setting up fletcher32 extension')
+
+    extra_compile_args = base_compile_args.copy()
+    define_macros = []
+
+    # setup sources
+    include_dirs = ['numcodecs']
+    # define_macros += [('CYTHON_TRACE', '1')]
+
+    sources = ['numcodecs/fletcher32.pyx']
+
+    # define extension module
+    extensions = [
+        Extension('numcodecs.fletcher32',
+                  sources=sources,
+                  include_dirs=include_dirs,
+                  define_macros=define_macros,
+                  extra_compile_args=extra_compile_args,
+                  ),
+    ]
+
+    return extensions
+
+
 def compat_extension():
     info('setting up compat extension')
 
@@ -265,7 +290,8 @@ def run_setup(with_extensions):
 
     if with_extensions:
         ext_modules = (blosc_extension() + zstd_extension() + lz4_extension() +
-                       compat_extension() + shuffle_extension() + vlen_extension())
+                       compat_extension() + shuffle_extension() + vlen_extension() +
+                       fletcher_extension())
 
         cmdclass = dict(build_ext=ve_build_ext)
     else:

From 4a7fd63161b1120f1fa14e3f814ccc1a150a502d Mon Sep 17 00:00:00 2001
From: Martin Durant <martindurant@users.noreply.github.com>
Date: Tue, 20 Dec 2022 13:00:26 -0500
Subject: [PATCH 2/9] Update numcodecs/fletcher32.pyx

Co-authored-by: Ryan Abernathey <ryan.abernathey@gmail.com>
---
 numcodecs/fletcher32.pyx | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 6c300f34..6a0ae7ae 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -47,7 +47,11 @@ class Fletcher32(Codec):
         else:
             val = fletcher32(b[:-4].view('uint16'))
         found = b[-4:].view('uint32')[0]
-        assert val == found
+        if val != found:
+            raise ValueError(
+                f"The flecher32 checksum of the data ({found}) did not match the expected checksum ({val}). "
+                "This could be a sign that the data has been corrupted."
+            )
         if out:
             out.view("uint8")[:] = b[:-4]
             return out

From db2275e2236c31da9a5ea2693f8c8568eaf3b820 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 13:06:53 -0500
Subject: [PATCH 3/9] Add docstring and erorr test

---
 numcodecs/fletcher32.pyx           | 11 ++++++++++-
 numcodecs/tests/test_fletcher32.py | 12 ++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 6a0ae7ae..a60547af 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -26,6 +26,14 @@ cpdef uint32_t fletcher32(const uint16_t[::1] data):
 
 
 class Fletcher32(Codec):
+    """The fletcher checksum with 16-bit words and 32-bit output
+
+    With this codec, the checksum is concatenated on the end of the data
+    bytes when encoded. At decode time, the checksum is performed on
+    the data portion and compared with the four-byte checksum, raising
+    ValueError if inconsistent.
+    """
+
     codec_id = "fletcher32"
 
     def encode(self, buf):
@@ -49,7 +57,8 @@ class Fletcher32(Codec):
         found = b[-4:].view('uint32')[0]
         if val != found:
             raise ValueError(
-                f"The flecher32 checksum of the data ({found}) did not match the expected checksum ({val}). "
+                f"The fletcher32 checksum of the data ({found}) did not"
+                f" match the expected checksum ({val}).\n"
                 "This could be a sign that the data has been corrupted."
             )
         if out:
diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py
index 5db75e13..d9435576 100644
--- a/numcodecs/tests/test_fletcher32.py
+++ b/numcodecs/tests/test_fletcher32.py
@@ -22,3 +22,15 @@ def test_with_data(dtype):
     f = Fletcher32()
     arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype)
     assert (arr == data).all()
+
+
+def test_error():
+    data = np.arange(100)
+    f = Fletcher32()
+    enc = f.encode(data)
+    enc2 = bytearray(enc)
+    enc2[0] += 1
+    with pytest.raises(ValueError) as e:
+        f.decode(enc2)
+    assert "fletcher32 checksum" in str(e.value)
+

From 4366b5b45ea6b6d10d6238595da43c8a8b449a37 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 15:18:07 -0500
Subject: [PATCH 4/9] Use HDF C impl

---
 numcodecs/_fletcher.c              | 43 ++++++++++++++++++++++++++++++
 numcodecs/fletcher32.pyx           | 39 +++++++--------------------
 numcodecs/tests/test_fletcher32.py | 28 +++++++++++--------
 3 files changed, 70 insertions(+), 40 deletions(-)
 create mode 100644 numcodecs/_fletcher.c

diff --git a/numcodecs/_fletcher.c b/numcodecs/_fletcher.c
new file mode 100644
index 00000000..15310d3a
--- /dev/null
+++ b/numcodecs/_fletcher.c
@@ -0,0 +1,43 @@
+#include <stdint.h>
+#include <stddef.h>
+
+// https://github.com/Unidata/netcdf-c/blob/8eb71290eb9360dcfd4955ba94759ba8d02c40a9/plugins/H5checksum.c
+
+
+uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
+{
+    const uint8_t *data = (const uint8_t *)_data;  /* Pointer to the data to be summed */
+    size_t len = _len / 2;      /* Length in 16-bit words */
+    uint32_t sum1 = 0, sum2 = 0;
+
+
+    /* Compute checksum for pairs of bytes */
+    /* (the magic "360" value is is the largest number of sums that can be
+     *  performed without numeric overflow)
+     */
+    while (len) {
+        size_t tlen = len > 360 ? 360 : len;
+        len -= tlen;
+        do {
+            sum1 += (uint32_t)(((uint16_t)data[0]) << 8) | ((uint16_t)data[1]);
+            data += 2;
+            sum2 += sum1;
+        } while (--tlen);
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+    }
+
+    /* Check for odd # of bytes */
+    if(_len % 2) {
+        sum1 += (uint32_t)(((uint16_t)*data) << 8);
+        sum2 += sum1;
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+    } /* end if */
+
+    /* Second reduction step to reduce sums to 16 bits */
+    sum1 = (sum1 & 0xffff) + (sum1 >> 16);
+    sum2 = (sum2 & 0xffff) + (sum2 >> 16);
+
+    return (sum2 << 16) | sum1;
+} /* end H5_checksum_fletcher32() */
diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index a60547af..3bf01704 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -11,19 +11,8 @@ from numcodecs.compat import ensure_contiguous_ndarray
 
 from libc.stdint cimport uint8_t, uint16_t, uint32_t
 
-cpdef uint32_t fletcher32(const uint16_t[::1] data):
-    cdef:
-        uint32_t sum1 = 0
-        uint32_t sum2 = 0
-        int index
-        int size = data.shape[0]
-
-    for index in range(0, size):
-        sum1 = (sum1 + data[index]) % 0xffff
-        sum2 = (sum2 + sum1) % 0xffff
-
-    return (sum2 << 16) | sum1
-
+cdef extern from "_fletcher.c":
+    uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
 
 class Fletcher32(Codec):
     """The fletcher checksum with 16-bit words and 32-bit output
@@ -37,28 +26,20 @@ class Fletcher32(Codec):
     codec_id = "fletcher32"
 
     def encode(self, buf):
-        buf = ensure_contiguous_ndarray(buf).ravel()
-        if len(buf) % 2:
-            # rare, odd size of bytes data only
-            arr = np.frombuffer(buf.tobytes() + b"\x00", dtype="uint16")
-            val = fletcher32(arr)
-        else:
-            val = fletcher32(buf.view('uint16'))
+        buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
+        cdef const uint8_t[::1] b_ptr = buf
+        val = H5_checksum_fletcher32(&b_ptr[0], buf.nbytes)
         return buf.tobytes() + struct.pack("<I", val)
 
     def decode(self, buf, out=None):
         b = ensure_contiguous_ndarray(buf).view('uint8')
-        if len(buf) % 2:
-            # rare, odd size of bytes data only
-            arr = np.frombuffer(b.tobytes() + b"\x00", dtype="uint16")
-            val = fletcher32(arr)
-        else:
-            val = fletcher32(b[:-4].view('uint16'))
-        found = b[-4:].view('uint32')[0]
+        cdef const uint8_t[::1] b_ptr = b
+        val = H5_checksum_fletcher32(&b_ptr[0], b.nbytes - 4)
+        found = b[-4:].view("<u4")[0]
         if val != found:
             raise ValueError(
-                f"The fletcher32 checksum of the data ({found}) did not"
-                f" match the expected checksum ({val}).\n"
+                f"The fletcher32 checksum of the data ({val}) did not"
+                f" match the expected checksum ({found}).\n"
                 "This could be a sign that the data has been corrupted."
             )
         if out:
diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py
index d9435576..a69e3dde 100644
--- a/numcodecs/tests/test_fletcher32.py
+++ b/numcodecs/tests/test_fletcher32.py
@@ -1,16 +1,7 @@
 import numpy as np
 import pytest
 
-from numcodecs.fletcher32 import Fletcher32, fletcher32
-
-
-@pytest.mark.parametrize("inval,outval", [
-    [b"abcdef", 1448095018],
-    [b"abcdefgh", 3957429649]
-])
-def test_vectors(inval, outval):
-    arr = np.array(list(inval), dtype="uint8").view('uint16')
-    assert fletcher32(arr) == outval
+from numcodecs.fletcher32 import Fletcher32
 
 
 @pytest.mark.parametrize(
@@ -18,7 +9,7 @@ def test_vectors(inval, outval):
     ["uint8", "int32", "float32"]
 )
 def test_with_data(dtype):
-    data = np.empty(100, dtype=dtype)
+    data = np.arange(100, dtype=dtype)
     f = Fletcher32()
     arr = np.frombuffer(f.decode(f.encode(data)), dtype=dtype)
     assert (arr == data).all()
@@ -34,3 +25,18 @@ def test_error():
         f.decode(enc2)
     assert "fletcher32 checksum" in str(e.value)
 
+
+def test_known():
+    data = (
+        b'\xf04\xfe\x1a\x03\xb2\xb1?^\x99j\xf3\xd6f\xef?\xbbm\x04n'
+        b'\x9a\xdf\xeb?x\x9eIL\xdeW\xc8?A\xef\x88\xa8&\xad\xef?'
+        b'\xf2\xc6a\x01a\xb8\xe8?#&\x96\xabY\xf2\xe7?\xe2Pw\xba\xd0w\xea?'
+        b'\x80\xc5\xf8M@0\x9a?\x98H+\xb4\x03\xfa\xc6?\xb9P\x1e1'
+    )
+    data3 = Fletcher32().decode(data)
+    outarr = np.frombuffer(data3, dtype="<f8")
+    expected = [
+        0.0691225, 0.98130367, 0.87104532, 0.19018153, 0.9898866,
+        0.77250719, 0.74833377, 0.8271259, 0.02557469, 0.17950484
+    ]
+    assert np.allclose(outarr, expected)

From 8e01f63a9929de561f23c11365b54813c583bc9a Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 15:21:01 -0500
Subject: [PATCH 5/9] Remove unused, add docstrings

---
 numcodecs/fletcher32.pyx | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 3bf01704..52d30acb 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -1,10 +1,4 @@
-# cython: boundscheck=False
-# cython: wraparound=False
-# cython: overflowcheck=False
-# cython: cdivision=True
-
 import struct
-import numpy as np
 
 from numcodecs.abc import Codec
 from numcodecs.compat import ensure_contiguous_ndarray
@@ -14,6 +8,7 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t
 cdef extern from "_fletcher.c":
     uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
 
+
 class Fletcher32(Codec):
     """The fletcher checksum with 16-bit words and 32-bit output
 
@@ -26,12 +21,14 @@ class Fletcher32(Codec):
     codec_id = "fletcher32"
 
     def encode(self, buf):
+        """Return buffer plus 4-byte fletcher checksum"""
         buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
         cdef const uint8_t[::1] b_ptr = buf
         val = H5_checksum_fletcher32(&b_ptr[0], buf.nbytes)
         return buf.tobytes() + struct.pack("<I", val)
 
     def decode(self, buf, out=None):
+        """Check fletcher checksum, and return buffer without it"""
         b = ensure_contiguous_ndarray(buf).view('uint8')
         cdef const uint8_t[::1] b_ptr = b
         val = H5_checksum_fletcher32(&b_ptr[0], b.nbytes - 4)

From cb0aa2f9d2982b44fff02cbd7488bdc722a4e3bf Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 15:37:30 -0500
Subject: [PATCH 6/9] to runtime and int test

---
 numcodecs/fletcher32.pyx           |  2 +-
 numcodecs/tests/test_fletcher32.py | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 52d30acb..befb367e 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -34,7 +34,7 @@ class Fletcher32(Codec):
         val = H5_checksum_fletcher32(&b_ptr[0], b.nbytes - 4)
         found = b[-4:].view("<u4")[0]
         if val != found:
-            raise ValueError(
+            raise RuntimeError(
                 f"The fletcher32 checksum of the data ({val}) did not"
                 f" match the expected checksum ({found}).\n"
                 "This could be a sign that the data has been corrupted."
diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py
index a69e3dde..76564e95 100644
--- a/numcodecs/tests/test_fletcher32.py
+++ b/numcodecs/tests/test_fletcher32.py
@@ -21,22 +21,22 @@ def test_error():
     enc = f.encode(data)
     enc2 = bytearray(enc)
     enc2[0] += 1
-    with pytest.raises(ValueError) as e:
+    with pytest.raises(RuntimeError) as e:
         f.decode(enc2)
     assert "fletcher32 checksum" in str(e.value)
 
 
 def test_known():
     data = (
-        b'\xf04\xfe\x1a\x03\xb2\xb1?^\x99j\xf3\xd6f\xef?\xbbm\x04n'
-        b'\x9a\xdf\xeb?x\x9eIL\xdeW\xc8?A\xef\x88\xa8&\xad\xef?'
-        b'\xf2\xc6a\x01a\xb8\xe8?#&\x96\xabY\xf2\xe7?\xe2Pw\xba\xd0w\xea?'
-        b'\x80\xc5\xf8M@0\x9a?\x98H+\xb4\x03\xfa\xc6?\xb9P\x1e1'
-    )
+        b'w\x07\x00\x00\x00\x00\x00\x00\x85\xf6\xff\xff\xff\xff\xff\xff'
+        b'i\x07\x00\x00\x00\x00\x00\x00\x94\xf6\xff\xff\xff\xff\xff\xff'
+        b'\x88\t\x00\x00\x00\x00\x00\x00i\x03\x00\x00\x00\x00\x00\x00'
+        b'\x93\xfd\xff\xff\xff\xff\xff\xff\xc3\xfc\xff\xff\xff\xff\xff\xff'
+        b"'\x02\x00\x00\x00\x00\x00\x00\xba\xf7\xff\xff\xff\xff\xff\xff"
+        b'\xfd%\x86d')
     data3 = Fletcher32().decode(data)
-    outarr = np.frombuffer(data3, dtype="<f8")
+    outarr = np.frombuffer(data3, dtype="<i8")
     expected = [
-        0.0691225, 0.98130367, 0.87104532, 0.19018153, 0.9898866,
-        0.77250719, 0.74833377, 0.8271259, 0.02557469, 0.17950484
+        1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118,
     ]
-    assert np.allclose(outarr, expected)
+    assert outarr.tolist() == expected

From 93cef03da713a57ed36702c93f223fa45783493c Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Tue, 20 Dec 2022 20:40:00 -0500
Subject: [PATCH 7/9] to cython

---
 numcodecs/_fletcher.c    | 43 ---------------------------------------
 numcodecs/fletcher32.pyx | 44 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 39 insertions(+), 48 deletions(-)
 delete mode 100644 numcodecs/_fletcher.c

diff --git a/numcodecs/_fletcher.c b/numcodecs/_fletcher.c
deleted file mode 100644
index 15310d3a..00000000
--- a/numcodecs/_fletcher.c
+++ /dev/null
@@ -1,43 +0,0 @@
-#include <stdint.h>
-#include <stddef.h>
-
-// https://github.com/Unidata/netcdf-c/blob/8eb71290eb9360dcfd4955ba94759ba8d02c40a9/plugins/H5checksum.c
-
-
-uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
-{
-    const uint8_t *data = (const uint8_t *)_data;  /* Pointer to the data to be summed */
-    size_t len = _len / 2;      /* Length in 16-bit words */
-    uint32_t sum1 = 0, sum2 = 0;
-
-
-    /* Compute checksum for pairs of bytes */
-    /* (the magic "360" value is is the largest number of sums that can be
-     *  performed without numeric overflow)
-     */
-    while (len) {
-        size_t tlen = len > 360 ? 360 : len;
-        len -= tlen;
-        do {
-            sum1 += (uint32_t)(((uint16_t)data[0]) << 8) | ((uint16_t)data[1]);
-            data += 2;
-            sum2 += sum1;
-        } while (--tlen);
-        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
-        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
-    }
-
-    /* Check for odd # of bytes */
-    if(_len % 2) {
-        sum1 += (uint32_t)(((uint16_t)*data) << 8);
-        sum2 += sum1;
-        sum1 = (sum1 & 0xffff) + (sum1 >> 16);
-        sum2 = (sum2 & 0xffff) + (sum2 >> 16);
-    } /* end if */
-
-    /* Second reduction step to reduce sums to 16 bits */
-    sum1 = (sum1 & 0xffff) + (sum1 >> 16);
-    sum2 = (sum2 & 0xffff) + (sum2 >> 16);
-
-    return (sum2 << 16) | sum1;
-} /* end H5_checksum_fletcher32() */
diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index befb367e..9728735d 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -1,3 +1,6 @@
+# cython: language_level=3
+# cython: overflowcheck=False
+# cython: cdivision=True
 import struct
 
 from numcodecs.abc import Codec
@@ -5,8 +8,39 @@ from numcodecs.compat import ensure_contiguous_ndarray
 
 from libc.stdint cimport uint8_t, uint16_t, uint32_t
 
-cdef extern from "_fletcher.c":
-    uint32_t H5_checksum_fletcher32(const void *_data, size_t _len)
+
+cdef uint32_t _fletcher32(const uint8_t[::1] _data):
+    cdef:
+        const uint8_t *data = &_data[0]
+        size_t _len = _data.shape[0]
+        size_t len = _len / 2
+        size_t tlen
+        uint32_t sum1 = 0, sum2 = 0;
+
+
+    while len:
+        tlen = 360 if len > 360 else len
+        len -= tlen
+        while True:
+            sum1 += <uint32_t>((<uint16_t>data[0]) << 8) | (<uint16_t>data[1])
+            data += 2
+            sum2 += sum1
+            tlen -= 1
+            if tlen < 1:
+                break
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16)
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16)
+
+    if _len % 2:
+        sum1 += <uint32_t>((<uint16_t>(data[0])) << 8)
+        sum2 += sum1
+        sum1 = (sum1 & 0xffff) + (sum1 >> 16)
+        sum2 = (sum2 & 0xffff) + (sum2 >> 16)
+
+    sum1 = (sum1 & 0xffff) + (sum1 >> 16)
+    sum2 = (sum2 & 0xffff) + (sum2 >> 16)
+
+    return (sum2 << 16) | sum1
 
 
 class Fletcher32(Codec):
@@ -24,14 +58,14 @@ class Fletcher32(Codec):
         """Return buffer plus 4-byte fletcher checksum"""
         buf = ensure_contiguous_ndarray(buf).ravel().view('uint8')
         cdef const uint8_t[::1] b_ptr = buf
-        val = H5_checksum_fletcher32(&b_ptr[0], buf.nbytes)
+        val = _fletcher32(b_ptr)
         return buf.tobytes() + struct.pack("<I", val)
 
     def decode(self, buf, out=None):
         """Check fletcher checksum, and return buffer without it"""
         b = ensure_contiguous_ndarray(buf).view('uint8')
-        cdef const uint8_t[::1] b_ptr = b
-        val = H5_checksum_fletcher32(&b_ptr[0], b.nbytes - 4)
+        cdef const uint8_t[::1] b_ptr = b[:-4]
+        val = _fletcher32(b_ptr)
         found = b[-4:].view("<u4")[0]
         if val != found:
             raise RuntimeError(

From dbbf2bcdf965d118b7bb8f35892b8064e694ec1c Mon Sep 17 00:00:00 2001
From: Martin Durant <martindurant@users.noreply.github.com>
Date: Wed, 21 Dec 2022 08:52:13 -0500
Subject: [PATCH 8/9] Update numcodecs/fletcher32.pyx

Co-authored-by: Ryan Abernathey <ryan.abernathey@gmail.com>
---
 numcodecs/fletcher32.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 9728735d..8ad54c12 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -49,7 +49,7 @@ class Fletcher32(Codec):
     With this codec, the checksum is concatenated on the end of the data
     bytes when encoded. At decode time, the checksum is performed on
     the data portion and compared with the four-byte checksum, raising
-    ValueError if inconsistent.
+    RuntimeError if inconsistent.
     """
 
     codec_id = "fletcher32"

From 4825a1d3b229f5b7a246b08f5e18db159b9c31c4 Mon Sep 17 00:00:00 2001
From: Martin Durant <martin.durant@alumni.utoronto.ca>
Date: Wed, 21 Dec 2022 09:16:17 -0500
Subject: [PATCH 9/9] Add docs

---
 docs/checksum32.rst      | 11 +++++++++++
 docs/release.rst         |  3 ++-
 numcodecs/fletcher32.pyx |  6 ++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/docs/checksum32.rst b/docs/checksum32.rst
index 1d5522e2..5e682afc 100644
--- a/docs/checksum32.rst
+++ b/docs/checksum32.rst
@@ -22,3 +22,14 @@ Adler32
     .. automethod:: decode
     .. automethod:: get_config
     .. automethod:: from_config
+
+
+Fletcher32
+----------
+
+.. autoclass:: numcodecs.fletcher32.Fletcher32
+
+    .. autoattribute:: codec_id
+    .. automethod:: encode
+    .. automethod:: decode
+
diff --git a/docs/release.rst b/docs/release.rst
index 6f176b8c..90d62750 100644
--- a/docs/release.rst
+++ b/docs/release.rst
@@ -15,7 +15,8 @@ Unreleased
 Enhancements
 ~~~~~~~~~~~~
 
-*
+* Add ``fletcher32`` checksum codec
+  By :user:`Martin Durant <martindurant>`, :issue:`410`.
 
 Fix
 ~~~
diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx
index 8ad54c12..02f9319c 100644
--- a/numcodecs/fletcher32.pyx
+++ b/numcodecs/fletcher32.pyx
@@ -10,6 +10,8 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t
 
 
 cdef uint32_t _fletcher32(const uint8_t[::1] _data):
+    # converted from
+    # https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109
     cdef:
         const uint8_t *data = &_data[0]
         size_t _len = _data.shape[0]
@@ -46,6 +48,10 @@ cdef uint32_t _fletcher32(const uint8_t[::1] _data):
 class Fletcher32(Codec):
     """The fletcher checksum with 16-bit words and 32-bit output
 
+    This is the netCDF4/HED5 implementation, which is not equivalent
+    to the one in wikipedia
+    https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95
+
     With this codec, the checksum is concatenated on the end of the data
     bytes when encoded. At decode time, the checksum is performed on
     the data portion and compared with the four-byte checksum, raising