From e835fc48f7e629f4fbd4b8c0384e7b61301edd7c Mon Sep 17 00:00:00 2001
From: Victor Petrovykh <victor@edgedb.com>
Date: Fri, 16 Feb 2024 12:27:16 -0500
Subject: [PATCH] Revert "Add codecs for dealing with `pgsparse` vector.
 (#478)" (#481)

This reverts commit f75993ddd962e4083c5aec1b3e51619d7bd90c47.

We're postponing adding pgsparse.
---
 edgedb/protocol/codecs/codecs.pyx | 114 -----------------------------
 tests/test_vector.py              | 116 ------------------------------
 2 files changed, 230 deletions(-)

diff --git a/edgedb/protocol/codecs/codecs.pyx b/edgedb/protocol/codecs/codecs.pyx
index c602a3f0..1218d3e6 100644
--- a/edgedb/protocol/codecs/codecs.pyx
+++ b/edgedb/protocol/codecs/codecs.pyx
@@ -798,113 +798,6 @@ cdef pgvector_decode(pgproto.CodecContext settings, FRBuffer *buf):
     return val
 
 
-# The pg_sparse extension uses a signed int16 when reading dimesion in binary
-# format.
-DEF PGSPARSE_MAX_DIM = (1 << 15) - 1
-
-
-cdef pgsparse_encode(pgproto.CodecContext settings, WriteBuffer buf,
-                     object obj):
-    cdef:
-        int16_t n_elem = 0
-        int64_t dim
-        Py_ssize_t i
-        float[:] memview
-
-    # If we can take a typed memview of the object, we use that.
-    # That is good, because it means we can consume array.array and
-    # numpy.ndarray without needing to unbox.
-    # Otherwise we take the slow path, indexing into the array using
-    # the normal protocol.
-    try:
-        memview = obj
-    except (ValueError, TypeError) as e:
-        pass
-    else:
-        # The actual dimentionality of the vector is the size of the raw array
-        dim = len(memview)
-        if dim > PGSPARSE_MAX_DIM:
-            raise ValueError('too many elements in vector value')
-
-        # First pass to count the number of non-zero elements
-        for i in range(dim):
-            if memview[i] != 0:
-                n_elem += 1
-
-        buf.write_int32(6 + n_elem*8)
-        buf.write_int16(n_elem)
-        buf.write_int16(<int16_t>dim)
-        buf.write_int16(0)
-        # Second pass will write the actual non-zero elements
-        for i in range(dim):
-            if memview[i] != 0:
-                buf.write_int32(i)
-                buf.write_float(memview[i])
-        return
-
-    # Annoyingly, this is literally identical code to the fast path...
-    # but the types are different in critical ways.
-    if not _is_array_iterable(obj):
-        raise TypeError(
-            'a sized iterable container expected (got type {!r})'.format(
-                type(obj).__name__))
-
-    # The actual dimentionality of the vector is the size of the raw array
-    dim = len(obj)
-    if dim > PGSPARSE_MAX_DIM:
-        raise ValueError('too many elements in vector value')
-
-    # First pass to count the number of non-zero elements
-    for i in range(dim):
-        if obj[i] != 0:
-            n_elem += 1
-
-    buf.write_int32(6 + n_elem*8)
-    buf.write_int16(n_elem)
-    buf.write_int16(dim)
-    buf.write_int16(0)
-    # Second pass will write the actual non-zero elements
-    for i in range(dim):
-        if obj[i] != 0:
-            buf.write_int32(i)
-            buf.write_float(obj[i])
-
-
-cdef pgsparse_decode(pgproto.CodecContext settings, FRBuffer *buf):
-    cdef:
-        int16_t n_elem
-        int16_t dim
-        Py_ssize_t i
-        int32_t index
-        float[::1] array_view
-
-    n_elem = hton.unpack_int16(frb_read(buf, 2))
-    dim = hton.unpack_int16(frb_read(buf, 2))
-    frb_read(buf, 2)
-
-    # Create a float array with size dim
-    val = ONE_EL_ARRAY * dim
-    array_view = val
-
-    # The underlying sparse Vector representation supports int32 as the
-    # dimension and index, but when converting to binary format the dimensions
-    # are maxed out at int16. So indexes beyond the truncated dimension will
-    # cause an exception.
-    if dim < 0:
-        # This is actually an indicator of overflow when converting from int32
-        # down to int16.
-        raise ValueError('too many elements in vector value')
-    try:
-        # Fill the non-zero elements
-        for i in range(n_elem):
-            index = hton.unpack_int32(frb_read(buf, 4))
-            array_view[index] = hton.unpack_float(frb_read(buf, 4))
-    except IndexError:
-        raise ValueError('too many elements in vector value')
-
-    return val
-
-
 cdef checked_decimal_encode(
     pgproto.CodecContext settings, WriteBuffer buf, obj
 ):
@@ -1114,12 +1007,5 @@ cdef register_base_scalar_codecs():
         uuid.UUID('9565dd88-04f5-11ee-a691-0b6ebe179825'),
     )
 
-    register_base_scalar_codec(
-        'ext::pgsparse::vector',
-        pgsparse_encode,
-        pgsparse_decode,
-        uuid.UUID('b646ace0-266d-47ce-8263-1224c38a4a12'),
-    )
-
 
 register_base_scalar_codecs()
diff --git a/tests/test_vector.py b/tests/test_vector.py
index f58c0d24..ede4a3d0 100644
--- a/tests/test_vector.py
+++ b/tests/test_vector.py
@@ -129,119 +129,3 @@ async def test_vector_01(self):
                 ''',
                 'foo',
             )
-
-
-class TestSparseVector(tb.SyncQueryTestCase):
-    def setUp(self):
-        super().setUp()
-
-        if not self.client.query_required_single('''
-            select exists (
-              select sys::ExtensionPackage filter .name = 'pgsparse'
-            )
-        '''):
-            self.skipTest("feature not implemented")
-
-        self.client.execute('''
-            create extension pgsparse;
-        ''')
-
-    def tearDown(self):
-        try:
-            self.client.execute('''
-                drop extension pgsparse;
-            ''')
-        finally:
-            super().tearDown()
-
-    async def test_vector_01(self):
-        val = self.client.query_single('''
-            select <ext::pgsparse::vector>[1.5,0,0,0,2.0,3.8]
-        ''')
-        self.assertTrue(isinstance(val, array.array))
-        self.assertEqual(val, array.array('f', [1.5, 0, 0, 0, 2.0, 3.8]))
-
-        val = self.client.query_single(
-            '''
-                select <json><ext::pgsparse::vector>$0
-            ''',
-            [3.0, 9.0, -42.5],
-        )
-        self.assertEqual(val, '[3, 9, -42.5]')
-
-        val = self.client.query_single(
-            '''
-                select <json><ext::pgsparse::vector>$0
-            ''',
-            array.array('f', [3.0, 9.0, -42.5])
-        )
-        self.assertEqual(val, '[3, 9, -42.5]')
-
-        val = self.client.query_single(
-            '''
-                select <json><ext::pgsparse::vector>$0
-            ''',
-            array.array('i', [1, 2, 3]),
-        )
-        self.assertEqual(val, '[1, 2, 3]')
-
-        val = self.client.query_single(
-            '''
-                select <ext::pgsparse::vector>$0
-            ''',
-            array.array('f', ([0] * 10000) + [1, 2]),
-        )
-        self.assertEqual(val, array.array('f', ([0] * 10000) + [1, 2]))
-
-        val = self.client.query_single(
-            '''
-                with zeros := array_agg(
-                    (for x in range_unpack(range(0, 20000)) union 0)
-                )
-                select <ext::pgsparse::vector>(zeros ++ [1, 2]);
-            ''',
-        )
-        self.assertEqual(val, array.array('f', ([0] * 20000) + [1, 2]))
-
-        # Some sad path tests
-        with self.assertRaises(edgedb.InvalidArgumentError):
-            self.client.query_single(
-                '''
-                    select <ext::pgsparse::vector>$0
-                ''',
-                [3.0, None, -42.5],
-            )
-
-        with self.assertRaises(edgedb.InvalidArgumentError):
-            self.client.query_single(
-                '''
-                    select <ext::pgsparse::vector>$0
-                ''',
-                [3.0, 'x', -42.5],
-            )
-
-        with self.assertRaises(edgedb.InvalidArgumentError):
-            self.client.query_single(
-                '''
-                    select <ext::pgsparse::vector>$0
-                ''',
-                'foo',
-            )
-
-        with self.assertRaises(edgedb.InvalidArgumentError):
-            self.client.query_single(
-                '''
-                    select <ext::pgsparse::vector>$0
-                ''',
-                array.array('f', ([0] * 50000) + [1, 2]),
-            )
-
-        with self.assertRaises(edgedb.ClientError):
-            self.client.query_single(
-                '''
-                    with zeros := array_agg(
-                        (for x in range_unpack(range(0, 50000)) union 0)
-                    )
-                    select <ext::pgsparse::vector>(zeros ++ [1, 2]);
-                ''',
-            )