From 9124f250e9f4d2133eb7bc8c42b87cf228bf5176 Mon Sep 17 00:00:00 2001 From: Ruxue Zeng <36695415+ruxuez@users.noreply.github.com> Date: Wed, 31 Jan 2024 04:33:37 +0100 Subject: [PATCH] Fix unique key missing problem with search (#232) When creating embeddings, we store the unique key attnum in pg_options. Previously, the unique key attnum was taken from the original table, so the error occurs if the embedding table and the original table have a different number or order of columns. This patch saves the unique key attnum from the embedding table instead. --- greenplumpython/experimental/embedding.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/greenplumpython/experimental/embedding.py b/greenplumpython/experimental/embedding.py index c30ed399..47a980a2 100644 --- a/greenplumpython/experimental/embedding.py +++ b/greenplumpython/experimental/embedding.py @@ -155,6 +155,9 @@ def create_index( query_col_names = _serialize_to_expr( list(self._dataframe.unique_key) + [column], self._dataframe._db ) + unique_key_col_names = _serialize_to_expr( + list(self._dataframe.unique_key), self._dataframe._db + ) sql_add_relationship = f""" DO $$ BEGIN @@ -168,13 +171,21 @@ def create_index( SELECT FROM unnest({query_col_names}) AS query WHERE attname = query ) + ),emb_attnum_map AS ( + SELECT attname, attnum FROM pg_attribute + WHERE + attrelid = '{embedding_df._qualified_table_name}'::regclass::oid AND + EXISTS ( + SELECT FROM unnest({unique_key_col_names}) AS query + WHERE attname = query + ) ), embedding_info AS ( SELECT '{embedding_df._qualified_table_name}'::regclass::oid AS embedding_relid, attnum AS content_attnum, {len(self._dataframe._unique_key) + 1} AS embedding_attnum, '{model_name}' AS model, - ARRAY(SELECT attnum FROM attnum_map WHERE attname != '{column}') AS unique_key + ARRAY(SELECT attnum FROM emb_attnum_map WHERE attname != '{column}') AS unique_key FROM attnum_map WHERE attname = '{column}' )