From c296318c742ee431040b38487467d9632577f083 Mon Sep 17 00:00:00 2001 From: Chitti Ankith Date: Fri, 3 Nov 2023 12:19:39 -0400 Subject: [PATCH] CREATE INDEX IF NOT EXISTS is broken. (#1337) This PR fixes an issue in CREATE INDEX IF NOT EXISTS command wherein if 'IF NOT EXISTS' is passed, we had an unreferenced variable issue. Added Unit Tests to check the correctness of both the cases. Also reverted the index changes while merging dataframes after vector scan, as it's failing for some cases where indexes can be undefined. --- evadb/executor/insert_executor.py | 4 +++- evadb/executor/vector_index_scan_executor.py | 9 +++------ test/integration_tests/long/test_similarity.py | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/evadb/executor/insert_executor.py b/evadb/executor/insert_executor.py index 8e38aea91..d2dccd96a 100644 --- a/evadb/executor/insert_executor.py +++ b/evadb/executor/insert_executor.py @@ -59,8 +59,10 @@ def exec(self, *args, **kwargs): for column in table_catalog_entry.columns: if column == index.feat_column: is_index_on_current_table = True + break if is_index_on_current_table: - create_index_query_list = index.index_def.split(" ") + create_index_query = index.index_def + create_index_query_list = create_index_query.split(" ") if_not_exists = " ".join(create_index_query_list[2:5]).lower() if if_not_exists != "if not exists": create_index_query = ( diff --git a/evadb/executor/vector_index_scan_executor.py b/evadb/executor/vector_index_scan_executor.py index 0d6ee58c4..57697236e 100644 --- a/evadb/executor/vector_index_scan_executor.py +++ b/evadb/executor/vector_index_scan_executor.py @@ -142,16 +142,13 @@ def _evadb_vector_index_scan(self, *args, **kwargs): res_data_list.append(row_dict) result_df = pd.DataFrame(res_data_list) - result_df.set_index(row_num_col_name, inplace=True) - result_df = result_df.reindex(row_num_np) - row_num_df.set_index(pd.Index(row_num_np), inplace=True) final_df = pd.merge( row_num_df, result_df, - left_index=True, - right_index=True, - how="left", + left_on="row_num_np", + right_on=row_num_col_name, + how="inner", ) if "row_num_np" in final_df: diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index 15a0b087d..81d6054fe 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -428,17 +428,15 @@ def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_faiss(self drop_query = "DROP INDEX testFaissIndexImageDataset" execute_query_fetch_all(self.evadb, drop_query) - def test_index_auto_update_on_structured_table_during_insertion_with_faiss(self): - create_query = "CREATE TABLE testIndexAutoUpdate (img_path TEXT(100))" - execute_query_fetch_all(self.evadb, create_query) - + def _helper_for_auto_update_during_insertion_with_faiss(self, if_exists: bool): for i, img_path in enumerate(self.img_path_list): insert_query = ( f"INSERT INTO testIndexAutoUpdate (img_path) VALUES ('{img_path}')" ) execute_query_fetch_all(self.evadb, insert_query) if i == 0: - create_index_query = "CREATE INDEX testIndex ON testIndexAutoUpdate(DummyFeatureExtractor(Open(img_path))) USING FAISS" + if_exists_str = "IF NOT EXISTS " if if_exists else "" + create_index_query = f"CREATE INDEX {if_exists_str}testIndex ON testIndexAutoUpdate(DummyFeatureExtractor(Open(img_path))) USING FAISS" execute_query_fetch_all(self.evadb, create_index_query) select_query = """SELECT _row_id FROM testIndexAutoUpdate @@ -452,6 +450,15 @@ def test_index_auto_update_on_structured_table_during_insertion_with_faiss(self) res_batch = execute_query_fetch_all(self.evadb, select_query) self.assertEqual(res_batch.frames["testindexautoupdate._row_id"][0], 5) + def test_index_auto_update_on_structured_table_during_insertion_with_faiss(self): + create_query = "CREATE TABLE testIndexAutoUpdate (img_path TEXT(100))" + drop_query = "DROP TABLE testIndexAutoUpdate" + execute_query_fetch_all(self.evadb, create_query) + self._helper_for_auto_update_during_insertion_with_faiss(False) + execute_query_fetch_all(self.evadb, drop_query) + execute_query_fetch_all(self.evadb, create_query) + self._helper_for_auto_update_during_insertion_with_faiss(True) + @qdrant_skip_marker def test_end_to_end_index_scan_should_work_correctly_on_image_dataset_qdrant(self): for _ in range(2):