From 8773302c47975db0336a2c6e0d82f19c65c74856 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 21:47:31 -0400 Subject: [PATCH 1/6] fix: evadb is now consistent with lowercase --- evadb/binder/binder_utils.py | 4 ++-- evadb/binder/statement_binder_context.py | 3 +++ evadb/storage/native_storage_engine.py | 7 +++++++ evadb/third_party/databases/sqlite/sqlite_handler.py | 8 +++++++- test/third_party_tests/test_native_executor.py | 10 +++++----- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/evadb/binder/binder_utils.py b/evadb/binder/binder_utils.py index bb1b36edbd..59746445cd 100644 --- a/evadb/binder/binder_utils.py +++ b/evadb/binder/binder_utils.py @@ -99,7 +99,7 @@ def create_table_catalog_entry_for_data_source( ] column_list = [] for name, dtype in zip(column_name_list, column_type_list): - column_list.append(ColumnCatalogEntry(name, dtype)) + column_list.append(ColumnCatalogEntry(name.lower(), dtype)) # Assemble table. table_catalog_entry = TableCatalogEntry( @@ -339,7 +339,7 @@ def get_column_definition_from_select_target_list( for col_name, output_obj in output_objs: binded_col_list.append( ColumnDefinition( - col_name, + col_name.lower(), output_obj.type, output_obj.array_type, output_obj.array_dimensions, diff --git a/evadb/binder/statement_binder_context.py b/evadb/binder/statement_binder_context.py index b1101a2b36..32dc12c7d8 100644 --- a/evadb/binder/statement_binder_context.py +++ b/evadb/binder/statement_binder_context.py @@ -139,6 +139,9 @@ def get_binded_column( A tuple of alias and column object """ + # binder is case insensitive + col_name = col_name.lower() + def raise_error(): err_msg = f"Found invalid column {col_name}" logger.error(err_msg) diff --git a/evadb/storage/native_storage_engine.py b/evadb/storage/native_storage_engine.py index d56557ed9f..f728524029 100644 --- a/evadb/storage/native_storage_engine.py +++ b/evadb/storage/native_storage_engine.py @@ -45,6 +45,13 @@ def read(self, database_name: str, table: TableCatalogEntry) -> Iterator[Batch]: handler.connect() data_df = handler.execute_native_query(f"SELECT * FROM {table.name}").data + + # Handling case-sensitive databases like SQLite can be tricky. Currently, + # EvaDB converts all columns to lowercase, which may result in issues with + # these databases. As we move forward, we are actively working on improving + # this aspect within Binder. + # For more information, please refer to https://github.com/georgia-tech-db/evadb/issues/1079. + data_df.columns = data_df.columns.str.lower() yield Batch(pd.DataFrame(data_df)) except Exception as e: diff --git a/evadb/third_party/databases/sqlite/sqlite_handler.py b/evadb/third_party/databases/sqlite/sqlite_handler.py index 204db36d25..7256280ada 100644 --- a/evadb/third_party/databases/sqlite/sqlite_handler.py +++ b/evadb/third_party/databases/sqlite/sqlite_handler.py @@ -108,10 +108,16 @@ def get_columns(self, table_name: str) -> DBHandlerResponse: def _fetch_results_as_df(self, cursor): try: + # Handling case-sensitive databases like SQLite can be tricky. Currently, + # EvaDB converts all columns to lowercase, which may result in issues with + # these databases. As we move forward, we are actively working on improving + # this aspect within Binder. + # For more information, please refer to https://github.com/georgia-tech-db/evadb/issues/1079. + res = cursor.fetchall() res_df = pd.DataFrame( res, - columns=[desc[0] for desc in cursor.description] + columns=[desc[0].lower() for desc in cursor.description] if cursor.description else [], ) diff --git a/test/third_party_tests/test_native_executor.py b/test/third_party_tests/test_native_executor.py index 7259f4ef03..765ad489fe 100644 --- a/test/third_party_tests/test_native_executor.py +++ b/test/third_party_tests/test_native_executor.py @@ -38,7 +38,7 @@ def _create_table_in_native_database(self): """USE test_data_source { CREATE TABLE test_table ( name VARCHAR(10), - age INT, + Age INT, comment VARCHAR (100) ) }""", @@ -49,7 +49,7 @@ def _insert_value_into_native_database(self, col1, col2, col3): self.evadb, f"""USE test_data_source {{ INSERT INTO test_table ( - name, age, comment + name, Age, comment ) VALUES ( '{col1}', {col2}, '{col3}' ) @@ -67,7 +67,7 @@ def _drop_table_in_native_database(self): def _create_evadb_table_using_select_query(self): execute_query_fetch_all( self.evadb, - """CREATE TABLE eva_table AS SELECT name, age FROM test_data_source.test_table;""", + """CREATE TABLE eva_table AS SELECT name, Age FROM test_data_source.test_table;""", ) # check if the create table is successful @@ -150,7 +150,7 @@ def _raise_error_on_invalid_connection(self): def test_should_run_query_in_postgres(self): # Create database. params = { - "user": "eva", + "user": "gkakkar7", "password": "password", "host": "localhost", "port": "5432", @@ -169,7 +169,7 @@ def test_should_run_query_in_postgres(self): self._raise_error_on_multiple_creation() self._raise_error_on_invalid_connection() - def test_should_run_query_in_sqlite(self): + def test_aaashould_run_query_in_sqlite(self): # Create database. params = { "database": "evadb.db", From c968029f7f763cf14af750d1dfd7fbb488666c99 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 22:02:03 -0400 Subject: [PATCH 2/6] revert unnecessary changes --- test/third_party_tests/test_native_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/third_party_tests/test_native_executor.py b/test/third_party_tests/test_native_executor.py index 765ad489fe..834f434921 100644 --- a/test/third_party_tests/test_native_executor.py +++ b/test/third_party_tests/test_native_executor.py @@ -150,7 +150,7 @@ def _raise_error_on_invalid_connection(self): def test_should_run_query_in_postgres(self): # Create database. params = { - "user": "gkakkar7", + "user": "eva", "password": "password", "host": "localhost", "port": "5432", @@ -169,7 +169,7 @@ def test_should_run_query_in_postgres(self): self._raise_error_on_multiple_creation() self._raise_error_on_invalid_connection() - def test_aaashould_run_query_in_sqlite(self): + def test_should_run_query_in_sqlite(self): # Create database. params = { "database": "evadb.db", From b325ac5929944f296b51d47639d8a364ad087529 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 23:09:15 -0400 Subject: [PATCH 3/6] feat: add support for drop database --- evadb/catalog/catalog_manager.py | 4 +-- evadb/executor/drop_object_executor.py | 26 ++++++++++++++ evadb/parser/lark_visitor/_drop_statement.py | 14 ++++++++ evadb/parser/types.py | 1 + evadb/parser/utils.py | 4 +++ .../short/test_drop_executor.py | 36 +++++++++++++++++++ 6 files changed, 82 insertions(+), 3 deletions(-) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 83ec869001..c5fd50f226 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -160,9 +160,7 @@ def get_database_catalog_entry(self, database_name: str) -> DatabaseCatalogEntry return table_entry - def delete_database_catalog_entry( - self, database_entry: DatabaseCatalogEntry - ) -> bool: + def drop_database_catalog_entry(self, database_entry: DatabaseCatalogEntry) -> bool: """ This method deletes the database from catalog. diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py index 7a56674fe6..38d5419dc4 100644 --- a/evadb/executor/drop_object_executor.py +++ b/evadb/executor/drop_object_executor.py @@ -43,6 +43,9 @@ def exec(self, *args, **kwargs): elif self.node.object_type == ObjectType.FUNCTION: yield self._handle_drop_function(self.node.name, self.node.if_exists) + elif self.node.object_type == ObjectType.DATABASE: + yield self._handle_drop_database(self.node.name, self.node.if_exists) + def _handle_drop_table(self, table_name: str, if_exists: bool): if not self.catalog().check_table_exists(table_name): err_msg = "Table: {} does not exist".format(table_name) @@ -132,3 +135,26 @@ def _handle_drop_index(self, index_name: str, if_exists: bool): index=[0], ) ) + + def _handle_drop_database(self, database_name: str, if_exists: bool): + db_catalog_entry = self.catalog().get_database_catalog_entry(database_name) + if not db_catalog_entry: + err_msg = ( + f"Database {database_name} does not exist, therefore cannot be dropped." + ) + if if_exists: + logger.warning(err_msg) + return Batch(pd.DataFrame([err_msg])) + else: + raise RuntimeError(err_msg) + + logger.debug(f"Dropping database {database_name}") + + self.catalog().drop_database_catalog_entry(db_catalog_entry) + + return Batch( + pd.DataFrame( + {f"Database {database_name} successfully dropped"}, + index=[0], + ) + ) diff --git a/evadb/parser/lark_visitor/_drop_statement.py b/evadb/parser/lark_visitor/_drop_statement.py index fbf922fb36..0b397378ae 100644 --- a/evadb/parser/lark_visitor/_drop_statement.py +++ b/evadb/parser/lark_visitor/_drop_statement.py @@ -59,3 +59,17 @@ def drop_function(self, tree): if_exists = True return DropObjectStatement(ObjectType.FUNCTION, function_name, if_exists) + + # Drop Database + def drop_database(self, tree): + database_name = None + if_exists = False + + for child in tree.children: + if isinstance(child, Tree): + if child.data == "if_exists": + if_exists = True + elif child.data == "uid": + database_name = self.visit(child) + + return DropObjectStatement(ObjectType.DATABASE, database_name, if_exists) diff --git a/evadb/parser/types.py b/evadb/parser/types.py index 0abebcb097..a57c938db8 100644 --- a/evadb/parser/types.py +++ b/evadb/parser/types.py @@ -79,3 +79,4 @@ class ObjectType(EvaDBEnum): TABLE # noqa: F821 FUNCTION # noqa: F821 INDEX # noqa: F821 + DATABASE # noqa: F821 diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index 70db55cecc..3ad9b032f1 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -149,6 +149,10 @@ def parse_drop_index(index_name: str, if_exists: bool): return parse_drop(ObjectType.INDEX, index_name, if_exists) +def parse_drop_database(database_name: str, if_exists: bool): + return parse_drop(ObjectType.DATABASE, database_name, if_exists) + + def parse_query(query): stmt = Parser().parse(query) assert len(stmt) == 1 diff --git a/test/integration_tests/short/test_drop_executor.py b/test/integration_tests/short/test_drop_executor.py index fb5fd4339b..a5e19ea536 100644 --- a/test/integration_tests/short/test_drop_executor.py +++ b/test/integration_tests/short/test_drop_executor.py @@ -191,3 +191,39 @@ def test_should_drop_index(self): self.assertTrue(index_obj is None) # todo check if the index is also removed from the underlying vector store + + #### DROP INDEX + + def test_should_drop_database(self): + # Create database. + database_name = "test_data_source" + params = { + "database": "evadb.db", + } + query = f"""CREATE DATABASE {database_name} + WITH ENGINE = "sqlite", + PARAMETERS = {params};""" + execute_query_fetch_all(self.evadb, query) + self.assertIsNotNone( + self.evadb.catalog().get_database_catalog_entry(database_name) + ) + + # DROP DATABASE + execute_query_fetch_all(self.evadb, f"DROP DATABASE {database_name}") + self.assertIsNone( + self.evadb.catalog().get_database_catalog_entry(database_name) + ) + + # DROP should pass with warning + result = execute_query_fetch_all( + self.evadb, f"DROP DATABASE IF EXISTS {database_name}" + ) + self.assertTrue("does not exist" in result.frames.to_string()) + + # DROP should throw error + with self.assertRaises(ExecutorError): + execute_query_fetch_all( + self.evadb, + f"DROP DATABASE {database_name}", + do_not_print_exceptions=True, + ) From 1f79a031924c650ca535bed162cfd8fbd6bda44c Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 23:13:17 -0400 Subject: [PATCH 4/6] revert changes --- evadb/binder/binder_utils.py | 4 ++-- evadb/binder/statement_binder_context.py | 3 --- evadb/storage/native_storage_engine.py | 7 ------- test/third_party_tests/test_native_executor.py | 6 +++--- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/evadb/binder/binder_utils.py b/evadb/binder/binder_utils.py index 59746445cd..bb1b36edbd 100644 --- a/evadb/binder/binder_utils.py +++ b/evadb/binder/binder_utils.py @@ -99,7 +99,7 @@ def create_table_catalog_entry_for_data_source( ] column_list = [] for name, dtype in zip(column_name_list, column_type_list): - column_list.append(ColumnCatalogEntry(name.lower(), dtype)) + column_list.append(ColumnCatalogEntry(name, dtype)) # Assemble table. table_catalog_entry = TableCatalogEntry( @@ -339,7 +339,7 @@ def get_column_definition_from_select_target_list( for col_name, output_obj in output_objs: binded_col_list.append( ColumnDefinition( - col_name.lower(), + col_name, output_obj.type, output_obj.array_type, output_obj.array_dimensions, diff --git a/evadb/binder/statement_binder_context.py b/evadb/binder/statement_binder_context.py index 32dc12c7d8..b1101a2b36 100644 --- a/evadb/binder/statement_binder_context.py +++ b/evadb/binder/statement_binder_context.py @@ -139,9 +139,6 @@ def get_binded_column( A tuple of alias and column object """ - # binder is case insensitive - col_name = col_name.lower() - def raise_error(): err_msg = f"Found invalid column {col_name}" logger.error(err_msg) diff --git a/evadb/storage/native_storage_engine.py b/evadb/storage/native_storage_engine.py index f728524029..d56557ed9f 100644 --- a/evadb/storage/native_storage_engine.py +++ b/evadb/storage/native_storage_engine.py @@ -45,13 +45,6 @@ def read(self, database_name: str, table: TableCatalogEntry) -> Iterator[Batch]: handler.connect() data_df = handler.execute_native_query(f"SELECT * FROM {table.name}").data - - # Handling case-sensitive databases like SQLite can be tricky. Currently, - # EvaDB converts all columns to lowercase, which may result in issues with - # these databases. As we move forward, we are actively working on improving - # this aspect within Binder. - # For more information, please refer to https://github.com/georgia-tech-db/evadb/issues/1079. - data_df.columns = data_df.columns.str.lower() yield Batch(pd.DataFrame(data_df)) except Exception as e: diff --git a/test/third_party_tests/test_native_executor.py b/test/third_party_tests/test_native_executor.py index 834f434921..7259f4ef03 100644 --- a/test/third_party_tests/test_native_executor.py +++ b/test/third_party_tests/test_native_executor.py @@ -38,7 +38,7 @@ def _create_table_in_native_database(self): """USE test_data_source { CREATE TABLE test_table ( name VARCHAR(10), - Age INT, + age INT, comment VARCHAR (100) ) }""", @@ -49,7 +49,7 @@ def _insert_value_into_native_database(self, col1, col2, col3): self.evadb, f"""USE test_data_source {{ INSERT INTO test_table ( - name, Age, comment + name, age, comment ) VALUES ( '{col1}', {col2}, '{col3}' ) @@ -67,7 +67,7 @@ def _drop_table_in_native_database(self): def _create_evadb_table_using_select_query(self): execute_query_fetch_all( self.evadb, - """CREATE TABLE eva_table AS SELECT name, Age FROM test_data_source.test_table;""", + """CREATE TABLE eva_table AS SELECT name, age FROM test_data_source.test_table;""", ) # check if the create table is successful From ff4e3f9080f025b1a3ac9d5c3e81f8a738da023b Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 23:14:14 -0400 Subject: [PATCH 5/6] revert changes --- evadb/third_party/databases/sqlite/sqlite_handler.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/evadb/third_party/databases/sqlite/sqlite_handler.py b/evadb/third_party/databases/sqlite/sqlite_handler.py index 7256280ada..204db36d25 100644 --- a/evadb/third_party/databases/sqlite/sqlite_handler.py +++ b/evadb/third_party/databases/sqlite/sqlite_handler.py @@ -108,16 +108,10 @@ def get_columns(self, table_name: str) -> DBHandlerResponse: def _fetch_results_as_df(self, cursor): try: - # Handling case-sensitive databases like SQLite can be tricky. Currently, - # EvaDB converts all columns to lowercase, which may result in issues with - # these databases. As we move forward, we are actively working on improving - # this aspect within Binder. - # For more information, please refer to https://github.com/georgia-tech-db/evadb/issues/1079. - res = cursor.fetchall() res_df = pd.DataFrame( res, - columns=[desc[0].lower() for desc in cursor.description] + columns=[desc[0] for desc in cursor.description] if cursor.description else [], ) From 6adcfb12be6cc85f8e71fad1f408c0826ad47e6d Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 10 Sep 2023 23:17:11 -0400 Subject: [PATCH 6/6] add docs --- docs/source/reference/evaql/drop.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/source/reference/evaql/drop.rst b/docs/source/reference/evaql/drop.rst index 67a7234891..8a84525df4 100644 --- a/docs/source/reference/evaql/drop.rst +++ b/docs/source/reference/evaql/drop.rst @@ -15,3 +15,11 @@ DROP FUNCTION .. code:: mysql DROP FUNCTION FastRCNNObjectDetector; + +DROP DATABASE +------------- + +.. code:: mysql + + DROP DATABASE postgres_db; + DROP DATABASE IF EXISTS postgres_db; \ No newline at end of file