From e03bc2fe270bd9d9d5a43015b1aa7baf542d9114 Mon Sep 17 00:00:00 2001
From: hershd23 <hershdhillon23@gmail.com>
Date: Thu, 5 Oct 2023 23:28:42 -0400
Subject: [PATCH 01/20] Adding EvaDb changes to the catalog 	modified:  
 evadb/catalog/catalog_manager.py 	new file:  
 evadb/catalog/models/configuration_catelog.py 	modified:  
 evadb/catalog/models/utils.py 	new file:  
 evadb/catalog/services/configuration_catalog_service.py 	modified:  
 evadb/catalog/sql_config.py Adding EvaDB base configs to a python dict 
 new file:   evadb/eva_config.py

---
 evadb/catalog/catalog_manager.py              | 28 +++++++++
 evadb/catalog/models/configuration_catelog.py | 43 +++++++++++++
 evadb/catalog/models/utils.py                 | 16 +++++
 .../services/configuration_catalog_service.py | 61 +++++++++++++++++++
 evadb/catalog/sql_config.py                   |  1 +
 evadb/eva_config.py                           | 41 +++++++++++++
 6 files changed, 190 insertions(+)
 create mode 100644 evadb/catalog/models/configuration_catelog.py
 create mode 100644 evadb/catalog/services/configuration_catalog_service.py
 create mode 100644 evadb/eva_config.py

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index b7c55c9bf2..3a63b890a7 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -33,6 +33,7 @@
 )
 from evadb.catalog.models.utils import (
     ColumnCatalogEntry,
+    ConfigurationCatalogEntry,
     DatabaseCatalogEntry,
     FunctionCacheCatalogEntry,
     FunctionCatalogEntry,
@@ -45,6 +46,7 @@
     init_db,
     truncate_catalog_tables,
 )
+from evadb.catalog.services.configuration_catalog_service import ConfigurationCatalogService
 from evadb.catalog.services.column_catalog_service import ColumnCatalogService
 from evadb.catalog.services.database_catalog_service import DatabaseCatalogService
 from evadb.catalog.services.function_cache_catalog_service import (
@@ -78,6 +80,7 @@ def __init__(self, db_uri: str, config: ConfigurationManager):
         self._config = config
         self._bootstrap_catalog()
         self._db_catalog_service = DatabaseCatalogService(self._sql_config.session)
+        self._config_catalog_service = ConfigurationCatalogService(self._sql_config.session)
         self._table_catalog_service = TableCatalogService(self._sql_config.session)
         self._column_service = ColumnCatalogService(self._sql_config.session)
         self._function_service = FunctionCatalogService(self._sql_config.session)
@@ -607,6 +610,31 @@ def create_and_insert_multimedia_metadata_table_catalog_entry(
             table_type=TableType.SYSTEM_STRUCTURED_DATA,
         )
         return obj
+    
+    "Configuration catalog services"
+
+    def insert_configuration_catalog_entry(self, key: str, value: any):
+        """A new entry is persisted in the database catalog."
+
+        Args:
+            key: key name
+            value: value name
+        """
+        self._db_catalog_service.insert_entry(key, value)
+
+    def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry:
+        """
+        Returns the value entry for the given key
+        Arguments:
+            key (str): key name
+
+        Returns:
+            ConfigurationCatalogEntry
+        """
+
+        table_entry = self._db_catalog_service.get_entry_by_name(key)
+
+        return table_entry
 
 
 #### get catalog instance
diff --git a/evadb/catalog/models/configuration_catelog.py b/evadb/catalog/models/configuration_catelog.py
new file mode 100644
index 0000000000..106c230131
--- /dev/null
+++ b/evadb/catalog/models/configuration_catelog.py
@@ -0,0 +1,43 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from sqlalchemy import Column, String
+
+from evadb.catalog.models.base_model import BaseModel
+from evadb.catalog.models.utils import ConfigurationCatalogEntry, TextPickleType
+
+
+class ConfigurationCatalog(BaseModel):
+    """The `ConfigurationCatalog` catalog stores all the configuration params.
+    `_row_id:` an autogenerated unique identifier.
+    `_key:` the key for the config.
+    `_value:` the value for the config
+    """
+
+    __tablename__ = "configuation_catalog"
+
+    _key = Column("name", String(100), unique=True)
+    _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ??
+
+    def __init__(self, key: str, value: any):
+        self._key = key
+        self._value = value
+
+    def as_dataclass(self) -> "ConfigurationCatalogEntry":
+        return ConfigurationCatalogEntry(
+            row_id=self._row_id,
+            key=self._key,
+            value=self._value,
+        )
diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py
index b1c067aa0b..36d11937f0 100644
--- a/evadb/catalog/models/utils.py
+++ b/evadb/catalog/models/utils.py
@@ -257,3 +257,19 @@ def display_format(self):
             "engine": self.engine,
             "params": self.params,
         }
+
+@dataclass(unsafe_hash=True)
+class ConfigurationCatalogEntry:
+    """Dataclass representing an entry in the `ConfigurationCatalog`.
+    This is done to ensure we don't expose the sqlalchemy dependencies beyond catalog service. Further, sqlalchemy does not allow sharing of objects across threads.
+    """
+
+    key = str,
+    value = str,
+    row_id: int = None
+
+    def display_format(self):
+        return {
+            "key": self.key,
+            "value": self.value,
+        }
diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py
new file mode 100644
index 0000000000..3f2722fbba
--- /dev/null
+++ b/evadb/catalog/services/configuration_catalog_service.py
@@ -0,0 +1,61 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from sqlalchemy.orm import Session
+from sqlalchemy.sql.expression import select
+
+from evadb.catalog.models.configuration_catelog import ConfigurationCatalog
+from evadb.catalog.models.utils import ConfigurationCatalogEntry
+from evadb.catalog.services.base_service import BaseService
+from evadb.utils.errors import CatalogError
+from evadb.utils.logging_manager import logger
+
+
+class ConfigurationCatalogService(BaseService):
+    def __init__(self, db_session: Session):
+        super().__init__(ConfigurationCatalog, db_session)
+
+    def insert_entry(
+        self,
+        key: str,
+        value: any,
+    ):
+        try:
+            config_catalog_obj = self.model(
+                key = key,
+                value = value
+            )
+            config_catalog_obj = config_catalog_obj.save(self.session)
+
+        except Exception as e:
+            logger.exception(
+                f"Failed to insert entry into database catalog with exception {str(e)}"
+            )
+            raise CatalogError(e)
+
+    def get_entry_by_name(self, key: str) -> ConfigurationCatalogEntry:
+        """
+        Get the table catalog entry with given table name.
+        Arguments:
+            key  (str): key name
+        Returns:
+            Configuration Catalog Entry - catalog entry for given key name
+        """
+        entry = self.session.execute(
+            select(self.model).filter(self.model._key == key)
+        ).scalar_one_or_none()
+        if entry:
+            return entry.as_dataclass()
+        return entry
diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py
index f4893ba997..c1432e3553 100644
--- a/evadb/catalog/sql_config.py
+++ b/evadb/catalog/sql_config.py
@@ -32,6 +32,7 @@
     "column_catalog",
     "table_catalog",
     "database_catalog",
+    "configuration_catalog",
     "depend_column_and_function_cache",
     "function_cache",
     "function_catalog",
diff --git a/evadb/eva_config.py b/evadb/eva_config.py
new file mode 100644
index 0000000000..d7882f4b35
--- /dev/null
+++ b/evadb/eva_config.py
@@ -0,0 +1,41 @@
+# coding=utf-8
+# Copyright 2018-2023 EvaDB
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+EvaDB configuration dict
+
+batch_mem_size configures the number of rows processed by the execution engine in one iteration
+rows = max(1, row_mem_size / batch_mem_size)
+"""
+
+BASE_EVADB_CONFIG = {
+  "evadb_installation_dir": "",
+  "datasets_dir": "",
+  "catalog_database_uri": "",
+  "application": "evadb",
+  "mode": "release",
+  "batch_mem_size": 30000000,
+  "gpu_batch_size": 1, # batch size used for gpu_operations
+  "gpu_ids": [
+    0
+  ],
+  "host": "0.0.0.0",
+  "port": 8803,
+  "socket_timeout": 60,
+  "ray": False,
+  "OPENAI_KEY": "",
+  "PINECONE_API_KEY": "",
+  "PINECONE_ENV": ""
+}
\ No newline at end of file

From f123b6ffd77d908fdc4253ecbeb2107efa555634 Mon Sep 17 00:00:00 2001
From: hershd23 <hershdhillon23@gmail.com>
Date: Fri, 6 Oct 2023 02:04:04 -0400
Subject: [PATCH 02/20] Adding changes to the configuration manager 
 modified:   evadb/configuration/bootstrap_environment.py 	modified:  
 evadb/configuration/configuration_manager.py 	modified:  
 evadb/configuration/constants.py 	renamed:    evadb/eva_config.py ->
 evadb/evadb_config.py

---
 evadb/configuration/bootstrap_environment.py | 10 ++++++----
 evadb/configuration/configuration_manager.py |  7 ++++---
 evadb/configuration/constants.py             |  2 +-
 evadb/{eva_config.py => evadb_config.py}     |  0
 4 files changed, 11 insertions(+), 8 deletions(-)
 rename evadb/{eva_config.py => evadb_config.py} (100%)

diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index 55e2f0f228..f0c33ac5d7 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -36,12 +36,12 @@
 
 def get_base_config(evadb_installation_dir: Path) -> Path:
     """
-    Get path to .evadb.yml source path.
+    Get path to .evadb_config.py source path.
     """
     # if evadb package is installed in environment
     if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE):
-        with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as yml_path:
-            return yml_path
+        with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path:
+            return config_path
     else:
         # For local dev environments without package installed
         return evadb_installation_dir / EvaDB_CONFIG_FILE
@@ -91,9 +91,11 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
     evadb_logger.setLevel(level)
     evadb_logger.debug(f"Setting logging level to: {str(level)}")
 
-    return config_obj
+    # Mainly want to add all the configs to sqlite
 
+    return config_obj
 
+# TODO : Change
 def create_directories_and_get_default_config_values(
     evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None
 ) -> Union[dict, str]:
diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py
index 8d209e2fcf..ea45963437 100644
--- a/evadb/configuration/configuration_manager.py
+++ b/evadb/configuration/configuration_manager.py
@@ -21,11 +21,12 @@
 
 
 class ConfigurationManager(object):
-    def __init__(self, evadb_dir: str = None) -> None:
+    def __init__(self, evadb_dir: str = None, catalog_obj = None) -> None:
         self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR
-        self._config_obj = self._create_if_not_exists()
+        self._catalog_obj = catalog_obj
+        self._populate_base_configs()
 
-    def _create_if_not_exists(self):
+    def _populate_base_configs(self):
         config_obj = bootstrap_environment(
             evadb_dir=Path(self._evadb_dir),
             evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR),
diff --git a/evadb/configuration/constants.py b/evadb/configuration/constants.py
index 51513462d6..138ec547f8 100644
--- a/evadb/configuration/constants.py
+++ b/evadb/configuration/constants.py
@@ -21,7 +21,7 @@
 EvaDB_DATABASE_DIR = "evadb_data"
 EvaDB_APPS_DIR = "apps"
 EvaDB_DATASET_DIR = "evadb_datasets"
-EvaDB_CONFIG_FILE = "evadb.yml"
+EvaDB_CONFIG_FILE = "evadb_config.py"
 FUNCTION_DIR = "functions"
 MODEL_DIR = "models"
 CATALOG_DIR = "catalog"
diff --git a/evadb/eva_config.py b/evadb/evadb_config.py
similarity index 100%
rename from evadb/eva_config.py
rename to evadb/evadb_config.py

From dd48b70bd5b87439ec35f697b46ebdadaad59ee8 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Sun, 8 Oct 2023 16:45:33 -0400
Subject: [PATCH 03/20] checkpoint

---
 evadb/catalog/catalog_manager.py              | 36 ++++++++++++-------
 evadb/catalog/catalog_utils.py                | 10 ++----
 ...on_catelog.py => configuration_catalog.py} |  0
 .../services/configuration_catalog_service.py |  2 +-
 evadb/database.py                             | 16 +++------
 5 files changed, 30 insertions(+), 34 deletions(-)
 rename evadb/catalog/models/{configuration_catelog.py => configuration_catalog.py} (100%)

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index 3a63b890a7..f6d3171634 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -23,7 +23,6 @@
     VideoColumnName,
 )
 from evadb.catalog.catalog_utils import (
-    cleanup_storage,
     construct_function_cache_catalog_entry,
     get_document_table_column_definitions,
     get_image_table_column_definitions,
@@ -46,7 +45,9 @@
     init_db,
     truncate_catalog_tables,
 )
-from evadb.catalog.services.configuration_catalog_service import ConfigurationCatalogService
+from evadb.catalog.services.configuration_catalog_service import (
+    ConfigurationCatalogService,
+)
 from evadb.catalog.services.column_catalog_service import ColumnCatalogService
 from evadb.catalog.services.database_catalog_service import DatabaseCatalogService
 from evadb.catalog.services.function_cache_catalog_service import (
@@ -63,24 +64,28 @@
 from evadb.catalog.services.index_catalog_service import IndexCatalogService
 from evadb.catalog.services.table_catalog_service import TableCatalogService
 from evadb.catalog.sql_config import IDENTIFIER_COLUMN, SQLConfig
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.expression.function_expression import FunctionExpression
 from evadb.parser.create_statement import ColumnDefinition
 from evadb.parser.table_ref import TableInfo
 from evadb.parser.types import FileFormatType
 from evadb.third_party.databases.interface import get_database_handler
-from evadb.utils.generic_utils import generate_file_path, get_file_checksum
+from evadb.utils.generic_utils import (
+    generate_file_path,
+    get_file_checksum,
+    remove_directory_contents,
+)
 from evadb.utils.logging_manager import logger
 
 
 class CatalogManager(object):
-    def __init__(self, db_uri: str, config: ConfigurationManager):
+    def __init__(self, db_uri: str):
         self._db_uri = db_uri
         self._sql_config = SQLConfig(db_uri)
-        self._config = config
         self._bootstrap_catalog()
         self._db_catalog_service = DatabaseCatalogService(self._sql_config.session)
-        self._config_catalog_service = ConfigurationCatalogService(self._sql_config.session)
+        self._config_catalog_service = ConfigurationCatalogService(
+            self._sql_config.session
+        )
         self._table_catalog_service = TableCatalogService(self._sql_config.session)
         self._column_service = ColumnCatalogService(self._sql_config.session)
         self._function_service = FunctionCatalogService(self._sql_config.session)
@@ -136,7 +141,10 @@ def _clear_catalog_contents(self):
         # truncate the catalog tables
         truncate_catalog_tables(self._sql_config.engine)
         # clean up the dataset, index, and cache directories
-        cleanup_storage(self._config)
+        for folder in ["cache_dir", "index_dir", "datasets_dir"]:
+            remove_directory_contents(
+                self._config_catalog_service.get_entry_by_name(folder).value
+            )
 
     "Database catalog services"
 
@@ -448,7 +456,7 @@ def get_all_index_catalog_entries(self):
     """ Function Cache related"""
 
     def insert_function_cache_catalog_entry(self, func_expr: FunctionExpression):
-        cache_dir = self._config.get_value("storage", "cache_dir")
+        cache_dir = self._config_catalog_service.get_entry_by_name("cache_dir").value
         entry = construct_function_cache_catalog_entry(func_expr, cache_dir=cache_dir)
         return self._function_cache_service.insert_entry(entry)
 
@@ -511,7 +519,9 @@ def create_and_insert_table_catalog_entry(
         table_name = table_info.table_name
         column_catalog_entries = xform_column_definitions_to_catalog_entries(columns)
 
-        dataset_location = self._config.get_value("core", "datasets_dir")
+        dataset_location = self._config_catalog_service.get_entry_by_name(
+            "datasets_dir"
+        )
         file_url = str(generate_file_path(dataset_location, table_name))
         table_catalog_entry = self.insert_table_catalog_entry(
             table_name,
@@ -610,7 +620,7 @@ def create_and_insert_multimedia_metadata_table_catalog_entry(
             table_type=TableType.SYSTEM_STRUCTURED_DATA,
         )
         return obj
-    
+
     "Configuration catalog services"
 
     def insert_configuration_catalog_entry(self, key: str, value: any):
@@ -645,5 +655,5 @@ def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry
 # instance across all objects within the same thread. It is worth investigating whether
 # SQLAlchemy already handles this optimization for us, which will be explored at a
 # later time.
-def get_catalog_instance(db_uri: str, config: ConfigurationManager):
-    return CatalogManager(db_uri, config)
+def get_catalog_instance(db_uri: str):
+    return CatalogManager(db_uri)
diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py
index d2187978d2..cc6b0c510f 100644
--- a/evadb/catalog/catalog_utils.py
+++ b/evadb/catalog/catalog_utils.py
@@ -256,12 +256,6 @@ def construct_function_cache_catalog_entry(
     return entry
 
 
-def cleanup_storage(config):
-    remove_directory_contents(config.get_value("storage", "index_dir"))
-    remove_directory_contents(config.get_value("storage", "cache_dir"))
-    remove_directory_contents(config.get_value("core", "datasets_dir"))
-
-
 def get_metadata_entry_or_val(
     function_obj: FunctionCatalogEntry, key: str, default_val: Any = None
 ) -> str:
@@ -308,7 +302,7 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict:
 # instance across all objects within the same thread. It is worth investigating whether
 # SQLAlchemy already handles this optimization for us, which will be explored at a
 # later time.
-def get_catalog_instance(db_uri: str, config: ConfigurationManager):
+def get_catalog_instance(db_uri: str):
     from evadb.catalog.catalog_manager import CatalogManager
 
-    return CatalogManager(db_uri, config)
+    return CatalogManager(db_uri)
diff --git a/evadb/catalog/models/configuration_catelog.py b/evadb/catalog/models/configuration_catalog.py
similarity index 100%
rename from evadb/catalog/models/configuration_catelog.py
rename to evadb/catalog/models/configuration_catalog.py
diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py
index 3f2722fbba..5b3646ad3c 100644
--- a/evadb/catalog/services/configuration_catalog_service.py
+++ b/evadb/catalog/services/configuration_catalog_service.py
@@ -16,7 +16,7 @@
 from sqlalchemy.orm import Session
 from sqlalchemy.sql.expression import select
 
-from evadb.catalog.models.configuration_catelog import ConfigurationCatalog
+from evadb.catalog.models.configuration_catalog import ConfigurationCatalog
 from evadb.catalog.models.utils import ConfigurationCatalogEntry
 from evadb.catalog.services.base_service import BaseService
 from evadb.utils.errors import CatalogError
diff --git a/evadb/database.py b/evadb/database.py
index ed6ad2f118..086bf0a018 100644
--- a/evadb/database.py
+++ b/evadb/database.py
@@ -17,9 +17,7 @@
 from typing import TYPE_CHECKING, Callable
 
 from evadb.catalog.catalog_utils import get_catalog_instance
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.configuration.constants import DB_DEFAULT_NAME, EvaDB_DATABASE_DIR
-from evadb.utils.generic_utils import parse_config_yml
 
 if TYPE_CHECKING:
     from evadb.catalog.catalog_manager import CatalogManager
@@ -28,7 +26,6 @@
 @dataclass
 class EvaDBDatabase:
     db_uri: str
-    config: ConfigurationManager
     catalog_uri: str
     catalog_func: Callable
 
@@ -36,16 +33,12 @@ def catalog(self) -> "CatalogManager":
         """
         Note: Generating an object on demand plays a crucial role in ensuring that different threads do not share the same catalog object, as it can result in serialization issues and incorrect behavior with SQLAlchemy. Refer to get_catalog_instance()
         """
-        return self.catalog_func(self.catalog_uri, self.config)
+        return self.catalog_func(self.catalog_uri)
 
 
 def get_default_db_uri(evadb_dir: Path):
-    config_obj = parse_config_yml()
-    if config_obj["core"]["catalog_database_uri"]:
-        return config_obj["core"]["catalog_database_uri"]
-    else:
-        # Default to sqlite.
-        return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}"
+    # Default to sqlite.
+    return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}"
 
 
 def init_evadb_instance(
@@ -53,8 +46,7 @@ def init_evadb_instance(
 ):
     if db_dir is None:
         db_dir = EvaDB_DATABASE_DIR
-    config = ConfigurationManager(db_dir)
 
     catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir))
 
-    return EvaDBDatabase(db_dir, config, catalog_uri, get_catalog_instance)
+    return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance)

From be27b60e126bc4ccb632defca2f536c9caedfa5a Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 9 Oct 2023 14:52:45 -0400
Subject: [PATCH 04/20] chekcpoint

---
 evadb/catalog/catalog_manager.py              | 28 +++++---------
 evadb/catalog/catalog_utils.py                | 14 +++++++
 evadb/catalog/models/base_model.py            | 30 ---------------
 evadb/catalog/models/configuration_catalog.py |  2 +-
 evadb/catalog/models/utils.py                 | 12 +++---
 evadb/configuration/bootstrap_environment.py  | 38 ++++++++-----------
 evadb/database.py                             | 18 ++++++++-
 evadb/executor/create_function_executor.py    | 11 ++++--
 evadb/executor/create_index_executor.py       |  6 +--
 evadb/executor/drop_object_executor.py        |  2 +-
 evadb/executor/executor_utils.py              | 10 ++++-
 evadb/executor/vector_index_scan_executor.py  |  4 +-
 evadb/optimizer/rules/rules.py                |  9 ++++-
 evadb/third_party/vector_stores/pinecone.py   | 15 +++-----
 evadb/third_party/vector_stores/utils.py      |  1 +
 15 files changed, 99 insertions(+), 101 deletions(-)

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index f6d3171634..69bc4052c8 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import shutil
 from pathlib import Path
-from typing import List
+from typing import Any, List
 
 from evadb.catalog.catalog_type import (
     ColumnType,
@@ -138,8 +138,11 @@ def _clear_catalog_contents(self):
         logger.info("Clearing catalog")
         # drop tables which are not part of catalog
         drop_all_tables_except_catalog(self._sql_config.engine)
-        # truncate the catalog tables
-        truncate_catalog_tables(self._sql_config.engine)
+        # truncate the catalog tables except configuration_catalog
+        # We do not remove the configuration entries
+        truncate_catalog_tables(
+            self._sql_config.engine, tables_not_to_truncate=["configuration_catalog"]
+        )
         # clean up the dataset, index, and cache directories
         for folder in ["cache_dir", "index_dir", "datasets_dir"]:
             remove_directory_contents(
@@ -632,7 +635,7 @@ def insert_configuration_catalog_entry(self, key: str, value: any):
         """
         self._db_catalog_service.insert_entry(key, value)
 
-    def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry:
+    def get_configuration_catalog_value(self, key: str) -> Any:
         """
         Returns the value entry for the given key
         Arguments:
@@ -643,17 +646,6 @@ def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry
         """
 
         table_entry = self._db_catalog_service.get_entry_by_name(key)
-
-        return table_entry
-
-
-#### get catalog instance
-# This function plays a crucial role in ensuring that different threads do
-# not share the same catalog object, as it can result in serialization issues and
-# incorrect behavior with SQLAlchemy. Therefore, whenever a catalog instance is
-# required, we create a new one. One possible optimization is to share the catalog
-# instance across all objects within the same thread. It is worth investigating whether
-# SQLAlchemy already handles this optimization for us, which will be explored at a
-# later time.
-def get_catalog_instance(db_uri: str):
-    return CatalogManager(db_uri)
+        if table_entry:
+            return table_entry.value
+        return None
diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py
index cc6b0c510f..29607a1cda 100644
--- a/evadb/catalog/catalog_utils.py
+++ b/evadb/catalog/catalog_utils.py
@@ -294,6 +294,20 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict:
     return properties
 
 
+def bootstrap_configs(catalog, configs: dict):
+    """
+    load all the configuration values into the catalog table configuration_catalog
+    """
+    for key, value in configs.items():
+        catalog._config_catalog_service.insert_entry(key, value)
+
+
+def get_configuration_value(key: str):
+    
+    catalog = get_catalog_instance()
+    return catalog.get_configuration_catalog_value(key)
+
+
 #### get catalog instance
 # This function plays a crucial role in ensuring that different threads do
 # not share the same catalog object, as it can result in serialization issues and
diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py
index 8e915af245..b4b067f911 100644
--- a/evadb/catalog/models/base_model.py
+++ b/evadb/catalog/models/base_model.py
@@ -100,33 +100,3 @@ def _commit(self, db_session):
 
 # Custom Base Model to be inherited by all models
 BaseModel = declarative_base(cls=CustomModel, constructor=None)
-
-
-def truncate_catalog_tables(engine: Engine):
-    """Truncate all the catalog tables"""
-    # https://stackoverflow.com/questions/4763472/sqlalchemy-clear-database-content-but-dont-drop-the-schema/5003705#5003705 #noqa
-    # reflect to refresh the metadata
-    BaseModel.metadata.reflect(bind=engine)
-    insp = sqlalchemy.inspect(engine)
-    if database_exists(engine.url):
-        with contextlib.closing(engine.connect()) as con:
-            trans = con.begin()
-            for table in reversed(BaseModel.metadata.sorted_tables):
-                if insp.has_table(table.name):
-                    con.execute(table.delete())
-            trans.commit()
-
-
-def drop_all_tables_except_catalog(engine: Engine):
-    """drop all the tables except the catalog"""
-    # reflect to refresh the metadata
-    BaseModel.metadata.reflect(bind=engine)
-    insp = sqlalchemy.inspect(engine)
-    if database_exists(engine.url):
-        with contextlib.closing(engine.connect()) as con:
-            trans = con.begin()
-            for table in reversed(BaseModel.metadata.sorted_tables):
-                if table.name not in CATALOG_TABLES:
-                    if insp.has_table(table.name):
-                        table.drop(con)
-            trans.commit()
diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py
index 106c230131..48d42b2cae 100644
--- a/evadb/catalog/models/configuration_catalog.py
+++ b/evadb/catalog/models/configuration_catalog.py
@@ -26,7 +26,7 @@ class ConfigurationCatalog(BaseModel):
     `_value:` the value for the config
     """
 
-    __tablename__ = "configuation_catalog"
+    __tablename__ = "configuration_catalog"
 
     _key = Column("name", String(100), unique=True)
     _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ??
diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py
index 36d11937f0..5da3a2eef5 100644
--- a/evadb/catalog/models/utils.py
+++ b/evadb/catalog/models/utils.py
@@ -61,7 +61,7 @@ def init_db(engine: Engine):
     BaseModel.metadata.create_all(bind=engine)
 
 
-def truncate_catalog_tables(engine: Engine):
+def truncate_catalog_tables(engine: Engine, tables_not_to_truncate: List[str] = []):
     """Truncate all the catalog tables"""
     # https://stackoverflow.com/questions/4763472/sqlalchemy-clear-database-content-but-dont-drop-the-schema/5003705#5003705 #noqa
     # reflect to refresh the metadata
@@ -71,8 +71,9 @@ def truncate_catalog_tables(engine: Engine):
         with contextlib.closing(engine.connect()) as con:
             trans = con.begin()
             for table in reversed(BaseModel.metadata.sorted_tables):
-                if insp.has_table(table.name):
-                    con.execute(table.delete())
+                if table.name not in tables_not_to_truncate:
+                    if insp.has_table(table.name):
+                        con.execute(table.delete())
             trans.commit()
 
 
@@ -258,14 +259,15 @@ def display_format(self):
             "params": self.params,
         }
 
+
 @dataclass(unsafe_hash=True)
 class ConfigurationCatalogEntry:
     """Dataclass representing an entry in the `ConfigurationCatalog`.
     This is done to ensure we don't expose the sqlalchemy dependencies beyond catalog service. Further, sqlalchemy does not allow sharing of objects across threads.
     """
 
-    key = str,
-    value = str,
+    key: str
+    value: str
     row_id: int = None
 
     def display_format(self):
diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index f0c33ac5d7..01655a2648 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -30,6 +30,7 @@
     EvaDB_CONFIG_FILE,
     EvaDB_DATASET_DIR,
 )
+from evadb.evadb_config import BASE_EVADB_CONFIG
 from evadb.utils.generic_utils import parse_config_yml
 from evadb.utils.logging_manager import logger as evadb_logger
 
@@ -38,6 +39,7 @@ def get_base_config(evadb_installation_dir: Path) -> Path:
     """
     Get path to .evadb_config.py source path.
     """
+    return BASE_EVADB_CONFIG
     # if evadb package is installed in environment
     if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE):
         with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path:
@@ -54,12 +56,8 @@ def get_default_db_uri(evadb_dir: Path):
     Arguments:
         evadb_dir: path to evadb database directory
     """
-    config_obj = parse_config_yml()
-    if config_obj["core"]["catalog_database_uri"]:
-        return config_obj["core"]["catalog_database_uri"]
-    else:
-        # Default to sqlite.
-        return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}"
+    # Default to sqlite.
+    return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}"
 
 
 def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
@@ -71,7 +69,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
         evadb_installation_dir: path to evadb package
     """
 
-    default_config_path = get_base_config(evadb_installation_dir).resolve()
+    config_obj = get_base_config(evadb_installation_dir)
 
     # creates necessary directories
     config_default_dict = create_directories_and_get_default_config_values(
@@ -80,11 +78,8 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
 
     assert evadb_dir.exists(), f"{evadb_dir} does not exist"
     assert evadb_installation_dir.exists(), f"{evadb_installation_dir} does not exist"
-    config_obj = {}
-    with default_config_path.open("r") as yml_file:
-        config_obj = yaml.load(yml_file, Loader=yaml.FullLoader)
     config_obj = merge_dict_of_dicts(config_default_dict, config_obj)
-    mode = config_obj["core"]["mode"]
+    mode = config_obj["mode"]
 
     # set logger to appropriate level (debug or release)
     level = logging.WARN if mode == "release" else logging.DEBUG
@@ -95,6 +90,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
 
     return config_obj
 
+
 # TODO : Change
 def create_directories_and_get_default_config_values(
     evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None
@@ -126,17 +122,15 @@ def create_directories_and_get_default_config_values(
         model_dir.mkdir(parents=True, exist_ok=True)
 
     config_obj = {}
-    config_obj["core"] = {}
-    config_obj["storage"] = {}
-    config_obj["core"]["evadb_installation_dir"] = str(default_install_dir.resolve())
-    config_obj["core"]["datasets_dir"] = str(dataset_location.resolve())
-    config_obj["core"]["catalog_database_uri"] = get_default_db_uri(evadb_dir)
-    config_obj["storage"]["index_dir"] = str(index_dir.resolve())
-    config_obj["storage"]["cache_dir"] = str(cache_dir.resolve())
-    config_obj["storage"]["s3_download_dir"] = str(s3_dir.resolve())
-    config_obj["storage"]["tmp_dir"] = str(tmp_dir.resolve())
-    config_obj["storage"]["function_dir"] = str(function_dir.resolve())
-    config_obj["storage"]["model_dir"] = str(model_dir.resolve())
+    config_obj["evadb_installation_dir"] = str(default_install_dir.resolve())
+    config_obj["datasets_dir"] = str(dataset_location.resolve())
+    config_obj["catalog_database_uri"] = get_default_db_uri(evadb_dir)
+    config_obj["index_dir"] = str(index_dir.resolve())
+    config_obj["cache_dir"] = str(cache_dir.resolve())
+    config_obj["s3_download_dir"] = str(s3_dir.resolve())
+    config_obj["tmp_dir"] = str(tmp_dir.resolve())
+    config_obj["function_dir"] = str(function_dir.resolve())
+    config_obj["model_dir"] = str(model_dir.resolve())
     if category and key:
         return config_obj.get(category, {}).get(key, None)
     elif category:
diff --git a/evadb/database.py b/evadb/database.py
index 086bf0a018..2da55f1a37 100644
--- a/evadb/database.py
+++ b/evadb/database.py
@@ -16,8 +16,13 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Callable
 
-from evadb.catalog.catalog_utils import get_catalog_instance
-from evadb.configuration.constants import DB_DEFAULT_NAME, EvaDB_DATABASE_DIR
+from evadb.catalog.catalog_utils import bootstrap_configs, get_catalog_instance
+from evadb.configuration.bootstrap_environment import bootstrap_environment
+from evadb.configuration.constants import (
+    DB_DEFAULT_NAME,
+    EvaDB_DATABASE_DIR,
+    EvaDB_INSTALLATION_DIR,
+)
 
 if TYPE_CHECKING:
     from evadb.catalog.catalog_manager import CatalogManager
@@ -47,6 +52,15 @@ def init_evadb_instance(
     if db_dir is None:
         db_dir = EvaDB_DATABASE_DIR
 
+    config_obj = bootstrap_environment(
+        Path(db_dir),
+        evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR),
+    )
+
     catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir))
 
+    # load all the config into the configuration_catalog table
+    print("gg")
+    bootstrap_configs(get_catalog_instance(catalog_uri), config_obj)
+    
     return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance)
diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py
index 0b4ddbf7c6..8272d22c81 100644
--- a/evadb/executor/create_function_executor.py
+++ b/evadb/executor/create_function_executor.py
@@ -99,10 +99,13 @@ def handle_ludwig_function(self):
             target=arg_map["predict"],
             tune_for_memory=arg_map.get("tune_for_memory", False),
             time_limit_s=arg_map.get("time_limit", DEFAULT_TRAIN_TIME_LIMIT),
-            output_directory=self.db.config.get_value("storage", "tmp_dir"),
+            output_directory=self.db.catalog().get_configuration_catalog_value(
+                "tmp_dir"
+            ),
         )
         model_path = os.path.join(
-            self.db.config.get_value("storage", "model_dir"), self.node.name
+            self.db.catalog().get_configuration_catalog_value("model_dir"),
+            self.node.name,
         )
         auto_train_results.best_model.save(model_path)
         self.node.metadata.append(
@@ -146,7 +149,7 @@ def handle_sklearn_function(self):
         aggregated_batch.frames.drop([arg_map["predict"]], axis=1, inplace=True)
         model.fit(X=aggregated_batch.frames, y=Y)
         model_path = os.path.join(
-            self.db.config.get_value("storage", "model_dir"), self.node.name
+            self.db.catalog().get_configuration_catalog_value("model_dir"), self.node.name
         )
         pickle.dump(model, open(model_path, "wb"))
         self.node.metadata.append(
@@ -372,7 +375,7 @@ def handle_forecasting_function(self):
             model_save_dir_name += "_exogenous_" + str(sorted(exogenous_columns))
 
         model_dir = os.path.join(
-            self.db.config.get_value("storage", "model_dir"),
+            self.db.catalog().get_configuration_catalog_value("model_dir"),
             "tsforecasting",
             model_save_dir_name,
             str(hashlib.sha256(data.to_string().encode()).hexdigest()),
diff --git a/evadb/executor/create_index_executor.py b/evadb/executor/create_index_executor.py
index 407cfef3c0..4b5c164745 100644
--- a/evadb/executor/create_index_executor.py
+++ b/evadb/executor/create_index_executor.py
@@ -75,7 +75,7 @@ def _create_native_index(self):
 
     # On-disk saving path for EvaDB index.
     def _get_evadb_index_save_path(self) -> Path:
-        index_dir = Path(self.config.get_value("storage", "index_dir"))
+        index_dir = Path(self.db.catalog().get_configuration_catalog_value("index_dir"))
         if not index_dir.exists():
             index_dir.mkdir(parents=True, exist_ok=True)
         return str(
@@ -121,7 +121,7 @@ def _create_evadb_index(self):
                         self.vector_store_type,
                         self.name,
                         **handle_vector_store_params(
-                            self.vector_store_type, index_path
+                            self.vector_store_type, index_path, self.catalog
                         ),
                     )
                 else:
@@ -151,7 +151,7 @@ def _create_evadb_index(self):
                             self.vector_store_type,
                             self.name,
                             **handle_vector_store_params(
-                                self.vector_store_type, index_path
+                                self.vector_store_type, index_path, self.catalog
                             ),
                         )
                         index.create(input_dim)
diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py
index 38d5419dc4..b9aa9e016a 100644
--- a/evadb/executor/drop_object_executor.py
+++ b/evadb/executor/drop_object_executor.py
@@ -118,7 +118,7 @@ def _handle_drop_index(self, index_name: str, if_exists: bool):
             index = VectorStoreFactory.init_vector_store(
                 index_obj.type,
                 index_obj.name,
-                **handle_vector_store_params(index_obj.type, index_obj.save_file_path),
+                **handle_vector_store_params(index_obj.type, index_obj.save_file_path, self.catalog),
             )
             assert (
                 index is not None
diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py
index 88d74ce3bc..6e0b812b85 100644
--- a/evadb/executor/executor_utils.py
+++ b/evadb/executor/executor_utils.py
@@ -152,7 +152,7 @@ def validate_media(file_path: Path, media_type: FileFormatType) -> bool:
 
 
 def handle_vector_store_params(
-    vector_store_type: VectorStoreType, index_path: str
+    vector_store_type: VectorStoreType, index_path: str, catalog
 ) -> dict:
     """Handle vector store parameters based on the vector store type and index path.
 
@@ -174,7 +174,13 @@ def handle_vector_store_params(
     elif vector_store_type == VectorStoreType.CHROMADB:
         return {"index_path": str(Path(index_path).parent)}
     elif vector_store_type == VectorStoreType.PINECONE:
-        return {}
+        # add the required API_KEYS
+        return {
+            "PINECONE_API_KEY": catalog().get_configuration_catalog_value(
+                "PINECONE_API_KEY"
+            ),
+            "PINECONE_ENV": catalog().get_configuration_catalog_value("PINECONE_ENV"),
+        }
     else:
         raise ValueError("Unsupported vector store type: {}".format(vector_store_type))
 
diff --git a/evadb/executor/vector_index_scan_executor.py b/evadb/executor/vector_index_scan_executor.py
index 2b58f5c337..b2e1bb219e 100644
--- a/evadb/executor/vector_index_scan_executor.py
+++ b/evadb/executor/vector_index_scan_executor.py
@@ -102,7 +102,9 @@ def _evadb_vector_index_scan(self, *args, **kwargs):
         self.index = VectorStoreFactory.init_vector_store(
             self.vector_store_type,
             self.index_name,
-            **handle_vector_store_params(self.vector_store_type, self.index_path),
+            **handle_vector_store_params(
+                self.vector_store_type, self.index_path, self.db.catalog
+            ),
         )
 
         search_feat = self._get_search_query_results()
diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py
index 955885f1ba..6cce17d4d0 100644
--- a/evadb/optimizer/rules/rules.py
+++ b/evadb/optimizer/rules/rules.py
@@ -836,7 +836,10 @@ def apply(self, before: LogicalCreateIndex, context: OptimizerContext):
             before.index_def,
         )
         child = SeqScanPlan(None, before.project_expr_list, before.table_ref.alias)
-        batch_mem_size = context.db.config.get_value("executor", "batch_mem_size")
+
+        batch_mem_size = context.db.catalog().get_configuration_catalog_value(
+            "batch_mem_size"
+        )
         child.append_child(
             StoragePlan(
                 before.table_ref.table.table_obj,
@@ -933,7 +936,9 @@ def apply(self, before: LogicalGet, context: OptimizerContext):
         # read in a batch from storage engine.
         # Todo: Experiment heuristics.
         after = SeqScanPlan(None, before.target_list, before.alias)
-        batch_mem_size = context.db.config.get_value("executor", "batch_mem_size")
+        batch_mem_size = context.db.catalog().get_configuration_catalog_value(
+            "batch_mem_size"
+        )
         after.append_child(
             StoragePlan(
                 before.table_obj,
diff --git a/evadb/third_party/vector_stores/pinecone.py b/evadb/third_party/vector_stores/pinecone.py
index 837c95e579..3bead1a690 100644
--- a/evadb/third_party/vector_stores/pinecone.py
+++ b/evadb/third_party/vector_stores/pinecone.py
@@ -15,7 +15,6 @@
 import os
 from typing import List
 
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.third_party.vector_stores.types import (
     FeaturePayload,
     VectorIndexQuery,
@@ -30,34 +29,30 @@
 
 
 class PineconeVectorStore(VectorStore):
-    def __init__(self, index_name: str) -> None:
+    def __init__(self, index_name: str, **kwargs) -> None:
         try_to_import_pinecone_client()
         global _pinecone_init_done
         # pinecone only allows index names with lower alpha-numeric characters and '-'
         self._index_name = index_name.strip().lower()
 
         # Get the API key.
-        self._api_key = ConfigurationManager().get_value(
-            "third_party", "PINECONE_API_KEY"
-        )
+        self._api_key = kwargs.get("PINECONE_API_KEY")
 
         if not self._api_key:
             self._api_key = os.environ.get("PINECONE_API_KEY")
 
         assert (
             self._api_key
-        ), "Please set your Pinecone API key in evadb.yml file (third_party, pinecone_api_key) or environment variable (PINECONE_KEY). It can be found at Pinecone Dashboard > API Keys > Value"
+        ), "Please set your `PINECONE_API_KEY` using set command or environment variable (PINECONE_KEY). It can be found at Pinecone Dashboard > API Keys > Value"
 
         # Get the environment name.
-        self._environment = ConfigurationManager().get_value(
-            "third_party", "PINECONE_ENV"
-        )
+        self._environment = kwargs.get("PINECONE_ENV")
         if not self._environment:
             self._environment = os.environ.get("PINECONE_ENV")
 
         assert (
             self._environment
-        ), "Please set the Pinecone environment key in evadb.yml file (third_party, pinecone_env) or environment variable (PINECONE_ENV). It can be found Pinecone Dashboard > API Keys > Environment."
+        ), "Please set your `PINECONE_ENV` or environment variable (PINECONE_ENV). It can be found Pinecone Dashboard > API Keys > Environment."
 
         if not _pinecone_init_done:
             # Initialize pinecone.
diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py
index e47d24f1f9..121b7d032d 100644
--- a/evadb/third_party/vector_stores/utils.py
+++ b/evadb/third_party/vector_stores/utils.py
@@ -41,6 +41,7 @@ def init_vector_store(
             from evadb.third_party.vector_stores.pinecone import required_params
 
             validate_kwargs(kwargs, required_params, required_params)
+            
             return PineconeVectorStore(index_name, **kwargs)
 
         elif vector_store_type == VectorStoreType.CHROMADB:

From 277975af1d72948da1fbc2306fb1b51f216e3123 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 00:15:50 -0400
Subject: [PATCH 05/20] checkpoint

---
 evadb/binder/statement_binder.py              |  6 +++++
 evadb/catalog/catalog_manager.py              | 19 +++++++--------
 .../services/configuration_catalog_service.py | 23 +++++++++++++++----
 evadb/executor/abstract_executor.py           |  6 -----
 evadb/executor/execution_context.py           | 14 +++--------
 evadb/executor/load_multimedia_executor.py    |  4 +++-
 evadb/executor/set_executor.py                |  3 +--
 evadb/optimizer/optimizer_context.py          |  6 +++--
 evadb/optimizer/plan_generator.py             |  4 +++-
 evadb/optimizer/rules/rules_manager.py        |  6 ++---
 .../long/test_explain_executor.py             |  4 ++--
 .../long/test_optimizer_rules.py              |  8 +++----
 test/integration_tests/long/test_reuse.py     |  2 +-
 .../short/test_set_executor.py                |  4 +++-
 test/unit_tests/optimizer/rules/test_rules.py | 14 +++++------
 .../optimizer/test_optimizer_task.py          |  6 ++---
 test/util.py                                  | 17 ++++++++------
 17 files changed, 80 insertions(+), 66 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 199c535181..0ed7e7f41a 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -33,6 +33,7 @@
 from evadb.catalog.catalog_type import ColumnType, TableType
 from evadb.catalog.catalog_utils import get_metadata_properties, is_document_table
 from evadb.configuration.constants import EvaDB_INSTALLATION_DIR
+from evadb.executor.execution_context import Context
 from evadb.expression.abstract_expression import AbstractExpression, ExpressionType
 from evadb.expression.function_expression import FunctionExpression
 from evadb.expression.tuple_value_expression import TupleValueExpression
@@ -264,6 +265,11 @@ def _bind_tuple_expr(self, node: TupleValueExpression):
 
     @bind.register(FunctionExpression)
     def _bind_func_expr(self, node: FunctionExpression):
+        # setup the context
+        # we read the GPUs from the catalog and populate in the context
+        gpus_ids = self._catalog().get_configuration_catalog_value("gpu_ids")
+        node._context = Context(gpus_ids)
+        
         # handle the special case of "extract_object"
         if node.name.upper() == str(FunctionType.EXTRACT_OBJECT):
             handle_bind_extract_object_function(node, self)
diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index 69bc4052c8..4b8daaea1e 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -145,9 +145,8 @@ def _clear_catalog_contents(self):
         )
         # clean up the dataset, index, and cache directories
         for folder in ["cache_dir", "index_dir", "datasets_dir"]:
-            remove_directory_contents(
-                self._config_catalog_service.get_entry_by_name(folder).value
-            )
+            print(folder, self.get_configuration_catalog_value(folder))
+            remove_directory_contents(self.get_configuration_catalog_value(folder))
 
     "Database catalog services"
 
@@ -459,7 +458,7 @@ def get_all_index_catalog_entries(self):
     """ Function Cache related"""
 
     def insert_function_cache_catalog_entry(self, func_expr: FunctionExpression):
-        cache_dir = self._config_catalog_service.get_entry_by_name("cache_dir").value
+        cache_dir = self.get_configuration_catalog_value("cache_dir")
         entry = construct_function_cache_catalog_entry(func_expr, cache_dir=cache_dir)
         return self._function_cache_service.insert_entry(entry)
 
@@ -522,9 +521,7 @@ def create_and_insert_table_catalog_entry(
         table_name = table_info.table_name
         column_catalog_entries = xform_column_definitions_to_catalog_entries(columns)
 
-        dataset_location = self._config_catalog_service.get_entry_by_name(
-            "datasets_dir"
-        )
+        dataset_location = self.get_configuration_catalog_value("datasets_dir")
         file_url = str(generate_file_path(dataset_location, table_name))
         table_catalog_entry = self.insert_table_catalog_entry(
             table_name,
@@ -626,14 +623,14 @@ def create_and_insert_multimedia_metadata_table_catalog_entry(
 
     "Configuration catalog services"
 
-    def insert_configuration_catalog_entry(self, key: str, value: any):
-        """A new entry is persisted in the database catalog."
+    def upsert_configuration_catalog_entry(self, key: str, value: any):
+        """Upserts configuration catalog entry"
 
         Args:
             key: key name
             value: value name
         """
-        self._db_catalog_service.insert_entry(key, value)
+        self._config_catalog_service.upsert_entry(key, value)
 
     def get_configuration_catalog_value(self, key: str) -> Any:
         """
@@ -645,7 +642,7 @@ def get_configuration_catalog_value(self, key: str) -> Any:
             ConfigurationCatalogEntry
         """
 
-        table_entry = self._db_catalog_service.get_entry_by_name(key)
+        table_entry = self._config_catalog_service.get_entry_by_name(key)
         if table_entry:
             return table_entry.value
         return None
diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py
index 5b3646ad3c..07843bc14e 100644
--- a/evadb/catalog/services/configuration_catalog_service.py
+++ b/evadb/catalog/services/configuration_catalog_service.py
@@ -33,10 +33,7 @@ def insert_entry(
         value: any,
     ):
         try:
-            config_catalog_obj = self.model(
-                key = key,
-                value = value
-            )
+            config_catalog_obj = self.model(key=key, value=value)
             config_catalog_obj = config_catalog_obj.save(self.session)
 
         except Exception as e:
@@ -59,3 +56,21 @@ def get_entry_by_name(self, key: str) -> ConfigurationCatalogEntry:
         if entry:
             return entry.as_dataclass()
         return entry
+
+    def upsert_entry(
+        self,
+        key: str,
+        value: any,
+    ):
+        try:
+            entry = self.session.execute(
+                select(self.model).filter(self.model._key == key)
+            ).scalar_one_or_none()
+            if entry:
+                entry.update(self.session, value=value)
+            else:
+                self.insert_entry(key, value)
+        except Exception as e:
+            raise CatalogError(
+                f"Error while upserting entry to ConfigurationCatalog: {str(e)}"
+            )
diff --git a/evadb/executor/abstract_executor.py b/evadb/executor/abstract_executor.py
index 66a453f71d..8647901460 100644
--- a/evadb/executor/abstract_executor.py
+++ b/evadb/executor/abstract_executor.py
@@ -18,7 +18,6 @@
 
 if TYPE_CHECKING:
     from evadb.catalog.catalog_manager import CatalogManager
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.database import EvaDBDatabase
 from evadb.models.storage.batch import Batch
 from evadb.plan_nodes.abstract_plan import AbstractPlan
@@ -36,7 +35,6 @@ class AbstractExecutor(ABC):
     def __init__(self, db: EvaDBDatabase, node: AbstractPlan):
         self._db = db
         self._node = node
-        self._config: ConfigurationManager = db.config if db else None
         self._children = []
 
     # @lru_cache(maxsize=None)
@@ -74,10 +72,6 @@ def node(self) -> AbstractPlan:
     def db(self) -> EvaDBDatabase:
         return self._db
 
-    @property
-    def config(self) -> ConfigurationManager:
-        return self._config
-
     @abstractmethod
     def exec(self, *args, **kwargs) -> Iterable[Batch]:
         """
diff --git a/evadb/executor/execution_context.py b/evadb/executor/execution_context.py
index d5feea4648..b0b5e1a696 100644
--- a/evadb/executor/execution_context.py
+++ b/evadb/executor/execution_context.py
@@ -16,7 +16,6 @@
 import random
 from typing import List
 
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.constants import NO_GPU
 from evadb.utils.generic_utils import get_gpu_count, is_gpu_available
 
@@ -28,13 +27,8 @@ class Context:
     if using horovod: current rank etc.
     """
 
-    def __new__(cls):
-        if not hasattr(cls, "_instance"):
-            cls._instance = super(Context, cls).__new__(cls)
-        return cls._instance
-
-    def __init__(self):
-        self._config_manager = ConfigurationManager()
+    def __init__(self, user_provided_gpu_conf=[]):
+        self._user_provided_gpu_conf = user_provided_gpu_conf
         self._gpus = self._populate_gpu_ids()
 
     @property
@@ -42,10 +36,8 @@ def gpus(self):
         return self._gpus
 
     def _populate_gpu_from_config(self) -> List:
-        # Populate GPU IDs from yaml config file.
-        gpu_conf = self._config_manager.get_value("executor", "gpu_ids")
         available_gpus = [i for i in range(get_gpu_count())]
-        return list(set(available_gpus) & set(gpu_conf))
+        return list(set(available_gpus) & set(self._user_provided_gpu_conf))
 
     def _populate_gpu_from_env(self) -> List:
         # Populate GPU IDs from env variable.
diff --git a/evadb/executor/load_multimedia_executor.py b/evadb/executor/load_multimedia_executor.py
index 90bc7edba3..9f9828476f 100644
--- a/evadb/executor/load_multimedia_executor.py
+++ b/evadb/executor/load_multimedia_executor.py
@@ -53,7 +53,9 @@ def exec(self, *args, **kwargs):
 
             # If it is a s3 path, download the file to local
             if self.node.file_path.as_posix().startswith("s3:/"):
-                s3_dir = Path(self.config.get_value("storage", "s3_download_dir"))
+                s3_dir = self.catalog().get_configuration_catalog_value(
+                    "s3_download_dir"
+                )
                 dst_path = s3_dir / self.node.table_info.table_name
                 dst_path.mkdir(parents=True, exist_ok=True)
                 video_files = download_from_s3(self.node.file_path, dst_path)
diff --git a/evadb/executor/set_executor.py b/evadb/executor/set_executor.py
index c73deaf976..9d0bcec4b5 100644
--- a/evadb/executor/set_executor.py
+++ b/evadb/executor/set_executor.py
@@ -36,8 +36,7 @@ def exec(self, *args, **kwargs):
         as a separate PR for the issue #1140, where all instances of config use
         will be replaced
         """
-        self._config.update_value(
-            category="default",
+        self.catalog().upsert_configuration_catalog_entry(
             key=self.node.config_name,
             value=self.node.config_value.value,
         )
diff --git a/evadb/optimizer/optimizer_context.py b/evadb/optimizer/optimizer_context.py
index 6cb72dfd82..fcf88571e1 100644
--- a/evadb/optimizer/optimizer_context.py
+++ b/evadb/optimizer/optimizer_context.py
@@ -43,8 +43,10 @@ def __init__(
         self._task_stack = OptimizerTaskStack()
         self._memo = Memo()
         self._cost_model = cost_model
-        self._rules_manager = rules_manager or RulesManager(db.config)
-
+        # check if ray is enabled
+        is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray")
+        self._rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled})
+        
     @property
     def db(self):
         return self._db
diff --git a/evadb/optimizer/plan_generator.py b/evadb/optimizer/plan_generator.py
index 5b8c6d2a24..5396f4b939 100644
--- a/evadb/optimizer/plan_generator.py
+++ b/evadb/optimizer/plan_generator.py
@@ -39,7 +39,9 @@ def __init__(
         cost_model: CostModel = None,
     ) -> None:
         self.db = db
-        self.rules_manager = rules_manager or RulesManager(db.config)
+        # check if ray is enabled
+        is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray")
+        self.rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled})
         self.cost_model = cost_model or CostModel()
 
     def execute_task_stack(self, task_stack: OptimizerTaskStack):
diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py
index e9720b78d5..147206bd1a 100644
--- a/evadb/optimizer/rules/rules_manager.py
+++ b/evadb/optimizer/rules/rules_manager.py
@@ -67,7 +67,7 @@
 
 
 class RulesManager:
-    def __init__(self, config: ConfigurationManager):
+    def __init__(self, configs: dict):
         self._logical_rules = [
             LogicalInnerJoinCommutativity(),
             CacheFunctionExpressionInApply(),
@@ -121,9 +121,9 @@ def __init__(self, config: ConfigurationManager):
         # These rules are enabled only if
         # (1) ray is installed and (2) ray is enabled
         # Ray must be installed using pip
-        # It must also be enabled in "evadb.yml"
+        # It must also be enabled using the SET command
         # NOTE: By default, it is not enabled
-        ray_enabled = config.get_value("experimental", "ray")
+        ray_enabled = configs.get("ray", False)
         if is_ray_enabled_and_installed(ray_enabled):
             self._implementation_rules.extend(
                 [
diff --git a/test/integration_tests/long/test_explain_executor.py b/test/integration_tests/long/test_explain_executor.py
index 9d2c8e8ca5..cad1718c36 100644
--- a/test/integration_tests/long/test_explain_executor.py
+++ b/test/integration_tests/long/test_explain_executor.py
@@ -57,7 +57,7 @@ def test_explain_simple_select(self):
             """|__ ProjectPlan\n    |__ SeqScanPlan\n        |__ StoragePlan\n"""
         )
         self.assertEqual(batch.frames[0][0], expected_output)
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]):
             custom_plan_generator = PlanGenerator(self.evadb, rules_manager)
             select_query = "EXPLAIN SELECT id, data FROM MyVideo JOIN LATERAL DummyObjectDetector(data) AS T ;"
@@ -68,7 +68,7 @@ def test_explain_simple_select(self):
             self.assertEqual(batch.frames[0][0], expected_output)
 
         # Disable more rules
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(
             rules_manager,
             [
diff --git a/test/integration_tests/long/test_optimizer_rules.py b/test/integration_tests/long/test_optimizer_rules.py
index 27f5189a4b..e703492d6b 100644
--- a/test/integration_tests/long/test_optimizer_rules.py
+++ b/test/integration_tests/long/test_optimizer_rules.py
@@ -88,7 +88,7 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock):
         result_without_pushdown_rules = None
 
         with time_without_rule:
-            rules_manager = RulesManager(self.evadb.config)
+            rules_manager = RulesManager()
             with disable_rules(
                 rules_manager,
                 [PushDownFilterThroughApplyAndMerge(), PushDownFilterThroughJoin()],
@@ -109,7 +109,7 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock):
         self.assertGreater(evaluate_count_without_rule, 3 * evaluate_count_with_rule)
 
         result_without_xform_rule = None
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]):
             custom_plan_generator = PlanGenerator(self.evadb, rules_manager)
             result_without_xform_rule = execute_query_fetch_all(
@@ -132,7 +132,7 @@ def test_should_pushdown_without_pushdown_join_rule(self):
         time_without_rule = Timer()
         result_without_pushdown_join_rule = None
         with time_without_rule:
-            rules_manager = RulesManager(self.evadb.config)
+            rules_manager = RulesManager()
             with disable_rules(rules_manager, [PushDownFilterThroughJoin()]):
                 # should use PushDownFilterThroughApplyAndMerge()
                 custom_plan_generator = PlanGenerator(self.evadb, rules_manager)
@@ -264,7 +264,7 @@ def test_reorder_rule_should_not_have_side_effects(self):
         query = "SELECT id FROM MyVideo WHERE id < 20 AND id > 10;"
         result = execute_query_fetch_all(self.evadb, query)
 
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(rules_manager, [ReorderPredicates()]):
             custom_plan_generator = PlanGenerator(self.evadb, rules_manager)
             expected = execute_query_fetch_all(
diff --git a/test/integration_tests/long/test_reuse.py b/test/integration_tests/long/test_reuse.py
index b90fcbb7f1..f1f39fc664 100644
--- a/test/integration_tests/long/test_reuse.py
+++ b/test/integration_tests/long/test_reuse.py
@@ -77,7 +77,7 @@ def _verify_reuse_correctness(self, query, reuse_batch):
         # surfaces when the system is running on low memory. Explicitly calling garbage
         # collection to reduce the memory usage.
         gc.collect()
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(
             rules_manager,
             [
diff --git a/test/integration_tests/short/test_set_executor.py b/test/integration_tests/short/test_set_executor.py
index d732681433..2a92ac2f94 100644
--- a/test/integration_tests/short/test_set_executor.py
+++ b/test/integration_tests/short/test_set_executor.py
@@ -36,6 +36,8 @@ def tearDownClass(cls):
     # integration test
     def test_set_execution(self):
         execute_query_fetch_all(self.evadb, "SET OPENAIKEY = 'ABCD';")
-        current_config_value = self.evadb.config.get_value("default", "OPENAIKEY")
+        current_config_value = self.evadb.catalog().get_configuration_catalog_value(
+            "OPENAIKEY"
+        )
 
         self.assertEqual("ABCD", current_config_value)
diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py
index e71dbef7cc..b84bf18682 100644
--- a/test/unit_tests/optimizer/rules/test_rules.py
+++ b/test/unit_tests/optimizer/rules/test_rules.py
@@ -168,8 +168,8 @@ def test_supported_rules(self):
             XformExtractObjectToLinearFlow(),
         ]
         rewrite_rules = (
-            RulesManager(self.evadb.config).stage_one_rewrite_rules
-            + RulesManager(self.evadb.config).stage_two_rewrite_rules
+            RulesManager().stage_one_rewrite_rules
+            + RulesManager().stage_two_rewrite_rules
         )
         self.assertEqual(
             len(supported_rewrite_rules),
@@ -187,14 +187,14 @@ def test_supported_rules(self):
         ]
         self.assertEqual(
             len(supported_logical_rules),
-            len(RulesManager(self.evadb.config).logical_rules),
+            len(RulesManager().logical_rules),
         )
 
         for rule in supported_logical_rules:
             self.assertTrue(
                 any(
                     isinstance(rule, type(x))
-                    for x in RulesManager(self.evadb.config).logical_rules
+                    for x in RulesManager().logical_rules
                 )
             )
 
@@ -244,14 +244,14 @@ def test_supported_rules(self):
             supported_implementation_rules.append(LogicalExchangeToPhysical())
         self.assertEqual(
             len(supported_implementation_rules),
-            len(RulesManager(self.evadb.config).implementation_rules),
+            len(RulesManager().implementation_rules),
         )
 
         for rule in supported_implementation_rules:
             self.assertTrue(
                 any(
                     isinstance(rule, type(x))
-                    for x in RulesManager(self.evadb.config).implementation_rules
+                    for x in RulesManager().implementation_rules
                 )
             )
 
@@ -280,7 +280,7 @@ def test_embed_sample_into_get_does_not_work_with_structured_data(self):
         self.assertFalse(rule.check(logi_sample, MagicMock()))
 
     def test_disable_rules(self):
-        rules_manager = RulesManager(self.evadb.config)
+        rules_manager = RulesManager()
         with disable_rules(rules_manager, [PushDownFilterThroughApplyAndMerge()]):
             self.assertFalse(
                 any(
diff --git a/test/unit_tests/optimizer/test_optimizer_task.py b/test/unit_tests/optimizer/test_optimizer_task.py
index 6230dfb164..ba693dd3c0 100644
--- a/test/unit_tests/optimizer/test_optimizer_task.py
+++ b/test/unit_tests/optimizer/test_optimizer_task.py
@@ -51,12 +51,12 @@ def test_abstract_optimizer_task(self):
             task.execute()
 
     def top_down_rewrite(self, opr):
-        opt_cxt = OptimizerContext(MagicMock(), CostModel(), RulesManager(MagicMock()))
+        opt_cxt = OptimizerContext(MagicMock(), CostModel(), RulesManager())
         grp_expr = opt_cxt.add_opr_to_group(opr)
         root_grp_id = grp_expr.group_id
         opt_cxt.task_stack.push(
             TopDownRewrite(
-                grp_expr, RulesManager(MagicMock()).stage_one_rewrite_rules, opt_cxt
+                grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt
             )
         )
         self.execute_task_stack(opt_cxt.task_stack)
@@ -66,7 +66,7 @@ def bottom_up_rewrite(self, root_grp_id, opt_cxt):
         grp_expr = opt_cxt.memo.groups[root_grp_id].logical_exprs[0]
         opt_cxt.task_stack.push(
             BottomUpRewrite(
-                grp_expr, RulesManager(MagicMock()).stage_two_rewrite_rules, opt_cxt
+                grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt
             )
         )
         self.execute_task_stack(opt_cxt.task_stack)
diff --git a/test/util.py b/test/util.py
index 23eaeb35e8..945e2175e7 100644
--- a/test/util.py
+++ b/test/util.py
@@ -30,8 +30,12 @@
 from evadb.binder.statement_binder import StatementBinder
 from evadb.binder.statement_binder_context import StatementBinderContext
 from evadb.catalog.catalog_type import NdArrayType
-from evadb.configuration.configuration_manager import ConfigurationManager
-from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_INSTALLATION_DIR
+from evadb.configuration.constants import (
+    S3_DOWNLOAD_DIR,
+    TMP_DIR,
+    EvaDB_DATABASE_DIR,
+    EvaDB_INSTALLATION_DIR,
+)
 from evadb.database import init_evadb_instance
 from evadb.expression.function_expression import FunctionExpression
 from evadb.functions.abstract.abstract_function import (
@@ -67,7 +71,7 @@ def suffix_pytest_xdist_worker_id_to_dir(path: str):
         path = Path(str(worker_id) + "_" + path)
     except KeyError:
         pass
-    return path
+    return Path(path)
 
 
 def get_evadb_for_testing(uri: str = None):
@@ -79,14 +83,12 @@ def get_evadb_for_testing(uri: str = None):
 
 def get_tmp_dir():
     db_dir = suffix_pytest_xdist_worker_id_to_dir(EvaDB_DATABASE_DIR)
-    config = ConfigurationManager(Path(db_dir))
-    return config.get_value("storage", "tmp_dir")
+    return db_dir / TMP_DIR
 
 
 def s3_dir():
     db_dir = suffix_pytest_xdist_worker_id_to_dir(EvaDB_DATABASE_DIR)
-    config = ConfigurationManager(Path(db_dir))
-    return config.get_value("storage", "s3_download_dir")
+    return db_dir / S3_DOWNLOAD_DIR
 
 
 EvaDB_TEST_DATA_DIR = Path(EvaDB_INSTALLATION_DIR).parent
@@ -420,6 +422,7 @@ def create_large_scale_image_dataset(num=1000000):
 
 def create_sample_video(num_frames=NUM_FRAMES):
     file_name = os.path.join(get_tmp_dir(), "dummy.avi")
+    print(file_name)
     try:
         os.remove(file_name)
     except FileNotFoundError:

From 7373425fd47bbb0557c12f385ee1acac466fa834 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 00:18:27 -0400
Subject: [PATCH 06/20] fix linter

---
 evadb/binder/statement_binder.py              |  2 +-
 evadb/catalog/catalog_manager.py              |  2 +-
 evadb/catalog/catalog_utils.py                |  4 +--
 evadb/catalog/models/base_model.py            |  7 ----
 evadb/catalog/models/configuration_catalog.py |  4 ++-
 evadb/configuration/bootstrap_environment.py  |  2 --
 evadb/configuration/configuration_manager.py  |  2 +-
 evadb/database.py                             |  3 +-
 evadb/evadb_config.py                         | 34 +++++++++----------
 evadb/executor/create_function_executor.py    |  3 +-
 evadb/executor/drop_object_executor.py        |  4 ++-
 evadb/optimizer/optimizer_context.py          |  2 +-
 evadb/optimizer/rules/rules_manager.py        |  1 -
 evadb/third_party/vector_stores/utils.py      |  2 +-
 test/unit_tests/optimizer/rules/test_rules.py |  5 +--
 .../optimizer/test_optimizer_task.py          |  8 ++---
 16 files changed, 34 insertions(+), 51 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 0ed7e7f41a..152ea104b2 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -269,7 +269,7 @@ def _bind_func_expr(self, node: FunctionExpression):
         # we read the GPUs from the catalog and populate in the context
         gpus_ids = self._catalog().get_configuration_catalog_value("gpu_ids")
         node._context = Context(gpus_ids)
-        
+
         # handle the special case of "extract_object"
         if node.name.upper() == str(FunctionType.EXTRACT_OBJECT):
             handle_bind_extract_object_function(node, self)
diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index 4b8daaea1e..ed82d47355 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -45,10 +45,10 @@
     init_db,
     truncate_catalog_tables,
 )
+from evadb.catalog.services.column_catalog_service import ColumnCatalogService
 from evadb.catalog.services.configuration_catalog_service import (
     ConfigurationCatalogService,
 )
-from evadb.catalog.services.column_catalog_service import ColumnCatalogService
 from evadb.catalog.services.database_catalog_service import DatabaseCatalogService
 from evadb.catalog.services.function_cache_catalog_service import (
     FunctionCacheCatalogService,
diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py
index 29607a1cda..7ff1a535fe 100644
--- a/evadb/catalog/catalog_utils.py
+++ b/evadb/catalog/catalog_utils.py
@@ -32,11 +32,10 @@
     TableCatalogEntry,
 )
 from evadb.catalog.sql_config import IDENTIFIER_COLUMN
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.expression.function_expression import FunctionExpression
 from evadb.expression.tuple_value_expression import TupleValueExpression
 from evadb.parser.create_statement import ColConstraintInfo, ColumnDefinition
-from evadb.utils.generic_utils import get_str_hash, remove_directory_contents
+from evadb.utils.generic_utils import get_str_hash
 
 
 def is_video_table(table: TableCatalogEntry):
@@ -303,7 +302,6 @@ def bootstrap_configs(catalog, configs: dict):
 
 
 def get_configuration_value(key: str):
-    
     catalog = get_catalog_instance()
     return catalog.get_configuration_catalog_value(key)
 
diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py
index b4b067f911..5826d75425 100644
--- a/evadb/catalog/models/base_model.py
+++ b/evadb/catalog/models/base_model.py
@@ -12,16 +12,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import contextlib
-
-import sqlalchemy
 from sqlalchemy import Column, Integer
-from sqlalchemy.engine import Engine
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy_utils import database_exists
-
-from evadb.catalog.sql_config import CATALOG_TABLES
 from evadb.utils.logging_manager import logger
 
 
diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py
index 48d42b2cae..faf99b5abb 100644
--- a/evadb/catalog/models/configuration_catalog.py
+++ b/evadb/catalog/models/configuration_catalog.py
@@ -29,7 +29,9 @@ class ConfigurationCatalog(BaseModel):
     __tablename__ = "configuration_catalog"
 
     _key = Column("name", String(100), unique=True)
-    _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ??
+    _value = Column(
+        "engine", TextPickleType()
+    )  # TODO :- Will this work or would we need to pickle ??
 
     def __init__(self, key: str, value: any):
         self._key = key
diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index 01655a2648..085b8d72d9 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -17,7 +17,6 @@
 from pathlib import Path
 from typing import Union
 
-import yaml
 
 from evadb.configuration.constants import (
     CACHE_DIR,
@@ -31,7 +30,6 @@
     EvaDB_DATASET_DIR,
 )
 from evadb.evadb_config import BASE_EVADB_CONFIG
-from evadb.utils.generic_utils import parse_config_yml
 from evadb.utils.logging_manager import logger as evadb_logger
 
 
diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py
index ea45963437..8722b66a6a 100644
--- a/evadb/configuration/configuration_manager.py
+++ b/evadb/configuration/configuration_manager.py
@@ -21,7 +21,7 @@
 
 
 class ConfigurationManager(object):
-    def __init__(self, evadb_dir: str = None, catalog_obj = None) -> None:
+    def __init__(self, evadb_dir: str = None, catalog_obj=None) -> None:
         self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR
         self._catalog_obj = catalog_obj
         self._populate_base_configs()
diff --git a/evadb/database.py b/evadb/database.py
index 2da55f1a37..9c22d5b9ff 100644
--- a/evadb/database.py
+++ b/evadb/database.py
@@ -60,7 +60,6 @@ def init_evadb_instance(
     catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir))
 
     # load all the config into the configuration_catalog table
-    print("gg")
     bootstrap_configs(get_catalog_instance(catalog_uri), config_obj)
-    
+
     return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance)
diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py
index d7882f4b35..e012f95567 100644
--- a/evadb/evadb_config.py
+++ b/evadb/evadb_config.py
@@ -21,21 +21,19 @@
 """
 
 BASE_EVADB_CONFIG = {
-  "evadb_installation_dir": "",
-  "datasets_dir": "",
-  "catalog_database_uri": "",
-  "application": "evadb",
-  "mode": "release",
-  "batch_mem_size": 30000000,
-  "gpu_batch_size": 1, # batch size used for gpu_operations
-  "gpu_ids": [
-    0
-  ],
-  "host": "0.0.0.0",
-  "port": 8803,
-  "socket_timeout": 60,
-  "ray": False,
-  "OPENAI_KEY": "",
-  "PINECONE_API_KEY": "",
-  "PINECONE_ENV": ""
-}
\ No newline at end of file
+    "evadb_installation_dir": "",
+    "datasets_dir": "",
+    "catalog_database_uri": "",
+    "application": "evadb",
+    "mode": "release",
+    "batch_mem_size": 30000000,
+    "gpu_batch_size": 1,  # batch size used for gpu_operations
+    "gpu_ids": [0],
+    "host": "0.0.0.0",
+    "port": 8803,
+    "socket_timeout": 60,
+    "ray": False,
+    "OPENAI_KEY": "",
+    "PINECONE_API_KEY": "",
+    "PINECONE_ENV": "",
+}
diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py
index 8272d22c81..537d28365f 100644
--- a/evadb/executor/create_function_executor.py
+++ b/evadb/executor/create_function_executor.py
@@ -149,7 +149,8 @@ def handle_sklearn_function(self):
         aggregated_batch.frames.drop([arg_map["predict"]], axis=1, inplace=True)
         model.fit(X=aggregated_batch.frames, y=Y)
         model_path = os.path.join(
-            self.db.catalog().get_configuration_catalog_value("model_dir"), self.node.name
+            self.db.catalog().get_configuration_catalog_value("model_dir"),
+            self.node.name,
         )
         pickle.dump(model, open(model_path, "wb"))
         self.node.metadata.append(
diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py
index b9aa9e016a..a857f15eae 100644
--- a/evadb/executor/drop_object_executor.py
+++ b/evadb/executor/drop_object_executor.py
@@ -118,7 +118,9 @@ def _handle_drop_index(self, index_name: str, if_exists: bool):
             index = VectorStoreFactory.init_vector_store(
                 index_obj.type,
                 index_obj.name,
-                **handle_vector_store_params(index_obj.type, index_obj.save_file_path, self.catalog),
+                **handle_vector_store_params(
+                    index_obj.type, index_obj.save_file_path, self.catalog
+                ),
             )
             assert (
                 index is not None
diff --git a/evadb/optimizer/optimizer_context.py b/evadb/optimizer/optimizer_context.py
index fcf88571e1..721cb5b928 100644
--- a/evadb/optimizer/optimizer_context.py
+++ b/evadb/optimizer/optimizer_context.py
@@ -46,7 +46,7 @@ def __init__(
         # check if ray is enabled
         is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray")
         self._rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled})
-        
+
     @property
     def db(self):
         return self._db
diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py
index 147206bd1a..6fffd0c57f 100644
--- a/evadb/optimizer/rules/rules_manager.py
+++ b/evadb/optimizer/rules/rules_manager.py
@@ -17,7 +17,6 @@
 from contextlib import contextmanager
 from typing import List
 
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.optimizer.rules.rules import (
     CacheFunctionExpressionInApply,
     CacheFunctionExpressionInFilter,
diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py
index 121b7d032d..c7c5cb75b2 100644
--- a/evadb/third_party/vector_stores/utils.py
+++ b/evadb/third_party/vector_stores/utils.py
@@ -41,7 +41,7 @@ def init_vector_store(
             from evadb.third_party.vector_stores.pinecone import required_params
 
             validate_kwargs(kwargs, required_params, required_params)
-            
+
             return PineconeVectorStore(index_name, **kwargs)
 
         elif vector_store_type == VectorStoreType.CHROMADB:
diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py
index b84bf18682..a08c002409 100644
--- a/test/unit_tests/optimizer/rules/test_rules.py
+++ b/test/unit_tests/optimizer/rules/test_rules.py
@@ -192,10 +192,7 @@ def test_supported_rules(self):
 
         for rule in supported_logical_rules:
             self.assertTrue(
-                any(
-                    isinstance(rule, type(x))
-                    for x in RulesManager().logical_rules
-                )
+                any(isinstance(rule, type(x)) for x in RulesManager().logical_rules)
             )
 
         ray_enabled = self.evadb.config.get_value("experimental", "ray")
diff --git a/test/unit_tests/optimizer/test_optimizer_task.py b/test/unit_tests/optimizer/test_optimizer_task.py
index ba693dd3c0..c9100d6eff 100644
--- a/test/unit_tests/optimizer/test_optimizer_task.py
+++ b/test/unit_tests/optimizer/test_optimizer_task.py
@@ -55,9 +55,7 @@ def top_down_rewrite(self, opr):
         grp_expr = opt_cxt.add_opr_to_group(opr)
         root_grp_id = grp_expr.group_id
         opt_cxt.task_stack.push(
-            TopDownRewrite(
-                grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt
-            )
+            TopDownRewrite(grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt)
         )
         self.execute_task_stack(opt_cxt.task_stack)
         return opt_cxt, root_grp_id
@@ -65,9 +63,7 @@ def top_down_rewrite(self, opr):
     def bottom_up_rewrite(self, root_grp_id, opt_cxt):
         grp_expr = opt_cxt.memo.groups[root_grp_id].logical_exprs[0]
         opt_cxt.task_stack.push(
-            BottomUpRewrite(
-                grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt
-            )
+            BottomUpRewrite(grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt)
         )
         self.execute_task_stack(opt_cxt.task_stack)
         return opt_cxt, root_grp_id

From 2288715554ee826362dfe2d53caff1d035080b1e Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 00:43:37 -0400
Subject: [PATCH 07/20] remove all dependencies on configuration manager

---
 evadb/binder/statement_binder.py              | 14 ++++++--
 evadb/catalog/sql_config.py                   | 32 ++++---------------
 evadb/evadb_cmd_client.py                     |  6 ++--
 .../abstract/pytorch_abstract_function.py     |  3 +-
 evadb/functions/chatgpt.py                    |  7 ++--
 .../executor/test_execution_context.py        | 32 +++++--------------
 test/unit_tests/test_eva_cmd_client.py        | 27 ++++++----------
 7 files changed, 43 insertions(+), 78 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 152ea104b2..149572ee9c 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -337,9 +337,17 @@ def _bind_func_expr(self, node: FunctionExpression):
                 )
                 # certain functions take additional inputs like yolo needs the model_name
                 # these arguments are passed by the user as part of metadata
-                node.function = lambda: function_class(
-                    **get_metadata_properties(function_obj)
-                )
+                # we also handle the special case of ChatGPT where we need to send the
+                # OpenAPI key as part of the parameter
+                # ToDO: this should be better handled
+                properties = get_metadata_properties(function_obj)
+                if node.name.upper() == str("CHATGPT"):
+                    openapi_key = self._catalog().get_configuration_catalog_value(
+                        "OPENAI_KEY"
+                    )
+                    properties["api_key"] = openapi_key
+
+                node.function = lambda: function_class(properties)
             except Exception as e:
                 err_msg = (
                     f"{str(e)}. Please verify that the function class name in the "
diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py
index c1432e3553..4fc4de587f 100644
--- a/evadb/catalog/sql_config.py
+++ b/evadb/catalog/sql_config.py
@@ -17,9 +17,7 @@
 
 from sqlalchemy import create_engine, event
 from sqlalchemy.orm import scoped_session, sessionmaker
-from sqlalchemy.pool import NullPool
 
-from evadb.utils.generic_utils import is_postgres_uri, parse_config_yml
 
 # Permanent identifier column.
 IDENTIFIER_COLUMN = "_row_id"
@@ -63,33 +61,17 @@ def __call__(cls, uri):
 
 class SQLConfig(metaclass=SingletonMeta):
     def __init__(self, uri):
-        """Initializes the engine and session for database operations
-
-        Retrieves the database uri for connection from ConfigurationManager.
-        """
+        """Initializes the engine and session for database operations"""
 
         self.worker_uri = str(uri)
         # set echo=True to log SQL
 
-        connect_args = {}
-        config_obj = parse_config_yml()
-        if is_postgres_uri(config_obj["core"]["catalog_database_uri"]):
-            # Set the arguments for postgres backend.
-            connect_args = {"connect_timeout": 1000}
-            # https://www.oddbird.net/2014/06/14/sqlalchemy-postgres-autocommit/
-            self.engine = create_engine(
-                self.worker_uri,
-                poolclass=NullPool,
-                isolation_level="AUTOCOMMIT",
-                connect_args=connect_args,
-            )
-        else:
-            # Default to SQLite.
-            connect_args = {"timeout": 1000}
-            self.engine = create_engine(
-                self.worker_uri,
-                connect_args=connect_args,
-            )
+        # Default to SQLite.
+        connect_args = {"timeout": 1000}
+        self.engine = create_engine(
+            self.worker_uri,
+            connect_args=connect_args,
+        )
 
         if self.engine.url.get_backend_name() == "sqlite":
             # enforce foreign key constraint and wal logging for sqlite
diff --git a/evadb/evadb_cmd_client.py b/evadb/evadb_cmd_client.py
index 3286b68fa3..82f0fba80e 100644
--- a/evadb/evadb_cmd_client.py
+++ b/evadb/evadb_cmd_client.py
@@ -26,7 +26,7 @@
 EvaDB_CODE_DIR = abspath(join(THIS_DIR, ".."))
 sys.path.append(EvaDB_CODE_DIR)
 
-from evadb.configuration.configuration_manager import ConfigurationManager  # noqa: E402
+from evadb.evadb_config import BASE_EVADB_CONFIG
 from evadb.server.interpreter import start_cmd_client  # noqa: E402
 
 
@@ -62,11 +62,11 @@ def main():
     args, unknown = parser.parse_known_args()
 
     host = (
-        args.host if args.host else ConfigurationManager().get_value("server", "host")
+        args.host if args.host else BASE_EVADB_CONFIG["host"]
     )
 
     port = (
-        args.port if args.port else ConfigurationManager().get_value("server", "port")
+        args.port if args.port else BASE_EVADB_CONFIG["port"]
     )
     asyncio.run(evadb_client(host, port))
 
diff --git a/evadb/functions/abstract/pytorch_abstract_function.py b/evadb/functions/abstract/pytorch_abstract_function.py
index 763f3658f7..4c8c1e063a 100644
--- a/evadb/functions/abstract/pytorch_abstract_function.py
+++ b/evadb/functions/abstract/pytorch_abstract_function.py
@@ -17,7 +17,6 @@
 import pandas as pd
 from numpy.typing import ArrayLike
 
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.functions.abstract.abstract_function import (
     AbstractClassifierFunction,
     AbstractTransformationFunction,
@@ -74,7 +73,7 @@ def __call__(self, *args, **kwargs) -> pd.DataFrame:
         if isinstance(frames, pd.DataFrame):
             frames = frames.transpose().values.tolist()[0]
 
-        gpu_batch_size = ConfigurationManager().get_value("executor", "gpu_batch_size")
+        gpu_batch_size = None
         import torch
 
         tens_batch = torch.cat([self.transform(x) for x in frames]).to(
diff --git a/evadb/functions/chatgpt.py b/evadb/functions/chatgpt.py
index 61253116fe..532e13e3de 100644
--- a/evadb/functions/chatgpt.py
+++ b/evadb/functions/chatgpt.py
@@ -20,7 +20,6 @@
 from retry import retry
 
 from evadb.catalog.catalog_type import NdArrayType
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.functions.abstract.abstract_function import AbstractFunction
 from evadb.functions.decorators.decorators import forward, setup
 from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe
@@ -85,10 +84,12 @@ def setup(
         self,
         model="gpt-3.5-turbo",
         temperature: float = 0,
+        api_key="",
     ) -> None:
         assert model in _VALID_CHAT_COMPLETION_MODEL, f"Unsupported ChatGPT {model}"
         self.model = model
         self.temperature = temperature
+        self.api_key = api_key
 
     @forward(
         input_signatures=[
@@ -120,9 +121,7 @@ def forward(self, text_df):
         def completion_with_backoff(**kwargs):
             return openai.ChatCompletion.create(**kwargs)
 
-        # Register API key, try configuration manager first
-        openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY")
-        # If not found, try OS Environment Variable
+        openai.api_key = self.api_key
         if len(openai.api_key) == 0:
             openai.api_key = os.environ.get("OPENAI_KEY", "")
         assert (
diff --git a/test/unit_tests/executor/test_execution_context.py b/test/unit_tests/executor/test_execution_context.py
index 28383949e9..a3e4e736f0 100644
--- a/test/unit_tests/executor/test_execution_context.py
+++ b/test/unit_tests/executor/test_execution_context.py
@@ -21,7 +21,6 @@
 
 
 class ExecutionContextTest(unittest.TestCase):
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.get_gpu_count")
     @patch("evadb.executor.execution_context.is_gpu_available")
     def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config(
@@ -29,20 +28,17 @@ def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config(
     ):
         gpu_check.return_value = True
         get_gpu_count.return_value = 3
-        cfm.return_value.get_value.return_value = [0, 1]
-        context = Context()
+        context = Context([[0, 1]])
 
         self.assertEqual(context.gpus, [0, 1])
 
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.os")
     @patch("evadb.executor.execution_context.get_gpu_count")
     @patch("evadb.executor.execution_context.is_gpu_available")
     def test_CUDA_VISIBLE_DEVICES_gets_populated_from_environment_if_no_config(
-        self, is_gpu, get_gpu_count, os, cfm
+        self, is_gpu, get_gpu_count, os
     ):
         is_gpu.return_value = True
-        cfm.return_value.get_value.return_value = []
         get_gpu_count.return_value = 3
         os.environ.get.return_value = "0,1"
         context = Context()
@@ -50,57 +46,45 @@ def test_CUDA_VISIBLE_DEVICES_gets_populated_from_environment_if_no_config(
 
         self.assertEqual(context.gpus, [0, 1])
 
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.os")
     @patch("evadb.executor.execution_context.get_gpu_count")
     @patch("evadb.executor.execution_context.is_gpu_available")
     def test_CUDA_VISIBLE_DEVICES_should_be_empty_if_nothing_provided(
-        self, gpu_check, get_gpu_count, os, cfm
+        self, gpu_check, get_gpu_count, os
     ):
         gpu_check.return_value = True
         get_gpu_count.return_value = 3
-        cfm.return_value.get_value.return_value = []
         os.environ.get.return_value = ""
         context = Context()
         os.environ.get.assert_called_with("CUDA_VISIBLE_DEVICES", "")
 
         self.assertEqual(context.gpus, [])
 
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.os")
     @patch("evadb.executor.execution_context.is_gpu_available")
-    def test_gpus_ignores_config_if_no_gpu_available(self, gpu_check, os, cfm):
+    def test_gpus_ignores_config_if_no_gpu_available(self, gpu_check, os):
         gpu_check.return_value = False
-        cfm.return_value.get_value.return_value = [0, 1, 2]
         os.environ.get.return_value = "0,1,2"
-        context = Context()
+        context = Context([0, 1, 2])
 
         self.assertEqual(context.gpus, [])
 
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.os")
     @patch("evadb.executor.execution_context.is_gpu_available")
-    def test_gpu_device_should_return_NO_GPU_if_GPU_not_available(
-        self, gpu_check, os, cfm
-    ):
+    def test_gpu_device_should_return_NO_GPU_if_GPU_not_available(self, gpu_check, os):
         gpu_check.return_value = True
-        cfm.return_value.get_value.return_value = []
         os.environ.get.return_value = ""
         context = Context()
         os.environ.get.assert_called_with("CUDA_VISIBLE_DEVICES", "")
 
         self.assertEqual(context.gpu_device(), NO_GPU)
 
-    @patch("evadb.executor.execution_context.ConfigurationManager")
     @patch("evadb.executor.execution_context.get_gpu_count")
     @patch("evadb.executor.execution_context.is_gpu_available")
-    def test_should_return_random_gpu_ID_if_available(
-        self, gpu_check, get_gpu_count, cfm
-    ):
+    def test_should_return_random_gpu_ID_if_available(self, gpu_check, get_gpu_count):
         gpu_check.return_value = True
         get_gpu_count.return_value = 1
-        cfm.return_value.get_value.return_value = [0, 1, 2]
-        context = Context()
+        context = Context([0, 1, 2])
 
         selected_device = context.gpu_device()
         self.assertEqual(selected_device, 0)
diff --git a/test/unit_tests/test_eva_cmd_client.py b/test/unit_tests/test_eva_cmd_client.py
index 0e1f67ee91..64806a42e6 100644
--- a/test/unit_tests/test_eva_cmd_client.py
+++ b/test/unit_tests/test_eva_cmd_client.py
@@ -17,13 +17,13 @@
 import unittest
 
 import pytest
-from mock import call, patch
+from mock import patch
 
-from evadb.configuration.configuration_manager import ConfigurationManager
+from evadb.evadb_config import BASE_EVADB_CONFIG
 from evadb.evadb_cmd_client import evadb_client, main
 
 
-@pytest.mark.skip
+# @pytest.mark.skip
 class CMDClientTest(unittest.TestCase):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -81,17 +81,10 @@ def test_main_without_cmd_arguments(
             [],
         )
 
-        # Mock the ConfigurationManager's get_value method
-        with patch.object(
-            ConfigurationManager, "get_value", return_value="default_value"
-        ) as mock_get_value:
-            # Call the function under test
-            main()
-
-            # Assert that the mocked functions were called correctly
-            mock_start_cmd_client.assert_called_once_with(
-                "default_value", "default_value"
-            )
-            mock_get_value.assert_has_calls(
-                [call("server", "host"), call("server", "port")]
-            )
+        # Call the function under test
+        main()
+
+        # Assert that the mocked functions were called correctly
+        mock_start_cmd_client.assert_called_once_with(
+            BASE_EVADB_CONFIG["host"], BASE_EVADB_CONFIG["port"]
+        )

From 791829bde01347feba00bc5559d76f9b485b3f51 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 00:44:00 -0400
Subject: [PATCH 08/20] remove configuration manager

---
 evadb/configuration/configuration_manager.py | 72 --------------------
 1 file changed, 72 deletions(-)
 delete mode 100644 evadb/configuration/configuration_manager.py

diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py
deleted file mode 100644
index 8722b66a6a..0000000000
--- a/evadb/configuration/configuration_manager.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# coding=utf-8
-# Copyright 2018-2023 EvaDB
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from pathlib import Path
-from typing import Any
-
-from evadb.configuration.bootstrap_environment import bootstrap_environment
-from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_INSTALLATION_DIR
-from evadb.utils.logging_manager import logger
-
-
-class ConfigurationManager(object):
-    def __init__(self, evadb_dir: str = None, catalog_obj=None) -> None:
-        self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR
-        self._catalog_obj = catalog_obj
-        self._populate_base_configs()
-
-    def _populate_base_configs(self):
-        config_obj = bootstrap_environment(
-            evadb_dir=Path(self._evadb_dir),
-            evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR),
-        )
-        return config_obj
-
-    def _get(self, category: str, key: str) -> Any:
-        """Retrieve a configuration value based on the category and key.
-
-        Args:
-            category (str): The category of the configuration.
-            key (str): The key of the configuration within the category.
-
-        Returns:
-            Any: The retrieved configuration value.
-
-        Raises:
-            ValueError: If the YAML file is invalid or cannot be loaded.
-        """
-        config_obj = self._config_obj
-
-        # Get value from the user-provided config file
-        value = config_obj.get(category, {}).get(key)
-
-        # cannot find the value, report invalid category, key combination
-        if value is None:
-            logger.exception(f"Invalid category and key combination {category}:{key}")
-
-        return value
-
-    def _update(self, category: str, key: str, value: str):
-        config_obj = self._config_obj
-
-        if category not in config_obj:
-            config_obj[category] = {}
-
-        config_obj[category][key] = value
-
-    def get_value(self, category: str, key: str) -> Any:
-        return self._get(category, key)
-
-    def update_value(self, category, key, value) -> None:
-        self._update(category, key, value)

From 9a7b8eab0649f079f669caf01c692d08aaf5b926 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 00:57:56 -0400
Subject: [PATCH 09/20] fix test cases

---
 evadb/binder/statement_binder.py             |  2 +-
 evadb/catalog/catalog_utils.py               |  4 ++--
 evadb/configuration/bootstrap_environment.py | 18 +-----------------
 evadb/optimizer/rules/rules_manager.py       |  2 +-
 evadb/server/server.py                       |  2 +-
 5 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 149572ee9c..49ec5e154c 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -347,7 +347,7 @@ def _bind_func_expr(self, node: FunctionExpression):
                     )
                     properties["api_key"] = openapi_key
 
-                node.function = lambda: function_class(properties)
+                node.function = lambda: function_class(**properties)
             except Exception as e:
                 err_msg = (
                     f"{str(e)}. Please verify that the function class name in the "
diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py
index 7ff1a535fe..f28eae334f 100644
--- a/evadb/catalog/catalog_utils.py
+++ b/evadb/catalog/catalog_utils.py
@@ -293,12 +293,12 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict:
     return properties
 
 
-def bootstrap_configs(catalog, configs: dict):
+def bootstrap_configs(catalog: "CatalogManager", configs: dict):
     """
     load all the configuration values into the catalog table configuration_catalog
     """
     for key, value in configs.items():
-        catalog._config_catalog_service.insert_entry(key, value)
+        catalog.upsert_configuration_catalog_entry(key, value)
 
 
 def get_configuration_value(key: str):
diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index 085b8d72d9..ec7793ee98 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -12,7 +12,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import importlib.resources as importlib_resources
 import logging
 from pathlib import Path
 from typing import Union
@@ -26,27 +25,12 @@
     MODEL_DIR,
     S3_DOWNLOAD_DIR,
     TMP_DIR,
-    EvaDB_CONFIG_FILE,
     EvaDB_DATASET_DIR,
 )
 from evadb.evadb_config import BASE_EVADB_CONFIG
 from evadb.utils.logging_manager import logger as evadb_logger
 
 
-def get_base_config(evadb_installation_dir: Path) -> Path:
-    """
-    Get path to .evadb_config.py source path.
-    """
-    return BASE_EVADB_CONFIG
-    # if evadb package is installed in environment
-    if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE):
-        with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path:
-            return config_path
-    else:
-        # For local dev environments without package installed
-        return evadb_installation_dir / EvaDB_CONFIG_FILE
-
-
 def get_default_db_uri(evadb_dir: Path):
     """
     Get the default database uri.
@@ -67,7 +51,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
         evadb_installation_dir: path to evadb package
     """
 
-    config_obj = get_base_config(evadb_installation_dir)
+    config_obj = BASE_EVADB_CONFIG
 
     # creates necessary directories
     config_default_dict = create_directories_and_get_default_config_values(
diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py
index 6fffd0c57f..cc88a9575d 100644
--- a/evadb/optimizer/rules/rules_manager.py
+++ b/evadb/optimizer/rules/rules_manager.py
@@ -66,7 +66,7 @@
 
 
 class RulesManager:
-    def __init__(self, configs: dict):
+    def __init__(self, configs: dict = {}):
         self._logical_rules = [
             LogicalInnerJoinCommutativity(),
             CacheFunctionExpressionInApply(),
diff --git a/evadb/server/server.py b/evadb/server/server.py
index 0105279a32..9f33dced0d 100644
--- a/evadb/server/server.py
+++ b/evadb/server/server.py
@@ -49,7 +49,7 @@ async def start_evadb_server(
         self._server = await asyncio.start_server(self.accept_client, host, port)
 
         # load built-in functions
-        mode = self._evadb.config.get_value("core", "mode")
+        mode = self._evadb.catalog().get_configuration_catalog_value("mode")
         init_builtin_functions(self._evadb, mode=mode)
 
         async with self._server:

From 50ed8264ac60f4065524cc972e77ae19a8cfa6cf Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 02:24:24 -0400
Subject: [PATCH 10/20] fix more testcases

---
 evadb/catalog/catalog_manager.py              |  1 -
 evadb/catalog/models/configuration_catalog.py |  6 ++----
 .../services/configuration_catalog_service.py |  2 +-
 .../services/function_cost_catalog_service.py |  2 +-
 evadb/configuration/bootstrap_environment.py  | 19 +++++++++---------
 evadb/parser/evadb.lark                       |  2 +-
 evadb/parser/lark_visitor/_expressions.py     |  6 ++++++
 test/app_tests/test_pandas_qa.py              |  2 +-
 test/app_tests/test_privategpt.py             |  2 +-
 test/app_tests/test_youtube_channel_qa.py     |  2 +-
 test/app_tests/test_youtube_qa.py             |  2 +-
 test/integration_tests/long/test_pytorch.py   |  3 ++-
 .../catalog/test_catalog_manager.py           | 20 +++++++++----------
 .../executor/test_execution_context.py        |  4 ++--
 .../optimizer/rules/test_batch_mem_size.py    |  4 +---
 test/unit_tests/optimizer/rules/test_rules.py |  2 +-
 16 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index ed82d47355..d1004bfc08 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -145,7 +145,6 @@ def _clear_catalog_contents(self):
         )
         # clean up the dataset, index, and cache directories
         for folder in ["cache_dir", "index_dir", "datasets_dir"]:
-            print(folder, self.get_configuration_catalog_value(folder))
             remove_directory_contents(self.get_configuration_catalog_value(folder))
 
     "Database catalog services"
diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py
index faf99b5abb..157b07eaf8 100644
--- a/evadb/catalog/models/configuration_catalog.py
+++ b/evadb/catalog/models/configuration_catalog.py
@@ -28,10 +28,8 @@ class ConfigurationCatalog(BaseModel):
 
     __tablename__ = "configuration_catalog"
 
-    _key = Column("name", String(100), unique=True)
-    _value = Column(
-        "engine", TextPickleType()
-    )  # TODO :- Will this work or would we need to pickle ??
+    _key = Column("key", String(100), unique=True)
+    _value = Column("value", TextPickleType())
 
     def __init__(self, key: str, value: any):
         self._key = key
diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py
index 07843bc14e..bda22253c0 100644
--- a/evadb/catalog/services/configuration_catalog_service.py
+++ b/evadb/catalog/services/configuration_catalog_service.py
@@ -67,7 +67,7 @@ def upsert_entry(
                 select(self.model).filter(self.model._key == key)
             ).scalar_one_or_none()
             if entry:
-                entry.update(self.session, value=value)
+                entry.update(self.session, _value=value)
             else:
                 self.insert_entry(key, value)
         except Exception as e:
diff --git a/evadb/catalog/services/function_cost_catalog_service.py b/evadb/catalog/services/function_cost_catalog_service.py
index ac84e8c948..5e20ac3030 100644
--- a/evadb/catalog/services/function_cost_catalog_service.py
+++ b/evadb/catalog/services/function_cost_catalog_service.py
@@ -62,7 +62,7 @@ def upsert_entry(self, function_id: int, name: str, new_cost: int):
                 select(self.model).filter(self.model._function_id == function_id)
             ).scalar_one_or_none()
             if function_obj:
-                function_obj.update(self.session, cost=new_cost)
+                function_obj.update(self.session, _cost=new_cost)
             else:
                 self.insert_entry(function_id, name, new_cost)
         except Exception as e:
diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index ec7793ee98..ac953be739 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -125,15 +125,14 @@ def merge_dict_of_dicts(dict1, dict2):
     merged_dict = dict1.copy()
 
     for key, value in dict2.items():
-        # Overwrite only if some value is specified.
-        if value:
-            if (
-                key in merged_dict
-                and isinstance(merged_dict[key], dict)
-                and isinstance(value, dict)
-            ):
-                merged_dict[key] = merge_dict_of_dicts(merged_dict[key], value)
-            else:
-                merged_dict[key] = value
+        if key in merged_dict.keys():
+            # Overwrite only if some value is specified.
+            if value is not None:
+                if isinstance(merged_dict[key], dict) and isinstance(value, dict):
+                    merged_dict[key] = merge_dict_of_dicts(merged_dict[key], value)
+                else:
+                    merged_dict[key] = value
+        else:
+            merged_dict[key] = value
 
     return merged_dict
diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark
index b8cc2fc564..8f618c29c9 100644
--- a/evadb/parser/evadb.lark
+++ b/evadb/parser/evadb.lark
@@ -84,7 +84,7 @@ set_statement: SET config_name (EQUAL_SYMBOL | TO) config_value
 
 config_name: uid
 
-config_value: (string_literal | decimal_literal | boolean_literal | real_literal)
+config_value: constant
 
 // Data Manipulation Language
 
diff --git a/evadb/parser/lark_visitor/_expressions.py b/evadb/parser/lark_visitor/_expressions.py
index c5cf5a0bfe..91b5be77c1 100644
--- a/evadb/parser/lark_visitor/_expressions.py
+++ b/evadb/parser/lark_visitor/_expressions.py
@@ -41,6 +41,12 @@ def array_literal(self, tree):
         res = ConstantValueExpression(np.array(array_elements), ColumnType.NDARRAY)
         return res
 
+    def boolean_literal(self, tree):
+        text = tree.children[0]
+        if text == "TRUE":
+            return ConstantValueExpression(True, ColumnType.BOOLEAN)
+        return ConstantValueExpression(False, ColumnType.BOOLEAN)
+
     def constant(self, tree):
         for child in tree.children:
             if isinstance(child, Tree):
diff --git a/test/app_tests/test_pandas_qa.py b/test/app_tests/test_pandas_qa.py
index 6976a4699d..ebb6df29cb 100644
--- a/test/app_tests/test_pandas_qa.py
+++ b/test/app_tests/test_pandas_qa.py
@@ -25,7 +25,7 @@ class PandasQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))
+        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_privategpt.py b/test/app_tests/test_privategpt.py
index 8a0b46f340..184888dd27 100644
--- a/test/app_tests/test_privategpt.py
+++ b/test/app_tests/test_privategpt.py
@@ -29,7 +29,7 @@ class PrivateGPTTest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))
+        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py
index 099a1c73c4..2bf70d918a 100644
--- a/test/app_tests/test_youtube_channel_qa.py
+++ b/test/app_tests/test_youtube_channel_qa.py
@@ -24,7 +24,7 @@ class YoutubeChannelQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))
+        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py
index 47d062405b..837404cda2 100644
--- a/test/app_tests/test_youtube_qa.py
+++ b/test/app_tests/test_youtube_qa.py
@@ -25,7 +25,7 @@ class YoutubeQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))
+        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py
index f2bd66ba04..3b1f0c2d36 100644
--- a/test/integration_tests/long/test_pytorch.py
+++ b/test/integration_tests/long/test_pytorch.py
@@ -49,7 +49,8 @@ class PytorchTest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray"))
+        print(cls.evadb.catalog().get_configuration_catalog_value("ray"), "ray")
+        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
         ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4"
         mnist = f"{EvaDB_ROOT_DIR}/data/mnist/mnist.mp4"
diff --git a/test/unit_tests/catalog/test_catalog_manager.py b/test/unit_tests/catalog/test_catalog_manager.py
index 3c0e23a3cd..6e8c6f9a37 100644
--- a/test/unit_tests/catalog/test_catalog_manager.py
+++ b/test/unit_tests/catalog/test_catalog_manager.py
@@ -44,7 +44,7 @@ def setUpClass(cls) -> None:
 
     @mock.patch("evadb.catalog.catalog_manager.init_db")
     def test_catalog_bootstrap(self, mocked_db):
-        x = CatalogManager(MagicMock(), MagicMock())
+        x = CatalogManager(MagicMock())
         x._bootstrap_catalog()
         mocked_db.assert_called()
 
@@ -52,7 +52,7 @@ def test_catalog_bootstrap(self, mocked_db):
         "evadb.catalog.catalog_manager.CatalogManager.create_and_insert_table_catalog_entry"
     )
     def test_create_multimedia_table_catalog_entry(self, mock):
-        x = CatalogManager(MagicMock(), MagicMock())
+        x = CatalogManager(MagicMock())
         name = "myvideo"
         x.create_and_insert_multimedia_table_catalog_entry(
             name=name, format_type=FileFormatType.VIDEO
@@ -71,7 +71,7 @@ def test_create_multimedia_table_catalog_entry(self, mock):
     def test_insert_table_catalog_entry_should_create_table_and_columns(
         self, ds_mock, initdb_mock
     ):
-        catalog = CatalogManager(MagicMock(), MagicMock())
+        catalog = CatalogManager(MagicMock())
         file_url = "file1"
         table_name = "name"
 
@@ -88,7 +88,7 @@ def test_insert_table_catalog_entry_should_create_table_and_columns(
     @mock.patch("evadb.catalog.catalog_manager.init_db")
     @mock.patch("evadb.catalog.catalog_manager.TableCatalogService")
     def test_get_table_catalog_entry_when_table_exists(self, ds_mock, initdb_mock):
-        catalog = CatalogManager(MagicMock(), MagicMock())
+        catalog = CatalogManager(MagicMock())
         table_name = "name"
         database_name = "database"
         row_id = 1
@@ -110,7 +110,7 @@ def test_get_table_catalog_entry_when_table_exists(self, ds_mock, initdb_mock):
     def test_get_table_catalog_entry_when_table_doesnot_exists(
         self, dcs_mock, ds_mock, initdb_mock
     ):
-        catalog = CatalogManager(MagicMock(), MagicMock())
+        catalog = CatalogManager(MagicMock())
         table_name = "name"
 
         database_name = "database"
@@ -132,7 +132,7 @@ def test_get_table_catalog_entry_when_table_doesnot_exists(
     def test_insert_function(
         self, checksum_mock, functionmetadata_mock, functionio_mock, function_mock
     ):
-        catalog = CatalogManager(MagicMock(), MagicMock())
+        catalog = CatalogManager(MagicMock())
         function_io_list = [MagicMock()]
         function_metadata_list = [MagicMock()]
         actual = catalog.insert_function_catalog_entry(
@@ -154,7 +154,7 @@ def test_insert_function(
 
     @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService")
     def test_get_function_catalog_entry_by_name(self, function_mock):
-        catalog = CatalogManager(MagicMock(), MagicMock())
+        catalog = CatalogManager(MagicMock())
         actual = catalog.get_function_catalog_entry_by_name("name")
         function_mock.return_value.get_entry_by_name.assert_called_with("name")
         self.assertEqual(
@@ -163,7 +163,7 @@ def test_get_function_catalog_entry_by_name(self, function_mock):
 
     @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService")
     def test_delete_function(self, function_mock):
-        CatalogManager(MagicMock(), MagicMock()).delete_function_catalog_entry_by_name(
+        CatalogManager(MagicMock()).delete_function_catalog_entry_by_name(
             "name"
         )
         function_mock.return_value.delete_entry_by_name.assert_called_with("name")
@@ -172,7 +172,7 @@ def test_delete_function(self, function_mock):
     def test_get_function_outputs(self, function_mock):
         mock_func = function_mock.return_value.get_output_entries_by_function_id
         function_obj = MagicMock(spec=FunctionCatalogEntry)
-        CatalogManager(MagicMock(), MagicMock()).get_function_io_catalog_output_entries(
+        CatalogManager(MagicMock()).get_function_io_catalog_output_entries(
             function_obj
         )
         mock_func.assert_called_once_with(function_obj.row_id)
@@ -181,7 +181,7 @@ def test_get_function_outputs(self, function_mock):
     def test_get_function_inputs(self, function_mock):
         mock_func = function_mock.return_value.get_input_entries_by_function_id
         function_obj = MagicMock(spec=FunctionCatalogEntry)
-        CatalogManager(MagicMock(), MagicMock()).get_function_io_catalog_input_entries(
+        CatalogManager(MagicMock()).get_function_io_catalog_input_entries(
             function_obj
         )
         mock_func.assert_called_once_with(function_obj.row_id)
diff --git a/test/unit_tests/executor/test_execution_context.py b/test/unit_tests/executor/test_execution_context.py
index a3e4e736f0..8d2a4caae3 100644
--- a/test/unit_tests/executor/test_execution_context.py
+++ b/test/unit_tests/executor/test_execution_context.py
@@ -24,11 +24,11 @@ class ExecutionContextTest(unittest.TestCase):
     @patch("evadb.executor.execution_context.get_gpu_count")
     @patch("evadb.executor.execution_context.is_gpu_available")
     def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config(
-        self, gpu_check, get_gpu_count, cfm
+        self, gpu_check, get_gpu_count
     ):
         gpu_check.return_value = True
         get_gpu_count.return_value = 3
-        context = Context([[0, 1]])
+        context = Context([0, 1])
 
         self.assertEqual(context.gpus, [0, 1])
 
diff --git a/test/unit_tests/optimizer/rules/test_batch_mem_size.py b/test/unit_tests/optimizer/rules/test_batch_mem_size.py
index 70033b014f..68f84db803 100644
--- a/test/unit_tests/optimizer/rules/test_batch_mem_size.py
+++ b/test/unit_tests/optimizer/rules/test_batch_mem_size.py
@@ -38,9 +38,7 @@ def test_batch_mem_size_for_sqlite_storage_engine(self):
         the storage engine.
         """
         test_batch_mem_size = 100
-        self.evadb.config.update_value(
-            "executor", "batch_mem_size", test_batch_mem_size
-        )
+        execute_query_fetch_all(self.evadb, f"SET batch_mem_size={test_batch_mem_size}")
         create_table_query = """
             CREATE TABLE IF NOT EXISTS MyCSV (
                 id INTEGER UNIQUE,
diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py
index a08c002409..18f8dc51d4 100644
--- a/test/unit_tests/optimizer/rules/test_rules.py
+++ b/test/unit_tests/optimizer/rules/test_rules.py
@@ -195,7 +195,7 @@ def test_supported_rules(self):
                 any(isinstance(rule, type(x)) for x in RulesManager().logical_rules)
             )
 
-        ray_enabled = self.evadb.config.get_value("experimental", "ray")
+        ray_enabled = self.evadb.catalog().get_configuration_catalog_value("ray")
         ray_enabled_and_installed = is_ray_enabled_and_installed(ray_enabled)
 
         # For the current version, we choose either the distributed or the

From 814ea85716cc5043d91d67bce4179179d65eadb7 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 02:27:28 -0400
Subject: [PATCH 11/20] fix linter

---
 evadb/catalog/catalog_manager.py                |  1 -
 evadb/catalog/catalog_utils.py                  |  2 +-
 evadb/catalog/models/base_model.py              |  1 +
 evadb/catalog/sql_config.py                     |  1 -
 evadb/configuration/bootstrap_environment.py    |  1 -
 evadb/evadb_cmd_client.py                       | 10 +++-------
 test/integration_tests/long/test_pytorch.py     |  1 -
 test/unit_tests/catalog/test_catalog_manager.py | 12 +++---------
 test/unit_tests/test_eva_cmd_client.py          |  3 +--
 test/util.py                                    |  1 -
 10 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index d1004bfc08..029d03da01 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -32,7 +32,6 @@
 )
 from evadb.catalog.models.utils import (
     ColumnCatalogEntry,
-    ConfigurationCatalogEntry,
     DatabaseCatalogEntry,
     FunctionCacheCatalogEntry,
     FunctionCatalogEntry,
diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py
index f28eae334f..35fb7f6a87 100644
--- a/evadb/catalog/catalog_utils.py
+++ b/evadb/catalog/catalog_utils.py
@@ -293,7 +293,7 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict:
     return properties
 
 
-def bootstrap_configs(catalog: "CatalogManager", configs: dict):
+def bootstrap_configs(catalog, configs: dict):
     """
     load all the configuration values into the catalog table configuration_catalog
     """
diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py
index 5826d75425..f31fc10b43 100644
--- a/evadb/catalog/models/base_model.py
+++ b/evadb/catalog/models/base_model.py
@@ -15,6 +15,7 @@
 from sqlalchemy import Column, Integer
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.declarative import declarative_base
+
 from evadb.utils.logging_manager import logger
 
 
diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py
index 4fc4de587f..b32959234b 100644
--- a/evadb/catalog/sql_config.py
+++ b/evadb/catalog/sql_config.py
@@ -18,7 +18,6 @@
 from sqlalchemy import create_engine, event
 from sqlalchemy.orm import scoped_session, sessionmaker
 
-
 # Permanent identifier column.
 IDENTIFIER_COLUMN = "_row_id"
 
diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index ac953be739..4b293cb365 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -16,7 +16,6 @@
 from pathlib import Path
 from typing import Union
 
-
 from evadb.configuration.constants import (
     CACHE_DIR,
     DB_DEFAULT_NAME,
diff --git a/evadb/evadb_cmd_client.py b/evadb/evadb_cmd_client.py
index 82f0fba80e..2f15f075d5 100644
--- a/evadb/evadb_cmd_client.py
+++ b/evadb/evadb_cmd_client.py
@@ -26,7 +26,7 @@
 EvaDB_CODE_DIR = abspath(join(THIS_DIR, ".."))
 sys.path.append(EvaDB_CODE_DIR)
 
-from evadb.evadb_config import BASE_EVADB_CONFIG
+from evadb.evadb_config import BASE_EVADB_CONFIG  # noqa: E402
 from evadb.server.interpreter import start_cmd_client  # noqa: E402
 
 
@@ -61,13 +61,9 @@ def main():
     # PARSE ARGS
     args, unknown = parser.parse_known_args()
 
-    host = (
-        args.host if args.host else BASE_EVADB_CONFIG["host"]
-    )
+    host = args.host if args.host else BASE_EVADB_CONFIG["host"]
 
-    port = (
-        args.port if args.port else BASE_EVADB_CONFIG["port"]
-    )
+    port = args.port if args.port else BASE_EVADB_CONFIG["port"]
     asyncio.run(evadb_client(host, port))
 
 
diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py
index 3b1f0c2d36..3fd758da29 100644
--- a/test/integration_tests/long/test_pytorch.py
+++ b/test/integration_tests/long/test_pytorch.py
@@ -49,7 +49,6 @@ class PytorchTest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        print(cls.evadb.catalog().get_configuration_catalog_value("ray"), "ray")
         os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
 
         ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4"
diff --git a/test/unit_tests/catalog/test_catalog_manager.py b/test/unit_tests/catalog/test_catalog_manager.py
index 6e8c6f9a37..3706bee7f0 100644
--- a/test/unit_tests/catalog/test_catalog_manager.py
+++ b/test/unit_tests/catalog/test_catalog_manager.py
@@ -163,25 +163,19 @@ def test_get_function_catalog_entry_by_name(self, function_mock):
 
     @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService")
     def test_delete_function(self, function_mock):
-        CatalogManager(MagicMock()).delete_function_catalog_entry_by_name(
-            "name"
-        )
+        CatalogManager(MagicMock()).delete_function_catalog_entry_by_name("name")
         function_mock.return_value.delete_entry_by_name.assert_called_with("name")
 
     @mock.patch("evadb.catalog.catalog_manager.FunctionIOCatalogService")
     def test_get_function_outputs(self, function_mock):
         mock_func = function_mock.return_value.get_output_entries_by_function_id
         function_obj = MagicMock(spec=FunctionCatalogEntry)
-        CatalogManager(MagicMock()).get_function_io_catalog_output_entries(
-            function_obj
-        )
+        CatalogManager(MagicMock()).get_function_io_catalog_output_entries(function_obj)
         mock_func.assert_called_once_with(function_obj.row_id)
 
     @mock.patch("evadb.catalog.catalog_manager.FunctionIOCatalogService")
     def test_get_function_inputs(self, function_mock):
         mock_func = function_mock.return_value.get_input_entries_by_function_id
         function_obj = MagicMock(spec=FunctionCatalogEntry)
-        CatalogManager(MagicMock()).get_function_io_catalog_input_entries(
-            function_obj
-        )
+        CatalogManager(MagicMock()).get_function_io_catalog_input_entries(function_obj)
         mock_func.assert_called_once_with(function_obj.row_id)
diff --git a/test/unit_tests/test_eva_cmd_client.py b/test/unit_tests/test_eva_cmd_client.py
index 64806a42e6..90a7f4a152 100644
--- a/test/unit_tests/test_eva_cmd_client.py
+++ b/test/unit_tests/test_eva_cmd_client.py
@@ -16,11 +16,10 @@
 import asyncio
 import unittest
 
-import pytest
 from mock import patch
 
-from evadb.evadb_config import BASE_EVADB_CONFIG
 from evadb.evadb_cmd_client import evadb_client, main
+from evadb.evadb_config import BASE_EVADB_CONFIG
 
 
 # @pytest.mark.skip
diff --git a/test/util.py b/test/util.py
index 945e2175e7..3a23a6ff5c 100644
--- a/test/util.py
+++ b/test/util.py
@@ -422,7 +422,6 @@ def create_large_scale_image_dataset(num=1000000):
 
 def create_sample_video(num_frames=NUM_FRAMES):
     file_name = os.path.join(get_tmp_dir(), "dummy.avi")
-    print(file_name)
     try:
         os.remove(file_name)
     except FileNotFoundError:

From fedf6a9b4750f17e520bc6d29fba5b2edffcb1d9 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 02:29:58 -0400
Subject: [PATCH 12/20] addressed PR comments

---
 evadb/configuration/bootstrap_environment.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py
index 4b293cb365..bb108692a4 100644
--- a/evadb/configuration/bootstrap_environment.py
+++ b/evadb/configuration/bootstrap_environment.py
@@ -74,7 +74,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path):
 
 # TODO : Change
 def create_directories_and_get_default_config_values(
-    evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None
+    evadb_dir: Path, evadb_installation_dir: Path
 ) -> Union[dict, str]:
     default_install_dir = evadb_installation_dir
     dataset_location = evadb_dir / EvaDB_DATASET_DIR
@@ -112,10 +112,6 @@ def create_directories_and_get_default_config_values(
     config_obj["tmp_dir"] = str(tmp_dir.resolve())
     config_obj["function_dir"] = str(function_dir.resolve())
     config_obj["model_dir"] = str(model_dir.resolve())
-    if category and key:
-        return config_obj.get(category, {}).get(key, None)
-    elif category:
-        return config_obj.get(category, {})
     return config_obj
 
 

From 2ecf6236e47a1fd5a0ba2102ec37e6f00c9472f0 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 03:16:16 -0400
Subject: [PATCH 13/20] fixed ray setup

---
 evadb/executor/load_multimedia_executor.py                | 4 ++--
 evadb/functions/abstract/pytorch_abstract_function.py     | 3 ++-
 test/app_tests/test_pandas_qa.py                          | 4 +++-
 test/app_tests/test_privategpt.py                         | 4 +++-
 test/app_tests/test_youtube_channel_qa.py                 | 4 +++-
 test/app_tests/test_youtube_qa.py                         | 4 +++-
 test/integration_tests/long/test_create_index_executor.py | 2 +-
 .../long/test_error_handling_with_ray.py                  | 4 +++-
 test/integration_tests/long/test_load_executor.py         | 4 +++-
 test/integration_tests/long/test_pytorch.py               | 8 ++++++--
 test/integration_tests/long/test_s3_load_executor.py      | 4 +++-
 test/integration_tests/long/test_similarity.py            | 2 +-
 12 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/evadb/executor/load_multimedia_executor.py b/evadb/executor/load_multimedia_executor.py
index 9f9828476f..03f1f7fb64 100644
--- a/evadb/executor/load_multimedia_executor.py
+++ b/evadb/executor/load_multimedia_executor.py
@@ -53,8 +53,8 @@ def exec(self, *args, **kwargs):
 
             # If it is a s3 path, download the file to local
             if self.node.file_path.as_posix().startswith("s3:/"):
-                s3_dir = self.catalog().get_configuration_catalog_value(
-                    "s3_download_dir"
+                s3_dir = Path(
+                    self.catalog().get_configuration_catalog_value("s3_download_dir")
                 )
                 dst_path = s3_dir / self.node.table_info.table_name
                 dst_path.mkdir(parents=True, exist_ok=True)
diff --git a/evadb/functions/abstract/pytorch_abstract_function.py b/evadb/functions/abstract/pytorch_abstract_function.py
index 4c8c1e063a..49e531655e 100644
--- a/evadb/functions/abstract/pytorch_abstract_function.py
+++ b/evadb/functions/abstract/pytorch_abstract_function.py
@@ -73,7 +73,8 @@ def __call__(self, *args, **kwargs) -> pd.DataFrame:
         if isinstance(frames, pd.DataFrame):
             frames = frames.transpose().values.tolist()[0]
 
-        gpu_batch_size = None
+        # hardcoding it for now, need to be fixed @xzdandy
+        gpu_batch_size = 1
         import torch
 
         tens_batch = torch.cat([self.transform(x) for x in frames]).to(
diff --git a/test/app_tests/test_pandas_qa.py b/test/app_tests/test_pandas_qa.py
index ebb6df29cb..e45c927620 100644
--- a/test/app_tests/test_pandas_qa.py
+++ b/test/app_tests/test_pandas_qa.py
@@ -25,7 +25,9 @@ class PandasQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
+        os.environ["ray"] = str(
+            cls.evadb.catalog().get_configuration_catalog_value("ray")
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_privategpt.py b/test/app_tests/test_privategpt.py
index 184888dd27..ff534dec22 100644
--- a/test/app_tests/test_privategpt.py
+++ b/test/app_tests/test_privategpt.py
@@ -29,7 +29,9 @@ class PrivateGPTTest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
+        os.environ["ray"] = str(
+            cls.evadb.catalog().get_configuration_catalog_value("ray")
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py
index 2bf70d918a..be1202bb98 100644
--- a/test/app_tests/test_youtube_channel_qa.py
+++ b/test/app_tests/test_youtube_channel_qa.py
@@ -24,7 +24,9 @@ class YoutubeChannelQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
+        os.environ["ray"] = str(
+            cls.evadb.catalog().get_configuration_catalog_value("ray")
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py
index 837404cda2..722566e905 100644
--- a/test/app_tests/test_youtube_qa.py
+++ b/test/app_tests/test_youtube_qa.py
@@ -25,7 +25,9 @@ class YoutubeQATest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
+        os.environ["ray"] = str(
+            cls.evadb.catalog().get_configuration_catalog_value("ray")
+        )
 
     @classmethod
     def tearDownClass(cls):
diff --git a/test/integration_tests/long/test_create_index_executor.py b/test/integration_tests/long/test_create_index_executor.py
index feabb5bff5..f44ef8f823 100644
--- a/test/integration_tests/long/test_create_index_executor.py
+++ b/test/integration_tests/long/test_create_index_executor.py
@@ -33,7 +33,7 @@
 class CreateIndexTest(unittest.TestCase):
     def _index_save_path(self):
         return str(
-            Path(self.evadb.config.get_value("storage", "index_dir"))
+            Path(self.evadb.catalog().get_configuration_catalog_value("index_dir"))
             / Path("{}_{}.index".format("FAISS", "testCreateIndexName"))
         )
 
diff --git a/test/integration_tests/long/test_error_handling_with_ray.py b/test/integration_tests/long/test_error_handling_with_ray.py
index da134b7ed3..c2d71e7fe3 100644
--- a/test/integration_tests/long/test_error_handling_with_ray.py
+++ b/test/integration_tests/long/test_error_handling_with_ray.py
@@ -32,7 +32,9 @@
 class ErrorHandlingRayTests(unittest.TestCase):
     def setUp(self):
         self.evadb = get_evadb_for_testing()
-        os.environ["ray"] = str(self.evadb.config.get_value("experimental", "ray"))
+        os.environ["ray"] = str(
+            self.evadb.catalog().get_configuration_catalog_value("ray")
+        )
         self.evadb.catalog().reset()
         # Load built-in Functions.
         load_functions_for_testing(self.evadb, mode="debug")
diff --git a/test/integration_tests/long/test_load_executor.py b/test/integration_tests/long/test_load_executor.py
index 05a479a8f4..0ca421cf1a 100644
--- a/test/integration_tests/long/test_load_executor.py
+++ b/test/integration_tests/long/test_load_executor.py
@@ -89,7 +89,9 @@ def test_should_form_symlink_to_individual_video(self):
         # check that the file is a symlink to self.video_file_path
         video_file_path = os.path.join(video_dir, video_file)
         self.assertTrue(os.path.islink(video_file_path))
-        self.assertEqual(os.readlink(video_file_path), self.video_file_path)
+        self.assertEqual(
+            os.readlink(video_file_path), str(Path(self.video_file_path).resolve())
+        )
 
         execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS MyVideo;")
 
diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py
index 3fd758da29..b0d1cf6327 100644
--- a/test/integration_tests/long/test_pytorch.py
+++ b/test/integration_tests/long/test_pytorch.py
@@ -49,7 +49,9 @@ class PytorchTest(unittest.TestCase):
     def setUpClass(cls):
         cls.evadb = get_evadb_for_testing()
         cls.evadb.catalog().reset()
-        os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray")
+        os.environ["ray"] = str(
+            cls.evadb.catalog().get_configuration_catalog_value("ray")
+        )
 
         ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4"
         mnist = f"{EvaDB_ROOT_DIR}/data/mnist/mnist.mp4"
@@ -295,7 +297,9 @@ def test_should_run_pytorch_and_similarity(self):
         batch_res = execute_query_fetch_all(self.evadb, select_query)
         img = batch_res.frames["myvideo.data"][0]
 
-        tmp_dir_from_config = self.evadb.config.get_value("storage", "tmp_dir")
+        tmp_dir_from_config = self.evadb.catalog().get_configuration_catalog_value(
+            "tmp_dir"
+        )
 
         img_save_path = os.path.join(tmp_dir_from_config, "dummy.jpg")
         try:
diff --git a/test/integration_tests/long/test_s3_load_executor.py b/test/integration_tests/long/test_s3_load_executor.py
index bdd6487249..e6f961de60 100644
--- a/test/integration_tests/long/test_s3_load_executor.py
+++ b/test/integration_tests/long/test_s3_load_executor.py
@@ -41,7 +41,9 @@ def setUp(self):
         self.evadb.catalog().reset()
         self.video_file_path = create_sample_video()
         self.multiple_video_file_path = f"{EvaDB_ROOT_DIR}/data/sample_videos/1"
-        self.s3_download_dir = self.evadb.config.get_value("storage", "s3_download_dir")
+        self.s3_download_dir = self.evadb.catalog().get_configuration_catalog_value(
+            "s3_download_dir"
+        )
 
         """Mocked AWS Credentials for moto."""
         os.environ["AWS_ACCESS_KEY_ID"] = "testing"
diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py
index 8dc7a2e3cc..35f70948f8 100644
--- a/test/integration_tests/long/test_similarity.py
+++ b/test/integration_tests/long/test_similarity.py
@@ -107,7 +107,7 @@ def setUp(self):
 
             # Create an actual image dataset.
             img_save_path = os.path.join(
-                self.evadb.config.get_value("storage", "tmp_dir"),
+                self.evadb.catalog().get_configuration_catalog_value("tmp_dir"),
                 f"test_similar_img{i}.jpg",
             )
             try_to_import_cv2()

From 9289dfc57c7caa2293df0105debc177874bbdcf5 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Tue, 10 Oct 2023 03:36:18 -0400
Subject: [PATCH 14/20] fix all testcases

---
 test/integration_tests/long/test_optimizer_rules.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/test/integration_tests/long/test_optimizer_rules.py b/test/integration_tests/long/test_optimizer_rules.py
index e703492d6b..eb515a19a2 100644
--- a/test/integration_tests/long/test_optimizer_rules.py
+++ b/test/integration_tests/long/test_optimizer_rules.py
@@ -34,7 +34,6 @@
     PushDownFilterThroughApplyAndMerge,
     PushDownFilterThroughJoin,
     ReorderPredicates,
-    XformLateralJoinToLinearFlow,
 )
 from evadb.optimizer.rules.rules_manager import RulesManager, disable_rules
 from evadb.plan_nodes.predicate_plan import PredicatePlan
@@ -108,16 +107,6 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock):
         # on all the frames
         self.assertGreater(evaluate_count_without_rule, 3 * evaluate_count_with_rule)
 
-        result_without_xform_rule = None
-        rules_manager = RulesManager()
-        with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]):
-            custom_plan_generator = PlanGenerator(self.evadb, rules_manager)
-            result_without_xform_rule = execute_query_fetch_all(
-                self.evadb, query, plan_generator=custom_plan_generator
-            )
-
-        self.assertEqual(result_without_xform_rule, result_with_rule)
-
     def test_should_pushdown_without_pushdown_join_rule(self):
         query = """SELECT id, obj.labels
                     FROM MyVideo JOIN LATERAL

From 5951906b0632bbf3bfb3aefd045d8ecd659cbe3d Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 15:08:58 -0400
Subject: [PATCH 15/20] address PR commits

---
 evadb/binder/statement_binder.py | 2 +-
 evadb/catalog/catalog_manager.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 49ec5e154c..39e95a3b0b 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -341,7 +341,7 @@ def _bind_func_expr(self, node: FunctionExpression):
                 # OpenAPI key as part of the parameter
                 # ToDO: this should be better handled
                 properties = get_metadata_properties(function_obj)
-                if node.name.upper() == str("CHATGPT"):
+                if string_comparison_case_insensitive(node.name, "CHATGPT"):
                     openapi_key = self._catalog().get_configuration_catalog_value(
                         "OPENAI_KEY"
                     )
diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index 029d03da01..7a7e5b9c4f 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -630,7 +630,7 @@ def upsert_configuration_catalog_entry(self, key: str, value: any):
         """
         self._config_catalog_service.upsert_entry(key, value)
 
-    def get_configuration_catalog_value(self, key: str) -> Any:
+    def get_configuration_catalog_value(self, key: str, default:Any = None) -> Any:
         """
         Returns the value entry for the given key
         Arguments:
@@ -643,4 +643,4 @@ def get_configuration_catalog_value(self, key: str) -> Any:
         table_entry = self._config_catalog_service.get_entry_by_name(key)
         if table_entry:
             return table_entry.value
-        return None
+        return default

From fad7b68ab1634354dfdaec41463411c969816aad Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 15:20:03 -0400
Subject: [PATCH 16/20] fix merge

---
 evadb/binder/statement_binder.py     | 13 +++++++------
 evadb/executor/show_info_executor.py |  3 +--
 evadb/functions/chatgpt.py           | 10 +++++-----
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py
index 39e95a3b0b..cc24068959 100644
--- a/evadb/binder/statement_binder.py
+++ b/evadb/binder/statement_binder.py
@@ -338,14 +338,15 @@ def _bind_func_expr(self, node: FunctionExpression):
                 # certain functions take additional inputs like yolo needs the model_name
                 # these arguments are passed by the user as part of metadata
                 # we also handle the special case of ChatGPT where we need to send the
-                # OpenAPI key as part of the parameter
-                # ToDO: this should be better handled
+                # OpenAPI key as part of the parameter if not provided by the user
                 properties = get_metadata_properties(function_obj)
                 if string_comparison_case_insensitive(node.name, "CHATGPT"):
-                    openapi_key = self._catalog().get_configuration_catalog_value(
-                        "OPENAI_KEY"
-                    )
-                    properties["api_key"] = openapi_key
+                    # if the user didn't provide any API_KEY, check if we have one in the catalog
+                    if "OPENAI_API_KEY" not in properties.keys():
+                        openapi_key = self._catalog().get_configuration_catalog_value(
+                            "OPENAI_API_KEY"
+                        )
+                        properties["openai_api_key"] = openapi_key
 
                 node.function = lambda: function_class(**properties)
             except Exception as e:
diff --git a/evadb/executor/show_info_executor.py b/evadb/executor/show_info_executor.py
index e4894aacf1..e928d777f0 100644
--- a/evadb/executor/show_info_executor.py
+++ b/evadb/executor/show_info_executor.py
@@ -46,8 +46,7 @@ def exec(self, *args, **kwargs):
                     show_entries.append(table.name)
             show_entries = {"name": show_entries}
         elif self.node.show_type is ShowType.CONFIG:
-            value = self._config.get_value(
-                category="default",
+            value = self.catalog().get_configuration_catalog_value(
                 key=self.node.show_val.upper(),
             )
             show_entries = {}
diff --git a/evadb/functions/chatgpt.py b/evadb/functions/chatgpt.py
index 532e13e3de..fadc61191a 100644
--- a/evadb/functions/chatgpt.py
+++ b/evadb/functions/chatgpt.py
@@ -84,12 +84,12 @@ def setup(
         self,
         model="gpt-3.5-turbo",
         temperature: float = 0,
-        api_key="",
+        openai_api_key="",
     ) -> None:
         assert model in _VALID_CHAT_COMPLETION_MODEL, f"Unsupported ChatGPT {model}"
         self.model = model
         self.temperature = temperature
-        self.api_key = api_key
+        self.openai_api_key = openai_api_key
 
     @forward(
         input_signatures=[
@@ -121,12 +121,12 @@ def forward(self, text_df):
         def completion_with_backoff(**kwargs):
             return openai.ChatCompletion.create(**kwargs)
 
-        openai.api_key = self.api_key
+        openai.api_key = self.openai_api_key
         if len(openai.api_key) == 0:
-            openai.api_key = os.environ.get("OPENAI_KEY", "")
+            openai.api_key = os.environ.get("OPENAI_API_KEY", "")
         assert (
             len(openai.api_key) != 0
-        ), "Please set your OpenAI API key in evadb.yml file (third_party, open_api_key) or environment variable (OPENAI_KEY)"
+        ), "Please set your OpenAI API key using SET OPENAI_API_KEY = 'sk-' or environment variable (OPENAI_API_KEY)"
 
         queries = text_df[text_df.columns[0]]
         content = text_df[text_df.columns[0]]

From 09ca39d629f50ce179693448c45e7618136d4e8b Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 15:30:41 -0400
Subject: [PATCH 17/20] fix linter

---
 .circleci/config.yml             | 2 +-
 evadb/catalog/catalog_manager.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 01984e154b..cb7ad985d9 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -234,7 +234,7 @@ jobs:
               else
                 pip install ".[dev,pinecone,chromadb]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864
               fi
-              python -c "import yaml;f = open('evadb/evadb.yml', 'r+');config_obj = yaml.load(f, Loader=yaml.FullLoader);config_obj['experimental']['ray'] = True;f.seek(0);f.write(yaml.dump(config_obj));f.truncate();"
+              python -c "import evadb;cur=evadb.connect().cursor();cur.query('SET ray=True';)"
             else
               if [ $PY_VERSION != "3.11" ]; then
                 pip install ".[dev,ludwig,qdrant,pinecone,chromadb]"
diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py
index 7a7e5b9c4f..df621da33e 100644
--- a/evadb/catalog/catalog_manager.py
+++ b/evadb/catalog/catalog_manager.py
@@ -630,7 +630,7 @@ def upsert_configuration_catalog_entry(self, key: str, value: any):
         """
         self._config_catalog_service.upsert_entry(key, value)
 
-    def get_configuration_catalog_value(self, key: str, default:Any = None) -> Any:
+    def get_configuration_catalog_value(self, key: str, default: Any = None) -> Any:
         """
         Returns the value entry for the given key
         Arguments:

From eb4ee8b5679802cc478bc969b14e5cb7e8929974 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 15:33:13 -0400
Subject: [PATCH 18/20] move openai_key -> openai_api_key

---
 apps/pandas_qa/pandas_qa.py                 |  4 +--
 apps/youtube_qa/youtube_qa.py               |  4 +--
 docs/source/overview/concepts.rst           |  2 +-
 docs/source/usecases/question-answering.rst |  2 +-
 evadb/evadb.yml                             | 29 ---------------------
 evadb/evadb_config.py                       |  2 +-
 test/app_tests/test_youtube_channel_qa.py   |  2 +-
 test/app_tests/test_youtube_qa.py           |  2 +-
 8 files changed, 9 insertions(+), 38 deletions(-)
 delete mode 100644 evadb/evadb.yml

diff --git a/apps/pandas_qa/pandas_qa.py b/apps/pandas_qa/pandas_qa.py
index 8e4b6868a0..b81ce27d11 100644
--- a/apps/pandas_qa/pandas_qa.py
+++ b/apps/pandas_qa/pandas_qa.py
@@ -53,10 +53,10 @@ def receive_user_input() -> Dict:
 
     # get OpenAI key if needed
     try:
-        api_key = os.environ["OPENAI_KEY"]
+        api_key = os.environ["OPENAI_API_KEY"]
     except KeyError:
         api_key = str(input("🔑 Enter your OpenAI key: "))
-        os.environ["OPENAI_KEY"] = api_key
+        os.environ["OPENAI_API_KEY"] = api_key
 
     return user_input
 
diff --git a/apps/youtube_qa/youtube_qa.py b/apps/youtube_qa/youtube_qa.py
index 5a56bbe29d..ee4626473c 100644
--- a/apps/youtube_qa/youtube_qa.py
+++ b/apps/youtube_qa/youtube_qa.py
@@ -93,10 +93,10 @@ def receive_user_input() -> Dict:
 
     # get OpenAI key if needed
     try:
-        api_key = os.environ["OPENAI_KEY"]
+        api_key = os.environ["OPENAI_API_KEY"]
     except KeyError:
         api_key = str(input("🔑 Enter your OpenAI key: "))
-        os.environ["OPENAI_KEY"] = api_key
+        os.environ["OPENAI_API_KEY"] = api_key
 
     return user_input
 
diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst
index c9dba32159..f2905b640f 100644
--- a/docs/source/overview/concepts.rst
+++ b/docs/source/overview/concepts.rst
@@ -46,7 +46,7 @@ Here are some illustrative **AI queries** for a ChatGPT-based video question ans
     --- The 'transcripts' table has a column called 'text' with the transcript text
     --- Since ChatGPT is a built-in function in EvaDB, we don't have to define it
     --- We can directly use ChatGPT() in any query
-    --- We will only need to set the OPENAI_KEY as an environment variable
+    --- We will only need to set the OPENAI_API_KEY as an environment variable
     SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) 
         FROM TEXT_SUMMARY;
 
diff --git a/docs/source/usecases/question-answering.rst b/docs/source/usecases/question-answering.rst
index 7a1235da04..15f548d9cb 100644
--- a/docs/source/usecases/question-answering.rst
+++ b/docs/source/usecases/question-answering.rst
@@ -57,7 +57,7 @@ EvaDB has built-in support for ``ChatGPT`` function from ``OpenAI``. You will ne
 
     # Set OpenAI key
     import os
-    os.environ["OPENAI_KEY"] = "sk-..."
+    os.environ["OPENAI_API_KEY"] = "sk-..."
 
 .. note::
     
diff --git a/evadb/evadb.yml b/evadb/evadb.yml
deleted file mode 100644
index e7d6a38943..0000000000
--- a/evadb/evadb.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-core:
-  evadb_installation_dir: ""
-  datasets_dir: ""
-  catalog_database_uri: ""
-  application: "evadb"
-  mode: "release" #release or debug
-
-executor:
-  # batch_mem_size configures the number of rows processed by the execution engine in one iteration
-  # rows = max(1, row_mem_size / batch_mem_size)
-  batch_mem_size: 30000000
-
-  # batch size used for gpu_operations
-  gpu_batch_size: 1
-
-  gpu_ids: [0]
-
-server:
-  host: "0.0.0.0"
-  port: 8803
-  socket_timeout: 60
-
-experimental:
-  ray: False
-
-third_party:
-  OPENAI_KEY: ""
-  PINECONE_API_KEY: ""
-  PINECONE_ENV: ""
diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py
index e012f95567..800112a601 100644
--- a/evadb/evadb_config.py
+++ b/evadb/evadb_config.py
@@ -33,7 +33,7 @@
     "port": 8803,
     "socket_timeout": 60,
     "ray": False,
-    "OPENAI_KEY": "",
+    "OPENAI_API_KEY": "",
     "PINECONE_API_KEY": "",
     "PINECONE_ENV": "",
 }
diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py
index be1202bb98..edb138588d 100644
--- a/test/app_tests/test_youtube_channel_qa.py
+++ b/test/app_tests/test_youtube_channel_qa.py
@@ -41,7 +41,7 @@ def tearDown(self) -> None:
     def test_should_run_youtube_channel_qa_app(self):
         app_path = Path("apps", "youtube_channel_qa", "youtube_channel_qa.py")
         input1 = "\n\n\n"  # Download just one video from the default channel in the default order.
-        # Assuming that OPENAI_KEY is already set as an environment variable
+        # Assuming that OPENAI_API_KEY is already set as an environment variable
         input2 = "What is this video about?\n"  # Question
         input3 = "exit\n"  # Exit
         inputs = input1 + input2 + input3
diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py
index 722566e905..9402654bdf 100644
--- a/test/app_tests/test_youtube_qa.py
+++ b/test/app_tests/test_youtube_qa.py
@@ -43,7 +43,7 @@ def tearDown(self) -> None:
     def test_should_run_youtube_qa_app(self):
         app_path = Path("apps", "youtube_qa", "youtube_qa.py")
         input1 = "yes\n\n"  # Go with online video and default URL
-        # Assuming that OPENAI_KEY is already set as an environment variable
+        # Assuming that OPENAI_API_KEY is already set as an environment variable
         input2 = "What is this video on?\n"  # Question
         input3 = "exit\nexit\n"  # Exit
         inputs = input1 + input2 + input3

From c84e0d70a291e567246faf83148b65e80d3d3108 Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 16:29:09 -0400
Subject: [PATCH 19/20] minor fixes

---
 docs/source/reference/ai/custom-ai-function.rst |  3 ---
 evadb/functions/dalle.py                        | 12 +++++-------
 evadb/functions/stable_diffusion.py             | 14 ++++----------
 3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/docs/source/reference/ai/custom-ai-function.rst b/docs/source/reference/ai/custom-ai-function.rst
index 71c19bb91c..d03b684cbe 100644
--- a/docs/source/reference/ai/custom-ai-function.rst
+++ b/docs/source/reference/ai/custom-ai-function.rst
@@ -258,9 +258,6 @@ The following code can be used to create an Object Detection function using Yolo
         try_to_import_openai()
         import openai
 
-        #setting up the key
-        openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY")
-
         #getting the data
         content = text_df[text_df.columns[0]]
         responses = []
diff --git a/evadb/functions/dalle.py b/evadb/functions/dalle.py
index efc075d733..7c1dc39dd0 100644
--- a/evadb/functions/dalle.py
+++ b/evadb/functions/dalle.py
@@ -22,7 +22,6 @@
 from PIL import Image
 
 from evadb.catalog.catalog_type import NdArrayType
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.functions.abstract.abstract_function import AbstractFunction
 from evadb.functions.decorators.decorators import forward
 from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe
@@ -34,8 +33,8 @@ class DallEFunction(AbstractFunction):
     def name(self) -> str:
         return "DallE"
 
-    def setup(self) -> None:
-        pass
+    def setup(self, openai_api_key="") -> None:
+        self.openai_api_key = openai_api_key
 
     @forward(
         input_signatures=[
@@ -59,14 +58,13 @@ def forward(self, text_df):
         try_to_import_openai()
         import openai
 
-        # Register API key, try configuration manager first
-        openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY")
+        openai.api_key = self.openai_api_key
         # If not found, try OS Environment Variable
         if len(openai.api_key) == 0:
-            openai.api_key = os.environ.get("OPENAI_KEY", "")
+            openai.api_key = os.environ.get("OPENAI_API_KEY", "")
         assert (
             len(openai.api_key) != 0
-        ), "Please set your OpenAI API key in evadb.yml file (third_party, open_api_key) or environment variable (OPENAI_KEY)"
+        ), "Please set your OpenAI API key using SET OPENAI_API_KEY = 'sk-'  or environment variable (OPENAI_API_KEY)"
 
         def generate_image(text_df: PandasDataframe):
             results = []
diff --git a/evadb/functions/stable_diffusion.py b/evadb/functions/stable_diffusion.py
index 6e84d687fa..c5337a27c0 100644
--- a/evadb/functions/stable_diffusion.py
+++ b/evadb/functions/stable_diffusion.py
@@ -22,7 +22,6 @@
 from PIL import Image
 
 from evadb.catalog.catalog_type import NdArrayType
-from evadb.configuration.configuration_manager import ConfigurationManager
 from evadb.functions.abstract.abstract_function import AbstractFunction
 from evadb.functions.decorators.decorators import forward
 from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe
@@ -34,10 +33,8 @@ class StableDiffusion(AbstractFunction):
     def name(self) -> str:
         return "StableDiffusion"
 
-    def setup(
-        self,
-    ) -> None:
-        pass
+    def setup(self, replicate_api_token="") -> None:
+        self.replicate_api_token = replicate_api_token
 
     @forward(
         input_signatures=[
@@ -64,16 +61,13 @@ def forward(self, text_df):
         try_to_import_replicate()
         import replicate
 
-        # Register API key, try configuration manager first
-        replicate_api_key = ConfigurationManager().get_value(
-            "third_party", "REPLICATE_API_TOKEN"
-        )
+        replicate_api_key = self.replicate_api_token
         # If not found, try OS Environment Variable
         if len(replicate_api_key) == 0:
             replicate_api_key = os.environ.get("REPLICATE_API_TOKEN", "")
         assert (
             len(replicate_api_key) != 0
-        ), "Please set your Replicate API key in evadb.yml file (third_party, replicate_api_token) or environment variable (REPLICATE_API_TOKEN)"
+        ), "Please set your Replicate API key using SET REPLICATE_API_TOKEN = '' or set the environment variable (REPLICATE_API_TOKEN)"
         os.environ["REPLICATE_API_TOKEN"] = replicate_api_key
 
         model_id = (

From a4276aef24b054cb69bb43064c46fc9052eb436e Mon Sep 17 00:00:00 2001
From: Gaurav Tarlok Kakkar <gaurav21776@gmail.com>
Date: Mon, 16 Oct 2023 16:42:08 -0400
Subject: [PATCH 20/20] fix test case

---
 test/unit_tests/test_dalle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit_tests/test_dalle.py b/test/unit_tests/test_dalle.py
index a7a9536fa2..c434a4db4a 100644
--- a/test/unit_tests/test_dalle.py
+++ b/test/unit_tests/test_dalle.py
@@ -41,7 +41,7 @@ def setUp(self) -> None:
     def tearDown(self) -> None:
         execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS ImageGen;")
 
-    @patch.dict("os.environ", {"OPENAI_KEY": "mocked_openai_key"})
+    @patch.dict("os.environ", {"OPENAI_API_KEY": "mocked_openai_key"})
     @patch("requests.get")
     @patch("openai.Image.create", return_value={"data": [{"url": "mocked_url"}]})
     def test_dalle_image_generation(self, mock_openai_create, mock_requests_get):