From e03bc2fe270bd9d9d5a43015b1aa7baf542d9114 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Thu, 5 Oct 2023 23:28:42 -0400 Subject: [PATCH 01/20] Adding EvaDb changes to the catalog modified: evadb/catalog/catalog_manager.py new file: evadb/catalog/models/configuration_catelog.py modified: evadb/catalog/models/utils.py new file: evadb/catalog/services/configuration_catalog_service.py modified: evadb/catalog/sql_config.py Adding EvaDB base configs to a python dict new file: evadb/eva_config.py --- evadb/catalog/catalog_manager.py | 28 +++++++++ evadb/catalog/models/configuration_catelog.py | 43 +++++++++++++ evadb/catalog/models/utils.py | 16 +++++ .../services/configuration_catalog_service.py | 61 +++++++++++++++++++ evadb/catalog/sql_config.py | 1 + evadb/eva_config.py | 41 +++++++++++++ 6 files changed, 190 insertions(+) create mode 100644 evadb/catalog/models/configuration_catelog.py create mode 100644 evadb/catalog/services/configuration_catalog_service.py create mode 100644 evadb/eva_config.py diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index b7c55c9bf2..3a63b890a7 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -33,6 +33,7 @@ ) from evadb.catalog.models.utils import ( ColumnCatalogEntry, + ConfigurationCatalogEntry, DatabaseCatalogEntry, FunctionCacheCatalogEntry, FunctionCatalogEntry, @@ -45,6 +46,7 @@ init_db, truncate_catalog_tables, ) +from evadb.catalog.services.configuration_catalog_service import ConfigurationCatalogService from evadb.catalog.services.column_catalog_service import ColumnCatalogService from evadb.catalog.services.database_catalog_service import DatabaseCatalogService from evadb.catalog.services.function_cache_catalog_service import ( @@ -78,6 +80,7 @@ def __init__(self, db_uri: str, config: ConfigurationManager): self._config = config self._bootstrap_catalog() self._db_catalog_service = DatabaseCatalogService(self._sql_config.session) + self._config_catalog_service = ConfigurationCatalogService(self._sql_config.session) self._table_catalog_service = TableCatalogService(self._sql_config.session) self._column_service = ColumnCatalogService(self._sql_config.session) self._function_service = FunctionCatalogService(self._sql_config.session) @@ -607,6 +610,31 @@ def create_and_insert_multimedia_metadata_table_catalog_entry( table_type=TableType.SYSTEM_STRUCTURED_DATA, ) return obj + + "Configuration catalog services" + + def insert_configuration_catalog_entry(self, key: str, value: any): + """A new entry is persisted in the database catalog." + + Args: + key: key name + value: value name + """ + self._db_catalog_service.insert_entry(key, value) + + def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry: + """ + Returns the value entry for the given key + Arguments: + key (str): key name + + Returns: + ConfigurationCatalogEntry + """ + + table_entry = self._db_catalog_service.get_entry_by_name(key) + + return table_entry #### get catalog instance diff --git a/evadb/catalog/models/configuration_catelog.py b/evadb/catalog/models/configuration_catelog.py new file mode 100644 index 0000000000..106c230131 --- /dev/null +++ b/evadb/catalog/models/configuration_catelog.py @@ -0,0 +1,43 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sqlalchemy import Column, String + +from evadb.catalog.models.base_model import BaseModel +from evadb.catalog.models.utils import ConfigurationCatalogEntry, TextPickleType + + +class ConfigurationCatalog(BaseModel): + """The `ConfigurationCatalog` catalog stores all the configuration params. + `_row_id:` an autogenerated unique identifier. + `_key:` the key for the config. + `_value:` the value for the config + """ + + __tablename__ = "configuation_catalog" + + _key = Column("name", String(100), unique=True) + _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ?? + + def __init__(self, key: str, value: any): + self._key = key + self._value = value + + def as_dataclass(self) -> "ConfigurationCatalogEntry": + return ConfigurationCatalogEntry( + row_id=self._row_id, + key=self._key, + value=self._value, + ) diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index b1c067aa0b..36d11937f0 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -257,3 +257,19 @@ def display_format(self): "engine": self.engine, "params": self.params, } + +@dataclass(unsafe_hash=True) +class ConfigurationCatalogEntry: + """Dataclass representing an entry in the `ConfigurationCatalog`. + This is done to ensure we don't expose the sqlalchemy dependencies beyond catalog service. Further, sqlalchemy does not allow sharing of objects across threads. + """ + + key = str, + value = str, + row_id: int = None + + def display_format(self): + return { + "key": self.key, + "value": self.value, + } diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py new file mode 100644 index 0000000000..3f2722fbba --- /dev/null +++ b/evadb/catalog/services/configuration_catalog_service.py @@ -0,0 +1,61 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import select + +from evadb.catalog.models.configuration_catelog import ConfigurationCatalog +from evadb.catalog.models.utils import ConfigurationCatalogEntry +from evadb.catalog.services.base_service import BaseService +from evadb.utils.errors import CatalogError +from evadb.utils.logging_manager import logger + + +class ConfigurationCatalogService(BaseService): + def __init__(self, db_session: Session): + super().__init__(ConfigurationCatalog, db_session) + + def insert_entry( + self, + key: str, + value: any, + ): + try: + config_catalog_obj = self.model( + key = key, + value = value + ) + config_catalog_obj = config_catalog_obj.save(self.session) + + except Exception as e: + logger.exception( + f"Failed to insert entry into database catalog with exception {str(e)}" + ) + raise CatalogError(e) + + def get_entry_by_name(self, key: str) -> ConfigurationCatalogEntry: + """ + Get the table catalog entry with given table name. + Arguments: + key (str): key name + Returns: + Configuration Catalog Entry - catalog entry for given key name + """ + entry = self.session.execute( + select(self.model).filter(self.model._key == key) + ).scalar_one_or_none() + if entry: + return entry.as_dataclass() + return entry diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py index f4893ba997..c1432e3553 100644 --- a/evadb/catalog/sql_config.py +++ b/evadb/catalog/sql_config.py @@ -32,6 +32,7 @@ "column_catalog", "table_catalog", "database_catalog", + "configuration_catalog", "depend_column_and_function_cache", "function_cache", "function_catalog", diff --git a/evadb/eva_config.py b/evadb/eva_config.py new file mode 100644 index 0000000000..d7882f4b35 --- /dev/null +++ b/evadb/eva_config.py @@ -0,0 +1,41 @@ +# coding=utf-8 +# Copyright 2018-2023 EvaDB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +EvaDB configuration dict + +batch_mem_size configures the number of rows processed by the execution engine in one iteration +rows = max(1, row_mem_size / batch_mem_size) +""" + +BASE_EVADB_CONFIG = { + "evadb_installation_dir": "", + "datasets_dir": "", + "catalog_database_uri": "", + "application": "evadb", + "mode": "release", + "batch_mem_size": 30000000, + "gpu_batch_size": 1, # batch size used for gpu_operations + "gpu_ids": [ + 0 + ], + "host": "0.0.0.0", + "port": 8803, + "socket_timeout": 60, + "ray": False, + "OPENAI_KEY": "", + "PINECONE_API_KEY": "", + "PINECONE_ENV": "" +} \ No newline at end of file From f123b6ffd77d908fdc4253ecbeb2107efa555634 Mon Sep 17 00:00:00 2001 From: hershd23 Date: Fri, 6 Oct 2023 02:04:04 -0400 Subject: [PATCH 02/20] Adding changes to the configuration manager modified: evadb/configuration/bootstrap_environment.py modified: evadb/configuration/configuration_manager.py modified: evadb/configuration/constants.py renamed: evadb/eva_config.py -> evadb/evadb_config.py --- evadb/configuration/bootstrap_environment.py | 10 ++++++---- evadb/configuration/configuration_manager.py | 7 ++++--- evadb/configuration/constants.py | 2 +- evadb/{eva_config.py => evadb_config.py} | 0 4 files changed, 11 insertions(+), 8 deletions(-) rename evadb/{eva_config.py => evadb_config.py} (100%) diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index 55e2f0f228..f0c33ac5d7 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -36,12 +36,12 @@ def get_base_config(evadb_installation_dir: Path) -> Path: """ - Get path to .evadb.yml source path. + Get path to .evadb_config.py source path. """ # if evadb package is installed in environment if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE): - with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as yml_path: - return yml_path + with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path: + return config_path else: # For local dev environments without package installed return evadb_installation_dir / EvaDB_CONFIG_FILE @@ -91,9 +91,11 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): evadb_logger.setLevel(level) evadb_logger.debug(f"Setting logging level to: {str(level)}") - return config_obj + # Mainly want to add all the configs to sqlite + return config_obj +# TODO : Change def create_directories_and_get_default_config_values( evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None ) -> Union[dict, str]: diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py index 8d209e2fcf..ea45963437 100644 --- a/evadb/configuration/configuration_manager.py +++ b/evadb/configuration/configuration_manager.py @@ -21,11 +21,12 @@ class ConfigurationManager(object): - def __init__(self, evadb_dir: str = None) -> None: + def __init__(self, evadb_dir: str = None, catalog_obj = None) -> None: self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR - self._config_obj = self._create_if_not_exists() + self._catalog_obj = catalog_obj + self._populate_base_configs() - def _create_if_not_exists(self): + def _populate_base_configs(self): config_obj = bootstrap_environment( evadb_dir=Path(self._evadb_dir), evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR), diff --git a/evadb/configuration/constants.py b/evadb/configuration/constants.py index 51513462d6..138ec547f8 100644 --- a/evadb/configuration/constants.py +++ b/evadb/configuration/constants.py @@ -21,7 +21,7 @@ EvaDB_DATABASE_DIR = "evadb_data" EvaDB_APPS_DIR = "apps" EvaDB_DATASET_DIR = "evadb_datasets" -EvaDB_CONFIG_FILE = "evadb.yml" +EvaDB_CONFIG_FILE = "evadb_config.py" FUNCTION_DIR = "functions" MODEL_DIR = "models" CATALOG_DIR = "catalog" diff --git a/evadb/eva_config.py b/evadb/evadb_config.py similarity index 100% rename from evadb/eva_config.py rename to evadb/evadb_config.py From dd48b70bd5b87439ec35f697b46ebdadaad59ee8 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Sun, 8 Oct 2023 16:45:33 -0400 Subject: [PATCH 03/20] checkpoint --- evadb/catalog/catalog_manager.py | 36 ++++++++++++------- evadb/catalog/catalog_utils.py | 10 ++---- ...on_catelog.py => configuration_catalog.py} | 0 .../services/configuration_catalog_service.py | 2 +- evadb/database.py | 16 +++------ 5 files changed, 30 insertions(+), 34 deletions(-) rename evadb/catalog/models/{configuration_catelog.py => configuration_catalog.py} (100%) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 3a63b890a7..f6d3171634 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -23,7 +23,6 @@ VideoColumnName, ) from evadb.catalog.catalog_utils import ( - cleanup_storage, construct_function_cache_catalog_entry, get_document_table_column_definitions, get_image_table_column_definitions, @@ -46,7 +45,9 @@ init_db, truncate_catalog_tables, ) -from evadb.catalog.services.configuration_catalog_service import ConfigurationCatalogService +from evadb.catalog.services.configuration_catalog_service import ( + ConfigurationCatalogService, +) from evadb.catalog.services.column_catalog_service import ColumnCatalogService from evadb.catalog.services.database_catalog_service import DatabaseCatalogService from evadb.catalog.services.function_cache_catalog_service import ( @@ -63,24 +64,28 @@ from evadb.catalog.services.index_catalog_service import IndexCatalogService from evadb.catalog.services.table_catalog_service import TableCatalogService from evadb.catalog.sql_config import IDENTIFIER_COLUMN, SQLConfig -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.expression.function_expression import FunctionExpression from evadb.parser.create_statement import ColumnDefinition from evadb.parser.table_ref import TableInfo from evadb.parser.types import FileFormatType from evadb.third_party.databases.interface import get_database_handler -from evadb.utils.generic_utils import generate_file_path, get_file_checksum +from evadb.utils.generic_utils import ( + generate_file_path, + get_file_checksum, + remove_directory_contents, +) from evadb.utils.logging_manager import logger class CatalogManager(object): - def __init__(self, db_uri: str, config: ConfigurationManager): + def __init__(self, db_uri: str): self._db_uri = db_uri self._sql_config = SQLConfig(db_uri) - self._config = config self._bootstrap_catalog() self._db_catalog_service = DatabaseCatalogService(self._sql_config.session) - self._config_catalog_service = ConfigurationCatalogService(self._sql_config.session) + self._config_catalog_service = ConfigurationCatalogService( + self._sql_config.session + ) self._table_catalog_service = TableCatalogService(self._sql_config.session) self._column_service = ColumnCatalogService(self._sql_config.session) self._function_service = FunctionCatalogService(self._sql_config.session) @@ -136,7 +141,10 @@ def _clear_catalog_contents(self): # truncate the catalog tables truncate_catalog_tables(self._sql_config.engine) # clean up the dataset, index, and cache directories - cleanup_storage(self._config) + for folder in ["cache_dir", "index_dir", "datasets_dir"]: + remove_directory_contents( + self._config_catalog_service.get_entry_by_name(folder).value + ) "Database catalog services" @@ -448,7 +456,7 @@ def get_all_index_catalog_entries(self): """ Function Cache related""" def insert_function_cache_catalog_entry(self, func_expr: FunctionExpression): - cache_dir = self._config.get_value("storage", "cache_dir") + cache_dir = self._config_catalog_service.get_entry_by_name("cache_dir").value entry = construct_function_cache_catalog_entry(func_expr, cache_dir=cache_dir) return self._function_cache_service.insert_entry(entry) @@ -511,7 +519,9 @@ def create_and_insert_table_catalog_entry( table_name = table_info.table_name column_catalog_entries = xform_column_definitions_to_catalog_entries(columns) - dataset_location = self._config.get_value("core", "datasets_dir") + dataset_location = self._config_catalog_service.get_entry_by_name( + "datasets_dir" + ) file_url = str(generate_file_path(dataset_location, table_name)) table_catalog_entry = self.insert_table_catalog_entry( table_name, @@ -610,7 +620,7 @@ def create_and_insert_multimedia_metadata_table_catalog_entry( table_type=TableType.SYSTEM_STRUCTURED_DATA, ) return obj - + "Configuration catalog services" def insert_configuration_catalog_entry(self, key: str, value: any): @@ -645,5 +655,5 @@ def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry # instance across all objects within the same thread. It is worth investigating whether # SQLAlchemy already handles this optimization for us, which will be explored at a # later time. -def get_catalog_instance(db_uri: str, config: ConfigurationManager): - return CatalogManager(db_uri, config) +def get_catalog_instance(db_uri: str): + return CatalogManager(db_uri) diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index d2187978d2..cc6b0c510f 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -256,12 +256,6 @@ def construct_function_cache_catalog_entry( return entry -def cleanup_storage(config): - remove_directory_contents(config.get_value("storage", "index_dir")) - remove_directory_contents(config.get_value("storage", "cache_dir")) - remove_directory_contents(config.get_value("core", "datasets_dir")) - - def get_metadata_entry_or_val( function_obj: FunctionCatalogEntry, key: str, default_val: Any = None ) -> str: @@ -308,7 +302,7 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict: # instance across all objects within the same thread. It is worth investigating whether # SQLAlchemy already handles this optimization for us, which will be explored at a # later time. -def get_catalog_instance(db_uri: str, config: ConfigurationManager): +def get_catalog_instance(db_uri: str): from evadb.catalog.catalog_manager import CatalogManager - return CatalogManager(db_uri, config) + return CatalogManager(db_uri) diff --git a/evadb/catalog/models/configuration_catelog.py b/evadb/catalog/models/configuration_catalog.py similarity index 100% rename from evadb/catalog/models/configuration_catelog.py rename to evadb/catalog/models/configuration_catalog.py diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py index 3f2722fbba..5b3646ad3c 100644 --- a/evadb/catalog/services/configuration_catalog_service.py +++ b/evadb/catalog/services/configuration_catalog_service.py @@ -16,7 +16,7 @@ from sqlalchemy.orm import Session from sqlalchemy.sql.expression import select -from evadb.catalog.models.configuration_catelog import ConfigurationCatalog +from evadb.catalog.models.configuration_catalog import ConfigurationCatalog from evadb.catalog.models.utils import ConfigurationCatalogEntry from evadb.catalog.services.base_service import BaseService from evadb.utils.errors import CatalogError diff --git a/evadb/database.py b/evadb/database.py index ed6ad2f118..086bf0a018 100644 --- a/evadb/database.py +++ b/evadb/database.py @@ -17,9 +17,7 @@ from typing import TYPE_CHECKING, Callable from evadb.catalog.catalog_utils import get_catalog_instance -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.configuration.constants import DB_DEFAULT_NAME, EvaDB_DATABASE_DIR -from evadb.utils.generic_utils import parse_config_yml if TYPE_CHECKING: from evadb.catalog.catalog_manager import CatalogManager @@ -28,7 +26,6 @@ @dataclass class EvaDBDatabase: db_uri: str - config: ConfigurationManager catalog_uri: str catalog_func: Callable @@ -36,16 +33,12 @@ def catalog(self) -> "CatalogManager": """ Note: Generating an object on demand plays a crucial role in ensuring that different threads do not share the same catalog object, as it can result in serialization issues and incorrect behavior with SQLAlchemy. Refer to get_catalog_instance() """ - return self.catalog_func(self.catalog_uri, self.config) + return self.catalog_func(self.catalog_uri) def get_default_db_uri(evadb_dir: Path): - config_obj = parse_config_yml() - if config_obj["core"]["catalog_database_uri"]: - return config_obj["core"]["catalog_database_uri"] - else: - # Default to sqlite. - return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}" + # Default to sqlite. + return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}" def init_evadb_instance( @@ -53,8 +46,7 @@ def init_evadb_instance( ): if db_dir is None: db_dir = EvaDB_DATABASE_DIR - config = ConfigurationManager(db_dir) catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir)) - return EvaDBDatabase(db_dir, config, catalog_uri, get_catalog_instance) + return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance) From be27b60e126bc4ccb632defca2f536c9caedfa5a Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 9 Oct 2023 14:52:45 -0400 Subject: [PATCH 04/20] chekcpoint --- evadb/catalog/catalog_manager.py | 28 +++++--------- evadb/catalog/catalog_utils.py | 14 +++++++ evadb/catalog/models/base_model.py | 30 --------------- evadb/catalog/models/configuration_catalog.py | 2 +- evadb/catalog/models/utils.py | 12 +++--- evadb/configuration/bootstrap_environment.py | 38 ++++++++----------- evadb/database.py | 18 ++++++++- evadb/executor/create_function_executor.py | 11 ++++-- evadb/executor/create_index_executor.py | 6 +-- evadb/executor/drop_object_executor.py | 2 +- evadb/executor/executor_utils.py | 10 ++++- evadb/executor/vector_index_scan_executor.py | 4 +- evadb/optimizer/rules/rules.py | 9 ++++- evadb/third_party/vector_stores/pinecone.py | 15 +++----- evadb/third_party/vector_stores/utils.py | 1 + 15 files changed, 99 insertions(+), 101 deletions(-) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index f6d3171634..69bc4052c8 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -14,7 +14,7 @@ # limitations under the License. import shutil from pathlib import Path -from typing import List +from typing import Any, List from evadb.catalog.catalog_type import ( ColumnType, @@ -138,8 +138,11 @@ def _clear_catalog_contents(self): logger.info("Clearing catalog") # drop tables which are not part of catalog drop_all_tables_except_catalog(self._sql_config.engine) - # truncate the catalog tables - truncate_catalog_tables(self._sql_config.engine) + # truncate the catalog tables except configuration_catalog + # We do not remove the configuration entries + truncate_catalog_tables( + self._sql_config.engine, tables_not_to_truncate=["configuration_catalog"] + ) # clean up the dataset, index, and cache directories for folder in ["cache_dir", "index_dir", "datasets_dir"]: remove_directory_contents( @@ -632,7 +635,7 @@ def insert_configuration_catalog_entry(self, key: str, value: any): """ self._db_catalog_service.insert_entry(key, value) - def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry: + def get_configuration_catalog_value(self, key: str) -> Any: """ Returns the value entry for the given key Arguments: @@ -643,17 +646,6 @@ def get_configuration_catalog_entry(self, key: str) -> ConfigurationCatalogEntry """ table_entry = self._db_catalog_service.get_entry_by_name(key) - - return table_entry - - -#### get catalog instance -# This function plays a crucial role in ensuring that different threads do -# not share the same catalog object, as it can result in serialization issues and -# incorrect behavior with SQLAlchemy. Therefore, whenever a catalog instance is -# required, we create a new one. One possible optimization is to share the catalog -# instance across all objects within the same thread. It is worth investigating whether -# SQLAlchemy already handles this optimization for us, which will be explored at a -# later time. -def get_catalog_instance(db_uri: str): - return CatalogManager(db_uri) + if table_entry: + return table_entry.value + return None diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index cc6b0c510f..29607a1cda 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -294,6 +294,20 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict: return properties +def bootstrap_configs(catalog, configs: dict): + """ + load all the configuration values into the catalog table configuration_catalog + """ + for key, value in configs.items(): + catalog._config_catalog_service.insert_entry(key, value) + + +def get_configuration_value(key: str): + + catalog = get_catalog_instance() + return catalog.get_configuration_catalog_value(key) + + #### get catalog instance # This function plays a crucial role in ensuring that different threads do # not share the same catalog object, as it can result in serialization issues and diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py index 8e915af245..b4b067f911 100644 --- a/evadb/catalog/models/base_model.py +++ b/evadb/catalog/models/base_model.py @@ -100,33 +100,3 @@ def _commit(self, db_session): # Custom Base Model to be inherited by all models BaseModel = declarative_base(cls=CustomModel, constructor=None) - - -def truncate_catalog_tables(engine: Engine): - """Truncate all the catalog tables""" - # https://stackoverflow.com/questions/4763472/sqlalchemy-clear-database-content-but-dont-drop-the-schema/5003705#5003705 #noqa - # reflect to refresh the metadata - BaseModel.metadata.reflect(bind=engine) - insp = sqlalchemy.inspect(engine) - if database_exists(engine.url): - with contextlib.closing(engine.connect()) as con: - trans = con.begin() - for table in reversed(BaseModel.metadata.sorted_tables): - if insp.has_table(table.name): - con.execute(table.delete()) - trans.commit() - - -def drop_all_tables_except_catalog(engine: Engine): - """drop all the tables except the catalog""" - # reflect to refresh the metadata - BaseModel.metadata.reflect(bind=engine) - insp = sqlalchemy.inspect(engine) - if database_exists(engine.url): - with contextlib.closing(engine.connect()) as con: - trans = con.begin() - for table in reversed(BaseModel.metadata.sorted_tables): - if table.name not in CATALOG_TABLES: - if insp.has_table(table.name): - table.drop(con) - trans.commit() diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py index 106c230131..48d42b2cae 100644 --- a/evadb/catalog/models/configuration_catalog.py +++ b/evadb/catalog/models/configuration_catalog.py @@ -26,7 +26,7 @@ class ConfigurationCatalog(BaseModel): `_value:` the value for the config """ - __tablename__ = "configuation_catalog" + __tablename__ = "configuration_catalog" _key = Column("name", String(100), unique=True) _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ?? diff --git a/evadb/catalog/models/utils.py b/evadb/catalog/models/utils.py index 36d11937f0..5da3a2eef5 100644 --- a/evadb/catalog/models/utils.py +++ b/evadb/catalog/models/utils.py @@ -61,7 +61,7 @@ def init_db(engine: Engine): BaseModel.metadata.create_all(bind=engine) -def truncate_catalog_tables(engine: Engine): +def truncate_catalog_tables(engine: Engine, tables_not_to_truncate: List[str] = []): """Truncate all the catalog tables""" # https://stackoverflow.com/questions/4763472/sqlalchemy-clear-database-content-but-dont-drop-the-schema/5003705#5003705 #noqa # reflect to refresh the metadata @@ -71,8 +71,9 @@ def truncate_catalog_tables(engine: Engine): with contextlib.closing(engine.connect()) as con: trans = con.begin() for table in reversed(BaseModel.metadata.sorted_tables): - if insp.has_table(table.name): - con.execute(table.delete()) + if table.name not in tables_not_to_truncate: + if insp.has_table(table.name): + con.execute(table.delete()) trans.commit() @@ -258,14 +259,15 @@ def display_format(self): "params": self.params, } + @dataclass(unsafe_hash=True) class ConfigurationCatalogEntry: """Dataclass representing an entry in the `ConfigurationCatalog`. This is done to ensure we don't expose the sqlalchemy dependencies beyond catalog service. Further, sqlalchemy does not allow sharing of objects across threads. """ - key = str, - value = str, + key: str + value: str row_id: int = None def display_format(self): diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index f0c33ac5d7..01655a2648 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -30,6 +30,7 @@ EvaDB_CONFIG_FILE, EvaDB_DATASET_DIR, ) +from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.utils.generic_utils import parse_config_yml from evadb.utils.logging_manager import logger as evadb_logger @@ -38,6 +39,7 @@ def get_base_config(evadb_installation_dir: Path) -> Path: """ Get path to .evadb_config.py source path. """ + return BASE_EVADB_CONFIG # if evadb package is installed in environment if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE): with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path: @@ -54,12 +56,8 @@ def get_default_db_uri(evadb_dir: Path): Arguments: evadb_dir: path to evadb database directory """ - config_obj = parse_config_yml() - if config_obj["core"]["catalog_database_uri"]: - return config_obj["core"]["catalog_database_uri"] - else: - # Default to sqlite. - return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}" + # Default to sqlite. + return f"sqlite:///{evadb_dir.resolve()}/{DB_DEFAULT_NAME}" def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): @@ -71,7 +69,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): evadb_installation_dir: path to evadb package """ - default_config_path = get_base_config(evadb_installation_dir).resolve() + config_obj = get_base_config(evadb_installation_dir) # creates necessary directories config_default_dict = create_directories_and_get_default_config_values( @@ -80,11 +78,8 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): assert evadb_dir.exists(), f"{evadb_dir} does not exist" assert evadb_installation_dir.exists(), f"{evadb_installation_dir} does not exist" - config_obj = {} - with default_config_path.open("r") as yml_file: - config_obj = yaml.load(yml_file, Loader=yaml.FullLoader) config_obj = merge_dict_of_dicts(config_default_dict, config_obj) - mode = config_obj["core"]["mode"] + mode = config_obj["mode"] # set logger to appropriate level (debug or release) level = logging.WARN if mode == "release" else logging.DEBUG @@ -95,6 +90,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): return config_obj + # TODO : Change def create_directories_and_get_default_config_values( evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None @@ -126,17 +122,15 @@ def create_directories_and_get_default_config_values( model_dir.mkdir(parents=True, exist_ok=True) config_obj = {} - config_obj["core"] = {} - config_obj["storage"] = {} - config_obj["core"]["evadb_installation_dir"] = str(default_install_dir.resolve()) - config_obj["core"]["datasets_dir"] = str(dataset_location.resolve()) - config_obj["core"]["catalog_database_uri"] = get_default_db_uri(evadb_dir) - config_obj["storage"]["index_dir"] = str(index_dir.resolve()) - config_obj["storage"]["cache_dir"] = str(cache_dir.resolve()) - config_obj["storage"]["s3_download_dir"] = str(s3_dir.resolve()) - config_obj["storage"]["tmp_dir"] = str(tmp_dir.resolve()) - config_obj["storage"]["function_dir"] = str(function_dir.resolve()) - config_obj["storage"]["model_dir"] = str(model_dir.resolve()) + config_obj["evadb_installation_dir"] = str(default_install_dir.resolve()) + config_obj["datasets_dir"] = str(dataset_location.resolve()) + config_obj["catalog_database_uri"] = get_default_db_uri(evadb_dir) + config_obj["index_dir"] = str(index_dir.resolve()) + config_obj["cache_dir"] = str(cache_dir.resolve()) + config_obj["s3_download_dir"] = str(s3_dir.resolve()) + config_obj["tmp_dir"] = str(tmp_dir.resolve()) + config_obj["function_dir"] = str(function_dir.resolve()) + config_obj["model_dir"] = str(model_dir.resolve()) if category and key: return config_obj.get(category, {}).get(key, None) elif category: diff --git a/evadb/database.py b/evadb/database.py index 086bf0a018..2da55f1a37 100644 --- a/evadb/database.py +++ b/evadb/database.py @@ -16,8 +16,13 @@ from pathlib import Path from typing import TYPE_CHECKING, Callable -from evadb.catalog.catalog_utils import get_catalog_instance -from evadb.configuration.constants import DB_DEFAULT_NAME, EvaDB_DATABASE_DIR +from evadb.catalog.catalog_utils import bootstrap_configs, get_catalog_instance +from evadb.configuration.bootstrap_environment import bootstrap_environment +from evadb.configuration.constants import ( + DB_DEFAULT_NAME, + EvaDB_DATABASE_DIR, + EvaDB_INSTALLATION_DIR, +) if TYPE_CHECKING: from evadb.catalog.catalog_manager import CatalogManager @@ -47,6 +52,15 @@ def init_evadb_instance( if db_dir is None: db_dir = EvaDB_DATABASE_DIR + config_obj = bootstrap_environment( + Path(db_dir), + evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR), + ) + catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir)) + # load all the config into the configuration_catalog table + print("gg") + bootstrap_configs(get_catalog_instance(catalog_uri), config_obj) + return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance) diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 0b4ddbf7c6..8272d22c81 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -99,10 +99,13 @@ def handle_ludwig_function(self): target=arg_map["predict"], tune_for_memory=arg_map.get("tune_for_memory", False), time_limit_s=arg_map.get("time_limit", DEFAULT_TRAIN_TIME_LIMIT), - output_directory=self.db.config.get_value("storage", "tmp_dir"), + output_directory=self.db.catalog().get_configuration_catalog_value( + "tmp_dir" + ), ) model_path = os.path.join( - self.db.config.get_value("storage", "model_dir"), self.node.name + self.db.catalog().get_configuration_catalog_value("model_dir"), + self.node.name, ) auto_train_results.best_model.save(model_path) self.node.metadata.append( @@ -146,7 +149,7 @@ def handle_sklearn_function(self): aggregated_batch.frames.drop([arg_map["predict"]], axis=1, inplace=True) model.fit(X=aggregated_batch.frames, y=Y) model_path = os.path.join( - self.db.config.get_value("storage", "model_dir"), self.node.name + self.db.catalog().get_configuration_catalog_value("model_dir"), self.node.name ) pickle.dump(model, open(model_path, "wb")) self.node.metadata.append( @@ -372,7 +375,7 @@ def handle_forecasting_function(self): model_save_dir_name += "_exogenous_" + str(sorted(exogenous_columns)) model_dir = os.path.join( - self.db.config.get_value("storage", "model_dir"), + self.db.catalog().get_configuration_catalog_value("model_dir"), "tsforecasting", model_save_dir_name, str(hashlib.sha256(data.to_string().encode()).hexdigest()), diff --git a/evadb/executor/create_index_executor.py b/evadb/executor/create_index_executor.py index 407cfef3c0..4b5c164745 100644 --- a/evadb/executor/create_index_executor.py +++ b/evadb/executor/create_index_executor.py @@ -75,7 +75,7 @@ def _create_native_index(self): # On-disk saving path for EvaDB index. def _get_evadb_index_save_path(self) -> Path: - index_dir = Path(self.config.get_value("storage", "index_dir")) + index_dir = Path(self.db.catalog().get_configuration_catalog_value("index_dir")) if not index_dir.exists(): index_dir.mkdir(parents=True, exist_ok=True) return str( @@ -121,7 +121,7 @@ def _create_evadb_index(self): self.vector_store_type, self.name, **handle_vector_store_params( - self.vector_store_type, index_path + self.vector_store_type, index_path, self.catalog ), ) else: @@ -151,7 +151,7 @@ def _create_evadb_index(self): self.vector_store_type, self.name, **handle_vector_store_params( - self.vector_store_type, index_path + self.vector_store_type, index_path, self.catalog ), ) index.create(input_dim) diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py index 38d5419dc4..b9aa9e016a 100644 --- a/evadb/executor/drop_object_executor.py +++ b/evadb/executor/drop_object_executor.py @@ -118,7 +118,7 @@ def _handle_drop_index(self, index_name: str, if_exists: bool): index = VectorStoreFactory.init_vector_store( index_obj.type, index_obj.name, - **handle_vector_store_params(index_obj.type, index_obj.save_file_path), + **handle_vector_store_params(index_obj.type, index_obj.save_file_path, self.catalog), ) assert ( index is not None diff --git a/evadb/executor/executor_utils.py b/evadb/executor/executor_utils.py index 88d74ce3bc..6e0b812b85 100644 --- a/evadb/executor/executor_utils.py +++ b/evadb/executor/executor_utils.py @@ -152,7 +152,7 @@ def validate_media(file_path: Path, media_type: FileFormatType) -> bool: def handle_vector_store_params( - vector_store_type: VectorStoreType, index_path: str + vector_store_type: VectorStoreType, index_path: str, catalog ) -> dict: """Handle vector store parameters based on the vector store type and index path. @@ -174,7 +174,13 @@ def handle_vector_store_params( elif vector_store_type == VectorStoreType.CHROMADB: return {"index_path": str(Path(index_path).parent)} elif vector_store_type == VectorStoreType.PINECONE: - return {} + # add the required API_KEYS + return { + "PINECONE_API_KEY": catalog().get_configuration_catalog_value( + "PINECONE_API_KEY" + ), + "PINECONE_ENV": catalog().get_configuration_catalog_value("PINECONE_ENV"), + } else: raise ValueError("Unsupported vector store type: {}".format(vector_store_type)) diff --git a/evadb/executor/vector_index_scan_executor.py b/evadb/executor/vector_index_scan_executor.py index 2b58f5c337..b2e1bb219e 100644 --- a/evadb/executor/vector_index_scan_executor.py +++ b/evadb/executor/vector_index_scan_executor.py @@ -102,7 +102,9 @@ def _evadb_vector_index_scan(self, *args, **kwargs): self.index = VectorStoreFactory.init_vector_store( self.vector_store_type, self.index_name, - **handle_vector_store_params(self.vector_store_type, self.index_path), + **handle_vector_store_params( + self.vector_store_type, self.index_path, self.db.catalog + ), ) search_feat = self._get_search_query_results() diff --git a/evadb/optimizer/rules/rules.py b/evadb/optimizer/rules/rules.py index 955885f1ba..6cce17d4d0 100644 --- a/evadb/optimizer/rules/rules.py +++ b/evadb/optimizer/rules/rules.py @@ -836,7 +836,10 @@ def apply(self, before: LogicalCreateIndex, context: OptimizerContext): before.index_def, ) child = SeqScanPlan(None, before.project_expr_list, before.table_ref.alias) - batch_mem_size = context.db.config.get_value("executor", "batch_mem_size") + + batch_mem_size = context.db.catalog().get_configuration_catalog_value( + "batch_mem_size" + ) child.append_child( StoragePlan( before.table_ref.table.table_obj, @@ -933,7 +936,9 @@ def apply(self, before: LogicalGet, context: OptimizerContext): # read in a batch from storage engine. # Todo: Experiment heuristics. after = SeqScanPlan(None, before.target_list, before.alias) - batch_mem_size = context.db.config.get_value("executor", "batch_mem_size") + batch_mem_size = context.db.catalog().get_configuration_catalog_value( + "batch_mem_size" + ) after.append_child( StoragePlan( before.table_obj, diff --git a/evadb/third_party/vector_stores/pinecone.py b/evadb/third_party/vector_stores/pinecone.py index 837c95e579..3bead1a690 100644 --- a/evadb/third_party/vector_stores/pinecone.py +++ b/evadb/third_party/vector_stores/pinecone.py @@ -15,7 +15,6 @@ import os from typing import List -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.third_party.vector_stores.types import ( FeaturePayload, VectorIndexQuery, @@ -30,34 +29,30 @@ class PineconeVectorStore(VectorStore): - def __init__(self, index_name: str) -> None: + def __init__(self, index_name: str, **kwargs) -> None: try_to_import_pinecone_client() global _pinecone_init_done # pinecone only allows index names with lower alpha-numeric characters and '-' self._index_name = index_name.strip().lower() # Get the API key. - self._api_key = ConfigurationManager().get_value( - "third_party", "PINECONE_API_KEY" - ) + self._api_key = kwargs.get("PINECONE_API_KEY") if not self._api_key: self._api_key = os.environ.get("PINECONE_API_KEY") assert ( self._api_key - ), "Please set your Pinecone API key in evadb.yml file (third_party, pinecone_api_key) or environment variable (PINECONE_KEY). It can be found at Pinecone Dashboard > API Keys > Value" + ), "Please set your `PINECONE_API_KEY` using set command or environment variable (PINECONE_KEY). It can be found at Pinecone Dashboard > API Keys > Value" # Get the environment name. - self._environment = ConfigurationManager().get_value( - "third_party", "PINECONE_ENV" - ) + self._environment = kwargs.get("PINECONE_ENV") if not self._environment: self._environment = os.environ.get("PINECONE_ENV") assert ( self._environment - ), "Please set the Pinecone environment key in evadb.yml file (third_party, pinecone_env) or environment variable (PINECONE_ENV). It can be found Pinecone Dashboard > API Keys > Environment." + ), "Please set your `PINECONE_ENV` or environment variable (PINECONE_ENV). It can be found Pinecone Dashboard > API Keys > Environment." if not _pinecone_init_done: # Initialize pinecone. diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py index e47d24f1f9..121b7d032d 100644 --- a/evadb/third_party/vector_stores/utils.py +++ b/evadb/third_party/vector_stores/utils.py @@ -41,6 +41,7 @@ def init_vector_store( from evadb.third_party.vector_stores.pinecone import required_params validate_kwargs(kwargs, required_params, required_params) + return PineconeVectorStore(index_name, **kwargs) elif vector_store_type == VectorStoreType.CHROMADB: From 277975af1d72948da1fbc2306fb1b51f216e3123 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 00:15:50 -0400 Subject: [PATCH 05/20] checkpoint --- evadb/binder/statement_binder.py | 6 +++++ evadb/catalog/catalog_manager.py | 19 +++++++-------- .../services/configuration_catalog_service.py | 23 +++++++++++++++---- evadb/executor/abstract_executor.py | 6 ----- evadb/executor/execution_context.py | 14 +++-------- evadb/executor/load_multimedia_executor.py | 4 +++- evadb/executor/set_executor.py | 3 +-- evadb/optimizer/optimizer_context.py | 6 +++-- evadb/optimizer/plan_generator.py | 4 +++- evadb/optimizer/rules/rules_manager.py | 6 ++--- .../long/test_explain_executor.py | 4 ++-- .../long/test_optimizer_rules.py | 8 +++---- test/integration_tests/long/test_reuse.py | 2 +- .../short/test_set_executor.py | 4 +++- test/unit_tests/optimizer/rules/test_rules.py | 14 +++++------ .../optimizer/test_optimizer_task.py | 6 ++--- test/util.py | 17 ++++++++------ 17 files changed, 80 insertions(+), 66 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 199c535181..0ed7e7f41a 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -33,6 +33,7 @@ from evadb.catalog.catalog_type import ColumnType, TableType from evadb.catalog.catalog_utils import get_metadata_properties, is_document_table from evadb.configuration.constants import EvaDB_INSTALLATION_DIR +from evadb.executor.execution_context import Context from evadb.expression.abstract_expression import AbstractExpression, ExpressionType from evadb.expression.function_expression import FunctionExpression from evadb.expression.tuple_value_expression import TupleValueExpression @@ -264,6 +265,11 @@ def _bind_tuple_expr(self, node: TupleValueExpression): @bind.register(FunctionExpression) def _bind_func_expr(self, node: FunctionExpression): + # setup the context + # we read the GPUs from the catalog and populate in the context + gpus_ids = self._catalog().get_configuration_catalog_value("gpu_ids") + node._context = Context(gpus_ids) + # handle the special case of "extract_object" if node.name.upper() == str(FunctionType.EXTRACT_OBJECT): handle_bind_extract_object_function(node, self) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 69bc4052c8..4b8daaea1e 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -145,9 +145,8 @@ def _clear_catalog_contents(self): ) # clean up the dataset, index, and cache directories for folder in ["cache_dir", "index_dir", "datasets_dir"]: - remove_directory_contents( - self._config_catalog_service.get_entry_by_name(folder).value - ) + print(folder, self.get_configuration_catalog_value(folder)) + remove_directory_contents(self.get_configuration_catalog_value(folder)) "Database catalog services" @@ -459,7 +458,7 @@ def get_all_index_catalog_entries(self): """ Function Cache related""" def insert_function_cache_catalog_entry(self, func_expr: FunctionExpression): - cache_dir = self._config_catalog_service.get_entry_by_name("cache_dir").value + cache_dir = self.get_configuration_catalog_value("cache_dir") entry = construct_function_cache_catalog_entry(func_expr, cache_dir=cache_dir) return self._function_cache_service.insert_entry(entry) @@ -522,9 +521,7 @@ def create_and_insert_table_catalog_entry( table_name = table_info.table_name column_catalog_entries = xform_column_definitions_to_catalog_entries(columns) - dataset_location = self._config_catalog_service.get_entry_by_name( - "datasets_dir" - ) + dataset_location = self.get_configuration_catalog_value("datasets_dir") file_url = str(generate_file_path(dataset_location, table_name)) table_catalog_entry = self.insert_table_catalog_entry( table_name, @@ -626,14 +623,14 @@ def create_and_insert_multimedia_metadata_table_catalog_entry( "Configuration catalog services" - def insert_configuration_catalog_entry(self, key: str, value: any): - """A new entry is persisted in the database catalog." + def upsert_configuration_catalog_entry(self, key: str, value: any): + """Upserts configuration catalog entry" Args: key: key name value: value name """ - self._db_catalog_service.insert_entry(key, value) + self._config_catalog_service.upsert_entry(key, value) def get_configuration_catalog_value(self, key: str) -> Any: """ @@ -645,7 +642,7 @@ def get_configuration_catalog_value(self, key: str) -> Any: ConfigurationCatalogEntry """ - table_entry = self._db_catalog_service.get_entry_by_name(key) + table_entry = self._config_catalog_service.get_entry_by_name(key) if table_entry: return table_entry.value return None diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py index 5b3646ad3c..07843bc14e 100644 --- a/evadb/catalog/services/configuration_catalog_service.py +++ b/evadb/catalog/services/configuration_catalog_service.py @@ -33,10 +33,7 @@ def insert_entry( value: any, ): try: - config_catalog_obj = self.model( - key = key, - value = value - ) + config_catalog_obj = self.model(key=key, value=value) config_catalog_obj = config_catalog_obj.save(self.session) except Exception as e: @@ -59,3 +56,21 @@ def get_entry_by_name(self, key: str) -> ConfigurationCatalogEntry: if entry: return entry.as_dataclass() return entry + + def upsert_entry( + self, + key: str, + value: any, + ): + try: + entry = self.session.execute( + select(self.model).filter(self.model._key == key) + ).scalar_one_or_none() + if entry: + entry.update(self.session, value=value) + else: + self.insert_entry(key, value) + except Exception as e: + raise CatalogError( + f"Error while upserting entry to ConfigurationCatalog: {str(e)}" + ) diff --git a/evadb/executor/abstract_executor.py b/evadb/executor/abstract_executor.py index 66a453f71d..8647901460 100644 --- a/evadb/executor/abstract_executor.py +++ b/evadb/executor/abstract_executor.py @@ -18,7 +18,6 @@ if TYPE_CHECKING: from evadb.catalog.catalog_manager import CatalogManager -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.database import EvaDBDatabase from evadb.models.storage.batch import Batch from evadb.plan_nodes.abstract_plan import AbstractPlan @@ -36,7 +35,6 @@ class AbstractExecutor(ABC): def __init__(self, db: EvaDBDatabase, node: AbstractPlan): self._db = db self._node = node - self._config: ConfigurationManager = db.config if db else None self._children = [] # @lru_cache(maxsize=None) @@ -74,10 +72,6 @@ def node(self) -> AbstractPlan: def db(self) -> EvaDBDatabase: return self._db - @property - def config(self) -> ConfigurationManager: - return self._config - @abstractmethod def exec(self, *args, **kwargs) -> Iterable[Batch]: """ diff --git a/evadb/executor/execution_context.py b/evadb/executor/execution_context.py index d5feea4648..b0b5e1a696 100644 --- a/evadb/executor/execution_context.py +++ b/evadb/executor/execution_context.py @@ -16,7 +16,6 @@ import random from typing import List -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.constants import NO_GPU from evadb.utils.generic_utils import get_gpu_count, is_gpu_available @@ -28,13 +27,8 @@ class Context: if using horovod: current rank etc. """ - def __new__(cls): - if not hasattr(cls, "_instance"): - cls._instance = super(Context, cls).__new__(cls) - return cls._instance - - def __init__(self): - self._config_manager = ConfigurationManager() + def __init__(self, user_provided_gpu_conf=[]): + self._user_provided_gpu_conf = user_provided_gpu_conf self._gpus = self._populate_gpu_ids() @property @@ -42,10 +36,8 @@ def gpus(self): return self._gpus def _populate_gpu_from_config(self) -> List: - # Populate GPU IDs from yaml config file. - gpu_conf = self._config_manager.get_value("executor", "gpu_ids") available_gpus = [i for i in range(get_gpu_count())] - return list(set(available_gpus) & set(gpu_conf)) + return list(set(available_gpus) & set(self._user_provided_gpu_conf)) def _populate_gpu_from_env(self) -> List: # Populate GPU IDs from env variable. diff --git a/evadb/executor/load_multimedia_executor.py b/evadb/executor/load_multimedia_executor.py index 90bc7edba3..9f9828476f 100644 --- a/evadb/executor/load_multimedia_executor.py +++ b/evadb/executor/load_multimedia_executor.py @@ -53,7 +53,9 @@ def exec(self, *args, **kwargs): # If it is a s3 path, download the file to local if self.node.file_path.as_posix().startswith("s3:/"): - s3_dir = Path(self.config.get_value("storage", "s3_download_dir")) + s3_dir = self.catalog().get_configuration_catalog_value( + "s3_download_dir" + ) dst_path = s3_dir / self.node.table_info.table_name dst_path.mkdir(parents=True, exist_ok=True) video_files = download_from_s3(self.node.file_path, dst_path) diff --git a/evadb/executor/set_executor.py b/evadb/executor/set_executor.py index c73deaf976..9d0bcec4b5 100644 --- a/evadb/executor/set_executor.py +++ b/evadb/executor/set_executor.py @@ -36,8 +36,7 @@ def exec(self, *args, **kwargs): as a separate PR for the issue #1140, where all instances of config use will be replaced """ - self._config.update_value( - category="default", + self.catalog().upsert_configuration_catalog_entry( key=self.node.config_name, value=self.node.config_value.value, ) diff --git a/evadb/optimizer/optimizer_context.py b/evadb/optimizer/optimizer_context.py index 6cb72dfd82..fcf88571e1 100644 --- a/evadb/optimizer/optimizer_context.py +++ b/evadb/optimizer/optimizer_context.py @@ -43,8 +43,10 @@ def __init__( self._task_stack = OptimizerTaskStack() self._memo = Memo() self._cost_model = cost_model - self._rules_manager = rules_manager or RulesManager(db.config) - + # check if ray is enabled + is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray") + self._rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled}) + @property def db(self): return self._db diff --git a/evadb/optimizer/plan_generator.py b/evadb/optimizer/plan_generator.py index 5b8c6d2a24..5396f4b939 100644 --- a/evadb/optimizer/plan_generator.py +++ b/evadb/optimizer/plan_generator.py @@ -39,7 +39,9 @@ def __init__( cost_model: CostModel = None, ) -> None: self.db = db - self.rules_manager = rules_manager or RulesManager(db.config) + # check if ray is enabled + is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray") + self.rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled}) self.cost_model = cost_model or CostModel() def execute_task_stack(self, task_stack: OptimizerTaskStack): diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index e9720b78d5..147206bd1a 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -67,7 +67,7 @@ class RulesManager: - def __init__(self, config: ConfigurationManager): + def __init__(self, configs: dict): self._logical_rules = [ LogicalInnerJoinCommutativity(), CacheFunctionExpressionInApply(), @@ -121,9 +121,9 @@ def __init__(self, config: ConfigurationManager): # These rules are enabled only if # (1) ray is installed and (2) ray is enabled # Ray must be installed using pip - # It must also be enabled in "evadb.yml" + # It must also be enabled using the SET command # NOTE: By default, it is not enabled - ray_enabled = config.get_value("experimental", "ray") + ray_enabled = configs.get("ray", False) if is_ray_enabled_and_installed(ray_enabled): self._implementation_rules.extend( [ diff --git a/test/integration_tests/long/test_explain_executor.py b/test/integration_tests/long/test_explain_executor.py index 9d2c8e8ca5..cad1718c36 100644 --- a/test/integration_tests/long/test_explain_executor.py +++ b/test/integration_tests/long/test_explain_executor.py @@ -57,7 +57,7 @@ def test_explain_simple_select(self): """|__ ProjectPlan\n |__ SeqScanPlan\n |__ StoragePlan\n""" ) self.assertEqual(batch.frames[0][0], expected_output) - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]): custom_plan_generator = PlanGenerator(self.evadb, rules_manager) select_query = "EXPLAIN SELECT id, data FROM MyVideo JOIN LATERAL DummyObjectDetector(data) AS T ;" @@ -68,7 +68,7 @@ def test_explain_simple_select(self): self.assertEqual(batch.frames[0][0], expected_output) # Disable more rules - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules( rules_manager, [ diff --git a/test/integration_tests/long/test_optimizer_rules.py b/test/integration_tests/long/test_optimizer_rules.py index 27f5189a4b..e703492d6b 100644 --- a/test/integration_tests/long/test_optimizer_rules.py +++ b/test/integration_tests/long/test_optimizer_rules.py @@ -88,7 +88,7 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock): result_without_pushdown_rules = None with time_without_rule: - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules( rules_manager, [PushDownFilterThroughApplyAndMerge(), PushDownFilterThroughJoin()], @@ -109,7 +109,7 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock): self.assertGreater(evaluate_count_without_rule, 3 * evaluate_count_with_rule) result_without_xform_rule = None - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]): custom_plan_generator = PlanGenerator(self.evadb, rules_manager) result_without_xform_rule = execute_query_fetch_all( @@ -132,7 +132,7 @@ def test_should_pushdown_without_pushdown_join_rule(self): time_without_rule = Timer() result_without_pushdown_join_rule = None with time_without_rule: - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules(rules_manager, [PushDownFilterThroughJoin()]): # should use PushDownFilterThroughApplyAndMerge() custom_plan_generator = PlanGenerator(self.evadb, rules_manager) @@ -264,7 +264,7 @@ def test_reorder_rule_should_not_have_side_effects(self): query = "SELECT id FROM MyVideo WHERE id < 20 AND id > 10;" result = execute_query_fetch_all(self.evadb, query) - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules(rules_manager, [ReorderPredicates()]): custom_plan_generator = PlanGenerator(self.evadb, rules_manager) expected = execute_query_fetch_all( diff --git a/test/integration_tests/long/test_reuse.py b/test/integration_tests/long/test_reuse.py index b90fcbb7f1..f1f39fc664 100644 --- a/test/integration_tests/long/test_reuse.py +++ b/test/integration_tests/long/test_reuse.py @@ -77,7 +77,7 @@ def _verify_reuse_correctness(self, query, reuse_batch): # surfaces when the system is running on low memory. Explicitly calling garbage # collection to reduce the memory usage. gc.collect() - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules( rules_manager, [ diff --git a/test/integration_tests/short/test_set_executor.py b/test/integration_tests/short/test_set_executor.py index d732681433..2a92ac2f94 100644 --- a/test/integration_tests/short/test_set_executor.py +++ b/test/integration_tests/short/test_set_executor.py @@ -36,6 +36,8 @@ def tearDownClass(cls): # integration test def test_set_execution(self): execute_query_fetch_all(self.evadb, "SET OPENAIKEY = 'ABCD';") - current_config_value = self.evadb.config.get_value("default", "OPENAIKEY") + current_config_value = self.evadb.catalog().get_configuration_catalog_value( + "OPENAIKEY" + ) self.assertEqual("ABCD", current_config_value) diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py index e71dbef7cc..b84bf18682 100644 --- a/test/unit_tests/optimizer/rules/test_rules.py +++ b/test/unit_tests/optimizer/rules/test_rules.py @@ -168,8 +168,8 @@ def test_supported_rules(self): XformExtractObjectToLinearFlow(), ] rewrite_rules = ( - RulesManager(self.evadb.config).stage_one_rewrite_rules - + RulesManager(self.evadb.config).stage_two_rewrite_rules + RulesManager().stage_one_rewrite_rules + + RulesManager().stage_two_rewrite_rules ) self.assertEqual( len(supported_rewrite_rules), @@ -187,14 +187,14 @@ def test_supported_rules(self): ] self.assertEqual( len(supported_logical_rules), - len(RulesManager(self.evadb.config).logical_rules), + len(RulesManager().logical_rules), ) for rule in supported_logical_rules: self.assertTrue( any( isinstance(rule, type(x)) - for x in RulesManager(self.evadb.config).logical_rules + for x in RulesManager().logical_rules ) ) @@ -244,14 +244,14 @@ def test_supported_rules(self): supported_implementation_rules.append(LogicalExchangeToPhysical()) self.assertEqual( len(supported_implementation_rules), - len(RulesManager(self.evadb.config).implementation_rules), + len(RulesManager().implementation_rules), ) for rule in supported_implementation_rules: self.assertTrue( any( isinstance(rule, type(x)) - for x in RulesManager(self.evadb.config).implementation_rules + for x in RulesManager().implementation_rules ) ) @@ -280,7 +280,7 @@ def test_embed_sample_into_get_does_not_work_with_structured_data(self): self.assertFalse(rule.check(logi_sample, MagicMock())) def test_disable_rules(self): - rules_manager = RulesManager(self.evadb.config) + rules_manager = RulesManager() with disable_rules(rules_manager, [PushDownFilterThroughApplyAndMerge()]): self.assertFalse( any( diff --git a/test/unit_tests/optimizer/test_optimizer_task.py b/test/unit_tests/optimizer/test_optimizer_task.py index 6230dfb164..ba693dd3c0 100644 --- a/test/unit_tests/optimizer/test_optimizer_task.py +++ b/test/unit_tests/optimizer/test_optimizer_task.py @@ -51,12 +51,12 @@ def test_abstract_optimizer_task(self): task.execute() def top_down_rewrite(self, opr): - opt_cxt = OptimizerContext(MagicMock(), CostModel(), RulesManager(MagicMock())) + opt_cxt = OptimizerContext(MagicMock(), CostModel(), RulesManager()) grp_expr = opt_cxt.add_opr_to_group(opr) root_grp_id = grp_expr.group_id opt_cxt.task_stack.push( TopDownRewrite( - grp_expr, RulesManager(MagicMock()).stage_one_rewrite_rules, opt_cxt + grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt ) ) self.execute_task_stack(opt_cxt.task_stack) @@ -66,7 +66,7 @@ def bottom_up_rewrite(self, root_grp_id, opt_cxt): grp_expr = opt_cxt.memo.groups[root_grp_id].logical_exprs[0] opt_cxt.task_stack.push( BottomUpRewrite( - grp_expr, RulesManager(MagicMock()).stage_two_rewrite_rules, opt_cxt + grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt ) ) self.execute_task_stack(opt_cxt.task_stack) diff --git a/test/util.py b/test/util.py index 23eaeb35e8..945e2175e7 100644 --- a/test/util.py +++ b/test/util.py @@ -30,8 +30,12 @@ from evadb.binder.statement_binder import StatementBinder from evadb.binder.statement_binder_context import StatementBinderContext from evadb.catalog.catalog_type import NdArrayType -from evadb.configuration.configuration_manager import ConfigurationManager -from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_INSTALLATION_DIR +from evadb.configuration.constants import ( + S3_DOWNLOAD_DIR, + TMP_DIR, + EvaDB_DATABASE_DIR, + EvaDB_INSTALLATION_DIR, +) from evadb.database import init_evadb_instance from evadb.expression.function_expression import FunctionExpression from evadb.functions.abstract.abstract_function import ( @@ -67,7 +71,7 @@ def suffix_pytest_xdist_worker_id_to_dir(path: str): path = Path(str(worker_id) + "_" + path) except KeyError: pass - return path + return Path(path) def get_evadb_for_testing(uri: str = None): @@ -79,14 +83,12 @@ def get_evadb_for_testing(uri: str = None): def get_tmp_dir(): db_dir = suffix_pytest_xdist_worker_id_to_dir(EvaDB_DATABASE_DIR) - config = ConfigurationManager(Path(db_dir)) - return config.get_value("storage", "tmp_dir") + return db_dir / TMP_DIR def s3_dir(): db_dir = suffix_pytest_xdist_worker_id_to_dir(EvaDB_DATABASE_DIR) - config = ConfigurationManager(Path(db_dir)) - return config.get_value("storage", "s3_download_dir") + return db_dir / S3_DOWNLOAD_DIR EvaDB_TEST_DATA_DIR = Path(EvaDB_INSTALLATION_DIR).parent @@ -420,6 +422,7 @@ def create_large_scale_image_dataset(num=1000000): def create_sample_video(num_frames=NUM_FRAMES): file_name = os.path.join(get_tmp_dir(), "dummy.avi") + print(file_name) try: os.remove(file_name) except FileNotFoundError: From 7373425fd47bbb0557c12f385ee1acac466fa834 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 00:18:27 -0400 Subject: [PATCH 06/20] fix linter --- evadb/binder/statement_binder.py | 2 +- evadb/catalog/catalog_manager.py | 2 +- evadb/catalog/catalog_utils.py | 4 +-- evadb/catalog/models/base_model.py | 7 ---- evadb/catalog/models/configuration_catalog.py | 4 ++- evadb/configuration/bootstrap_environment.py | 2 -- evadb/configuration/configuration_manager.py | 2 +- evadb/database.py | 3 +- evadb/evadb_config.py | 34 +++++++++---------- evadb/executor/create_function_executor.py | 3 +- evadb/executor/drop_object_executor.py | 4 ++- evadb/optimizer/optimizer_context.py | 2 +- evadb/optimizer/rules/rules_manager.py | 1 - evadb/third_party/vector_stores/utils.py | 2 +- test/unit_tests/optimizer/rules/test_rules.py | 5 +-- .../optimizer/test_optimizer_task.py | 8 ++--- 16 files changed, 34 insertions(+), 51 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 0ed7e7f41a..152ea104b2 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -269,7 +269,7 @@ def _bind_func_expr(self, node: FunctionExpression): # we read the GPUs from the catalog and populate in the context gpus_ids = self._catalog().get_configuration_catalog_value("gpu_ids") node._context = Context(gpus_ids) - + # handle the special case of "extract_object" if node.name.upper() == str(FunctionType.EXTRACT_OBJECT): handle_bind_extract_object_function(node, self) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 4b8daaea1e..ed82d47355 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -45,10 +45,10 @@ init_db, truncate_catalog_tables, ) +from evadb.catalog.services.column_catalog_service import ColumnCatalogService from evadb.catalog.services.configuration_catalog_service import ( ConfigurationCatalogService, ) -from evadb.catalog.services.column_catalog_service import ColumnCatalogService from evadb.catalog.services.database_catalog_service import DatabaseCatalogService from evadb.catalog.services.function_cache_catalog_service import ( FunctionCacheCatalogService, diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index 29607a1cda..7ff1a535fe 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -32,11 +32,10 @@ TableCatalogEntry, ) from evadb.catalog.sql_config import IDENTIFIER_COLUMN -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.expression.function_expression import FunctionExpression from evadb.expression.tuple_value_expression import TupleValueExpression from evadb.parser.create_statement import ColConstraintInfo, ColumnDefinition -from evadb.utils.generic_utils import get_str_hash, remove_directory_contents +from evadb.utils.generic_utils import get_str_hash def is_video_table(table: TableCatalogEntry): @@ -303,7 +302,6 @@ def bootstrap_configs(catalog, configs: dict): def get_configuration_value(key: str): - catalog = get_catalog_instance() return catalog.get_configuration_catalog_value(key) diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py index b4b067f911..5826d75425 100644 --- a/evadb/catalog/models/base_model.py +++ b/evadb/catalog/models/base_model.py @@ -12,16 +12,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import contextlib - -import sqlalchemy from sqlalchemy import Column, Integer -from sqlalchemy.engine import Engine from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy_utils import database_exists - -from evadb.catalog.sql_config import CATALOG_TABLES from evadb.utils.logging_manager import logger diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py index 48d42b2cae..faf99b5abb 100644 --- a/evadb/catalog/models/configuration_catalog.py +++ b/evadb/catalog/models/configuration_catalog.py @@ -29,7 +29,9 @@ class ConfigurationCatalog(BaseModel): __tablename__ = "configuration_catalog" _key = Column("name", String(100), unique=True) - _value = Column("engine", TextPickleType()) # TODO :- Will this work or would we need to pickle ?? + _value = Column( + "engine", TextPickleType() + ) # TODO :- Will this work or would we need to pickle ?? def __init__(self, key: str, value: any): self._key = key diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index 01655a2648..085b8d72d9 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -17,7 +17,6 @@ from pathlib import Path from typing import Union -import yaml from evadb.configuration.constants import ( CACHE_DIR, @@ -31,7 +30,6 @@ EvaDB_DATASET_DIR, ) from evadb.evadb_config import BASE_EVADB_CONFIG -from evadb.utils.generic_utils import parse_config_yml from evadb.utils.logging_manager import logger as evadb_logger diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py index ea45963437..8722b66a6a 100644 --- a/evadb/configuration/configuration_manager.py +++ b/evadb/configuration/configuration_manager.py @@ -21,7 +21,7 @@ class ConfigurationManager(object): - def __init__(self, evadb_dir: str = None, catalog_obj = None) -> None: + def __init__(self, evadb_dir: str = None, catalog_obj=None) -> None: self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR self._catalog_obj = catalog_obj self._populate_base_configs() diff --git a/evadb/database.py b/evadb/database.py index 2da55f1a37..9c22d5b9ff 100644 --- a/evadb/database.py +++ b/evadb/database.py @@ -60,7 +60,6 @@ def init_evadb_instance( catalog_uri = custom_db_uri or get_default_db_uri(Path(db_dir)) # load all the config into the configuration_catalog table - print("gg") bootstrap_configs(get_catalog_instance(catalog_uri), config_obj) - + return EvaDBDatabase(db_dir, catalog_uri, get_catalog_instance) diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py index d7882f4b35..e012f95567 100644 --- a/evadb/evadb_config.py +++ b/evadb/evadb_config.py @@ -21,21 +21,19 @@ """ BASE_EVADB_CONFIG = { - "evadb_installation_dir": "", - "datasets_dir": "", - "catalog_database_uri": "", - "application": "evadb", - "mode": "release", - "batch_mem_size": 30000000, - "gpu_batch_size": 1, # batch size used for gpu_operations - "gpu_ids": [ - 0 - ], - "host": "0.0.0.0", - "port": 8803, - "socket_timeout": 60, - "ray": False, - "OPENAI_KEY": "", - "PINECONE_API_KEY": "", - "PINECONE_ENV": "" -} \ No newline at end of file + "evadb_installation_dir": "", + "datasets_dir": "", + "catalog_database_uri": "", + "application": "evadb", + "mode": "release", + "batch_mem_size": 30000000, + "gpu_batch_size": 1, # batch size used for gpu_operations + "gpu_ids": [0], + "host": "0.0.0.0", + "port": 8803, + "socket_timeout": 60, + "ray": False, + "OPENAI_KEY": "", + "PINECONE_API_KEY": "", + "PINECONE_ENV": "", +} diff --git a/evadb/executor/create_function_executor.py b/evadb/executor/create_function_executor.py index 8272d22c81..537d28365f 100644 --- a/evadb/executor/create_function_executor.py +++ b/evadb/executor/create_function_executor.py @@ -149,7 +149,8 @@ def handle_sklearn_function(self): aggregated_batch.frames.drop([arg_map["predict"]], axis=1, inplace=True) model.fit(X=aggregated_batch.frames, y=Y) model_path = os.path.join( - self.db.catalog().get_configuration_catalog_value("model_dir"), self.node.name + self.db.catalog().get_configuration_catalog_value("model_dir"), + self.node.name, ) pickle.dump(model, open(model_path, "wb")) self.node.metadata.append( diff --git a/evadb/executor/drop_object_executor.py b/evadb/executor/drop_object_executor.py index b9aa9e016a..a857f15eae 100644 --- a/evadb/executor/drop_object_executor.py +++ b/evadb/executor/drop_object_executor.py @@ -118,7 +118,9 @@ def _handle_drop_index(self, index_name: str, if_exists: bool): index = VectorStoreFactory.init_vector_store( index_obj.type, index_obj.name, - **handle_vector_store_params(index_obj.type, index_obj.save_file_path, self.catalog), + **handle_vector_store_params( + index_obj.type, index_obj.save_file_path, self.catalog + ), ) assert ( index is not None diff --git a/evadb/optimizer/optimizer_context.py b/evadb/optimizer/optimizer_context.py index fcf88571e1..721cb5b928 100644 --- a/evadb/optimizer/optimizer_context.py +++ b/evadb/optimizer/optimizer_context.py @@ -46,7 +46,7 @@ def __init__( # check if ray is enabled is_ray_enabled = self.db.catalog().get_configuration_catalog_value("ray") self._rules_manager = rules_manager or RulesManager({"ray": is_ray_enabled}) - + @property def db(self): return self._db diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index 147206bd1a..6fffd0c57f 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -17,7 +17,6 @@ from contextlib import contextmanager from typing import List -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.optimizer.rules.rules import ( CacheFunctionExpressionInApply, CacheFunctionExpressionInFilter, diff --git a/evadb/third_party/vector_stores/utils.py b/evadb/third_party/vector_stores/utils.py index 121b7d032d..c7c5cb75b2 100644 --- a/evadb/third_party/vector_stores/utils.py +++ b/evadb/third_party/vector_stores/utils.py @@ -41,7 +41,7 @@ def init_vector_store( from evadb.third_party.vector_stores.pinecone import required_params validate_kwargs(kwargs, required_params, required_params) - + return PineconeVectorStore(index_name, **kwargs) elif vector_store_type == VectorStoreType.CHROMADB: diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py index b84bf18682..a08c002409 100644 --- a/test/unit_tests/optimizer/rules/test_rules.py +++ b/test/unit_tests/optimizer/rules/test_rules.py @@ -192,10 +192,7 @@ def test_supported_rules(self): for rule in supported_logical_rules: self.assertTrue( - any( - isinstance(rule, type(x)) - for x in RulesManager().logical_rules - ) + any(isinstance(rule, type(x)) for x in RulesManager().logical_rules) ) ray_enabled = self.evadb.config.get_value("experimental", "ray") diff --git a/test/unit_tests/optimizer/test_optimizer_task.py b/test/unit_tests/optimizer/test_optimizer_task.py index ba693dd3c0..c9100d6eff 100644 --- a/test/unit_tests/optimizer/test_optimizer_task.py +++ b/test/unit_tests/optimizer/test_optimizer_task.py @@ -55,9 +55,7 @@ def top_down_rewrite(self, opr): grp_expr = opt_cxt.add_opr_to_group(opr) root_grp_id = grp_expr.group_id opt_cxt.task_stack.push( - TopDownRewrite( - grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt - ) + TopDownRewrite(grp_expr, RulesManager().stage_one_rewrite_rules, opt_cxt) ) self.execute_task_stack(opt_cxt.task_stack) return opt_cxt, root_grp_id @@ -65,9 +63,7 @@ def top_down_rewrite(self, opr): def bottom_up_rewrite(self, root_grp_id, opt_cxt): grp_expr = opt_cxt.memo.groups[root_grp_id].logical_exprs[0] opt_cxt.task_stack.push( - BottomUpRewrite( - grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt - ) + BottomUpRewrite(grp_expr, RulesManager().stage_two_rewrite_rules, opt_cxt) ) self.execute_task_stack(opt_cxt.task_stack) return opt_cxt, root_grp_id From 2288715554ee826362dfe2d53caff1d035080b1e Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 00:43:37 -0400 Subject: [PATCH 07/20] remove all dependencies on configuration manager --- evadb/binder/statement_binder.py | 14 ++++++-- evadb/catalog/sql_config.py | 32 ++++--------------- evadb/evadb_cmd_client.py | 6 ++-- .../abstract/pytorch_abstract_function.py | 3 +- evadb/functions/chatgpt.py | 7 ++-- .../executor/test_execution_context.py | 32 +++++-------------- test/unit_tests/test_eva_cmd_client.py | 27 ++++++---------- 7 files changed, 43 insertions(+), 78 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 152ea104b2..149572ee9c 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -337,9 +337,17 @@ def _bind_func_expr(self, node: FunctionExpression): ) # certain functions take additional inputs like yolo needs the model_name # these arguments are passed by the user as part of metadata - node.function = lambda: function_class( - **get_metadata_properties(function_obj) - ) + # we also handle the special case of ChatGPT where we need to send the + # OpenAPI key as part of the parameter + # ToDO: this should be better handled + properties = get_metadata_properties(function_obj) + if node.name.upper() == str("CHATGPT"): + openapi_key = self._catalog().get_configuration_catalog_value( + "OPENAI_KEY" + ) + properties["api_key"] = openapi_key + + node.function = lambda: function_class(properties) except Exception as e: err_msg = ( f"{str(e)}. Please verify that the function class name in the " diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py index c1432e3553..4fc4de587f 100644 --- a/evadb/catalog/sql_config.py +++ b/evadb/catalog/sql_config.py @@ -17,9 +17,7 @@ from sqlalchemy import create_engine, event from sqlalchemy.orm import scoped_session, sessionmaker -from sqlalchemy.pool import NullPool -from evadb.utils.generic_utils import is_postgres_uri, parse_config_yml # Permanent identifier column. IDENTIFIER_COLUMN = "_row_id" @@ -63,33 +61,17 @@ def __call__(cls, uri): class SQLConfig(metaclass=SingletonMeta): def __init__(self, uri): - """Initializes the engine and session for database operations - - Retrieves the database uri for connection from ConfigurationManager. - """ + """Initializes the engine and session for database operations""" self.worker_uri = str(uri) # set echo=True to log SQL - connect_args = {} - config_obj = parse_config_yml() - if is_postgres_uri(config_obj["core"]["catalog_database_uri"]): - # Set the arguments for postgres backend. - connect_args = {"connect_timeout": 1000} - # https://www.oddbird.net/2014/06/14/sqlalchemy-postgres-autocommit/ - self.engine = create_engine( - self.worker_uri, - poolclass=NullPool, - isolation_level="AUTOCOMMIT", - connect_args=connect_args, - ) - else: - # Default to SQLite. - connect_args = {"timeout": 1000} - self.engine = create_engine( - self.worker_uri, - connect_args=connect_args, - ) + # Default to SQLite. + connect_args = {"timeout": 1000} + self.engine = create_engine( + self.worker_uri, + connect_args=connect_args, + ) if self.engine.url.get_backend_name() == "sqlite": # enforce foreign key constraint and wal logging for sqlite diff --git a/evadb/evadb_cmd_client.py b/evadb/evadb_cmd_client.py index 3286b68fa3..82f0fba80e 100644 --- a/evadb/evadb_cmd_client.py +++ b/evadb/evadb_cmd_client.py @@ -26,7 +26,7 @@ EvaDB_CODE_DIR = abspath(join(THIS_DIR, "..")) sys.path.append(EvaDB_CODE_DIR) -from evadb.configuration.configuration_manager import ConfigurationManager # noqa: E402 +from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.server.interpreter import start_cmd_client # noqa: E402 @@ -62,11 +62,11 @@ def main(): args, unknown = parser.parse_known_args() host = ( - args.host if args.host else ConfigurationManager().get_value("server", "host") + args.host if args.host else BASE_EVADB_CONFIG["host"] ) port = ( - args.port if args.port else ConfigurationManager().get_value("server", "port") + args.port if args.port else BASE_EVADB_CONFIG["port"] ) asyncio.run(evadb_client(host, port)) diff --git a/evadb/functions/abstract/pytorch_abstract_function.py b/evadb/functions/abstract/pytorch_abstract_function.py index 763f3658f7..4c8c1e063a 100644 --- a/evadb/functions/abstract/pytorch_abstract_function.py +++ b/evadb/functions/abstract/pytorch_abstract_function.py @@ -17,7 +17,6 @@ import pandas as pd from numpy.typing import ArrayLike -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.functions.abstract.abstract_function import ( AbstractClassifierFunction, AbstractTransformationFunction, @@ -74,7 +73,7 @@ def __call__(self, *args, **kwargs) -> pd.DataFrame: if isinstance(frames, pd.DataFrame): frames = frames.transpose().values.tolist()[0] - gpu_batch_size = ConfigurationManager().get_value("executor", "gpu_batch_size") + gpu_batch_size = None import torch tens_batch = torch.cat([self.transform(x) for x in frames]).to( diff --git a/evadb/functions/chatgpt.py b/evadb/functions/chatgpt.py index 61253116fe..532e13e3de 100644 --- a/evadb/functions/chatgpt.py +++ b/evadb/functions/chatgpt.py @@ -20,7 +20,6 @@ from retry import retry from evadb.catalog.catalog_type import NdArrayType -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.decorators.decorators import forward, setup from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe @@ -85,10 +84,12 @@ def setup( self, model="gpt-3.5-turbo", temperature: float = 0, + api_key="", ) -> None: assert model in _VALID_CHAT_COMPLETION_MODEL, f"Unsupported ChatGPT {model}" self.model = model self.temperature = temperature + self.api_key = api_key @forward( input_signatures=[ @@ -120,9 +121,7 @@ def forward(self, text_df): def completion_with_backoff(**kwargs): return openai.ChatCompletion.create(**kwargs) - # Register API key, try configuration manager first - openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY") - # If not found, try OS Environment Variable + openai.api_key = self.api_key if len(openai.api_key) == 0: openai.api_key = os.environ.get("OPENAI_KEY", "") assert ( diff --git a/test/unit_tests/executor/test_execution_context.py b/test/unit_tests/executor/test_execution_context.py index 28383949e9..a3e4e736f0 100644 --- a/test/unit_tests/executor/test_execution_context.py +++ b/test/unit_tests/executor/test_execution_context.py @@ -21,7 +21,6 @@ class ExecutionContextTest(unittest.TestCase): - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.get_gpu_count") @patch("evadb.executor.execution_context.is_gpu_available") def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config( @@ -29,20 +28,17 @@ def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config( ): gpu_check.return_value = True get_gpu_count.return_value = 3 - cfm.return_value.get_value.return_value = [0, 1] - context = Context() + context = Context([[0, 1]]) self.assertEqual(context.gpus, [0, 1]) - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.os") @patch("evadb.executor.execution_context.get_gpu_count") @patch("evadb.executor.execution_context.is_gpu_available") def test_CUDA_VISIBLE_DEVICES_gets_populated_from_environment_if_no_config( - self, is_gpu, get_gpu_count, os, cfm + self, is_gpu, get_gpu_count, os ): is_gpu.return_value = True - cfm.return_value.get_value.return_value = [] get_gpu_count.return_value = 3 os.environ.get.return_value = "0,1" context = Context() @@ -50,57 +46,45 @@ def test_CUDA_VISIBLE_DEVICES_gets_populated_from_environment_if_no_config( self.assertEqual(context.gpus, [0, 1]) - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.os") @patch("evadb.executor.execution_context.get_gpu_count") @patch("evadb.executor.execution_context.is_gpu_available") def test_CUDA_VISIBLE_DEVICES_should_be_empty_if_nothing_provided( - self, gpu_check, get_gpu_count, os, cfm + self, gpu_check, get_gpu_count, os ): gpu_check.return_value = True get_gpu_count.return_value = 3 - cfm.return_value.get_value.return_value = [] os.environ.get.return_value = "" context = Context() os.environ.get.assert_called_with("CUDA_VISIBLE_DEVICES", "") self.assertEqual(context.gpus, []) - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.os") @patch("evadb.executor.execution_context.is_gpu_available") - def test_gpus_ignores_config_if_no_gpu_available(self, gpu_check, os, cfm): + def test_gpus_ignores_config_if_no_gpu_available(self, gpu_check, os): gpu_check.return_value = False - cfm.return_value.get_value.return_value = [0, 1, 2] os.environ.get.return_value = "0,1,2" - context = Context() + context = Context([0, 1, 2]) self.assertEqual(context.gpus, []) - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.os") @patch("evadb.executor.execution_context.is_gpu_available") - def test_gpu_device_should_return_NO_GPU_if_GPU_not_available( - self, gpu_check, os, cfm - ): + def test_gpu_device_should_return_NO_GPU_if_GPU_not_available(self, gpu_check, os): gpu_check.return_value = True - cfm.return_value.get_value.return_value = [] os.environ.get.return_value = "" context = Context() os.environ.get.assert_called_with("CUDA_VISIBLE_DEVICES", "") self.assertEqual(context.gpu_device(), NO_GPU) - @patch("evadb.executor.execution_context.ConfigurationManager") @patch("evadb.executor.execution_context.get_gpu_count") @patch("evadb.executor.execution_context.is_gpu_available") - def test_should_return_random_gpu_ID_if_available( - self, gpu_check, get_gpu_count, cfm - ): + def test_should_return_random_gpu_ID_if_available(self, gpu_check, get_gpu_count): gpu_check.return_value = True get_gpu_count.return_value = 1 - cfm.return_value.get_value.return_value = [0, 1, 2] - context = Context() + context = Context([0, 1, 2]) selected_device = context.gpu_device() self.assertEqual(selected_device, 0) diff --git a/test/unit_tests/test_eva_cmd_client.py b/test/unit_tests/test_eva_cmd_client.py index 0e1f67ee91..64806a42e6 100644 --- a/test/unit_tests/test_eva_cmd_client.py +++ b/test/unit_tests/test_eva_cmd_client.py @@ -17,13 +17,13 @@ import unittest import pytest -from mock import call, patch +from mock import patch -from evadb.configuration.configuration_manager import ConfigurationManager +from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.evadb_cmd_client import evadb_client, main -@pytest.mark.skip +# @pytest.mark.skip class CMDClientTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -81,17 +81,10 @@ def test_main_without_cmd_arguments( [], ) - # Mock the ConfigurationManager's get_value method - with patch.object( - ConfigurationManager, "get_value", return_value="default_value" - ) as mock_get_value: - # Call the function under test - main() - - # Assert that the mocked functions were called correctly - mock_start_cmd_client.assert_called_once_with( - "default_value", "default_value" - ) - mock_get_value.assert_has_calls( - [call("server", "host"), call("server", "port")] - ) + # Call the function under test + main() + + # Assert that the mocked functions were called correctly + mock_start_cmd_client.assert_called_once_with( + BASE_EVADB_CONFIG["host"], BASE_EVADB_CONFIG["port"] + ) From 791829bde01347feba00bc5559d76f9b485b3f51 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 00:44:00 -0400 Subject: [PATCH 08/20] remove configuration manager --- evadb/configuration/configuration_manager.py | 72 -------------------- 1 file changed, 72 deletions(-) delete mode 100644 evadb/configuration/configuration_manager.py diff --git a/evadb/configuration/configuration_manager.py b/evadb/configuration/configuration_manager.py deleted file mode 100644 index 8722b66a6a..0000000000 --- a/evadb/configuration/configuration_manager.py +++ /dev/null @@ -1,72 +0,0 @@ -# coding=utf-8 -# Copyright 2018-2023 EvaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from pathlib import Path -from typing import Any - -from evadb.configuration.bootstrap_environment import bootstrap_environment -from evadb.configuration.constants import EvaDB_DATABASE_DIR, EvaDB_INSTALLATION_DIR -from evadb.utils.logging_manager import logger - - -class ConfigurationManager(object): - def __init__(self, evadb_dir: str = None, catalog_obj=None) -> None: - self._evadb_dir = evadb_dir or EvaDB_DATABASE_DIR - self._catalog_obj = catalog_obj - self._populate_base_configs() - - def _populate_base_configs(self): - config_obj = bootstrap_environment( - evadb_dir=Path(self._evadb_dir), - evadb_installation_dir=Path(EvaDB_INSTALLATION_DIR), - ) - return config_obj - - def _get(self, category: str, key: str) -> Any: - """Retrieve a configuration value based on the category and key. - - Args: - category (str): The category of the configuration. - key (str): The key of the configuration within the category. - - Returns: - Any: The retrieved configuration value. - - Raises: - ValueError: If the YAML file is invalid or cannot be loaded. - """ - config_obj = self._config_obj - - # Get value from the user-provided config file - value = config_obj.get(category, {}).get(key) - - # cannot find the value, report invalid category, key combination - if value is None: - logger.exception(f"Invalid category and key combination {category}:{key}") - - return value - - def _update(self, category: str, key: str, value: str): - config_obj = self._config_obj - - if category not in config_obj: - config_obj[category] = {} - - config_obj[category][key] = value - - def get_value(self, category: str, key: str) -> Any: - return self._get(category, key) - - def update_value(self, category, key, value) -> None: - self._update(category, key, value) From 9a7b8eab0649f079f669caf01c692d08aaf5b926 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 00:57:56 -0400 Subject: [PATCH 09/20] fix test cases --- evadb/binder/statement_binder.py | 2 +- evadb/catalog/catalog_utils.py | 4 ++-- evadb/configuration/bootstrap_environment.py | 18 +----------------- evadb/optimizer/rules/rules_manager.py | 2 +- evadb/server/server.py | 2 +- 5 files changed, 6 insertions(+), 22 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 149572ee9c..49ec5e154c 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -347,7 +347,7 @@ def _bind_func_expr(self, node: FunctionExpression): ) properties["api_key"] = openapi_key - node.function = lambda: function_class(properties) + node.function = lambda: function_class(**properties) except Exception as e: err_msg = ( f"{str(e)}. Please verify that the function class name in the " diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index 7ff1a535fe..f28eae334f 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -293,12 +293,12 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict: return properties -def bootstrap_configs(catalog, configs: dict): +def bootstrap_configs(catalog: "CatalogManager", configs: dict): """ load all the configuration values into the catalog table configuration_catalog """ for key, value in configs.items(): - catalog._config_catalog_service.insert_entry(key, value) + catalog.upsert_configuration_catalog_entry(key, value) def get_configuration_value(key: str): diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index 085b8d72d9..ec7793ee98 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import importlib.resources as importlib_resources import logging from pathlib import Path from typing import Union @@ -26,27 +25,12 @@ MODEL_DIR, S3_DOWNLOAD_DIR, TMP_DIR, - EvaDB_CONFIG_FILE, EvaDB_DATASET_DIR, ) from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.utils.logging_manager import logger as evadb_logger -def get_base_config(evadb_installation_dir: Path) -> Path: - """ - Get path to .evadb_config.py source path. - """ - return BASE_EVADB_CONFIG - # if evadb package is installed in environment - if importlib_resources.is_resource("evadb", EvaDB_CONFIG_FILE): - with importlib_resources.path("evadb", EvaDB_CONFIG_FILE) as config_path: - return config_path - else: - # For local dev environments without package installed - return evadb_installation_dir / EvaDB_CONFIG_FILE - - def get_default_db_uri(evadb_dir: Path): """ Get the default database uri. @@ -67,7 +51,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): evadb_installation_dir: path to evadb package """ - config_obj = get_base_config(evadb_installation_dir) + config_obj = BASE_EVADB_CONFIG # creates necessary directories config_default_dict = create_directories_and_get_default_config_values( diff --git a/evadb/optimizer/rules/rules_manager.py b/evadb/optimizer/rules/rules_manager.py index 6fffd0c57f..cc88a9575d 100644 --- a/evadb/optimizer/rules/rules_manager.py +++ b/evadb/optimizer/rules/rules_manager.py @@ -66,7 +66,7 @@ class RulesManager: - def __init__(self, configs: dict): + def __init__(self, configs: dict = {}): self._logical_rules = [ LogicalInnerJoinCommutativity(), CacheFunctionExpressionInApply(), diff --git a/evadb/server/server.py b/evadb/server/server.py index 0105279a32..9f33dced0d 100644 --- a/evadb/server/server.py +++ b/evadb/server/server.py @@ -49,7 +49,7 @@ async def start_evadb_server( self._server = await asyncio.start_server(self.accept_client, host, port) # load built-in functions - mode = self._evadb.config.get_value("core", "mode") + mode = self._evadb.catalog().get_configuration_catalog_value("mode") init_builtin_functions(self._evadb, mode=mode) async with self._server: From 50ed8264ac60f4065524cc972e77ae19a8cfa6cf Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 02:24:24 -0400 Subject: [PATCH 10/20] fix more testcases --- evadb/catalog/catalog_manager.py | 1 - evadb/catalog/models/configuration_catalog.py | 6 ++---- .../services/configuration_catalog_service.py | 2 +- .../services/function_cost_catalog_service.py | 2 +- evadb/configuration/bootstrap_environment.py | 19 +++++++++--------- evadb/parser/evadb.lark | 2 +- evadb/parser/lark_visitor/_expressions.py | 6 ++++++ test/app_tests/test_pandas_qa.py | 2 +- test/app_tests/test_privategpt.py | 2 +- test/app_tests/test_youtube_channel_qa.py | 2 +- test/app_tests/test_youtube_qa.py | 2 +- test/integration_tests/long/test_pytorch.py | 3 ++- .../catalog/test_catalog_manager.py | 20 +++++++++---------- .../executor/test_execution_context.py | 4 ++-- .../optimizer/rules/test_batch_mem_size.py | 4 +--- test/unit_tests/optimizer/rules/test_rules.py | 2 +- 16 files changed, 40 insertions(+), 39 deletions(-) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index ed82d47355..d1004bfc08 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -145,7 +145,6 @@ def _clear_catalog_contents(self): ) # clean up the dataset, index, and cache directories for folder in ["cache_dir", "index_dir", "datasets_dir"]: - print(folder, self.get_configuration_catalog_value(folder)) remove_directory_contents(self.get_configuration_catalog_value(folder)) "Database catalog services" diff --git a/evadb/catalog/models/configuration_catalog.py b/evadb/catalog/models/configuration_catalog.py index faf99b5abb..157b07eaf8 100644 --- a/evadb/catalog/models/configuration_catalog.py +++ b/evadb/catalog/models/configuration_catalog.py @@ -28,10 +28,8 @@ class ConfigurationCatalog(BaseModel): __tablename__ = "configuration_catalog" - _key = Column("name", String(100), unique=True) - _value = Column( - "engine", TextPickleType() - ) # TODO :- Will this work or would we need to pickle ?? + _key = Column("key", String(100), unique=True) + _value = Column("value", TextPickleType()) def __init__(self, key: str, value: any): self._key = key diff --git a/evadb/catalog/services/configuration_catalog_service.py b/evadb/catalog/services/configuration_catalog_service.py index 07843bc14e..bda22253c0 100644 --- a/evadb/catalog/services/configuration_catalog_service.py +++ b/evadb/catalog/services/configuration_catalog_service.py @@ -67,7 +67,7 @@ def upsert_entry( select(self.model).filter(self.model._key == key) ).scalar_one_or_none() if entry: - entry.update(self.session, value=value) + entry.update(self.session, _value=value) else: self.insert_entry(key, value) except Exception as e: diff --git a/evadb/catalog/services/function_cost_catalog_service.py b/evadb/catalog/services/function_cost_catalog_service.py index ac84e8c948..5e20ac3030 100644 --- a/evadb/catalog/services/function_cost_catalog_service.py +++ b/evadb/catalog/services/function_cost_catalog_service.py @@ -62,7 +62,7 @@ def upsert_entry(self, function_id: int, name: str, new_cost: int): select(self.model).filter(self.model._function_id == function_id) ).scalar_one_or_none() if function_obj: - function_obj.update(self.session, cost=new_cost) + function_obj.update(self.session, _cost=new_cost) else: self.insert_entry(function_id, name, new_cost) except Exception as e: diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index ec7793ee98..ac953be739 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -125,15 +125,14 @@ def merge_dict_of_dicts(dict1, dict2): merged_dict = dict1.copy() for key, value in dict2.items(): - # Overwrite only if some value is specified. - if value: - if ( - key in merged_dict - and isinstance(merged_dict[key], dict) - and isinstance(value, dict) - ): - merged_dict[key] = merge_dict_of_dicts(merged_dict[key], value) - else: - merged_dict[key] = value + if key in merged_dict.keys(): + # Overwrite only if some value is specified. + if value is not None: + if isinstance(merged_dict[key], dict) and isinstance(value, dict): + merged_dict[key] = merge_dict_of_dicts(merged_dict[key], value) + else: + merged_dict[key] = value + else: + merged_dict[key] = value return merged_dict diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index b8cc2fc564..8f618c29c9 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -84,7 +84,7 @@ set_statement: SET config_name (EQUAL_SYMBOL | TO) config_value config_name: uid -config_value: (string_literal | decimal_literal | boolean_literal | real_literal) +config_value: constant // Data Manipulation Language diff --git a/evadb/parser/lark_visitor/_expressions.py b/evadb/parser/lark_visitor/_expressions.py index c5cf5a0bfe..91b5be77c1 100644 --- a/evadb/parser/lark_visitor/_expressions.py +++ b/evadb/parser/lark_visitor/_expressions.py @@ -41,6 +41,12 @@ def array_literal(self, tree): res = ConstantValueExpression(np.array(array_elements), ColumnType.NDARRAY) return res + def boolean_literal(self, tree): + text = tree.children[0] + if text == "TRUE": + return ConstantValueExpression(True, ColumnType.BOOLEAN) + return ConstantValueExpression(False, ColumnType.BOOLEAN) + def constant(self, tree): for child in tree.children: if isinstance(child, Tree): diff --git a/test/app_tests/test_pandas_qa.py b/test/app_tests/test_pandas_qa.py index 6976a4699d..ebb6df29cb 100644 --- a/test/app_tests/test_pandas_qa.py +++ b/test/app_tests/test_pandas_qa.py @@ -25,7 +25,7 @@ class PandasQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_privategpt.py b/test/app_tests/test_privategpt.py index 8a0b46f340..184888dd27 100644 --- a/test/app_tests/test_privategpt.py +++ b/test/app_tests/test_privategpt.py @@ -29,7 +29,7 @@ class PrivateGPTTest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py index 099a1c73c4..2bf70d918a 100644 --- a/test/app_tests/test_youtube_channel_qa.py +++ b/test/app_tests/test_youtube_channel_qa.py @@ -24,7 +24,7 @@ class YoutubeChannelQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py index 47d062405b..837404cda2 100644 --- a/test/app_tests/test_youtube_qa.py +++ b/test/app_tests/test_youtube_qa.py @@ -25,7 +25,7 @@ class YoutubeQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py index f2bd66ba04..3b1f0c2d36 100644 --- a/test/integration_tests/long/test_pytorch.py +++ b/test/integration_tests/long/test_pytorch.py @@ -49,7 +49,8 @@ class PytorchTest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = str(cls.evadb.config.get_value("experimental", "ray")) + print(cls.evadb.catalog().get_configuration_catalog_value("ray"), "ray") + os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" mnist = f"{EvaDB_ROOT_DIR}/data/mnist/mnist.mp4" diff --git a/test/unit_tests/catalog/test_catalog_manager.py b/test/unit_tests/catalog/test_catalog_manager.py index 3c0e23a3cd..6e8c6f9a37 100644 --- a/test/unit_tests/catalog/test_catalog_manager.py +++ b/test/unit_tests/catalog/test_catalog_manager.py @@ -44,7 +44,7 @@ def setUpClass(cls) -> None: @mock.patch("evadb.catalog.catalog_manager.init_db") def test_catalog_bootstrap(self, mocked_db): - x = CatalogManager(MagicMock(), MagicMock()) + x = CatalogManager(MagicMock()) x._bootstrap_catalog() mocked_db.assert_called() @@ -52,7 +52,7 @@ def test_catalog_bootstrap(self, mocked_db): "evadb.catalog.catalog_manager.CatalogManager.create_and_insert_table_catalog_entry" ) def test_create_multimedia_table_catalog_entry(self, mock): - x = CatalogManager(MagicMock(), MagicMock()) + x = CatalogManager(MagicMock()) name = "myvideo" x.create_and_insert_multimedia_table_catalog_entry( name=name, format_type=FileFormatType.VIDEO @@ -71,7 +71,7 @@ def test_create_multimedia_table_catalog_entry(self, mock): def test_insert_table_catalog_entry_should_create_table_and_columns( self, ds_mock, initdb_mock ): - catalog = CatalogManager(MagicMock(), MagicMock()) + catalog = CatalogManager(MagicMock()) file_url = "file1" table_name = "name" @@ -88,7 +88,7 @@ def test_insert_table_catalog_entry_should_create_table_and_columns( @mock.patch("evadb.catalog.catalog_manager.init_db") @mock.patch("evadb.catalog.catalog_manager.TableCatalogService") def test_get_table_catalog_entry_when_table_exists(self, ds_mock, initdb_mock): - catalog = CatalogManager(MagicMock(), MagicMock()) + catalog = CatalogManager(MagicMock()) table_name = "name" database_name = "database" row_id = 1 @@ -110,7 +110,7 @@ def test_get_table_catalog_entry_when_table_exists(self, ds_mock, initdb_mock): def test_get_table_catalog_entry_when_table_doesnot_exists( self, dcs_mock, ds_mock, initdb_mock ): - catalog = CatalogManager(MagicMock(), MagicMock()) + catalog = CatalogManager(MagicMock()) table_name = "name" database_name = "database" @@ -132,7 +132,7 @@ def test_get_table_catalog_entry_when_table_doesnot_exists( def test_insert_function( self, checksum_mock, functionmetadata_mock, functionio_mock, function_mock ): - catalog = CatalogManager(MagicMock(), MagicMock()) + catalog = CatalogManager(MagicMock()) function_io_list = [MagicMock()] function_metadata_list = [MagicMock()] actual = catalog.insert_function_catalog_entry( @@ -154,7 +154,7 @@ def test_insert_function( @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService") def test_get_function_catalog_entry_by_name(self, function_mock): - catalog = CatalogManager(MagicMock(), MagicMock()) + catalog = CatalogManager(MagicMock()) actual = catalog.get_function_catalog_entry_by_name("name") function_mock.return_value.get_entry_by_name.assert_called_with("name") self.assertEqual( @@ -163,7 +163,7 @@ def test_get_function_catalog_entry_by_name(self, function_mock): @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService") def test_delete_function(self, function_mock): - CatalogManager(MagicMock(), MagicMock()).delete_function_catalog_entry_by_name( + CatalogManager(MagicMock()).delete_function_catalog_entry_by_name( "name" ) function_mock.return_value.delete_entry_by_name.assert_called_with("name") @@ -172,7 +172,7 @@ def test_delete_function(self, function_mock): def test_get_function_outputs(self, function_mock): mock_func = function_mock.return_value.get_output_entries_by_function_id function_obj = MagicMock(spec=FunctionCatalogEntry) - CatalogManager(MagicMock(), MagicMock()).get_function_io_catalog_output_entries( + CatalogManager(MagicMock()).get_function_io_catalog_output_entries( function_obj ) mock_func.assert_called_once_with(function_obj.row_id) @@ -181,7 +181,7 @@ def test_get_function_outputs(self, function_mock): def test_get_function_inputs(self, function_mock): mock_func = function_mock.return_value.get_input_entries_by_function_id function_obj = MagicMock(spec=FunctionCatalogEntry) - CatalogManager(MagicMock(), MagicMock()).get_function_io_catalog_input_entries( + CatalogManager(MagicMock()).get_function_io_catalog_input_entries( function_obj ) mock_func.assert_called_once_with(function_obj.row_id) diff --git a/test/unit_tests/executor/test_execution_context.py b/test/unit_tests/executor/test_execution_context.py index a3e4e736f0..8d2a4caae3 100644 --- a/test/unit_tests/executor/test_execution_context.py +++ b/test/unit_tests/executor/test_execution_context.py @@ -24,11 +24,11 @@ class ExecutionContextTest(unittest.TestCase): @patch("evadb.executor.execution_context.get_gpu_count") @patch("evadb.executor.execution_context.is_gpu_available") def test_CUDA_VISIBLE_DEVICES_gets_populated_from_config( - self, gpu_check, get_gpu_count, cfm + self, gpu_check, get_gpu_count ): gpu_check.return_value = True get_gpu_count.return_value = 3 - context = Context([[0, 1]]) + context = Context([0, 1]) self.assertEqual(context.gpus, [0, 1]) diff --git a/test/unit_tests/optimizer/rules/test_batch_mem_size.py b/test/unit_tests/optimizer/rules/test_batch_mem_size.py index 70033b014f..68f84db803 100644 --- a/test/unit_tests/optimizer/rules/test_batch_mem_size.py +++ b/test/unit_tests/optimizer/rules/test_batch_mem_size.py @@ -38,9 +38,7 @@ def test_batch_mem_size_for_sqlite_storage_engine(self): the storage engine. """ test_batch_mem_size = 100 - self.evadb.config.update_value( - "executor", "batch_mem_size", test_batch_mem_size - ) + execute_query_fetch_all(self.evadb, f"SET batch_mem_size={test_batch_mem_size}") create_table_query = """ CREATE TABLE IF NOT EXISTS MyCSV ( id INTEGER UNIQUE, diff --git a/test/unit_tests/optimizer/rules/test_rules.py b/test/unit_tests/optimizer/rules/test_rules.py index a08c002409..18f8dc51d4 100644 --- a/test/unit_tests/optimizer/rules/test_rules.py +++ b/test/unit_tests/optimizer/rules/test_rules.py @@ -195,7 +195,7 @@ def test_supported_rules(self): any(isinstance(rule, type(x)) for x in RulesManager().logical_rules) ) - ray_enabled = self.evadb.config.get_value("experimental", "ray") + ray_enabled = self.evadb.catalog().get_configuration_catalog_value("ray") ray_enabled_and_installed = is_ray_enabled_and_installed(ray_enabled) # For the current version, we choose either the distributed or the From 814ea85716cc5043d91d67bce4179179d65eadb7 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 02:27:28 -0400 Subject: [PATCH 11/20] fix linter --- evadb/catalog/catalog_manager.py | 1 - evadb/catalog/catalog_utils.py | 2 +- evadb/catalog/models/base_model.py | 1 + evadb/catalog/sql_config.py | 1 - evadb/configuration/bootstrap_environment.py | 1 - evadb/evadb_cmd_client.py | 10 +++------- test/integration_tests/long/test_pytorch.py | 1 - test/unit_tests/catalog/test_catalog_manager.py | 12 +++--------- test/unit_tests/test_eva_cmd_client.py | 3 +-- test/util.py | 1 - 10 files changed, 9 insertions(+), 24 deletions(-) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index d1004bfc08..029d03da01 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -32,7 +32,6 @@ ) from evadb.catalog.models.utils import ( ColumnCatalogEntry, - ConfigurationCatalogEntry, DatabaseCatalogEntry, FunctionCacheCatalogEntry, FunctionCatalogEntry, diff --git a/evadb/catalog/catalog_utils.py b/evadb/catalog/catalog_utils.py index f28eae334f..35fb7f6a87 100644 --- a/evadb/catalog/catalog_utils.py +++ b/evadb/catalog/catalog_utils.py @@ -293,7 +293,7 @@ def get_metadata_properties(function_obj: FunctionCatalogEntry) -> Dict: return properties -def bootstrap_configs(catalog: "CatalogManager", configs: dict): +def bootstrap_configs(catalog, configs: dict): """ load all the configuration values into the catalog table configuration_catalog """ diff --git a/evadb/catalog/models/base_model.py b/evadb/catalog/models/base_model.py index 5826d75425..f31fc10b43 100644 --- a/evadb/catalog/models/base_model.py +++ b/evadb/catalog/models/base_model.py @@ -15,6 +15,7 @@ from sqlalchemy import Column, Integer from sqlalchemy.exc import SQLAlchemyError from sqlalchemy.ext.declarative import declarative_base + from evadb.utils.logging_manager import logger diff --git a/evadb/catalog/sql_config.py b/evadb/catalog/sql_config.py index 4fc4de587f..b32959234b 100644 --- a/evadb/catalog/sql_config.py +++ b/evadb/catalog/sql_config.py @@ -18,7 +18,6 @@ from sqlalchemy import create_engine, event from sqlalchemy.orm import scoped_session, sessionmaker - # Permanent identifier column. IDENTIFIER_COLUMN = "_row_id" diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index ac953be739..4b293cb365 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -16,7 +16,6 @@ from pathlib import Path from typing import Union - from evadb.configuration.constants import ( CACHE_DIR, DB_DEFAULT_NAME, diff --git a/evadb/evadb_cmd_client.py b/evadb/evadb_cmd_client.py index 82f0fba80e..2f15f075d5 100644 --- a/evadb/evadb_cmd_client.py +++ b/evadb/evadb_cmd_client.py @@ -26,7 +26,7 @@ EvaDB_CODE_DIR = abspath(join(THIS_DIR, "..")) sys.path.append(EvaDB_CODE_DIR) -from evadb.evadb_config import BASE_EVADB_CONFIG +from evadb.evadb_config import BASE_EVADB_CONFIG # noqa: E402 from evadb.server.interpreter import start_cmd_client # noqa: E402 @@ -61,13 +61,9 @@ def main(): # PARSE ARGS args, unknown = parser.parse_known_args() - host = ( - args.host if args.host else BASE_EVADB_CONFIG["host"] - ) + host = args.host if args.host else BASE_EVADB_CONFIG["host"] - port = ( - args.port if args.port else BASE_EVADB_CONFIG["port"] - ) + port = args.port if args.port else BASE_EVADB_CONFIG["port"] asyncio.run(evadb_client(host, port)) diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py index 3b1f0c2d36..3fd758da29 100644 --- a/test/integration_tests/long/test_pytorch.py +++ b/test/integration_tests/long/test_pytorch.py @@ -49,7 +49,6 @@ class PytorchTest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - print(cls.evadb.catalog().get_configuration_catalog_value("ray"), "ray") os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" diff --git a/test/unit_tests/catalog/test_catalog_manager.py b/test/unit_tests/catalog/test_catalog_manager.py index 6e8c6f9a37..3706bee7f0 100644 --- a/test/unit_tests/catalog/test_catalog_manager.py +++ b/test/unit_tests/catalog/test_catalog_manager.py @@ -163,25 +163,19 @@ def test_get_function_catalog_entry_by_name(self, function_mock): @mock.patch("evadb.catalog.catalog_manager.FunctionCatalogService") def test_delete_function(self, function_mock): - CatalogManager(MagicMock()).delete_function_catalog_entry_by_name( - "name" - ) + CatalogManager(MagicMock()).delete_function_catalog_entry_by_name("name") function_mock.return_value.delete_entry_by_name.assert_called_with("name") @mock.patch("evadb.catalog.catalog_manager.FunctionIOCatalogService") def test_get_function_outputs(self, function_mock): mock_func = function_mock.return_value.get_output_entries_by_function_id function_obj = MagicMock(spec=FunctionCatalogEntry) - CatalogManager(MagicMock()).get_function_io_catalog_output_entries( - function_obj - ) + CatalogManager(MagicMock()).get_function_io_catalog_output_entries(function_obj) mock_func.assert_called_once_with(function_obj.row_id) @mock.patch("evadb.catalog.catalog_manager.FunctionIOCatalogService") def test_get_function_inputs(self, function_mock): mock_func = function_mock.return_value.get_input_entries_by_function_id function_obj = MagicMock(spec=FunctionCatalogEntry) - CatalogManager(MagicMock()).get_function_io_catalog_input_entries( - function_obj - ) + CatalogManager(MagicMock()).get_function_io_catalog_input_entries(function_obj) mock_func.assert_called_once_with(function_obj.row_id) diff --git a/test/unit_tests/test_eva_cmd_client.py b/test/unit_tests/test_eva_cmd_client.py index 64806a42e6..90a7f4a152 100644 --- a/test/unit_tests/test_eva_cmd_client.py +++ b/test/unit_tests/test_eva_cmd_client.py @@ -16,11 +16,10 @@ import asyncio import unittest -import pytest from mock import patch -from evadb.evadb_config import BASE_EVADB_CONFIG from evadb.evadb_cmd_client import evadb_client, main +from evadb.evadb_config import BASE_EVADB_CONFIG # @pytest.mark.skip diff --git a/test/util.py b/test/util.py index 945e2175e7..3a23a6ff5c 100644 --- a/test/util.py +++ b/test/util.py @@ -422,7 +422,6 @@ def create_large_scale_image_dataset(num=1000000): def create_sample_video(num_frames=NUM_FRAMES): file_name = os.path.join(get_tmp_dir(), "dummy.avi") - print(file_name) try: os.remove(file_name) except FileNotFoundError: From fedf6a9b4750f17e520bc6d29fba5b2edffcb1d9 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 02:29:58 -0400 Subject: [PATCH 12/20] addressed PR comments --- evadb/configuration/bootstrap_environment.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/evadb/configuration/bootstrap_environment.py b/evadb/configuration/bootstrap_environment.py index 4b293cb365..bb108692a4 100644 --- a/evadb/configuration/bootstrap_environment.py +++ b/evadb/configuration/bootstrap_environment.py @@ -74,7 +74,7 @@ def bootstrap_environment(evadb_dir: Path, evadb_installation_dir: Path): # TODO : Change def create_directories_and_get_default_config_values( - evadb_dir: Path, evadb_installation_dir: Path, category: str = None, key: str = None + evadb_dir: Path, evadb_installation_dir: Path ) -> Union[dict, str]: default_install_dir = evadb_installation_dir dataset_location = evadb_dir / EvaDB_DATASET_DIR @@ -112,10 +112,6 @@ def create_directories_and_get_default_config_values( config_obj["tmp_dir"] = str(tmp_dir.resolve()) config_obj["function_dir"] = str(function_dir.resolve()) config_obj["model_dir"] = str(model_dir.resolve()) - if category and key: - return config_obj.get(category, {}).get(key, None) - elif category: - return config_obj.get(category, {}) return config_obj From 2ecf6236e47a1fd5a0ba2102ec37e6f00c9472f0 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 03:16:16 -0400 Subject: [PATCH 13/20] fixed ray setup --- evadb/executor/load_multimedia_executor.py | 4 ++-- evadb/functions/abstract/pytorch_abstract_function.py | 3 ++- test/app_tests/test_pandas_qa.py | 4 +++- test/app_tests/test_privategpt.py | 4 +++- test/app_tests/test_youtube_channel_qa.py | 4 +++- test/app_tests/test_youtube_qa.py | 4 +++- test/integration_tests/long/test_create_index_executor.py | 2 +- .../long/test_error_handling_with_ray.py | 4 +++- test/integration_tests/long/test_load_executor.py | 4 +++- test/integration_tests/long/test_pytorch.py | 8 ++++++-- test/integration_tests/long/test_s3_load_executor.py | 4 +++- test/integration_tests/long/test_similarity.py | 2 +- 12 files changed, 33 insertions(+), 14 deletions(-) diff --git a/evadb/executor/load_multimedia_executor.py b/evadb/executor/load_multimedia_executor.py index 9f9828476f..03f1f7fb64 100644 --- a/evadb/executor/load_multimedia_executor.py +++ b/evadb/executor/load_multimedia_executor.py @@ -53,8 +53,8 @@ def exec(self, *args, **kwargs): # If it is a s3 path, download the file to local if self.node.file_path.as_posix().startswith("s3:/"): - s3_dir = self.catalog().get_configuration_catalog_value( - "s3_download_dir" + s3_dir = Path( + self.catalog().get_configuration_catalog_value("s3_download_dir") ) dst_path = s3_dir / self.node.table_info.table_name dst_path.mkdir(parents=True, exist_ok=True) diff --git a/evadb/functions/abstract/pytorch_abstract_function.py b/evadb/functions/abstract/pytorch_abstract_function.py index 4c8c1e063a..49e531655e 100644 --- a/evadb/functions/abstract/pytorch_abstract_function.py +++ b/evadb/functions/abstract/pytorch_abstract_function.py @@ -73,7 +73,8 @@ def __call__(self, *args, **kwargs) -> pd.DataFrame: if isinstance(frames, pd.DataFrame): frames = frames.transpose().values.tolist()[0] - gpu_batch_size = None + # hardcoding it for now, need to be fixed @xzdandy + gpu_batch_size = 1 import torch tens_batch = torch.cat([self.transform(x) for x in frames]).to( diff --git a/test/app_tests/test_pandas_qa.py b/test/app_tests/test_pandas_qa.py index ebb6df29cb..e45c927620 100644 --- a/test/app_tests/test_pandas_qa.py +++ b/test/app_tests/test_pandas_qa.py @@ -25,7 +25,9 @@ class PandasQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") + os.environ["ray"] = str( + cls.evadb.catalog().get_configuration_catalog_value("ray") + ) @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_privategpt.py b/test/app_tests/test_privategpt.py index 184888dd27..ff534dec22 100644 --- a/test/app_tests/test_privategpt.py +++ b/test/app_tests/test_privategpt.py @@ -29,7 +29,9 @@ class PrivateGPTTest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") + os.environ["ray"] = str( + cls.evadb.catalog().get_configuration_catalog_value("ray") + ) @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py index 2bf70d918a..be1202bb98 100644 --- a/test/app_tests/test_youtube_channel_qa.py +++ b/test/app_tests/test_youtube_channel_qa.py @@ -24,7 +24,9 @@ class YoutubeChannelQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") + os.environ["ray"] = str( + cls.evadb.catalog().get_configuration_catalog_value("ray") + ) @classmethod def tearDownClass(cls): diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py index 837404cda2..722566e905 100644 --- a/test/app_tests/test_youtube_qa.py +++ b/test/app_tests/test_youtube_qa.py @@ -25,7 +25,9 @@ class YoutubeQATest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") + os.environ["ray"] = str( + cls.evadb.catalog().get_configuration_catalog_value("ray") + ) @classmethod def tearDownClass(cls): diff --git a/test/integration_tests/long/test_create_index_executor.py b/test/integration_tests/long/test_create_index_executor.py index feabb5bff5..f44ef8f823 100644 --- a/test/integration_tests/long/test_create_index_executor.py +++ b/test/integration_tests/long/test_create_index_executor.py @@ -33,7 +33,7 @@ class CreateIndexTest(unittest.TestCase): def _index_save_path(self): return str( - Path(self.evadb.config.get_value("storage", "index_dir")) + Path(self.evadb.catalog().get_configuration_catalog_value("index_dir")) / Path("{}_{}.index".format("FAISS", "testCreateIndexName")) ) diff --git a/test/integration_tests/long/test_error_handling_with_ray.py b/test/integration_tests/long/test_error_handling_with_ray.py index da134b7ed3..c2d71e7fe3 100644 --- a/test/integration_tests/long/test_error_handling_with_ray.py +++ b/test/integration_tests/long/test_error_handling_with_ray.py @@ -32,7 +32,9 @@ class ErrorHandlingRayTests(unittest.TestCase): def setUp(self): self.evadb = get_evadb_for_testing() - os.environ["ray"] = str(self.evadb.config.get_value("experimental", "ray")) + os.environ["ray"] = str( + self.evadb.catalog().get_configuration_catalog_value("ray") + ) self.evadb.catalog().reset() # Load built-in Functions. load_functions_for_testing(self.evadb, mode="debug") diff --git a/test/integration_tests/long/test_load_executor.py b/test/integration_tests/long/test_load_executor.py index 05a479a8f4..0ca421cf1a 100644 --- a/test/integration_tests/long/test_load_executor.py +++ b/test/integration_tests/long/test_load_executor.py @@ -89,7 +89,9 @@ def test_should_form_symlink_to_individual_video(self): # check that the file is a symlink to self.video_file_path video_file_path = os.path.join(video_dir, video_file) self.assertTrue(os.path.islink(video_file_path)) - self.assertEqual(os.readlink(video_file_path), self.video_file_path) + self.assertEqual( + os.readlink(video_file_path), str(Path(self.video_file_path).resolve()) + ) execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS MyVideo;") diff --git a/test/integration_tests/long/test_pytorch.py b/test/integration_tests/long/test_pytorch.py index 3fd758da29..b0d1cf6327 100644 --- a/test/integration_tests/long/test_pytorch.py +++ b/test/integration_tests/long/test_pytorch.py @@ -49,7 +49,9 @@ class PytorchTest(unittest.TestCase): def setUpClass(cls): cls.evadb = get_evadb_for_testing() cls.evadb.catalog().reset() - os.environ["ray"] = cls.evadb.catalog().get_configuration_catalog_value("ray") + os.environ["ray"] = str( + cls.evadb.catalog().get_configuration_catalog_value("ray") + ) ua_detrac = f"{EvaDB_ROOT_DIR}/data/ua_detrac/ua_detrac.mp4" mnist = f"{EvaDB_ROOT_DIR}/data/mnist/mnist.mp4" @@ -295,7 +297,9 @@ def test_should_run_pytorch_and_similarity(self): batch_res = execute_query_fetch_all(self.evadb, select_query) img = batch_res.frames["myvideo.data"][0] - tmp_dir_from_config = self.evadb.config.get_value("storage", "tmp_dir") + tmp_dir_from_config = self.evadb.catalog().get_configuration_catalog_value( + "tmp_dir" + ) img_save_path = os.path.join(tmp_dir_from_config, "dummy.jpg") try: diff --git a/test/integration_tests/long/test_s3_load_executor.py b/test/integration_tests/long/test_s3_load_executor.py index bdd6487249..e6f961de60 100644 --- a/test/integration_tests/long/test_s3_load_executor.py +++ b/test/integration_tests/long/test_s3_load_executor.py @@ -41,7 +41,9 @@ def setUp(self): self.evadb.catalog().reset() self.video_file_path = create_sample_video() self.multiple_video_file_path = f"{EvaDB_ROOT_DIR}/data/sample_videos/1" - self.s3_download_dir = self.evadb.config.get_value("storage", "s3_download_dir") + self.s3_download_dir = self.evadb.catalog().get_configuration_catalog_value( + "s3_download_dir" + ) """Mocked AWS Credentials for moto.""" os.environ["AWS_ACCESS_KEY_ID"] = "testing" diff --git a/test/integration_tests/long/test_similarity.py b/test/integration_tests/long/test_similarity.py index 8dc7a2e3cc..35f70948f8 100644 --- a/test/integration_tests/long/test_similarity.py +++ b/test/integration_tests/long/test_similarity.py @@ -107,7 +107,7 @@ def setUp(self): # Create an actual image dataset. img_save_path = os.path.join( - self.evadb.config.get_value("storage", "tmp_dir"), + self.evadb.catalog().get_configuration_catalog_value("tmp_dir"), f"test_similar_img{i}.jpg", ) try_to_import_cv2() From 9289dfc57c7caa2293df0105debc177874bbdcf5 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Tue, 10 Oct 2023 03:36:18 -0400 Subject: [PATCH 14/20] fix all testcases --- test/integration_tests/long/test_optimizer_rules.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/test/integration_tests/long/test_optimizer_rules.py b/test/integration_tests/long/test_optimizer_rules.py index e703492d6b..eb515a19a2 100644 --- a/test/integration_tests/long/test_optimizer_rules.py +++ b/test/integration_tests/long/test_optimizer_rules.py @@ -34,7 +34,6 @@ PushDownFilterThroughApplyAndMerge, PushDownFilterThroughJoin, ReorderPredicates, - XformLateralJoinToLinearFlow, ) from evadb.optimizer.rules.rules_manager import RulesManager, disable_rules from evadb.plan_nodes.predicate_plan import PredicatePlan @@ -108,16 +107,6 @@ def test_should_benefit_from_pushdown(self, merge_mock, evaluate_mock): # on all the frames self.assertGreater(evaluate_count_without_rule, 3 * evaluate_count_with_rule) - result_without_xform_rule = None - rules_manager = RulesManager() - with disable_rules(rules_manager, [XformLateralJoinToLinearFlow()]): - custom_plan_generator = PlanGenerator(self.evadb, rules_manager) - result_without_xform_rule = execute_query_fetch_all( - self.evadb, query, plan_generator=custom_plan_generator - ) - - self.assertEqual(result_without_xform_rule, result_with_rule) - def test_should_pushdown_without_pushdown_join_rule(self): query = """SELECT id, obj.labels FROM MyVideo JOIN LATERAL From 5951906b0632bbf3bfb3aefd045d8ecd659cbe3d Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 15:08:58 -0400 Subject: [PATCH 15/20] address PR commits --- evadb/binder/statement_binder.py | 2 +- evadb/catalog/catalog_manager.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 49ec5e154c..39e95a3b0b 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -341,7 +341,7 @@ def _bind_func_expr(self, node: FunctionExpression): # OpenAPI key as part of the parameter # ToDO: this should be better handled properties = get_metadata_properties(function_obj) - if node.name.upper() == str("CHATGPT"): + if string_comparison_case_insensitive(node.name, "CHATGPT"): openapi_key = self._catalog().get_configuration_catalog_value( "OPENAI_KEY" ) diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 029d03da01..7a7e5b9c4f 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -630,7 +630,7 @@ def upsert_configuration_catalog_entry(self, key: str, value: any): """ self._config_catalog_service.upsert_entry(key, value) - def get_configuration_catalog_value(self, key: str) -> Any: + def get_configuration_catalog_value(self, key: str, default:Any = None) -> Any: """ Returns the value entry for the given key Arguments: @@ -643,4 +643,4 @@ def get_configuration_catalog_value(self, key: str) -> Any: table_entry = self._config_catalog_service.get_entry_by_name(key) if table_entry: return table_entry.value - return None + return default From fad7b68ab1634354dfdaec41463411c969816aad Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 15:20:03 -0400 Subject: [PATCH 16/20] fix merge --- evadb/binder/statement_binder.py | 13 +++++++------ evadb/executor/show_info_executor.py | 3 +-- evadb/functions/chatgpt.py | 10 +++++----- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/evadb/binder/statement_binder.py b/evadb/binder/statement_binder.py index 39e95a3b0b..cc24068959 100644 --- a/evadb/binder/statement_binder.py +++ b/evadb/binder/statement_binder.py @@ -338,14 +338,15 @@ def _bind_func_expr(self, node: FunctionExpression): # certain functions take additional inputs like yolo needs the model_name # these arguments are passed by the user as part of metadata # we also handle the special case of ChatGPT where we need to send the - # OpenAPI key as part of the parameter - # ToDO: this should be better handled + # OpenAPI key as part of the parameter if not provided by the user properties = get_metadata_properties(function_obj) if string_comparison_case_insensitive(node.name, "CHATGPT"): - openapi_key = self._catalog().get_configuration_catalog_value( - "OPENAI_KEY" - ) - properties["api_key"] = openapi_key + # if the user didn't provide any API_KEY, check if we have one in the catalog + if "OPENAI_API_KEY" not in properties.keys(): + openapi_key = self._catalog().get_configuration_catalog_value( + "OPENAI_API_KEY" + ) + properties["openai_api_key"] = openapi_key node.function = lambda: function_class(**properties) except Exception as e: diff --git a/evadb/executor/show_info_executor.py b/evadb/executor/show_info_executor.py index e4894aacf1..e928d777f0 100644 --- a/evadb/executor/show_info_executor.py +++ b/evadb/executor/show_info_executor.py @@ -46,8 +46,7 @@ def exec(self, *args, **kwargs): show_entries.append(table.name) show_entries = {"name": show_entries} elif self.node.show_type is ShowType.CONFIG: - value = self._config.get_value( - category="default", + value = self.catalog().get_configuration_catalog_value( key=self.node.show_val.upper(), ) show_entries = {} diff --git a/evadb/functions/chatgpt.py b/evadb/functions/chatgpt.py index 532e13e3de..fadc61191a 100644 --- a/evadb/functions/chatgpt.py +++ b/evadb/functions/chatgpt.py @@ -84,12 +84,12 @@ def setup( self, model="gpt-3.5-turbo", temperature: float = 0, - api_key="", + openai_api_key="", ) -> None: assert model in _VALID_CHAT_COMPLETION_MODEL, f"Unsupported ChatGPT {model}" self.model = model self.temperature = temperature - self.api_key = api_key + self.openai_api_key = openai_api_key @forward( input_signatures=[ @@ -121,12 +121,12 @@ def forward(self, text_df): def completion_with_backoff(**kwargs): return openai.ChatCompletion.create(**kwargs) - openai.api_key = self.api_key + openai.api_key = self.openai_api_key if len(openai.api_key) == 0: - openai.api_key = os.environ.get("OPENAI_KEY", "") + openai.api_key = os.environ.get("OPENAI_API_KEY", "") assert ( len(openai.api_key) != 0 - ), "Please set your OpenAI API key in evadb.yml file (third_party, open_api_key) or environment variable (OPENAI_KEY)" + ), "Please set your OpenAI API key using SET OPENAI_API_KEY = 'sk-' or environment variable (OPENAI_API_KEY)" queries = text_df[text_df.columns[0]] content = text_df[text_df.columns[0]] From 09ca39d629f50ce179693448c45e7618136d4e8b Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 15:30:41 -0400 Subject: [PATCH 17/20] fix linter --- .circleci/config.yml | 2 +- evadb/catalog/catalog_manager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 01984e154b..cb7ad985d9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -234,7 +234,7 @@ jobs: else pip install ".[dev,pinecone,chromadb]" # ray < 2.5.0 does not work with python 3.11 ray-project/ray#33864 fi - python -c "import yaml;f = open('evadb/evadb.yml', 'r+');config_obj = yaml.load(f, Loader=yaml.FullLoader);config_obj['experimental']['ray'] = True;f.seek(0);f.write(yaml.dump(config_obj));f.truncate();" + python -c "import evadb;cur=evadb.connect().cursor();cur.query('SET ray=True';)" else if [ $PY_VERSION != "3.11" ]; then pip install ".[dev,ludwig,qdrant,pinecone,chromadb]" diff --git a/evadb/catalog/catalog_manager.py b/evadb/catalog/catalog_manager.py index 7a7e5b9c4f..df621da33e 100644 --- a/evadb/catalog/catalog_manager.py +++ b/evadb/catalog/catalog_manager.py @@ -630,7 +630,7 @@ def upsert_configuration_catalog_entry(self, key: str, value: any): """ self._config_catalog_service.upsert_entry(key, value) - def get_configuration_catalog_value(self, key: str, default:Any = None) -> Any: + def get_configuration_catalog_value(self, key: str, default: Any = None) -> Any: """ Returns the value entry for the given key Arguments: From eb4ee8b5679802cc478bc969b14e5cb7e8929974 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 15:33:13 -0400 Subject: [PATCH 18/20] move openai_key -> openai_api_key --- apps/pandas_qa/pandas_qa.py | 4 +-- apps/youtube_qa/youtube_qa.py | 4 +-- docs/source/overview/concepts.rst | 2 +- docs/source/usecases/question-answering.rst | 2 +- evadb/evadb.yml | 29 --------------------- evadb/evadb_config.py | 2 +- test/app_tests/test_youtube_channel_qa.py | 2 +- test/app_tests/test_youtube_qa.py | 2 +- 8 files changed, 9 insertions(+), 38 deletions(-) delete mode 100644 evadb/evadb.yml diff --git a/apps/pandas_qa/pandas_qa.py b/apps/pandas_qa/pandas_qa.py index 8e4b6868a0..b81ce27d11 100644 --- a/apps/pandas_qa/pandas_qa.py +++ b/apps/pandas_qa/pandas_qa.py @@ -53,10 +53,10 @@ def receive_user_input() -> Dict: # get OpenAI key if needed try: - api_key = os.environ["OPENAI_KEY"] + api_key = os.environ["OPENAI_API_KEY"] except KeyError: api_key = str(input("🔑 Enter your OpenAI key: ")) - os.environ["OPENAI_KEY"] = api_key + os.environ["OPENAI_API_KEY"] = api_key return user_input diff --git a/apps/youtube_qa/youtube_qa.py b/apps/youtube_qa/youtube_qa.py index 5a56bbe29d..ee4626473c 100644 --- a/apps/youtube_qa/youtube_qa.py +++ b/apps/youtube_qa/youtube_qa.py @@ -93,10 +93,10 @@ def receive_user_input() -> Dict: # get OpenAI key if needed try: - api_key = os.environ["OPENAI_KEY"] + api_key = os.environ["OPENAI_API_KEY"] except KeyError: api_key = str(input("🔑 Enter your OpenAI key: ")) - os.environ["OPENAI_KEY"] = api_key + os.environ["OPENAI_API_KEY"] = api_key return user_input diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst index c9dba32159..f2905b640f 100644 --- a/docs/source/overview/concepts.rst +++ b/docs/source/overview/concepts.rst @@ -46,7 +46,7 @@ Here are some illustrative **AI queries** for a ChatGPT-based video question ans --- The 'transcripts' table has a column called 'text' with the transcript text --- Since ChatGPT is a built-in function in EvaDB, we don't have to define it --- We can directly use ChatGPT() in any query - --- We will only need to set the OPENAI_KEY as an environment variable + --- We will only need to set the OPENAI_API_KEY as an environment variable SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) FROM TEXT_SUMMARY; diff --git a/docs/source/usecases/question-answering.rst b/docs/source/usecases/question-answering.rst index 7a1235da04..15f548d9cb 100644 --- a/docs/source/usecases/question-answering.rst +++ b/docs/source/usecases/question-answering.rst @@ -57,7 +57,7 @@ EvaDB has built-in support for ``ChatGPT`` function from ``OpenAI``. You will ne # Set OpenAI key import os - os.environ["OPENAI_KEY"] = "sk-..." + os.environ["OPENAI_API_KEY"] = "sk-..." .. note:: diff --git a/evadb/evadb.yml b/evadb/evadb.yml deleted file mode 100644 index e7d6a38943..0000000000 --- a/evadb/evadb.yml +++ /dev/null @@ -1,29 +0,0 @@ -core: - evadb_installation_dir: "" - datasets_dir: "" - catalog_database_uri: "" - application: "evadb" - mode: "release" #release or debug - -executor: - # batch_mem_size configures the number of rows processed by the execution engine in one iteration - # rows = max(1, row_mem_size / batch_mem_size) - batch_mem_size: 30000000 - - # batch size used for gpu_operations - gpu_batch_size: 1 - - gpu_ids: [0] - -server: - host: "0.0.0.0" - port: 8803 - socket_timeout: 60 - -experimental: - ray: False - -third_party: - OPENAI_KEY: "" - PINECONE_API_KEY: "" - PINECONE_ENV: "" diff --git a/evadb/evadb_config.py b/evadb/evadb_config.py index e012f95567..800112a601 100644 --- a/evadb/evadb_config.py +++ b/evadb/evadb_config.py @@ -33,7 +33,7 @@ "port": 8803, "socket_timeout": 60, "ray": False, - "OPENAI_KEY": "", + "OPENAI_API_KEY": "", "PINECONE_API_KEY": "", "PINECONE_ENV": "", } diff --git a/test/app_tests/test_youtube_channel_qa.py b/test/app_tests/test_youtube_channel_qa.py index be1202bb98..edb138588d 100644 --- a/test/app_tests/test_youtube_channel_qa.py +++ b/test/app_tests/test_youtube_channel_qa.py @@ -41,7 +41,7 @@ def tearDown(self) -> None: def test_should_run_youtube_channel_qa_app(self): app_path = Path("apps", "youtube_channel_qa", "youtube_channel_qa.py") input1 = "\n\n\n" # Download just one video from the default channel in the default order. - # Assuming that OPENAI_KEY is already set as an environment variable + # Assuming that OPENAI_API_KEY is already set as an environment variable input2 = "What is this video about?\n" # Question input3 = "exit\n" # Exit inputs = input1 + input2 + input3 diff --git a/test/app_tests/test_youtube_qa.py b/test/app_tests/test_youtube_qa.py index 722566e905..9402654bdf 100644 --- a/test/app_tests/test_youtube_qa.py +++ b/test/app_tests/test_youtube_qa.py @@ -43,7 +43,7 @@ def tearDown(self) -> None: def test_should_run_youtube_qa_app(self): app_path = Path("apps", "youtube_qa", "youtube_qa.py") input1 = "yes\n\n" # Go with online video and default URL - # Assuming that OPENAI_KEY is already set as an environment variable + # Assuming that OPENAI_API_KEY is already set as an environment variable input2 = "What is this video on?\n" # Question input3 = "exit\nexit\n" # Exit inputs = input1 + input2 + input3 From c84e0d70a291e567246faf83148b65e80d3d3108 Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 16:29:09 -0400 Subject: [PATCH 19/20] minor fixes --- docs/source/reference/ai/custom-ai-function.rst | 3 --- evadb/functions/dalle.py | 12 +++++------- evadb/functions/stable_diffusion.py | 14 ++++---------- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/docs/source/reference/ai/custom-ai-function.rst b/docs/source/reference/ai/custom-ai-function.rst index 71c19bb91c..d03b684cbe 100644 --- a/docs/source/reference/ai/custom-ai-function.rst +++ b/docs/source/reference/ai/custom-ai-function.rst @@ -258,9 +258,6 @@ The following code can be used to create an Object Detection function using Yolo try_to_import_openai() import openai - #setting up the key - openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY") - #getting the data content = text_df[text_df.columns[0]] responses = [] diff --git a/evadb/functions/dalle.py b/evadb/functions/dalle.py index efc075d733..7c1dc39dd0 100644 --- a/evadb/functions/dalle.py +++ b/evadb/functions/dalle.py @@ -22,7 +22,6 @@ from PIL import Image from evadb.catalog.catalog_type import NdArrayType -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.decorators.decorators import forward from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe @@ -34,8 +33,8 @@ class DallEFunction(AbstractFunction): def name(self) -> str: return "DallE" - def setup(self) -> None: - pass + def setup(self, openai_api_key="") -> None: + self.openai_api_key = openai_api_key @forward( input_signatures=[ @@ -59,14 +58,13 @@ def forward(self, text_df): try_to_import_openai() import openai - # Register API key, try configuration manager first - openai.api_key = ConfigurationManager().get_value("third_party", "OPENAI_KEY") + openai.api_key = self.openai_api_key # If not found, try OS Environment Variable if len(openai.api_key) == 0: - openai.api_key = os.environ.get("OPENAI_KEY", "") + openai.api_key = os.environ.get("OPENAI_API_KEY", "") assert ( len(openai.api_key) != 0 - ), "Please set your OpenAI API key in evadb.yml file (third_party, open_api_key) or environment variable (OPENAI_KEY)" + ), "Please set your OpenAI API key using SET OPENAI_API_KEY = 'sk-' or environment variable (OPENAI_API_KEY)" def generate_image(text_df: PandasDataframe): results = [] diff --git a/evadb/functions/stable_diffusion.py b/evadb/functions/stable_diffusion.py index 6e84d687fa..c5337a27c0 100644 --- a/evadb/functions/stable_diffusion.py +++ b/evadb/functions/stable_diffusion.py @@ -22,7 +22,6 @@ from PIL import Image from evadb.catalog.catalog_type import NdArrayType -from evadb.configuration.configuration_manager import ConfigurationManager from evadb.functions.abstract.abstract_function import AbstractFunction from evadb.functions.decorators.decorators import forward from evadb.functions.decorators.io_descriptors.data_types import PandasDataframe @@ -34,10 +33,8 @@ class StableDiffusion(AbstractFunction): def name(self) -> str: return "StableDiffusion" - def setup( - self, - ) -> None: - pass + def setup(self, replicate_api_token="") -> None: + self.replicate_api_token = replicate_api_token @forward( input_signatures=[ @@ -64,16 +61,13 @@ def forward(self, text_df): try_to_import_replicate() import replicate - # Register API key, try configuration manager first - replicate_api_key = ConfigurationManager().get_value( - "third_party", "REPLICATE_API_TOKEN" - ) + replicate_api_key = self.replicate_api_token # If not found, try OS Environment Variable if len(replicate_api_key) == 0: replicate_api_key = os.environ.get("REPLICATE_API_TOKEN", "") assert ( len(replicate_api_key) != 0 - ), "Please set your Replicate API key in evadb.yml file (third_party, replicate_api_token) or environment variable (REPLICATE_API_TOKEN)" + ), "Please set your Replicate API key using SET REPLICATE_API_TOKEN = '' or set the environment variable (REPLICATE_API_TOKEN)" os.environ["REPLICATE_API_TOKEN"] = replicate_api_key model_id = ( From a4276aef24b054cb69bb43064c46fc9052eb436e Mon Sep 17 00:00:00 2001 From: Gaurav Tarlok Kakkar Date: Mon, 16 Oct 2023 16:42:08 -0400 Subject: [PATCH 20/20] fix test case --- test/unit_tests/test_dalle.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit_tests/test_dalle.py b/test/unit_tests/test_dalle.py index a7a9536fa2..c434a4db4a 100644 --- a/test/unit_tests/test_dalle.py +++ b/test/unit_tests/test_dalle.py @@ -41,7 +41,7 @@ def setUp(self) -> None: def tearDown(self) -> None: execute_query_fetch_all(self.evadb, "DROP TABLE IF EXISTS ImageGen;") - @patch.dict("os.environ", {"OPENAI_KEY": "mocked_openai_key"}) + @patch.dict("os.environ", {"OPENAI_API_KEY": "mocked_openai_key"}) @patch("requests.get") @patch("openai.Image.create", return_value={"data": [{"url": "mocked_url"}]}) def test_dalle_image_generation(self, mock_openai_create, mock_requests_get):