diff --git a/README.md b/README.md index 2ce17a8e8..b75985395 100644 --- a/README.md +++ b/README.md @@ -191,8 +191,8 @@ SELECT ChatGPT('Is this video summary related to Ukraine russia war', text) CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig -'predict' 'rental_price' -'time_limit' 120; +PREDICT 'rental_price' +TIME_LIMIT 120; ``` diff --git a/benchmark/text_summarization/text_summarization_with_evadb.py b/benchmark/text_summarization/text_summarization_with_evadb.py index 7e788a088..f5e7e5237 100644 --- a/benchmark/text_summarization/text_summarization_with_evadb.py +++ b/benchmark/text_summarization/text_summarization_with_evadb.py @@ -16,10 +16,10 @@ cursor.query("DROP UDF IF EXISTS TextSummarizer;").df() cursor.query("""CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100;""").df() + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100;""").df() cursor.query("DROP TABLE IF EXISTS cnn_news_summary;").df() diff --git a/docs/_toc.yml b/docs/_toc.yml index 14dffd09b..927d13197 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -66,7 +66,7 @@ parts: sections: - file: source/reference/ai/model-train title: Model Training - - file: source/reference/udfs/model-forecasting + - file: source/reference/ai/model-forecasting title: Time Series Forecasting - file: source/reference/ai/hf title: Hugging Face diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst index fa8d87a45..119803f39 100644 --- a/docs/source/benchmarks/text_summarization.rst +++ b/docs/source/benchmarks/text_summarization.rst @@ -47,10 +47,10 @@ Creating Text Summarization Function in EvaDB CREATE UDF IF NOT EXISTS TextSummarizer TYPE HuggingFace - 'task' 'summarization' - 'model' 'sshleifer/distilbart-cnn-12-6' - 'min_length' 5 - 'max_length' 100; + TASK 'summarization' + MODEL 'sshleifer/distilbart-cnn-12-6' + MIN_LENGTH 5 + MAX_LENGTH 100; Tuning EvaDB for Maximum GPU Utilization diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst index b37478f06..1c9317114 100644 --- a/docs/source/overview/concepts.rst +++ b/docs/source/overview/concepts.rst @@ -24,8 +24,8 @@ Here is set of illustrative EvaQL queries for a ChatGPT-based video question ans --- After creating the function, we can use the function in any future query CREATE UDF SpeechRecognizer TYPE HuggingFace - 'task' 'automatic-speech-recognition' - 'model' 'openai/whisper-base'; + TASK 'automatic-speech-recognition' + MODEL 'openai/whisper-base'; -- EvaDB automatically extracts the audio from the videos --- We only need to run the SpeechRecognizer UDF on the 'audio' column diff --git a/docs/source/reference/ai/hf.rst b/docs/source/reference/ai/hf.rst index f1c8ffc97..8a0831331 100644 --- a/docs/source/reference/ai/hf.rst +++ b/docs/source/reference/ai/hf.rst @@ -15,8 +15,8 @@ EvaDB supports functions similar to `Pipelines str: if self._metadata is not None: for key, value in self._metadata: - s += f" '{key}' '{value}'" + # NOTE :- Removing quotes around key and making it upper case + # Since in tests we are doing a straight string comparison + s += f" {key.upper()} '{value}'" return s @property diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark index 802f41fa3..a958dd54c 100644 --- a/evadb/parser/evadb.lark +++ b/evadb/parser/evadb.lark @@ -49,7 +49,7 @@ function_impl: string_literal function_metadata: function_metadata_key function_metadata_value -function_metadata_key: string_literal +function_metadata_key: uid function_metadata_value: string_literal | decimal_literal diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py index 25cfeb12c..6c354cac1 100644 --- a/evadb/parser/lark_visitor/_functions.py +++ b/evadb/parser/lark_visitor/_functions.py @@ -97,7 +97,9 @@ def create_function(self, tree): value = key_value_pair[1] if isinstance(value, ConstantValueExpression): value = value.value - metadata.append((key_value_pair[0].value, value)), + # Removing .value from key_value_pair[0] since key is now an ID_LITERAL + # Adding lower() to ensure the key is in lowercase + metadata.append((key_value_pair[0].lower(), value)), return CreateFunctionStatement( function_name, diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py index 31615992d..70db55cec 100644 --- a/evadb/parser/utils.py +++ b/evadb/parser/utils.py @@ -74,7 +74,7 @@ def parse_create_function( mock_query += f" TYPE {type}" task, model = kwargs["task"], kwargs["model"] if task is not None and model is not None: - mock_query += f" 'task' '{task}' 'model' '{model}'" + mock_query += f" TASK '{task}' MODEL '{model}'" else: mock_query += f" IMPL '{function_file_path}'" mock_query += ";" diff --git a/test/benchmark_tests/test_benchmark_pytorch.py b/test/benchmark_tests/test_benchmark_pytorch.py index de6a01439..b9300cfd6 100644 --- a/test/benchmark_tests/test_benchmark_pytorch.py +++ b/test/benchmark_tests/test_benchmark_pytorch.py @@ -109,7 +109,7 @@ def test_automatic_speech_recognition(benchmark, setup_pytorch_tests): udf_name = "SpeechRecognizer" create_udf = ( f"CREATE UDF {udf_name} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) @@ -135,14 +135,14 @@ def test_summarization_from_video(benchmark, setup_pytorch_tests): asr_udf = "SpeechRecognizer" create_udf = ( f"CREATE UDF {asr_udf} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) summary_udf = "Summarizer" create_udf = ( f"CREATE UDF {summary_udf} TYPE HuggingFace " - "'task' 'summarization' 'model' 'philschmid/bart-large-cnn-samsum' 'min_length' 10 'max_length' 100;" + "TASK 'summarization' MODEL 'philschmid/bart-large-cnn-samsum' MIN_LENGTH 10 MAX_LENGTH 100;" ) execute_query_fetch_all(setup_pytorch_tests, create_udf) diff --git a/test/integration_tests/long/interfaces/relational/test_relational_api.py b/test/integration_tests/long/interfaces/relational/test_relational_api.py index 411a2e51f..773607960 100644 --- a/test/integration_tests/long/interfaces/relational/test_relational_api.py +++ b/test/integration_tests/long/interfaces/relational/test_relational_api.py @@ -231,7 +231,7 @@ def test_create_function_with_relational_api(self): query = create_speech_recognizer_function_if_not_exists.sql_query() self.assertEqual( query, - """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'""", + """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'""", ) create_speech_recognizer_function_if_not_exists.execute() @@ -242,7 +242,7 @@ def test_create_function_with_relational_api(self): query = create_speech_recognizer_function.sql_query() self.assertEqual( query, - "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'", + "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'", ) with self.assertRaises(ExecutorError): create_speech_recognizer_function.execute() diff --git a/test/integration_tests/long/test_error_handling_with_ray.py b/test/integration_tests/long/test_error_handling_with_ray.py index de0ba4d3f..da134b7ed 100644 --- a/test/integration_tests/long/test_error_handling_with_ray.py +++ b/test/integration_tests/long/test_error_handling_with_ray.py @@ -58,7 +58,7 @@ def test_ray_error_populate_to_all_stages(self): function_name, task = "HFObjectDetector", "image-classification" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/integration_tests/long/test_function_executor.py b/test/integration_tests/long/test_function_executor.py index 77c7e74c0..28368ddad 100644 --- a/test/integration_tests/long/test_function_executor.py +++ b/test/integration_tests/long/test_function_executor.py @@ -175,8 +175,8 @@ def test_should_create_function_with_metadata(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_function_query.format(function_name)) @@ -187,7 +187,8 @@ def test_should_create_function_with_metadata(self): self.assertEqual(len(entries), 2) metadata = [(entry.key, entry.value) for entry in entries] - expected_metadata = [("CACHE", "TRUE"), ("BATCH", "FALSE")] + # metadata ultimately stored as lowercase string literals in metadata + expected_metadata = [("cache", "TRUE"), ("batch", "FALSE")] self.assertEqual(set(metadata), set(expected_metadata)) def test_should_return_empty_metadata_list_for_missing_function(self): @@ -205,8 +206,8 @@ def test_should_return_empty_metadata_list_if_function_is_removed(self): OUTPUT (label NDARRAY STR(10)) TYPE Classification IMPL 'test/util.py' - 'CACHE' 'TRUE' - 'BATCH' 'FALSE'; + CACHE 'TRUE' + BATCH 'FALSE'; """ execute_query_fetch_all(self.evadb, create_function_query.format(function_name)) diff --git a/test/integration_tests/long/test_huggingface_functions.py b/test/integration_tests/long/test_huggingface_functions.py index b125d78b2..e2a258525 100644 --- a/test/integration_tests/long/test_huggingface_functions.py +++ b/test/integration_tests/long/test_huggingface_functions.py @@ -55,7 +55,7 @@ def test_io_catalog_entries_populated(self): function_name, task = "HFObjectDetector", "image-classification" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -79,7 +79,7 @@ def test_raise_error_on_unsupported_task(self): task = "zero-shot-object-detection" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' '{task}' + TASK '{task}' """ # catch an assert @@ -95,8 +95,8 @@ def test_object_detection(self): function_name = "HFObjectDetector" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'object-detection' - 'model' 'facebook/detr-resnet-50'; + TASK 'object-detection' + MODEL 'facebook/detr-resnet-50'; """ execute_query_fetch_all(self.evadb, create_function_query) @@ -147,7 +147,7 @@ def test_image_classification(self): function_name = "HFImageClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'image-classification' + TASK 'image-classification' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -192,7 +192,7 @@ def test_text_classification(self): function_name = "HFTextClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' + TASK 'text-classification' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -229,7 +229,7 @@ def test_automatic_speech_recognition(self): function_name = "SpeechRecognizer" create_function = ( f"CREATE FUNCTION {function_name} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(self.evadb, create_function) @@ -258,14 +258,14 @@ def test_summarization_from_video(self): asr_function = "SpeechRecognizer" create_function = ( f"CREATE FUNCTION {asr_function} TYPE HuggingFace " - "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';" + "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';" ) execute_query_fetch_all(self.evadb, create_function) summary_function = "Summarizer" create_function = ( f"CREATE FUNCTION {summary_function} TYPE HuggingFace " - "'task' 'summarization' 'model' 'philschmid/bart-large-cnn-samsum' 'min_length' 10 'max_new_tokens' 100;" + "TASK 'summarization' MODEL 'philschmid/bart-large-cnn-samsum' MIN_LENGTH 10 MAX_NEW_TOKENS 100;" ) execute_query_fetch_all(self.evadb, create_function) @@ -290,8 +290,8 @@ def test_toxicity_classification(self): function_name = "HFToxicityClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' - 'model' 'martin-ha/toxic-comment-model' + TASK 'text-classification' + MODEL 'martin-ha/toxic-comment-model' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -340,8 +340,8 @@ def test_multilingual_toxicity_classification(self): function_name = "HFMultToxicityClassifier" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'text-classification' - 'model' 'EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus' + TASK 'text-classification' + MODEL 'EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -389,7 +389,7 @@ def test_named_entity_recognition_model_all_pdf_data(self): function_name = "HFNERModel" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'ner' + TASK 'ner' """ execute_query_fetch_all(self.evadb, create_function_query) @@ -424,7 +424,7 @@ def test_named_entity_recognition_model_no_ner_data_exists(self): function_name = "HFNERModel" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'ner' + TASK 'ner' """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/integration_tests/long/test_model_forecasting.py b/test/integration_tests/long/test_model_forecasting.py index 874a840e6..04cf69975 100644 --- a/test/integration_tests/long/test_model_forecasting.py +++ b/test/integration_tests/long/test_model_forecasting.py @@ -54,7 +54,7 @@ def test_forecast(self): CREATE UDF Forecast FROM (SELECT unique_id, ds, y FROM AirData) TYPE Forecasting - 'predict' 'y'; + PREDICT 'y'; """ execute_query_fetch_all(self.evadb, create_predict_udf) diff --git a/test/integration_tests/long/test_model_train.py b/test/integration_tests/long/test_model_train.py index bbc8ed26f..55ae6da9c 100644 --- a/test/integration_tests/long/test_model_train.py +++ b/test/integration_tests/long/test_model_train.py @@ -60,8 +60,8 @@ def test_ludwig_automl(self): CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM ( SELECT * FROM HomeRentals ) TYPE Ludwig - 'predict' 'rental_price' - 'time_limit' 120; + PREDICT 'rental_price' + TIME_LIMIT 120; """ execute_query_fetch_all(self.evadb, create_predict_function) diff --git a/test/integration_tests/long/test_reuse.py b/test/integration_tests/long/test_reuse.py index 7911bb20f..eefcda67e 100644 --- a/test/integration_tests/long/test_reuse.py +++ b/test/integration_tests/long/test_reuse.py @@ -42,8 +42,8 @@ def _load_hf_model(self): function_name = "HFObjectDetector" create_function_query = f"""CREATE FUNCTION {function_name} TYPE HuggingFace - 'task' 'object-detection' - 'model' 'facebook/detr-resnet-50'; + TASK 'object-detection' + MODEL 'facebook/detr-resnet-50'; """ execute_query_fetch_all(self.evadb, create_function_query) diff --git a/test/unit_tests/parser/test_parser.py b/test/unit_tests/parser/test_parser.py index d0894d589..339abe7e8 100644 --- a/test/unit_tests/parser/test_parser.py +++ b/test/unit_tests/parser/test_parser.py @@ -662,7 +662,7 @@ def test_create_function_statement(self): OUTPUT (Labels NDARRAY STR(10), Bbox NDARRAY UINT8(10, 4)) TYPE Classification IMPL 'data/fastrcnn.py' - "KEY" "VALUE"; + PREDICT "VALUE"; """ expected_cci = ColConstraintInfo() @@ -690,7 +690,7 @@ def test_create_function_statement(self): ], "Classification", None, - [("KEY", "VALUE")], + [("predict", "VALUE")], ) evadb_statement_list = parser.parse(create_func_query) self.assertIsInstance(evadb_statement_list, list)