Removing quotes from udf_metadata_key (#1026)

Replacing udf_metadata_key from string_literal to ID_LITERAL ` modified: evadb/parser/evadb.lark` Removed .value from key_value_pair[0] post the change in type ` modified: evadb/parser/lark_visitor/_functions.py` Replaced string key to ID_LITERAL in test query ` modified: test/unit_tests/parser/test_parser.py` Solves #1010 --------- Co-authored-by: xzdandy <[email protected]>
georgia-tech-db · Sep 5, 2023 · e535896 · e535896
1 parent 0f88555
commit e535896
Show file tree

Hide file tree

Showing 28 changed files with 78 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -191,8 +191,8 @@ SELECT ChatGPT('Is this video summary related to Ukraine russia war', text)
 CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM
 ( SELECT * FROM HomeRentals )
 TYPE Ludwig
-'predict' 'rental_price'
-'time_limit' 120;
+PREDICT 'rental_price'
+TIME_LIMIT 120;
 ```
 
 </details>

diff --git a/benchmark/text_summarization/text_summarization_with_evadb.py b/benchmark/text_summarization/text_summarization_with_evadb.py
@@ -16,10 +16,10 @@
 cursor.query("DROP UDF IF EXISTS TextSummarizer;").df()
 cursor.query("""CREATE UDF IF NOT EXISTS TextSummarizer
                 TYPE HuggingFace
-                'task' 'summarization'
-                'model' 'sshleifer/distilbart-cnn-12-6'
-                'min_length' 5
-                'max_length' 100;""").df()
+                TASK 'summarization'
+                MODEL 'sshleifer/distilbart-cnn-12-6'
+                MIN_LENGTH 5
+                MAX_LENGTH 100;""").df()
 
 
 cursor.query("DROP TABLE IF EXISTS cnn_news_summary;").df()

diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -66,7 +66,7 @@ parts:
         sections:
           - file: source/reference/ai/model-train
             title: Model Training
-          - file: source/reference/udfs/model-forecasting
+          - file: source/reference/ai/model-forecasting
             title: Time Series Forecasting
           - file: source/reference/ai/hf
             title: Hugging Face

diff --git a/docs/source/benchmarks/text_summarization.rst b/docs/source/benchmarks/text_summarization.rst
@@ -47,10 +47,10 @@ Creating Text Summarization Function in EvaDB
 
    CREATE UDF IF NOT EXISTS TextSummarizer
          TYPE HuggingFace
-         'task' 'summarization'
-         'model' 'sshleifer/distilbart-cnn-12-6'
-         'min_length' 5
-         'max_length' 100;
+         TASK 'summarization'
+         MODEL 'sshleifer/distilbart-cnn-12-6'
+         MIN_LENGTH 5
+         MAX_LENGTH 100;
 
 
 Tuning EvaDB for Maximum GPU Utilization

diff --git a/docs/source/overview/concepts.rst b/docs/source/overview/concepts.rst
@@ -24,8 +24,8 @@ Here is set of illustrative EvaQL queries for a ChatGPT-based video question ans
     --- After creating the function, we can use the function in any future query
     CREATE UDF SpeechRecognizer 
         TYPE HuggingFace 
-        'task' 'automatic-speech-recognition' 
-        'model' 'openai/whisper-base';
+        TASK 'automatic-speech-recognition' 
+        MODEL 'openai/whisper-base';
 
     --  EvaDB automatically extracts the audio from the videos
     --- We only need to run the SpeechRecognizer UDF on the 'audio' column 

diff --git a/docs/source/reference/ai/hf.rst b/docs/source/reference/ai/hf.rst
@@ -15,8 +15,8 @@ EvaDB supports functions similar to `Pipelines <https://huggingface.co/docs/tran
 
     CREATE FUNCTION IF NOT EXISTS HFObjectDetector
     TYPE  HuggingFace
-    'task' 'object-detection'
-    'model' 'facebook / detr-resnet-50'
+    TASK 'object-detection'
+    MODEL 'facebook / detr-resnet-50'
 
 EvaDB supports all arguments supported by HF pipelines. You can pass those using a key value format similar to task and model above.
 

diff --git a/...urce/reference/udfs/model-forecasting.rst → ...source/reference/ai/model-forecasting.rst b/...urce/reference/udfs/model-forecasting.rst → ...source/reference/ai/model-forecasting.rst
@@ -5,7 +5,7 @@ You can train a forecasting model easily in EvaDB.
 
 .. note::
 
-   Install `statsforecast` in your EvaDB virtual environment: ``pip install statsforecast``.
+   Install `statsforecast` in your EvaDB virtual environment: ``pip install eva[forecasting]``.
 
 First, we create a table to insert required data.
 
@@ -26,12 +26,12 @@ Next, we create a UDF of `TYPE Forecasting`. We must enter the column name on wh
    CREATE UDF IF NOT EXISTS Forecast FROM
    (SELECT y FROM AirData)
    TYPE Forecasting
-   'predict' 'y';
+   PREDICT 'y';
 
 This trains a forecasting model. The model can be called by providing the horizon for forecasting.
 
 .. code-block:: sql
 
    SELECT Forecast(12) FROM AirData;
 
-Here, the horizon is `12`.
+Here, the horizon is `12`.
diff --git a/docs/source/reference/ai/model-train.rst b/docs/source/reference/ai/model-train.rst
@@ -12,8 +12,8 @@ Training and Finetuning
    CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM
    ( SELECT sqft, location, rental_price FROM HomeRentals )
    TYPE Ludwig
-   'predict' 'rental_price'
-   'time_limit' 120;
+   PREDICT 'rental_price'
+   TIME_LIMIT 120;
 
 In the above query, you are creating a new customized function by automatically training a model from the `HomeRentals` table. The `rental_price` column will be the target column for predication, while `sqft` and `location` are the inputs. 
 
@@ -24,8 +24,8 @@ You can also simply give all other columns in `HomeRentals` as inputs and let th
    CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM
    ( SELECT * FROM HomeRentals )
    TYPE Ludwig
-   'predict' 'rental_price'
-   'time_limit' 120;
+   PREDICT 'rental_price'
+   TIME_LIMIT 120;
 
 .. note::
 

diff --git a/docs/source/reference/ai/openai.rst b/docs/source/reference/ai/openai.rst
@@ -15,7 +15,7 @@ To create a chat completion function in EvaDB, use the following SQL command:
 
     CREATE FUNCTION IF NOT EXISTS OpenAIChatCompletion
     IMPL 'evadb/functions/openai_chat_completion_function.py'
-    'model' 'gpt-3.5-turbo'
+    MODEL 'gpt-3.5-turbo'
 
 EvaDB supports the following models for chat completion task:
 

diff --git a/docs/source/reference/ai/yolo.rst b/docs/source/reference/ai/yolo.rst
@@ -13,7 +13,7 @@ To create a YOLO function in EvaDB using Ultralytics models, use the following S
 
     CREATE FUNCTION IF NOT EXISTS Yolo
     TYPE ultralytics
-    'model' 'yolov8m.pt'
+    MODEL 'yolov8m.pt'
 
 You can change the `model` value to specify any other model supported by Ultralytics.
 

diff --git a/docs/source/reference/evaql/create.rst b/docs/source/reference/evaql/create.rst
@@ -75,9 +75,9 @@ To register an user-defined function by training a predication model.
    CREATE FUNCTION IF NOT EXISTS PredictHouseRent FROM
    (SELECT * FROM HomeRentals)
    TYPE Ludwig
-   'predict' 'rental_price'
-   'time_list' 120;
-   'tune_for_memory' False;
+   PREDICT 'rental_price'
+   TIME_LIST 120;
+   TUNE_FOR_MEMORY False;
 
 CREATE MATERIALIZED VIEW
 ------------------------

diff --git a/docs/source/usecases/object-detection.rst b/docs/source/usecases/object-detection.rst
@@ -39,7 +39,7 @@ To create a custom ``Yolo`` function based on the popular ``YOLO-v8m`` model, us
 
         CREATE UDF IF NOT EXISTS Yolo
         TYPE  ultralytics
-        'model' 'yolov8m.pt';
+        MODEL 'yolov8m.pt';
 
 Object Detection Queries
 ------------------------

diff --git a/docs/source/usecases/question-answering.rst b/docs/source/usecases/question-answering.rst
@@ -41,8 +41,8 @@ To create a custom ``SpeechRecognizer`` function based on the popular ``Whisper`
 
     CREATE FUNCTION SpeechRecognizer 
     TYPE HuggingFace 
-        'task' 'automatic-speech-recognition' 
-        'model' 'openai/whisper-base';
+        TASK 'automatic-speech-recognition' 
+        MODEL 'openai/whisper-base';
 
 .. note::
 

diff --git a/docs/source/usecases/text-summarization.rst b/docs/source/usecases/text-summarization.rst
@@ -41,13 +41,13 @@ To create custom ``TextSummarizer`` and ``TextClassifier`` functions, use the ``
 
         CREATE FUNCTION IF NOT EXISTS TextSummarizer
         TYPE HuggingFace
-        'task' 'summarization'
-        'model' 'facebook/bart-large-cnn';
+        TASK 'summarization'
+        MODEL 'facebook/bart-large-cnn';
 
         CREATE FUNCTION IF NOT EXISTS TextClassifier
         TYPE HuggingFace
-        'task' 'text-classification'
-        'model' 'distilbert-base-uncased-finetuned-sst-2-english';
+        TASK 'text-classification'
+        MODEL 'distilbert-base-uncased-finetuned-sst-2-english';
 
 .. note::
 

diff --git a/evadb/functions/function_bootstrap_queries.py b/evadb/functions/function_bootstrap_queries.py
@@ -119,7 +119,7 @@
 
 Yolo_function_query = """CREATE FUNCTION IF NOT EXISTS Yolo
       TYPE  ultralytics
-      'model' 'yolov8m.pt';
+      MODEL 'yolov8m.pt';
       """
 
 face_detection_function_query = """CREATE FUNCTION IF NOT EXISTS FaceDetector
@@ -185,7 +185,7 @@
 
 yolo8n_query = """CREATE FUNCTION IF NOT EXISTS Yolo
             TYPE  ultralytics
-            'model' 'yolov8n.pt';
+            MODEL 'yolov8n.pt';
         """
 
 

diff --git a/evadb/parser/create_function_statement.py b/evadb/parser/create_function_statement.py
@@ -86,7 +86,9 @@ def __str__(self) -> str:
 
         if self._metadata is not None:
             for key, value in self._metadata:
-                s += f" '{key}' '{value}'"
+                # NOTE :- Removing quotes around key and making it upper case
+                # Since in tests we are doing a straight string comparison
+                s += f" {key.upper()} '{value}'"
         return s
 
     @property

diff --git a/evadb/parser/evadb.lark b/evadb/parser/evadb.lark
@@ -49,7 +49,7 @@ function_impl: string_literal
 
 function_metadata: function_metadata_key function_metadata_value
 
-function_metadata_key: string_literal
+function_metadata_key: uid
 
 function_metadata_value: string_literal | decimal_literal
 

diff --git a/evadb/parser/lark_visitor/_functions.py b/evadb/parser/lark_visitor/_functions.py
@@ -97,7 +97,9 @@ def create_function(self, tree):
                     value = key_value_pair[1]
                     if isinstance(value, ConstantValueExpression):
                         value = value.value
-                    metadata.append((key_value_pair[0].value, value)),
+                    # Removing .value from key_value_pair[0] since key is now an ID_LITERAL
+                    # Adding lower() to ensure the key is in lowercase
+                    metadata.append((key_value_pair[0].lower(), value)),
 
         return CreateFunctionStatement(
             function_name,

diff --git a/evadb/parser/utils.py b/evadb/parser/utils.py
@@ -74,7 +74,7 @@ def parse_create_function(
         mock_query += f" TYPE {type}"
         task, model = kwargs["task"], kwargs["model"]
         if task is not None and model is not None:
-            mock_query += f" 'task' '{task}' 'model' '{model}'"
+            mock_query += f" TASK '{task}' MODEL '{model}'"
     else:
         mock_query += f" IMPL '{function_file_path}'"
     mock_query += ";"

diff --git a/test/benchmark_tests/test_benchmark_pytorch.py b/test/benchmark_tests/test_benchmark_pytorch.py
@@ -109,7 +109,7 @@ def test_automatic_speech_recognition(benchmark, setup_pytorch_tests):
     udf_name = "SpeechRecognizer"
     create_udf = (
         f"CREATE UDF {udf_name} TYPE HuggingFace "
-        "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';"
+        "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';"
     )
     execute_query_fetch_all(setup_pytorch_tests, create_udf)
 
@@ -135,14 +135,14 @@ def test_summarization_from_video(benchmark, setup_pytorch_tests):
     asr_udf = "SpeechRecognizer"
     create_udf = (
         f"CREATE UDF {asr_udf} TYPE HuggingFace "
-        "'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base';"
+        "TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base';"
     )
     execute_query_fetch_all(setup_pytorch_tests, create_udf)
 
     summary_udf = "Summarizer"
     create_udf = (
         f"CREATE UDF {summary_udf} TYPE HuggingFace "
-        "'task' 'summarization' 'model' 'philschmid/bart-large-cnn-samsum' 'min_length' 10 'max_length' 100;"
+        "TASK 'summarization' MODEL 'philschmid/bart-large-cnn-samsum' MIN_LENGTH 10 MAX_LENGTH 100;"
     )
     execute_query_fetch_all(setup_pytorch_tests, create_udf)
 

diff --git a/test/integration_tests/long/interfaces/relational/test_relational_api.py b/test/integration_tests/long/interfaces/relational/test_relational_api.py
@@ -231,7 +231,7 @@ def test_create_function_with_relational_api(self):
         query = create_speech_recognizer_function_if_not_exists.sql_query()
         self.assertEqual(
             query,
-            """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'""",
+            """CREATE FUNCTION IF NOT EXISTS SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'""",
         )
         create_speech_recognizer_function_if_not_exists.execute()
 
@@ -242,7 +242,7 @@ def test_create_function_with_relational_api(self):
         query = create_speech_recognizer_function.sql_query()
         self.assertEqual(
             query,
-            "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace 'task' 'automatic-speech-recognition' 'model' 'openai/whisper-base'",
+            "CREATE FUNCTION SpeechRecognizer TYPE HuggingFace TASK 'automatic-speech-recognition' MODEL 'openai/whisper-base'",
         )
         with self.assertRaises(ExecutorError):
             create_speech_recognizer_function.execute()

diff --git a/test/integration_tests/long/test_error_handling_with_ray.py b/test/integration_tests/long/test_error_handling_with_ray.py
@@ -58,7 +58,7 @@ def test_ray_error_populate_to_all_stages(self):
         function_name, task = "HFObjectDetector", "image-classification"
         create_function_query = f"""CREATE FUNCTION {function_name}
             TYPE HuggingFace
-            'task' '{task}'
+            TASK '{task}'
         """
 
         execute_query_fetch_all(self.evadb, create_function_query)

diff --git a/test/integration_tests/long/test_function_executor.py b/test/integration_tests/long/test_function_executor.py
@@ -175,8 +175,8 @@ def test_should_create_function_with_metadata(self):
                   OUTPUT (label NDARRAY STR(10))
                   TYPE  Classification
                   IMPL  'test/util.py'
-                  'CACHE' 'TRUE'
-                  'BATCH' 'FALSE';
+                  CACHE 'TRUE'
+                  BATCH 'FALSE';
         """
         execute_query_fetch_all(self.evadb, create_function_query.format(function_name))
 
@@ -187,7 +187,8 @@ def test_should_create_function_with_metadata(self):
         self.assertEqual(len(entries), 2)
         metadata = [(entry.key, entry.value) for entry in entries]
 
-        expected_metadata = [("CACHE", "TRUE"), ("BATCH", "FALSE")]
+        # metadata ultimately stored as lowercase string literals in metadata
+        expected_metadata = [("cache", "TRUE"), ("batch", "FALSE")]
         self.assertEqual(set(metadata), set(expected_metadata))
 
     def test_should_return_empty_metadata_list_for_missing_function(self):
@@ -205,8 +206,8 @@ def test_should_return_empty_metadata_list_if_function_is_removed(self):
                   OUTPUT (label NDARRAY STR(10))
                   TYPE  Classification
                   IMPL  'test/util.py'
-                  'CACHE' 'TRUE'
-                  'BATCH' 'FALSE';
+                  CACHE 'TRUE'
+                  BATCH 'FALSE';
         """
         execute_query_fetch_all(self.evadb, create_function_query.format(function_name))