enhance: Rename textmatch to text_match (#37290)

issue: #36672 Signed-off-by: Cai Zhang <[email protected]>
milvus-io · Nov 3, 2024 · 50de122 · 50de122
1 parent 0449c74
commit 50de122
Show file tree

Hide file tree

Showing 11 changed files with 352 additions and 356 deletions.
diff --git a/internal/parser/planparserv2/Plan.g4 b/internal/parser/planparserv2/Plan.g4
@@ -57,7 +57,7 @@ NE: '!=';
 
 LIKE: 'like' | 'LIKE';
 EXISTS: 'exists' | 'EXISTS';
-TEXTMATCH: 'TextMatch'|'textmatch'|'TEXTMATCH';
+TEXTMATCH: 'text_match'|'TEXT_MATCH';
 
 ADD: '+';
 SUB: '-';

diff --git a/internal/parser/planparserv2/generated/PlanLexer.interp b/internal/parser/planparserv2/generated/PlanLexer.interp
diff --git a/internal/parser/planparserv2/generated/plan_lexer.go b/internal/parser/planparserv2/generated/plan_lexer.go
diff --git a/internal/parser/planparserv2/plan_parser_v2_test.go b/internal/parser/planparserv2/plan_parser_v2_test.go
@@ -216,15 +216,15 @@ func TestExpr_TextMatch(t *testing.T) {
 	assert.NoError(t, err)
 
 	exprStrs := []string{
-		`TextMatch(VarCharField, "query")`,
+		`text_match(VarCharField, "query")`,
 	}
 	for _, exprStr := range exprStrs {
 		assertValidExpr(t, helper, exprStr)
 	}
 
 	unsupported := []string{
-		`TextMatch(not_exist, "query")`,
-		`TextMatch(BoolField, "query")`,
+		`text_match(not_exist, "query")`,
+		`text_match(BoolField, "query")`,
 	}
 	for _, exprStr := range unsupported {
 		assertInvalidExpr(t, helper, exprStr)

diff --git a/tests/python_client/chaos/checker.py b/tests/python_client/chaos/checker.py
@@ -1397,7 +1397,7 @@ def __init__(self, collection_name=None, shards_num=2, replica_number=1, schema=
         self.c_wrap.load(replica_number=replica_number)  # do load before query
         self.insert_data()
         key_word = self.word_freq.most_common(1)[0][0]
-        self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
+        self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"
 
     @trace()
     def query(self):
@@ -1408,7 +1408,7 @@ def query(self):
     @exception_handler()
     def run_task(self):
         key_word = self.word_freq.most_common(1)[0][0]
-        self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
+        self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"
         res, result = self.query()
         return res, result
 

diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py
@@ -227,7 +227,7 @@ def generate_text_match_expr(query_dict):
 
     def process_node(node):
         if isinstance(node, dict) and 'field' in node and 'value' in node:
-            return f"TextMatch({node['field']}, '{node['value']}')"
+            return f"TEXT_MATCH({node['field']}, '{node['value']}')"
         elif isinstance(node, dict) and 'not' in node:
             return f"not {process_node(node['not'])}"
         elif isinstance(node, list):

diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py
@@ -899,7 +899,7 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities
         query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
         res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
         assert len(res) == len(query_data)
-        res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
+        res, _ = self.collection_wrap.query(expr=f"text_match({df.text_field}, 'milvus')", output_fields=[df.text_field])
         if nullable is False:
             assert len(res) == entities
         else:
@@ -1052,7 +1052,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
         query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
         res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
         assert len(res) == len(query_data)
-        res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
+        res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
         if nullable is False:
             assert len(res) == entities
         else:
@@ -1218,7 +1218,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable
         query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
         res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
         assert len(res) == len(query_data)
-        res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
+        res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
         if not nullable:
             assert len(res) == entities
         else:

diff --git a/tests/python_client/testcases/test_full_text_search.py b/tests/python_client/testcases/test_full_text_search.py
@@ -2236,7 +2236,7 @@ def test_full_text_search_default(
         token = random.choice(tokens)
         search_data = [fake.text().lower() + f" {token} "  for _ in range(nq)]
         if expr == "text_match":
-            filter = f"TextMatch(text, '{token}')"
+            filter = f"TEXT_MATCH(text, '{token}')"
             res, _ = collection_w.query(
                 expr=filter,
             )
@@ -2431,7 +2431,7 @@ def test_full_text_search_with_jieba_tokenizer(
         limit = 100
         search_data = [fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)]
         if expr == "text_match":
-            filter = f"TextMatch(text, '{tokens[0]}')"
+            filter = f"text_match(text, '{tokens[0]}')"
             res, _ = collection_w.query(
                 expr=filter,
             )

diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py
@@ -4538,7 +4538,7 @@ def test_query_text_match_normal(
         # query single field for one token
         for field in text_fields:
             token = wf_map[field].most_common()[0][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             assert len(res) > 0
@@ -4562,7 +4562,7 @@ def test_query_text_match_normal(
             for word, count in wf_map[field].most_common(10):
                 top_10_tokens.append(word)
             string_of_top_10_words = " ".join(top_10_tokens)
-            expr = f"TextMatch({field}, '{string_of_top_10_words}')"
+            expr = f"text_match({field}, '{string_of_top_10_words}')"
             log.info(f"expr {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -4677,7 +4677,7 @@ def test_query_text_match_custom_analyzer(self):
         # query single field for one word
         for field in text_fields:
             token = list(wf_map[field].keys())[0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -4691,7 +4691,7 @@ def test_query_text_match_custom_analyzer(self):
             for word, count in wf_map[field].most_common(10):
                 top_10_tokens.append(word)
             string_of_top_10_words = " ".join(top_10_tokens)
-            expr = f"TextMatch({field}, '{string_of_top_10_words}')"
+            expr = f"text_match({field}, '{string_of_top_10_words}')"
             log.info(f"expr {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -4793,7 +4793,7 @@ def test_query_text_match_with_combined_expression_for_single_field(self):
             wf_counter = Counter(wf_map[field])
             pd_tmp_res_list = []
             for word, count in wf_counter.most_common(2):
-                tmp = f"TextMatch({field}, '{word}')"
+                tmp = f"text_match({field}, '{word}')"
                 log.info(f"tmp expr {tmp}")
                 expr_list.append(tmp)
                 manual_result = df_new[
@@ -5074,7 +5074,7 @@ def test_query_text_match_with_multi_lang(self):
         # query single field for one word
         for field in text_fields:
             token = wf_map[field].most_common()[-1][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -5089,7 +5089,7 @@ def test_query_text_match_with_multi_lang(self):
             for word, count in wf_map[field].most_common(3):
                 multi_words.append(word)
             string_of_multi_words = " ".join(multi_words)
-            expr = f"TextMatch({field}, '{string_of_multi_words}')"
+            expr = f"text_match({field}, '{string_of_multi_words}')"
             log.info(f"expr {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -5194,7 +5194,7 @@ def test_query_text_match_with_addition_inverted_index(self):
         # query single field for one word
         for field in text_fields:
             token = wf_map[field].most_common()[-1][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             pandas_res = df_split[df_split.apply(lambda row: token in row[field], axis=1)]
@@ -5311,7 +5311,7 @@ def test_query_text_match_with_non_varchar_fields_expr(self, combine_op):
         # query single field for one word
         for field in text_fields:
             token = wf_map[field].most_common()[0][0]
-            tm_expr = f"TextMatch({field}, '{token}')"
+            tm_expr = f"text_match({field}, '{token}')"
             int_expr = "age > 10"
             combined_expr = f"{tm_expr} {combine_op} {int_expr}"
             log.info(f"expr: {combined_expr}")
@@ -5445,7 +5445,7 @@ def test_query_text_match_with_some_empty_string(self):
         # query single field for one word
         for field in text_fields:
             token = wf_map[field].most_common()[-1][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -5459,7 +5459,7 @@ def test_query_text_match_with_some_empty_string(self):
             for word, count in wf_map[field].most_common(3):
                 multi_words.append(word)
             string_of_multi_words = " ".join(multi_words)
-            expr = f"TextMatch({field}, '{string_of_multi_words}')"
+            expr = f"text_match({field}, '{string_of_multi_words}')"
             log.info(f"expr {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=["id", field])
             log.info(f"res len {len(res)}")
@@ -5563,7 +5563,7 @@ def test_query_text_match_with_nullable(self):
         # query single field for one word
         for field in text_fields:
             token = wf_map[field].most_common()[-1][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             log.info(f"expr: {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=text_fields)
             log.info(f"res len {len(res)}, \n{res}")
@@ -5577,7 +5577,7 @@ def test_query_text_match_with_nullable(self):
             for word, count in wf_map[field].most_common(3):
                 multi_words.append(word)
             string_of_multi_words = " ".join(multi_words)
-            expr = f"TextMatch({field}, '{string_of_multi_words}')"
+            expr = f"text_match({field}, '{string_of_multi_words}')"
             log.info(f"expr {expr}")
             res, _ = collection_w.query(expr=expr, output_fields=text_fields)
             log.info(f"res len {len(res)}, {res}")

diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py
@@ -13402,7 +13402,7 @@ def test_search_with_text_match_filter_normal(
                 search_data = [[random.random() for _ in range(dim)]]
             for field in text_fields:
                 token = wf_map[field].most_common()[0][0]
-                expr = f"TextMatch({field}, '{token}')"
+                expr = f"text_match({field}, '{token}')"
                 manual_result = df_split[
                     df_split.apply(lambda row: token in row[field], axis=1)
                 ]
@@ -13427,7 +13427,7 @@ def test_search_with_text_match_filter_normal(
                 for word, count in wf_map[field].most_common(10):
                     top_10_tokens.append(word)
                 string_of_top_10_words = " ".join(top_10_tokens)
-                expr = f"TextMatch({field}, '{string_of_top_10_words}')"
+                expr = f"text_match({field}, '{string_of_top_10_words}')"
                 log.info(f"expr {expr}")
                 res_list, _ = collection_w.search(
                     data=search_data,

diff --git a/tests/restful_client_v2/testcases/test_vector_operations.py b/tests/restful_client_v2/testcases/test_vector_operations.py
@@ -1977,7 +1977,7 @@ def test_search_vector_with_text_match_filter(self, tokenizer):
         vector_to_search = [[random.random() for _ in range(dim)]]
         for field in text_fields:
             token = wf_map[field].most_common()[0][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             logger.info(f"expr: {expr}")
             rsp = self.vector_client.vector_search({"collectionName": name, "data":vector_to_search, "filter": f"{expr}", "outputFields": ["*"]})
             assert rsp['code'] == 0, rsp
@@ -2813,7 +2813,7 @@ def test_query_vector_with_text_match_filter(self, tokenizer):
         time.sleep(5)
         for field in text_fields:
             token = wf_map[field].most_common()[0][0]
-            expr = f"TextMatch({field}, '{token}')"
+            expr = f"text_match({field}, '{token}')"
             logger.info(f"expr: {expr}")
             rsp = self.vector_client.vector_query({"collectionName": name, "filter": f"{expr}", "outputFields": ["*"]})
             assert rsp['code'] == 0, rsp