Skip to content

Commit

Permalink
enhance: Rename textmatch to text_match (milvus-io#37290)
Browse files Browse the repository at this point in the history
issue: milvus-io#36672

Signed-off-by: Cai Zhang <[email protected]>
  • Loading branch information
xiaocai2333 authored Nov 3, 2024
1 parent 0449c74 commit 50de122
Show file tree
Hide file tree
Showing 11 changed files with 352 additions and 356 deletions.
2 changes: 1 addition & 1 deletion internal/parser/planparserv2/Plan.g4
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ NE: '!=';

LIKE: 'like' | 'LIKE';
EXISTS: 'exists' | 'EXISTS';
TEXTMATCH: 'TextMatch'|'textmatch'|'TEXTMATCH';
TEXTMATCH: 'text_match'|'TEXT_MATCH';

ADD: '+';
SUB: '-';
Expand Down
2 changes: 1 addition & 1 deletion internal/parser/planparserv2/generated/PlanLexer.interp

Large diffs are not rendered by default.

648 changes: 322 additions & 326 deletions internal/parser/planparserv2/generated/plan_lexer.go

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions internal/parser/planparserv2/plan_parser_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@ func TestExpr_TextMatch(t *testing.T) {
assert.NoError(t, err)

exprStrs := []string{
`TextMatch(VarCharField, "query")`,
`text_match(VarCharField, "query")`,
}
for _, exprStr := range exprStrs {
assertValidExpr(t, helper, exprStr)
}

unsupported := []string{
`TextMatch(not_exist, "query")`,
`TextMatch(BoolField, "query")`,
`text_match(not_exist, "query")`,
`text_match(BoolField, "query")`,
}
for _, exprStr := range unsupported {
assertInvalidExpr(t, helper, exprStr)
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/chaos/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,7 @@ def __init__(self, collection_name=None, shards_num=2, replica_number=1, schema=
self.c_wrap.load(replica_number=replica_number) # do load before query
self.insert_data()
key_word = self.word_freq.most_common(1)[0][0]
self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"

@trace()
def query(self):
Expand All @@ -1408,7 +1408,7 @@ def query(self):
@exception_handler()
def run_task(self):
key_word = self.word_freq.most_common(1)[0][0]
self.term_expr = f"TextMatch({self.text_field_name}, '{key_word}')"
self.term_expr = f"TEXT_MATCH({self.text_field_name}, '{key_word}')"
res, result = self.query()
return res, result

Expand Down
2 changes: 1 addition & 1 deletion tests/python_client/common/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def generate_text_match_expr(query_dict):

def process_node(node):
if isinstance(node, dict) and 'field' in node and 'value' in node:
return f"TextMatch({node['field']}, '{node['value']}')"
return f"TEXT_MATCH({node['field']}, '{node['value']}')"
elif isinstance(node, dict) and 'not' in node:
return f"not {process_node(node['not'])}"
elif isinstance(node, list):
Expand Down
6 changes: 3 additions & 3 deletions tests/python_client/testcases/test_bulk_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities
query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"text_match({df.text_field}, 'milvus')", output_fields=[df.text_field])
if nullable is False:
assert len(res) == entities
else:
Expand Down Expand Up @@ -1052,7 +1052,7 @@ def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_d
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
if nullable is False:
assert len(res) == entities
else:
Expand Down Expand Up @@ -1218,7 +1218,7 @@ def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable
query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
assert len(res) == len(query_data)
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
res, _ = self.collection_wrap.query(expr=f"TEXT_MATCH({df.text_field}, 'milvus')", output_fields=[df.text_field])
if not nullable:
assert len(res) == entities
else:
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/testcases/test_full_text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,7 @@ def test_full_text_search_default(
token = random.choice(tokens)
search_data = [fake.text().lower() + f" {token} " for _ in range(nq)]
if expr == "text_match":
filter = f"TextMatch(text, '{token}')"
filter = f"TEXT_MATCH(text, '{token}')"
res, _ = collection_w.query(
expr=filter,
)
Expand Down Expand Up @@ -2431,7 +2431,7 @@ def test_full_text_search_with_jieba_tokenizer(
limit = 100
search_data = [fake.text().lower() + " " + random.choice(tokens) for _ in range(nq)]
if expr == "text_match":
filter = f"TextMatch(text, '{tokens[0]}')"
filter = f"text_match(text, '{tokens[0]}')"
res, _ = collection_w.query(
expr=filter,
)
Expand Down
26 changes: 13 additions & 13 deletions tests/python_client/testcases/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4538,7 +4538,7 @@ def test_query_text_match_normal(
# query single field for one token
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
assert len(res) > 0
Expand All @@ -4562,7 +4562,7 @@ def test_query_text_match_normal(
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -4677,7 +4677,7 @@ def test_query_text_match_custom_analyzer(self):
# query single field for one word
for field in text_fields:
token = list(wf_map[field].keys())[0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -4691,7 +4691,7 @@ def test_query_text_match_custom_analyzer(self):
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -4793,7 +4793,7 @@ def test_query_text_match_with_combined_expression_for_single_field(self):
wf_counter = Counter(wf_map[field])
pd_tmp_res_list = []
for word, count in wf_counter.most_common(2):
tmp = f"TextMatch({field}, '{word}')"
tmp = f"text_match({field}, '{word}')"
log.info(f"tmp expr {tmp}")
expr_list.append(tmp)
manual_result = df_new[
Expand Down Expand Up @@ -5074,7 +5074,7 @@ def test_query_text_match_with_multi_lang(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -5089,7 +5089,7 @@ def test_query_text_match_with_multi_lang(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -5194,7 +5194,7 @@ def test_query_text_match_with_addition_inverted_index(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
pandas_res = df_split[df_split.apply(lambda row: token in row[field], axis=1)]
Expand Down Expand Up @@ -5311,7 +5311,7 @@ def test_query_text_match_with_non_varchar_fields_expr(self, combine_op):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[0][0]
tm_expr = f"TextMatch({field}, '{token}')"
tm_expr = f"text_match({field}, '{token}')"
int_expr = "age > 10"
combined_expr = f"{tm_expr} {combine_op} {int_expr}"
log.info(f"expr: {combined_expr}")
Expand Down Expand Up @@ -5445,7 +5445,7 @@ def test_query_text_match_with_some_empty_string(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand All @@ -5459,7 +5459,7 @@ def test_query_text_match_with_some_empty_string(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=["id", field])
log.info(f"res len {len(res)}")
Expand Down Expand Up @@ -5563,7 +5563,7 @@ def test_query_text_match_with_nullable(self):
# query single field for one word
for field in text_fields:
token = wf_map[field].most_common()[-1][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
log.info(f"expr: {expr}")
res, _ = collection_w.query(expr=expr, output_fields=text_fields)
log.info(f"res len {len(res)}, \n{res}")
Expand All @@ -5577,7 +5577,7 @@ def test_query_text_match_with_nullable(self):
for word, count in wf_map[field].most_common(3):
multi_words.append(word)
string_of_multi_words = " ".join(multi_words)
expr = f"TextMatch({field}, '{string_of_multi_words}')"
expr = f"text_match({field}, '{string_of_multi_words}')"
log.info(f"expr {expr}")
res, _ = collection_w.query(expr=expr, output_fields=text_fields)
log.info(f"res len {len(res)}, {res}")
Expand Down
4 changes: 2 additions & 2 deletions tests/python_client/testcases/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -13402,7 +13402,7 @@ def test_search_with_text_match_filter_normal(
search_data = [[random.random() for _ in range(dim)]]
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
manual_result = df_split[
df_split.apply(lambda row: token in row[field], axis=1)
]
Expand All @@ -13427,7 +13427,7 @@ def test_search_with_text_match_filter_normal(
for word, count in wf_map[field].most_common(10):
top_10_tokens.append(word)
string_of_top_10_words = " ".join(top_10_tokens)
expr = f"TextMatch({field}, '{string_of_top_10_words}')"
expr = f"text_match({field}, '{string_of_top_10_words}')"
log.info(f"expr {expr}")
res_list, _ = collection_w.search(
data=search_data,
Expand Down
4 changes: 2 additions & 2 deletions tests/restful_client_v2/testcases/test_vector_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1977,7 +1977,7 @@ def test_search_vector_with_text_match_filter(self, tokenizer):
vector_to_search = [[random.random() for _ in range(dim)]]
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
logger.info(f"expr: {expr}")
rsp = self.vector_client.vector_search({"collectionName": name, "data":vector_to_search, "filter": f"{expr}", "outputFields": ["*"]})
assert rsp['code'] == 0, rsp
Expand Down Expand Up @@ -2813,7 +2813,7 @@ def test_query_vector_with_text_match_filter(self, tokenizer):
time.sleep(5)
for field in text_fields:
token = wf_map[field].most_common()[0][0]
expr = f"TextMatch({field}, '{token}')"
expr = f"text_match({field}, '{token}')"
logger.info(f"expr: {expr}")
rsp = self.vector_client.vector_query({"collectionName": name, "filter": f"{expr}", "outputFields": ["*"]})
assert rsp['code'] == 0, rsp
Expand Down

0 comments on commit 50de122

Please sign in to comment.