From f2cf87ecdb2b004212c5721ac64da6ef5b84e365 Mon Sep 17 00:00:00 2001 From: nitao Date: Wed, 11 Dec 2024 10:34:17 +0800 Subject: [PATCH 1/3] cp to 2.0 "[Tech Request]: optimize filters on composite columns (#20695)" (#20703) cp to 2.0 "[Tech Request]: optimize filters on composite columns (#20695)" Approved by: @heni02, @ouyuanning, @sukki37 --- pkg/sql/plan/explain/explain_expr.go | 33 ++- pkg/sql/plan/expr_opt.go | 19 +- .../cases/optimizer/blockfilter.result | 195 ++++++++++++++++++ .../cases/optimizer/blockfilter.test | 63 ++++++ 4 files changed, 297 insertions(+), 13 deletions(-) create mode 100644 test/distributed/cases/optimizer/blockfilter.result create mode 100644 test/distributed/cases/optimizer/blockfilter.test diff --git a/pkg/sql/plan/explain/explain_expr.go b/pkg/sql/plan/explain/explain_expr.go index 0cf0a78825164..270759179bd9a 100644 --- a/pkg/sql/plan/explain/explain_expr.go +++ b/pkg/sql/plan/explain/explain_expr.go @@ -198,6 +198,21 @@ func describeExpr(ctx context.Context, expr *plan.Expr, options *ExplainOptions, return nil } +func needSpecialHandling(funcExpr *plan.Function) bool { + if funcExpr.Func.GetObjName() == "prefix_in" || funcExpr.Func.GetObjName() == "prefix_eq" || funcExpr.Func.GetObjName() == "prefix_between" { + return true + } + if len(funcExpr.Args) > 1 { + col := funcExpr.Args[0].GetCol() + if col != nil && funcExpr.Args[1].GetCol() == nil { + if strings.Contains(col.Name, catalog.PrefixCBColName) || strings.Contains(col.Name, catalog.PrefixPriColName) { + return true + } + } + } + return false +} + // generator function expression(Expr_F) explain information func funcExprExplain(ctx context.Context, funcExpr *plan.Function, Typ *plan.Type, options *ExplainOptions, buf *bytes.Buffer) error { // SysFunsAndOperatorsMap @@ -212,7 +227,7 @@ func funcExprExplain(ctx context.Context, funcExpr *plan.Function, Typ *plan.Typ switch layout { case function.STANDARD_FUNCTION: buf.WriteString(funcExpr.Func.GetObjName() + "(") - if funcExpr.Func.GetObjName() == "prefix_in" || funcExpr.Func.GetObjName() == "prefix_eq" || funcExpr.Func.GetObjName() == "prefix_between" { + if needSpecialHandling(funcExpr) { //contains invisible character, need special handling err = describeExpr(ctx, funcExpr.Args[0], options, buf) if err != nil { @@ -264,9 +279,11 @@ func funcExprExplain(ctx context.Context, funcExpr *plan.Function, Typ *plan.Typ return err } buf.WriteString(" " + funcExpr.Func.GetObjName() + " ") - err = describeExpr(ctx, funcExpr.Args[1], options, buf) - if err != nil { - return err + if !needSpecialHandling(funcExpr) { + err = describeExpr(ctx, funcExpr.Args[1], options, buf) + if err != nil { + return err + } } buf.WriteString(")") case function.MULTIARY_LOGICAL_OPERATOR: @@ -351,9 +368,11 @@ func funcExprExplain(ctx context.Context, funcExpr *plan.Function, Typ *plan.Typ return err } buf.WriteString(" " + funcExpr.Func.GetObjName() + " (") - err = describeExpr(ctx, funcExpr.Args[1], options, buf) - if err != nil { - return err + if !needSpecialHandling(funcExpr) { + err = describeExpr(ctx, funcExpr.Args[1], options, buf) + if err != nil { + return err + } } buf.WriteString(")") case function.EXISTS_ANY_PREDICATE: diff --git a/pkg/sql/plan/expr_opt.go b/pkg/sql/plan/expr_opt.go index 1869db9de0907..34557ec7d2115 100644 --- a/pkg/sql/plan/expr_opt.go +++ b/pkg/sql/plan/expr_opt.go @@ -18,6 +18,7 @@ import ( "github.com/matrixorigin/matrixone/pkg/container/types" "github.com/matrixorigin/matrixone/pkg/pb/plan" "github.com/matrixorigin/matrixone/pkg/sql/plan/function" + "github.com/matrixorigin/matrixone/pkg/sql/util" ) func (builder *QueryBuilder) mergeFiltersOnCompositeKey(nodeID int32) { @@ -41,9 +42,15 @@ func (builder *QueryBuilder) mergeFiltersOnCompositeKey(nodeID int32) { } func (builder *QueryBuilder) doMergeFiltersOnCompositeKey(tableDef *plan.TableDef, tableTag int32, filters ...*plan.Expr) []*plan.Expr { - pkIdx := tableDef.Name2ColIndex[tableDef.Pkey.PkeyColName] + sortkeyIdx := tableDef.Name2ColIndex[tableDef.Pkey.PkeyColName] col2filter := make(map[int32]int) - numParts := len(tableDef.Pkey.Names) + Parts := tableDef.Pkey.Names + numParts := len(Parts) + if tableDef.ClusterBy != nil && util.JudgeIsCompositeClusterByColumn(tableDef.ClusterBy.Name) { + sortkeyIdx = tableDef.Name2ColIndex[tableDef.ClusterBy.Name] + Parts = util.SplitCompositeClusterByColumnName(tableDef.ClusterBy.Name) + numParts = len(Parts) + } for i, expr := range filters { fn := expr.GetF() @@ -179,7 +186,7 @@ func (builder *QueryBuilder) doMergeFiltersOnCompositeKey(tableDef *plan.TableDe if len(newOrArgs) == 1 { filters[i] = newOrArgs[0] colPos := firstEquiExpr.GetF().Args[0].GetCol().ColPos - if colPos != pkIdx { + if colPos != sortkeyIdx { col2filter[colPos] = i } } else { @@ -193,7 +200,7 @@ func (builder *QueryBuilder) doMergeFiltersOnCompositeKey(tableDef *plan.TableDe } filterIdx := make([]int, 0, numParts) - for _, part := range tableDef.Pkey.Names { + for _, part := range Parts { colIdx := tableDef.Name2ColIndex[part] idx, ok := col2filter[colIdx] if !ok { @@ -212,11 +219,11 @@ func (builder *QueryBuilder) doMergeFiltersOnCompositeKey(tableDef *plan.TableDe var compositePKFilter *plan.Expr pkExpr := &plan.Expr{ - Typ: tableDef.Cols[pkIdx].Typ, + Typ: tableDef.Cols[sortkeyIdx].Typ, Expr: &plan.Expr_Col{ Col: &plan.ColRef{ RelPos: tableTag, - ColPos: pkIdx, + ColPos: sortkeyIdx, }, }, } diff --git a/test/distributed/cases/optimizer/blockfilter.result b/test/distributed/cases/optimizer/blockfilter.result new file mode 100644 index 0000000000000..7a8941cdb3926 --- /dev/null +++ b/test/distributed/cases/optimizer/blockfilter.result @@ -0,0 +1,195 @@ +drop database if exists d1; +create database d1; +use d1; +drop table if exists t1; +drop table if exists t2; +create table t1(c2 int, c1 int, c3 int) cluster by (c1,c2); +create table t2(c1 int, c2 int, c3 int, primary key(c1,c2)); +insert into t1 select result%100,result%10000, result from generate_series(100000) g; +insert into t2 select result%100,*,* from generate_series(1000000) g; +select mo_ctl('dn', 'flush', 'd1.t1'); +mo_ctl(dn, flush, d1.t1) +{\n "method": "Flush",\n "result": [\n {\n "returnStr": "OK"\n }\n ]\n}\n +select mo_ctl('dn', 'flush', 'd1.t2'); +mo_ctl(dn, flush, d1.t2) +{\n "method": "Flush",\n "result": [\n {\n "returnStr": "OK"\n }\n ]\n}\n +select Sleep(1); +Sleep(1) +0 +explain select count(*) from t1 where c1 = 1; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2) + Block Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2) +select count(*) from t1 where c1 = 1; +count(*) +10 +explain select count(*) from t1 where c1 > 10; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: (t1.c1 > 10) + Block Filter Cond: (t1.c1 > 10) +select count(*) from t1 where c1 > 10; +count(*) +99890 +explain select count(*) from t1 where c1 in (1,2,3); +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: prefix_in(t1.__mo_cbkey_002c1002c2) + Block Filter Cond: prefix_in(t1.__mo_cbkey_002c1002c2) +select count(*) from t1 where c1 in (1,2,3); +count(*) +30 +explain select count(*) from t1 where c1 between 1 and 5; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: t1.c1 BETWEEN 1 AND 5 + Block Filter Cond: t1.c1 BETWEEN 1 AND 5 +select count(*) from t1 where c1 between 1 and 5; +count(*) +50 +explain select count(*) from t1 where c1 = 2 and c2 = 10; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: (t1.__mo_cbkey_002c1002c2 = ) + Block Filter Cond: (t1.__mo_cbkey_002c1002c2 = ) +select count(*) from t1 where c1 = 2 and c2 = 10; +count(*) +0 +explain select count(*) from t1 where c1 = 5 and c2 > 10; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2), (t1.c2 > 10) + Block Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2), (t1.c2 > 10) +select count(*) from t1 where c1 = 5 and c2 > 10; +count(*) +0 +explain select count(*) from t1 where c1 = 3 and c2 in (1,2,3); +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: t1.__mo_cbkey_002c1002c2 in () + Block Filter Cond: t1.__mo_cbkey_002c1002c2 in () +select count(*) from t1 where c1 = 3 and c2 in (1,2,3); +count(*) +10 +explain select count(*) from t1 where c1=4 and c2 between 1 and 5; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t1 + Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2), t1.c2 BETWEEN 1 AND 5 + Block Filter Cond: prefix_eq(t1.__mo_cbkey_002c1002c2), t1.c2 BETWEEN 1 AND 5 +select count(*) from t1 where c1=4 and c2 between 1 and 5; +count(*) +10 +explain select count(*) from t2 where c1 = 1; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: prefix_eq(t2.__mo_cpkey_col) + Block Filter Cond: prefix_eq(t2.__mo_cpkey_col) +select count(*) from t2 where c1 = 1; +count(*) +10000 +explain select count(*) from t2 where c1 > 10; +AP QUERY PLAN ON ONE CN(4 core) +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: (t2.c1 > 10) + Block Filter Cond: (t2.c1 > 10) +select count(*) from t2 where c1 > 10; +count(*) +890000 +explain select count(*) from t2 where c1 in (1,2,3); +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: prefix_in(t2.__mo_cpkey_col) + Block Filter Cond: prefix_in(t2.__mo_cpkey_col) +select count(*) from t2 where c1 in (1,2,3); +count(*) +30000 +explain select count(*) from t2 where c1 between 1 and 5; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: t2.c1 BETWEEN 1 AND 5 + Block Filter Cond: t2.c1 BETWEEN 1 AND 5 +select count(*) from t2 where c1 between 1 and 5; +count(*) +50000 +explain select count(*) from t2 where c1 = 2 and c2 = 10; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: (t2.__mo_cpkey_col = ) + Block Filter Cond: (t2.__mo_cpkey_col = ) +select count(*) from t2 where c1 = 2 and c2 = 10; +count(*) +0 +explain select count(*) from t2 where c1 = 5 and c2 > 10; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: prefix_eq(t2.__mo_cpkey_col), (t2.c2 > 10) + Block Filter Cond: prefix_eq(t2.__mo_cpkey_col), (t2.c2 > 10) +select count(*) from t2 where c1 = 5 and c2 > 10; +count(*) +9999 +explain select count(*) from t2 where c1 = 3 and c2 in (1,2,3); +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: t2.__mo_cpkey_col in () + Block Filter Cond: t2.__mo_cpkey_col in () +select count(*) from t2 where c1 = 3 and c2 in (1,2,3); +count(*) +1 +explain select count(*) from t2 where c1=4 and c2 between 1 and 5; +TP QUERY PLAN +Project + -> Aggregate + Aggregate Functions: starcount(1) + -> Table Scan on d1.t2 + Filter Cond: prefix_eq(t2.__mo_cpkey_col), t2.c2 BETWEEN 1 AND 5 + Block Filter Cond: prefix_eq(t2.__mo_cpkey_col), t2.c2 BETWEEN 1 AND 5 +select count(*) from t2 where c1=4 and c2 between 1 and 5; +count(*) +1 +drop database if exists d1; \ No newline at end of file diff --git a/test/distributed/cases/optimizer/blockfilter.test b/test/distributed/cases/optimizer/blockfilter.test new file mode 100644 index 0000000000000..f47979ad2fe3d --- /dev/null +++ b/test/distributed/cases/optimizer/blockfilter.test @@ -0,0 +1,63 @@ +drop database if exists d1; +create database d1; +use d1; +drop table if exists t1; +drop table if exists t2; +create table t1(c2 int, c1 int, c3 int) cluster by (c1,c2); +create table t2(c1 int, c2 int, c3 int, primary key(c1,c2)); +insert into t1 select result%100,result%10000, result from generate_series(100000) g; +insert into t2 select result%100,*,* from generate_series(1000000) g; +-- @separator:table +select mo_ctl('dn', 'flush', 'd1.t1'); +-- @separator:table +select mo_ctl('dn', 'flush', 'd1.t2'); +select Sleep(1); +-- @separator:table +explain select count(*) from t1 where c1 = 1; +select count(*) from t1 where c1 = 1; +-- @separator:table +explain select count(*) from t1 where c1 > 10; +select count(*) from t1 where c1 > 10; +-- @separator:table +explain select count(*) from t1 where c1 in (1,2,3); +select count(*) from t1 where c1 in (1,2,3); +-- @separator:table +explain select count(*) from t1 where c1 between 1 and 5; +select count(*) from t1 where c1 between 1 and 5; +-- @separator:table +explain select count(*) from t1 where c1 = 2 and c2 = 10; +select count(*) from t1 where c1 = 2 and c2 = 10; +-- @separator:table +explain select count(*) from t1 where c1 = 5 and c2 > 10; +select count(*) from t1 where c1 = 5 and c2 > 10; +-- @separator:table +explain select count(*) from t1 where c1 = 3 and c2 in (1,2,3); +select count(*) from t1 where c1 = 3 and c2 in (1,2,3); +-- @separator:table +explain select count(*) from t1 where c1=4 and c2 between 1 and 5; +select count(*) from t1 where c1=4 and c2 between 1 and 5; +-- @separator:table +explain select count(*) from t2 where c1 = 1; +select count(*) from t2 where c1 = 1; +-- @separator:table +explain select count(*) from t2 where c1 > 10; +select count(*) from t2 where c1 > 10; +-- @separator:table +explain select count(*) from t2 where c1 in (1,2,3); +select count(*) from t2 where c1 in (1,2,3); +-- @separator:table +explain select count(*) from t2 where c1 between 1 and 5; +select count(*) from t2 where c1 between 1 and 5; +-- @separator:table +explain select count(*) from t2 where c1 = 2 and c2 = 10; +select count(*) from t2 where c1 = 2 and c2 = 10; +-- @separator:table +explain select count(*) from t2 where c1 = 5 and c2 > 10; +select count(*) from t2 where c1 = 5 and c2 > 10; +-- @separator:table +explain select count(*) from t2 where c1 = 3 and c2 in (1,2,3); +select count(*) from t2 where c1 = 3 and c2 in (1,2,3); +-- @separator:table +explain select count(*) from t2 where c1=4 and c2 between 1 and 5; +select count(*) from t2 where c1=4 and c2 between 1 and 5; +drop database if exists d1; \ No newline at end of file From 9b4d02012cc6ee68c4a0c74fc27656af2320145a Mon Sep 17 00:00:00 2001 From: nitao Date: Wed, 11 Dec 2024 12:28:24 +0800 Subject: [PATCH 2/3] fix a bug for case when expression (#20710) fix a bug for case when expression Approved by: @m-schen, @heni02, @sukki37, @aressu1985 --- pkg/sql/plan/function/func_compare.go | 6 +++--- .../cases/expression/case_when.result | 16 +++++++++++++++- test/distributed/cases/expression/case_when.sql | 7 ++++++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/pkg/sql/plan/function/func_compare.go b/pkg/sql/plan/function/func_compare.go index 69f22ff1d8ab7..818ba1dd6f501 100644 --- a/pkg/sql/plan/function/func_compare.go +++ b/pkg/sql/plan/function/func_compare.go @@ -135,14 +135,14 @@ func equalFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, p return a == b }, selectList) case types.T_char, types.T_varchar, types.T_blob, types.T_json, types.T_text, types.T_binary, types.T_varbinary, types.T_datalink: - if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil { + if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil && (selectList == nil) { return compareVarlenaEqual(parameters, rs, proc, length, selectList) } return opBinaryStrStrToFixed[bool](parameters, rs, proc, length, func(v1, v2 string) bool { return v1 == v2 }, selectList) case types.T_array_float32: - if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil { + if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil && (selectList == nil) { return compareVarlenaEqual(parameters, rs, proc, length, selectList) } return opBinaryBytesBytesToFixed[bool](parameters, rs, proc, length, func(v1, v2 []byte) bool { @@ -152,7 +152,7 @@ func equalFn(parameters []*vector.Vector, result vector.FunctionResultWrapper, p return moarray.Compare[float32](_v1, _v2) == 0 }, selectList) case types.T_array_float64: - if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil { + if parameters[0].GetArea() == nil && parameters[1].GetArea() == nil && (selectList == nil) { return compareVarlenaEqual(parameters, rs, proc, length, selectList) } return opBinaryBytesBytesToFixed[bool](parameters, rs, proc, length, func(v1, v2 []byte) bool { diff --git a/test/distributed/cases/expression/case_when.result b/test/distributed/cases/expression/case_when.result index 3b67db786a16f..7361a350af331 100755 --- a/test/distributed/cases/expression/case_when.result +++ b/test/distributed/cases/expression/case_when.result @@ -86,6 +86,14 @@ nothing 2 drop table if exists t1; create table t1 (`row` int not null, col int not null, val varchar(255) not null); insert into t1 values (1,1,'orange'),(1,2,'large'),(2,1,'yellow'),(2,2,'medium'),(3,1,'green'),(3,2,'small'); +select col,val, case when val="orange" then 1 when upper(val)="LARGE" then 2 else 3 end from t1; +col val case when val = orange then 1 when upper(val) = LARGE then 2 else 3 end +1 orange 1 +2 large 2 +1 yellow 3 +2 medium 3 +1 green 3 +2 small 3 select max(case col when 1 then val else null end) as color from t1 group by `row`; color orange @@ -219,4 +227,10 @@ id xxx 18 3 19 3 20 3 -DROP TABLE t1; \ No newline at end of file +DROP TABLE t1; +create table t1(a varchar(100)); +insert into t1 values ("a"); +select a, case when a="a" then 1 when upper(a)="b" then 2 end from t1; +a case when a = a then 1 when upper(a) = b then 2 end +a 1 +drop table if exists t1; \ No newline at end of file diff --git a/test/distributed/cases/expression/case_when.sql b/test/distributed/cases/expression/case_when.sql index 1ab3bc9d90f47..068a697067ffc 100755 --- a/test/distributed/cases/expression/case_when.sql +++ b/test/distributed/cases/expression/case_when.sql @@ -60,6 +60,7 @@ drop table if exists t1; -- @label:bvt create table t1 (`row` int not null, col int not null, val varchar(255) not null); insert into t1 values (1,1,'orange'),(1,2,'large'),(2,1,'yellow'),(2,2,'medium'),(3,1,'green'),(3,2,'small'); +select col,val, case when val="orange" then 1 when upper(val)="LARGE" then 2 else 3 end from t1; select max(case col when 1 then val else null end) as color from t1 group by `row`; drop table if exists t1; @@ -164,4 +165,8 @@ insert into t1 values (19, 7986, '1771-12-06'), (20, 7987, '1985-10-06'); select id, case when id < 5 then 0 when id < 10 then 1 when id < 15 then 2 when true then 3 else -1 end as xxx from t1; -DROP TABLE t1; \ No newline at end of file +DROP TABLE t1; +create table t1(a varchar(100)); +insert into t1 values ("a"); +select a, case when a="a" then 1 when upper(a)="b" then 2 end from t1; +drop table if exists t1; \ No newline at end of file From 6b1e4ea0db7148f0d1d6b97276e240a064bdb06e Mon Sep 17 00:00:00 2001 From: YANGGMM Date: Wed, 11 Dec 2024 13:34:19 +0800 Subject: [PATCH 3/3] support json_set/json_insert/json_replace(#2.0-dev) (#20592) support json_set/json_insert/json_replace Approved by: @heni02, @m-schen, @XuPeng-SH, @aressu1985, @sukki37 --- .../bytejson/bytejosn_modifier_test.go | 245 +++++++++++++++++ pkg/container/bytejson/bytejson.go | 37 +++ pkg/container/bytejson/bytejson_modifier.go | 185 +++++++++++++ pkg/container/bytejson/path.go | 5 + pkg/container/bytejson/types.go | 25 +- pkg/container/bytejson/utils.go | 255 ++++++++++++++++++ pkg/sql/plan/function/func_builtin_json.go | 200 ++++++++++++++ pkg/sql/plan/function/function_id.go | 8 + pkg/sql/plan/function/list_builtIn.go | 61 +++++ .../cases/function/func_json_insert.result | 61 +++++ .../cases/function/func_json_insert.sql | 38 +++ .../cases/function/func_json_replace.result | 61 +++++ .../cases/function/func_json_replace.sql | 38 +++ .../cases/function/func_json_set.result | 162 +++++++++++ .../cases/function/func_json_set.sql | 120 +++++++++ 15 files changed, 1498 insertions(+), 3 deletions(-) create mode 100644 pkg/container/bytejson/bytejosn_modifier_test.go create mode 100644 pkg/container/bytejson/bytejson_modifier.go create mode 100644 test/distributed/cases/function/func_json_insert.result create mode 100644 test/distributed/cases/function/func_json_insert.sql create mode 100644 test/distributed/cases/function/func_json_replace.result create mode 100644 test/distributed/cases/function/func_json_replace.sql create mode 100644 test/distributed/cases/function/func_json_set.result create mode 100644 test/distributed/cases/function/func_json_set.sql diff --git a/pkg/container/bytejson/bytejosn_modifier_test.go b/pkg/container/bytejson/bytejosn_modifier_test.go new file mode 100644 index 0000000000000..e1c3e71161390 --- /dev/null +++ b/pkg/container/bytejson/bytejosn_modifier_test.go @@ -0,0 +1,245 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bytejson + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_Modify(t *testing.T) { + path, _ := ParseJsonPath("$.a") + type args struct { + pathList []*Path + valList []ByteJson + modifyType JsonModifyType + expectedErr bool + } + tests := []args{ + // path is empty + { + pathList: []*Path{}, + valList: []ByteJson{}, + modifyType: JsonModifyReplace, + expectedErr: false, + }, + // path length is not equal to val length + { + pathList: []*Path{}, + valList: []ByteJson{Null}, + modifyType: JsonModifyReplace, + expectedErr: true, + }, + // modifyType is not valid + { + + pathList: []*Path{&path}, + valList: []ByteJson{Null}, + modifyType: JsonModifyType(100), + expectedErr: true, + }, + } + + for _, test := range tests { + bj := Null + _, err := bj.Modify(test.pathList, test.valList, test.modifyType) + if test.expectedErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + } +} + +func TestAppendBinaryJSON(t *testing.T) { + tests := []struct { + name string + input any + wantType TpCode + wantErr bool + }{ + { + name: "nil value", + input: nil, + wantType: TpCodeLiteral, + wantErr: false, + }, + { + name: "bool true", + input: true, + wantType: TpCodeLiteral, + wantErr: false, + }, + { + name: "bool false", + input: false, + wantType: TpCodeLiteral, + wantErr: false, + }, + { + name: "int64", + input: int64(123), + wantType: TpCodeInt64, + wantErr: false, + }, + { + name: "uint64", + input: uint64(123), + wantType: TpCodeUint64, + wantErr: false, + }, + { + name: "float64", + input: float64(123.45), + wantType: TpCodeFloat64, + wantErr: false, + }, + { + name: "string", + input: "test", + wantType: TpCodeString, + wantErr: false, + }, + { + name: "array", + input: []any{int64(1), int64(2), true}, + wantType: TpCodeArray, + wantErr: false, + }, + { + name: "object", + input: map[string]any{"key": "value"}, + wantType: TpCodeObject, + wantErr: false, + }, + { + name: "invalid type", + input: struct{}{}, + wantType: 0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotType, gotBuf, err := appendBinaryJSON(nil, tt.input) + + if tt.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + require.Equal(t, tt.wantType, gotType) + + require.NotEmpty(t, gotBuf) + + switch tt.input.(type) { + case nil: + require.Equal(t, []byte{LiteralNull}, gotBuf) + case bool: + if tt.input.(bool) { + require.Equal(t, []byte{LiteralTrue}, gotBuf) + } else { + require.Equal(t, []byte{LiteralFalse}, gotBuf) + } + } + }) + } +} + +func TestAppendBinaryNumber(t *testing.T) { + tests := []struct { + name string + input json.Number + wantType TpCode + wantErr bool + }{ + { + name: "int64", + input: json.Number("123"), + wantType: TpCodeInt64, + wantErr: false, + }, + { + name: "uint64", + input: json.Number("18446744073709551615"), + wantType: TpCodeUint64, + wantErr: false, + }, + { + name: "float64", + input: json.Number("123.45"), + wantType: TpCodeFloat64, + wantErr: false, + }, + { + name: "invalid number", + input: json.Number("invalid"), + wantType: 0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotType, gotBuf, err := appendBinaryNumber(nil, tt.input) + + if tt.wantErr { + require.Error(t, err) + return + } + + require.NoError(t, err) + require.Equal(t, tt.wantType, gotType) + require.NotEmpty(t, gotBuf) + }) + } +} + +func TestAppendBinaryString(t *testing.T) { + tests := []struct { + name string + input string + }{ + { + name: "empty string", + input: "", + }, + { + name: "simple string", + input: "test", + }, + { + name: "unicode string", + input: "测试", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := appendBinaryString(nil, tt.input) + require.NotEmpty(t, got) + + strLen, lenLen := calStrLen(got) + require.Equal(t, len(tt.input), strLen) + require.True(t, lenLen > 0) + + require.Equal(t, tt.input, string(got[lenLen:])) + }) + } +} diff --git a/pkg/container/bytejson/bytejson.go b/pkg/container/bytejson/bytejson.go index bc03974d50174..058d97dbfe37f 100644 --- a/pkg/container/bytejson/bytejson.go +++ b/pkg/container/bytejson/bytejson.go @@ -472,6 +472,43 @@ func (bj ByteJson) QuerySimple(paths []*Path) ByteJson { } } +func (bj ByteJson) Modify(pathList []*Path, valList []ByteJson, modifyType JsonModifyType) (ByteJson, error) { + var ( + err error + ) + + if len(pathList) != len(valList) { + return Null, moerr.NewInvalidInputNoCtx("pathList and valList should have the same length") + } + + if len(pathList) == 0 { + return bj, nil + } + + for i := 0; i < len(pathList); i++ { + path := pathList[i] + val := valList[i] + + modifier := &bytejsonModifier{bj: bj} + + switch modifyType { + case JsonModifySet: + bj, err = modifier.set(path, val) + case JsonModifyInsert: + bj, err = modifier.insert(path, val) + case JsonModifyReplace: + bj, err = modifier.replace(path, val) + default: + return Null, moerr.NewInvalidInputNoCtx("invalid modify type") + } + + if err != nil { + return Null, err + } + } + return bj, nil +} + func (bj ByteJson) canUnnest() bool { return bj.Type == TpCodeArray || bj.Type == TpCodeObject } diff --git a/pkg/container/bytejson/bytejson_modifier.go b/pkg/container/bytejson/bytejson_modifier.go new file mode 100644 index 0000000000000..dc9fe15969ab0 --- /dev/null +++ b/pkg/container/bytejson/bytejson_modifier.go @@ -0,0 +1,185 @@ +// Copyright 2024 Matrix Origin +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package bytejson + +import ( + "bytes" + "sort" + + "github.com/matrixorigin/matrixone/pkg/common/moerr" +) + +type bytejsonModifier struct { + bj ByteJson + modifyPtr *byte + modifyVal ByteJson +} + +func (bm *bytejsonModifier) insert(path *Path, newBj ByteJson) (ByteJson, error) { + result := bm.bj.querySimple(path) + if CompareByteJson(result, Null) > 0 { + // if path exists, return + return bm.bj, nil + } + // insert + if err := bm.doInsert(path, newBj); err == nil { + return bm.rebuild(), nil + } + + return Null, moerr.NewInvalidArgNoCtx("invalid json insert", path.String()) +} + +func (bm *bytejsonModifier) replace(path *Path, newBj ByteJson) (ByteJson, error) { + result := bm.bj.querySimple(path) + if CompareByteJson(result, Null) == 0 { + // if path not exists, return + return bm.bj, nil + } + // replace + bm.modifyPtr = &result.Data[0] + bm.modifyVal = newBj + return bm.rebuild(), nil +} + +func (bm *bytejsonModifier) set(path *Path, newBj ByteJson) (ByteJson, error) { + result := bm.bj.querySimple(path) + if CompareByteJson(result, Null) > 0 { + // set + bm.modifyPtr = &result.Data[0] + bm.modifyVal = newBj + return bm.rebuild(), nil + } + // insert + if err := bm.doInsert(path, newBj); err == nil { + return bm.rebuild(), nil + } + // return bm.rebuild() + return Null, moerr.NewInvalidArgNoCtx("invalid path", path.String()) +} + +func (bm *bytejsonModifier) rebuild() ByteJson { + buf := make([]byte, 0, len(bm.bj.Data)+len(bm.modifyVal.Data)) + value, tpCode := bm.rebuildTo(buf) + return ByteJson{Type: tpCode, Data: value} +} + +func (bm *bytejsonModifier) rebuildTo(buf []byte) ([]byte, TpCode) { + if bm.modifyPtr == &bm.bj.Data[0] { + bm.modifyPtr = nil + return append(buf, bm.modifyVal.Data...), bm.modifyVal.Type + } else if bm.modifyPtr == nil { + return append(buf, bm.bj.Data...), bm.bj.Type + } + + bj := bm.bj + if bj.Type != TpCodeArray && bj.Type != TpCodeObject { + return append(buf, bj.Data...), bj.Type + } + + docOff := len(buf) + elemCount := bj.GetElemCnt() + + var valEntryStart int + if bj.Type == TpCodeArray { + // json array + copySize := headerSize + elemCount*valEntrySize + valEntryStart = headerSize + buf = append(buf, bj.Data[:copySize]...) + } else { + // josn object + copySize := headerSize + elemCount*(keyEntrySize+valEntrySize) + valEntryStart = headerSize + elemCount*keyEntrySize + buf = append(buf, bj.Data[:copySize]...) + if elemCount > 0 { + firstKeyOff := int(endian.Uint32(bj.Data[headerSize:])) + lastKeyOff := int(endian.Uint32(bj.Data[headerSize+keyEntrySize*(elemCount-1):])) + lastKeyLen := int(endian.Uint16(bj.Data[headerSize+keyEntrySize*(elemCount-1)+docSizeOff:])) + buf = append(buf, bj.Data[firstKeyOff:lastKeyOff+lastKeyLen]...) + } + } + + for i := 0; i < elemCount; i++ { + valEntryOff := valEntryStart + i*valEntrySize + elem := bj.getValEntry(valEntryOff) + bm.bj = elem + var tpCode TpCode + valOff := len(buf) - docOff + buf, tpCode = bm.rebuildTo(buf) + buf[docOff+valEntryOff] = tpCode + if tpCode == TpCodeLiteral { + lastIdx := len(buf) - 1 + endian.PutUint32(buf[docOff+valEntryOff+valTypeSize:], uint32(buf[lastIdx])) + buf = buf[:lastIdx] + } else { + endian.PutUint32(buf[docOff+valEntryOff+valTypeSize:], uint32(valOff)) + } + } + endian.PutUint32(buf[docOff+docSizeOff:], uint32(len(buf)-docOff)) + return buf, bj.Type +} + +func (bm *bytejsonModifier) doInsert(path *Path, newBj ByteJson) (err error) { + parentPath, lastSub := path.popOneSubPath() + result := bm.bj.querySimple(&parentPath) + if CompareByteJson(result, Null) == 0 { + return + } + + parent := result + + if lastSub.tp == subPathIdx { + bm.modifyPtr = &parent.Data[0] + if parent.Type != TpCodeArray { + bm.modifyVal = buildBinaryJSONArray([]ByteJson{parent, newBj}) + return + } + elemCnt := parent.GetElemCnt() + elems := make([]ByteJson, 0, elemCnt+1) + for i := 0; i < elemCnt; i++ { + elems = append(elems, parent.getArrayElem(i)) + } + elems = append(elems, newBj) + bm.modifyVal = buildBinaryJSONArray(elems) + return + } + + if parent.Type != TpCodeObject { + return + } + + bm.modifyPtr = &parent.Data[0] + elementCount := parent.GetElemCnt() + insertKey := lastSub.key + inserIndx := sort.Search(elementCount, func(i int) bool { + k := parent.getObjectKey(i) + return bytes.Compare(k, []byte(insertKey)) >= 0 + }) + keys := make([][]byte, 0, elementCount+1) + elems := make([]ByteJson, 0, elementCount+1) + for i := 0; i < elementCount; i++ { + if i == inserIndx { + keys = append(keys, []byte(insertKey)) + elems = append(elems, newBj) + } + keys = append(keys, parent.getObjectKey(i)) + elems = append(elems, parent.getObjectVal(i)) + } + if inserIndx == elementCount { + keys = append(keys, []byte(insertKey)) + elems = append(elems, newBj) + } + bm.modifyVal, err = buildJsonObject(keys, elems) + return +} diff --git a/pkg/container/bytejson/path.go b/pkg/container/bytejson/path.go index c8be8deccd0ff..580b5d473e761 100644 --- a/pkg/container/bytejson/path.go +++ b/pkg/container/bytejson/path.go @@ -69,6 +69,11 @@ func (p *Path) IsSimple() bool { return true } +func (p *Path) popOneSubPath() (Path, subPath) { + lastPathIdx := len(p.paths) - 1 + return Path{paths: p.paths[:lastPathIdx]}, p.paths[lastPathIdx] +} + func (p *Path) String() string { var s strings.Builder diff --git a/pkg/container/bytejson/types.go b/pkg/container/bytejson/types.go index f4a6009dcca4d..de7e8851332a6 100644 --- a/pkg/container/bytejson/types.go +++ b/pkg/container/bytejson/types.go @@ -73,6 +73,7 @@ const ( subPathKey subPathRange ) + const ( pathFlagSingleStar pathFlag = iota + 1 pathFlagDoubleStar @@ -107,6 +108,11 @@ var ( Null = ByteJson{Type: TpCodeLiteral, Data: []byte{LiteralNull}} ) +type field struct { + key string + val any +} + var ( escapedChars = map[byte]byte{ '"': '"', @@ -137,7 +143,7 @@ func (bj ByteJson) TYPE() string { case TpCodeArray: return "ARRAY" case TpCodeLiteral: - return "NULL" + return "LITERAL" case TpCodeInt64: return "INTEGER" case TpCodeUint64: @@ -158,16 +164,29 @@ var jsonTpOrder = map[string]int{ "INTEGER": -4, "UNSIGNED INTEGER": -5, "DOUBLE": -6, - "NULL": -7, + "LITERAL": -7, } +type JsonModifyType byte + +const ( + // JsonModifyInsert is for insert a new element into a JSON. + // If an old elemList exists, it would NOT replace it. + JsonModifyInsert JsonModifyType = 0x01 + // JsonModifyReplace is for replace an old elemList from a JSON. + // If no elemList exists, it would NOT insert it. + JsonModifyReplace JsonModifyType = 0x02 + // JsonModifySet = JsonModifyInsert | JsonModifyReplace + JsonModifySet JsonModifyType = 0x03 +) + func CompareByteJson(left, right ByteJson) int { order1 := jsonTpOrder[left.TYPE()] order2 := jsonTpOrder[right.TYPE()] var cmp int if order1 == order2 { - if order1 == jsonTpOrder["NULL"] { + if order1 == jsonTpOrder["LITERAL"] { cmp = 0 } switch left.Type { diff --git a/pkg/container/bytejson/utils.go b/pkg/container/bytejson/utils.go index d528a983ac94d..542901c4f5cfb 100644 --- a/pkg/container/bytejson/utils.go +++ b/pkg/container/bytejson/utils.go @@ -16,10 +16,13 @@ package bytejson import ( "bytes" + "cmp" "encoding/binary" "encoding/json" "math" "math/bits" + "reflect" + "slices" "strconv" "strings" "unicode/utf8" @@ -151,6 +154,54 @@ func addByteElem(buf []byte, entryStart int, elems []ByteJson) []byte { return buf } +func buildJsonObject(keys [][]byte, elems []ByteJson) (ByteJson, error) { + totalSize := headerSize + len(elems)*(keyEntrySize+valEntrySize) + for i, elem := range elems { + if elem.Type != TpCodeLiteral { + totalSize += len(elem.Data) + } + totalSize += len(keys[i]) + } + buf := make([]byte, headerSize+len(elems)*(keyEntrySize+valEntrySize), totalSize) + endian.PutUint32(buf, uint32(len(elems))) + endian.PutUint32(buf[docSizeOff:], uint32(totalSize)) + for i, key := range keys { + endian.PutUint32(buf[headerSize+i*keyEntrySize:], uint32(len(buf))) + endian.PutUint16(buf[headerSize+i*keyEntrySize+keyOriginOff:], uint16(len(key))) + buf = append(buf, key...) + } + entryStart := headerSize + len(elems)*keyEntrySize + buf = addByteElem(buf, entryStart, elems) + return ByteJson{Type: TpCodeObject, Data: buf}, nil +} + +func buildBinaryJSONArray(elems []ByteJson) ByteJson { + totalSize := headerSize + len(elems)*valEntrySize + for _, elem := range elems { + if elem.Type != TpCodeLiteral { + totalSize += len(elem.Data) + } + } + buf := make([]byte, headerSize+len(elems)*valEntrySize, totalSize) + endian.PutUint32(buf, uint32(len(elems))) + endian.PutUint32(buf[docSizeOff:], uint32(totalSize)) + buf = buildBinaryJSONElements(buf, headerSize, elems) + return ByteJson{Type: TpCodeArray, Data: buf} +} + +func buildBinaryJSONElements(buf []byte, entryStart int, elems []ByteJson) []byte { + for i, elem := range elems { + buf[entryStart+i*valEntrySize] = elem.Type + if elem.Type == TpCodeLiteral { + buf[entryStart+i*valEntrySize+valTypeSize] = elem.Data[0] + } else { + endian.PutUint32(buf[entryStart+i*valEntrySize+valTypeSize:], uint32(len(buf))) + buf = append(buf, elem.Data...) + } + } + return buf +} + func mergeToArray(origin []ByteJson) ByteJson { totalSize := headerSize + len(origin)*valEntrySize for _, el := range origin { @@ -444,3 +495,207 @@ func appendString(out []byte, in string) ([]byte, error) { out = append(out, '"') return out, nil } + +func CreateByteJSON(in any) (ByteJson, error) { + return CreateByteJSONWithCheck(in) +} + +func CreateByteJSONWithCheck(in any) (ByteJson, error) { + typeCode, buf, err := appendBinaryJSON(nil, in) + if err != nil { + return ByteJson{}, err + } + return ByteJson{Type: typeCode, Data: buf}, nil +} + +func appendBinaryJSON(buf []byte, in any) (TpCode, []byte, error) { + var typeCode byte + var err error + switch x := in.(type) { + case nil: + typeCode = TpCodeLiteral + buf = append(buf, LiteralNull) + case bool: + typeCode = TpCodeLiteral + if x { + buf = append(buf, LiteralTrue) + } else { + buf = append(buf, LiteralFalse) + } + case int64: + typeCode = TpCodeInt64 + buf = appendBinaryUint64(buf, uint64(x)) + case uint64: + typeCode = TpCodeUint64 + buf = appendBinaryUint64(buf, x) + case float64: + typeCode = TpCodeFloat64 + buf = appendBinaryFloat64(buf, x) + case json.Number: + typeCode, buf, err = appendBinaryNumber(buf, x) + if err != nil { + return typeCode, nil, err + } + case string: + typeCode = TpCodeString + buf = appendBinaryString(buf, x) + case ByteJson: + typeCode = x.Type + buf = append(buf, x.Data...) + case []any: + typeCode = TpCodeArray + buf, err = appendBinaryArray(buf, x) + if err != nil { + return typeCode, nil, err + } + case map[string]any: + typeCode = TpCodeObject + buf, err = appendBinaryObject(buf, x) + if err != nil { + return typeCode, nil, err + } + default: + return typeCode, nil, moerr.NewInvalidArgNoCtx("invalid json type", reflect.TypeOf(in).String()) + } + return typeCode, buf, err +} + +func appendBinaryUint64(buf []byte, v uint64) []byte { + off := len(buf) + buf = appendZero(buf, 8) + endian.PutUint64(buf[off:], v) + return buf +} + +func appendUint32(buf []byte, v uint32) []byte { + var tmp [4]byte + endian.PutUint32(tmp[:], v) + return append(buf, tmp[:]...) +} + +func appendBinaryFloat64(buf []byte, v float64) []byte { + off := len(buf) + buf = appendZero(buf, 8) + endian.PutUint64(buf[off:], math.Float64bits(v)) + return buf +} + +func appendBinaryNumber(buf []byte, x json.Number) (TpCode, []byte, error) { + if strings.Contains(x.String(), "Ee.") { + f64, err := x.Float64() + if err != nil { + return TpCodeFloat64, nil, moerr.NewInvalidArgNoCtx("invalid json number", x.String()) + } + return TpCodeFloat64, appendBinaryFloat64(buf, f64), nil + } else if val, err := x.Int64(); err == nil { + return TpCodeInt64, appendBinaryUint64(buf, uint64(val)), nil + } else if val, err := strconv.ParseUint(string(x), 10, 64); err == nil { + return TpCodeUint64, appendBinaryUint64(buf, val), nil + } + val, err := x.Float64() + if err == nil { + return TpCodeFloat64, appendBinaryFloat64(buf, val), nil + } + var typeCode TpCode + return typeCode, nil, moerr.NewInvalidArgNoCtx("invalid json number", x.String()) +} + +func appendBinaryString(buf []byte, v string) []byte { + begin := len(buf) + buf = appendZero(buf, binary.MaxVarintLen64) + lenLen := binary.PutUvarint(buf[begin:], uint64(len(v))) + buf = buf[:len(buf)-binary.MaxVarintLen64+lenLen] + buf = append(buf, v...) + return buf +} + +func appendBinaryArray(buf []byte, array []any) ([]byte, error) { + docOff := len(buf) + buf = appendUint32(buf, uint32(len(array))) + buf = appendZero(buf, docSizeOff) + valEntryBegin := len(buf) + buf = appendZero(buf, len(array)*valEntrySize) + for i, val := range array { + var err error + buf, err = appendBinaryValElem(buf, docOff, valEntryBegin+i*valEntrySize, val) + if err != nil { + return nil, moerr.NewInvalidArgNoCtx("invalid json array", val) + } + } + docSize := len(buf) - docOff + endian.PutUint32(buf[docOff+docSizeOff:], uint32(docSize)) + return buf, nil +} + +func appendBinaryObject(buf []byte, x map[string]any) ([]byte, error) { + docOff := len(buf) + buf = appendUint32(buf, uint32(len(x))) + buf = appendZero(buf, docSizeOff) + keyEntryBegin := len(buf) + buf = appendZero(buf, len(x)*keyEntrySize) + valEntryBegin := len(buf) + buf = appendZero(buf, len(x)*valEntrySize) + + fields := make([]field, 0, len(x)) + for key, val := range x { + fields = append(fields, field{key: key, val: val}) + } + slices.SortFunc(fields, func(i, j field) int { + return cmp.Compare(i.key, j.key) + }) + for i, field := range fields { + keyEntryOff := keyEntryBegin + i*keyEntrySize + keyOff := len(buf) - docOff + keyLen := uint32(len(field.key)) + if keyLen > math.MaxUint16 { + return nil, moerr.NewInvalidArgNoCtx("invalid json key", field.key) + } + endian.PutUint32(buf[keyEntryOff:], uint32(keyOff)) + endian.PutUint16(buf[keyEntryOff+keyOriginOff:], uint16(keyLen)) + buf = append(buf, field.key...) + } + for i, field := range fields { + var err error + buf, err = appendBinaryValElem(buf, docOff, valEntryBegin+i*valEntrySize, field.val) + if err != nil { + return nil, moerr.NewInvalidArgNoCtx("invalid json object", field.val) + } + } + docSize := len(buf) - docOff + endian.PutUint32(buf[docOff+docSizeOff:], uint32(docSize)) + return buf, nil +} + +func appendBinaryValElem(buf []byte, docOff, valEntryOff int, val any) ([]byte, error) { + var typeCode TpCode + var err error + elemDocOff := len(buf) + typeCode, buf, err = appendBinaryJSON(buf, val) + if err != nil { + return nil, moerr.NewInvalidArgNoCtx("invalid json value", val) + } + if typeCode == TpCodeLiteral { + litCode := buf[elemDocOff] + buf = buf[:elemDocOff] + buf[valEntryOff] = TpCodeLiteral + buf[valEntryOff+1] = litCode + return buf, nil + } + buf[valEntryOff] = typeCode + valOff := elemDocOff - docOff + endian.PutUint32(buf[valEntryOff+1:], uint32(valOff)) + return buf, nil +} + +func appendZero(buf []byte, length int) []byte { + var tmp [8]byte + rem := length % 8 + loop := length / 8 + for i := 0; i < loop; i++ { + buf = append(buf, tmp[:]...) + } + for i := 0; i < rem; i++ { + buf = append(buf, 0) + } + return buf +} diff --git a/pkg/sql/plan/function/func_builtin_json.go b/pkg/sql/plan/function/func_builtin_json.go index e5458abc90a2b..0ba9e85b09999 100644 --- a/pkg/sql/plan/function/func_builtin_json.go +++ b/pkg/sql/plan/function/func_builtin_json.go @@ -15,6 +15,8 @@ package function import ( + "strconv" + "github.com/matrixorigin/matrixone/pkg/common/moerr" "github.com/matrixorigin/matrixone/pkg/container/bytejson" "github.com/matrixorigin/matrixone/pkg/container/types" @@ -61,6 +63,8 @@ func jsonExtractCheckFn(overloads []overload, inputs []types.Type) checkResult { type computeFn func([]byte, []*bytejson.Path) (bytejson.ByteJson, error) +type computeJsonFn func([]byte, []*bytejson.Path, []bytejson.ByteJson) (bytejson.ByteJson, error) + func computeJson(json []byte, paths []*bytejson.Path) (bytejson.ByteJson, error) { bj := types.DecodeJson(json) return bj.Query(paths), nil @@ -87,6 +91,45 @@ func computeStringSimple(json []byte, paths []*bytejson.Path) (bytejson.ByteJson return bj.QuerySimple(paths), nil } +func computeJsonSet(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj := types.DecodeJson(json) + return bj.Modify(paths, newVal, bytejson.JsonModifySet) +} + +func computeStringJsonSet(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj, err := types.ParseSliceToByteJson(json) + if err != nil { + return bytejson.Null, err + } + return bj.Modify(paths, newVal, bytejson.JsonModifySet) +} + +func computeJsonInsert(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj := types.DecodeJson(json) + return bj.Modify(paths, newVal, bytejson.JsonModifyInsert) +} + +func computeStringJsonInsert(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj, err := types.ParseSliceToByteJson(json) + if err != nil { + return bytejson.Null, err + } + return bj.Modify(paths, newVal, bytejson.JsonModifyInsert) +} + +func computeJsonReplace(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj := types.DecodeJson(json) + return bj.Modify(paths, newVal, bytejson.JsonModifyReplace) +} + +func computeStringJsonReplace(json []byte, paths []*bytejson.Path, newVal []bytejson.ByteJson) (bytejson.ByteJson, error) { + bj, err := types.ParseSliceToByteJson(json) + if err != nil { + return bytejson.Null, err + } + return bj.Modify(paths, newVal, bytejson.JsonModifyReplace) +} + func (op *opBuiltInJsonExtract) buildPath(params []*vector.Vector, length int) error { op.npath = len(params) - 1 if op.npath == 0 { @@ -392,3 +435,160 @@ func (op *opBuiltInJsonExtract) jsonExtractFloat64(parameters []*vector.Vector, } return nil } + +type opBuiltInJsonSet struct { +} + +func newOpBuiltInJsonSet() *opBuiltInJsonSet { + return &opBuiltInJsonSet{} +} + +// JSON_SET +func jsonSetCheckFn(overloads []overload, inputs []types.Type) checkResult { + if len(inputs) > 2 { + ts := make([]types.Type, 0, len(inputs)) + allMatch := true + for _, input := range inputs { + if input.Oid == types.T_json || input.Oid.IsMySQLString() { + ts = append(ts, input) + } else { + if canCast, _ := fixedImplicitTypeCast(input, types.T_varchar); canCast { + ts = append(ts, types.T_varchar.ToType()) + allMatch = false + } else { + return newCheckResultWithFailure(failedFunctionParametersWrong) + } + } + } + if allMatch { + return newCheckResultWithSuccess(0) + } + return newCheckResultWithCast(0, ts) + } + return newCheckResultWithFailure(failedFunctionParametersWrong) +} + +func (op *opBuiltInJsonSet) buildJsonSet(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList) error { + return op.buildJsonFunction(parameters, result, proc, length, selectList, bytejson.JsonModifySet) +} + +func (op *opBuiltInJsonSet) buildJsonInsert(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList) error { + return op.buildJsonFunction(parameters, result, proc, length, selectList, bytejson.JsonModifyInsert) +} + +func (op *opBuiltInJsonSet) buildJsonReplace(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList) error { + return op.buildJsonFunction(parameters, result, proc, length, selectList, bytejson.JsonModifyReplace) +} + +func (op *opBuiltInJsonSet) buildJsonFunction(parameters []*vector.Vector, result vector.FunctionResultWrapper, proc *process.Process, length int, selectList *FunctionSelectList, jsonFuncType bytejson.JsonModifyType) error { + // implement json_set function + // the first parameter is the json object + // the rest of the parameters are the path-value pairs + // the path is a string, the value is a json object + var err error + var fn computeJsonFn + + jsonVec := parameters[0] + jsonWrapper := vector.GenerateFunctionStrParameter(jsonVec) + rs := vector.MustFunctionResult[types.Varlena](result) + + switch jsonFuncType { + case bytejson.JsonModifySet: + if jsonVec.GetType().Oid == types.T_json { + fn = computeJsonSet + } else { + fn = computeStringJsonSet + } + case bytejson.JsonModifyInsert: + if jsonVec.GetType().Oid == types.T_json { + fn = computeJsonInsert + } else { + fn = computeStringJsonInsert + } + case bytejson.JsonModifyReplace: + if jsonVec.GetType().Oid == types.T_json { + fn = computeJsonReplace + } else { + fn = computeStringJsonReplace + } + default: + return moerr.NewInvalidInput(proc.Ctx, "invalid json function type") + } + + for i := uint64(0); i < uint64(length); i++ { + jsonBytes, jIsNull := jsonWrapper.GetStrValue(i) + if jIsNull { + if err = rs.AppendBytes(nil, true); err != nil { + return err + } + return err + } + + // build all paths + pathExprs := make([]*bytejson.Path, 0, (len(parameters)-1)/2+1) + for j := 1; j < len(parameters); j += 2 { + pathBytes, pIsNull := vector.GenerateFunctionStrParameter(parameters[j]).GetStrValue(uint64(i)) + if pIsNull { + if err = rs.AppendBytes(nil, true); err != nil { + return err + } + return err + } + + pathStr := string(pathBytes) + p, err := types.ParseStringToPath(pathStr) + if err != nil { + return err + } + + pathExprs = append(pathExprs, &p) + } + + // build all values + valExprs := make([]bytejson.ByteJson, 0, (len(parameters)-1)/2+1) + for j := 2; j < len(parameters); j += 2 { + valBytes, vIsNull := vector.GenerateFunctionStrParameter(parameters[j]).GetStrValue(uint64(i)) + if vIsNull { + var expr bytejson.ByteJson + expr, err = bytejson.CreateByteJSON(nil) + if err != nil { + return err + } + valExprs = append(valExprs, expr) + continue + } + valString := string(valBytes) + + _, parserErr := strconv.ParseInt(valString, 10, 64) + var val bytejson.ByteJson + if len(valString) > 0 && (valString[0] == '{' || valString[0] == '[' || parserErr == nil) { + val, err = types.ParseStringToByteJson(valString) + if err != nil { + return err + } + + } else { + val, err = bytejson.CreateByteJSON(valString) + if err != nil { + return err + } + } + valExprs = append(valExprs, val) + } + + out, err := fn(jsonBytes, pathExprs, valExprs) + if err != nil { + return err + } + if out.IsNull() { + if err = rs.AppendBytes(nil, true); err != nil { + return err + } + } else { + if err = rs.AppendByteJson(out, false); err != nil { + return err + } + } + } + return nil +} diff --git a/pkg/sql/plan/function/function_id.go b/pkg/sql/plan/function/function_id.go index e070e199efbe7..9d3685ff2e9c8 100644 --- a/pkg/sql/plan/function/function_id.go +++ b/pkg/sql/plan/function/function_id.go @@ -289,6 +289,11 @@ const ( JSON_QUOTE JSON_UNQUOTE JSON_ROW + + JSON_SET + JSON_INSERT + JSON_REPLACE + JQ TRY_JQ WASM @@ -601,6 +606,9 @@ var functionIdRegister = map[string]int32{ "json_quote": JSON_QUOTE, "json_unquote": JSON_UNQUOTE, "json_row": JSON_ROW, + "json_set": JSON_SET, + "json_insert": JSON_INSERT, + "json_replace": JSON_REPLACE, "jq": JQ, "try_jq": TRY_JQ, "wasm": WASM, diff --git a/pkg/sql/plan/function/list_builtIn.go b/pkg/sql/plan/function/list_builtIn.go index c00fa5df9e1c9..f351a9d64d0db 100644 --- a/pkg/sql/plan/function/list_builtIn.go +++ b/pkg/sql/plan/function/list_builtIn.go @@ -817,6 +817,67 @@ var supportedStringBuiltIns = []FuncNew{ }, }, + //function `json_set` + { + functionId: JSON_SET, + class: plan.Function_STRICT, + layout: STANDARD_FUNCTION, + checkFn: jsonSetCheckFn, + Overloads: []overload{ + { + overloadId: 0, + args: []types.T{types.T_json, types.T_varchar, types.T_any}, + retType: func(parameters []types.Type) types.Type { + return types.T_json.ToType() + }, + newOp: func() executeLogicOfOverload { + return newOpBuiltInJsonSet().buildJsonSet + }, + }, + }, + }, + + // function `json_insert` + { + functionId: JSON_INSERT, + class: plan.Function_STRICT, + layout: STANDARD_FUNCTION, + checkFn: jsonSetCheckFn, + Overloads: []overload{ + { + overloadId: 0, + args: []types.T{types.T_json, types.T_varchar, types.T_any}, + retType: func(parameters []types.Type) types.Type { + return types.T_json.ToType() + }, + newOp: func() executeLogicOfOverload { + return newOpBuiltInJsonSet().buildJsonInsert + }, + }, + }, + }, + + // function `json_replace` + { + functionId: JSON_REPLACE, + class: plan.Function_STRICT, + layout: STANDARD_FUNCTION, + checkFn: jsonSetCheckFn, + Overloads: []overload{ + { + overloadId: 0, + args: []types.T{types.T_json, types.T_varchar, types.T_any}, + retType: func(parameters []types.Type) types.Type { + return types.T_json.ToType() + + }, + newOp: func() executeLogicOfOverload { + return newOpBuiltInJsonSet().buildJsonReplace + }, + }, + }, + }, + // function `left` { functionId: LEFT, diff --git a/test/distributed/cases/function/func_json_insert.result b/test/distributed/cases/function/func_json_insert.result new file mode 100644 index 0000000000000..29a5df71a3d4d --- /dev/null +++ b/test/distributed/cases/function/func_json_insert.result @@ -0,0 +1,61 @@ +SELECT JSON_INSERT('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +result +{"fruits": ["apple", "banana", "cherry"]} +SELECT JSON_INSERT('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +result +{"fruits": ["apple", "banana", "cherry"]} +SELECT JSON_INSERT('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +result +{"fruits": [{"name": "apple"}, {"color": "yellow", "name": "banana"}]} +SELECT JSON_INSERT('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +result +{"user": {"age": 30, "city": "New York", "name": "John"}} +SELECT JSON_INSERT('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +result +{"company": {"employees": [{"id": 1, "name": "John"}, {"department": "HR", "id": 2, "name": "Jane"}], "name": "Moonshot AI"}} +CREATE TABLE users ( +id INT PRIMARY KEY, +info JSON +); +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); +SELECT * FROM users; +id info +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +UPDATE users +SET info = JSON_INSERT(info, '$.phone', '123-456-7890') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice", "phone": "123-456-7890"} +UPDATE users +SET info = JSON_INSERT(info, '$.address.state', 'CA') +WHERE id = 2; +SELECT * FROM users; +id info +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice", "phone": "123-456-7890"} +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +UPDATE users +SET info = JSON_INSERT(info, '$.skills[2]', 'SQL') +WHERE id = 3; +SELECT * FROM users; +id info +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice", "phone": "123-456-7890"} +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python", "SQL"]} +UPDATE users +SET info = JSON_INSERT(info, '$.phone', '123-456-7890', '$.address.state', 'NY') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python", "SQL"]} +1 {"address": {"city": "New York", "state": "NY", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice", "phone": "123-456-7890"} +drop table users; diff --git a/test/distributed/cases/function/func_json_insert.sql b/test/distributed/cases/function/func_json_insert.sql new file mode 100644 index 0000000000000..a35ff1ba3ee33 --- /dev/null +++ b/test/distributed/cases/function/func_json_insert.sql @@ -0,0 +1,38 @@ +SELECT JSON_INSERT('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +SELECT JSON_INSERT('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +SELECT JSON_INSERT('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +SELECT JSON_INSERT('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +SELECT JSON_INSERT('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +CREATE TABLE users ( + id INT PRIMARY KEY, + info JSON +); + +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); + +SELECT * FROM users; + +UPDATE users +SET info = JSON_INSERT(info, '$.phone', '123-456-7890') +WHERE id = 1; +SELECT * FROM users; + +UPDATE users +SET info = JSON_INSERT(info, '$.address.state', 'CA') +WHERE id = 2; +SELECT * FROM users; + +UPDATE users +SET info = JSON_INSERT(info, '$.skills[2]', 'SQL') +WHERE id = 3; +SELECT * FROM users; + +UPDATE users +SET info = JSON_INSERT(info, '$.phone', '123-456-7890', '$.address.state', 'NY') +WHERE id = 1; +SELECT * FROM users; + +drop table users; diff --git a/test/distributed/cases/function/func_json_replace.result b/test/distributed/cases/function/func_json_replace.result new file mode 100644 index 0000000000000..07750accdda4f --- /dev/null +++ b/test/distributed/cases/function/func_json_replace.result @@ -0,0 +1,61 @@ +SELECT JSON_REPLACE('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +result +{"fruits": ["apple", "orange", "cherry"]} +SELECT JSON_REPLACE('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +result +{"fruits": ["apple", "banana"]} +SELECT JSON_REPLACE('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +result +{"fruits": [{"name": "apple"}, {"name": "banana"}]} +SELECT JSON_REPLACE('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +result +{"user": {"age": 31, "name": "John"}} +SELECT JSON_REPLACE('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +result +{"company": {"employees": [{"id": 1, "name": "John Doe"}, {"id": 2, "name": "Jane"}], "name": "Moonshot AI"}} +CREATE TABLE users ( +id INT PRIMARY KEY, +info JSON +); +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); +SELECT * FROM users; +id info +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +UPDATE users +SET info = JSON_REPLACE(info, '$.age', 31) +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "New York", "zip": "10001"}, "age": 31, "email": "alice@example.com", "name": "Alice"} +UPDATE users +SET info = JSON_REPLACE(info, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 31, "email": "alice@example.com", "name": "Alice"} +UPDATE users +SET info = JSON_REPLACE(info, '$.skills[0]', 'JavaScript') +WHERE id = 3; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 31, "email": "alice@example.com", "name": "Alice"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["JavaScript", "Python"]} +UPDATE users +SET info = JSON_REPLACE(info, '$.age', 32, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["JavaScript", "Python"]} +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 32, "email": "alice@example.com", "name": "Alice"} +drop table users; diff --git a/test/distributed/cases/function/func_json_replace.sql b/test/distributed/cases/function/func_json_replace.sql new file mode 100644 index 0000000000000..ff66795a01707 --- /dev/null +++ b/test/distributed/cases/function/func_json_replace.sql @@ -0,0 +1,38 @@ +SELECT JSON_REPLACE('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +SELECT JSON_REPLACE('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +SELECT JSON_REPLACE('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +SELECT JSON_REPLACE('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +SELECT JSON_REPLACE('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +CREATE TABLE users ( + id INT PRIMARY KEY, + info JSON +); + +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); + +SELECT * FROM users; + +UPDATE users +SET info = JSON_REPLACE(info, '$.age', 31) +WHERE id = 1; +SELECT * FROM users; + +UPDATE users +SET info = JSON_REPLACE(info, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; + +UPDATE users +SET info = JSON_REPLACE(info, '$.skills[0]', 'JavaScript') +WHERE id = 3; +SELECT * FROM users; + +UPDATE users +SET info = JSON_REPLACE(info, '$.age', 32, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; + +drop table users; diff --git a/test/distributed/cases/function/func_json_set.result b/test/distributed/cases/function/func_json_set.result new file mode 100644 index 0000000000000..fe0c65872f3a2 --- /dev/null +++ b/test/distributed/cases/function/func_json_set.result @@ -0,0 +1,162 @@ +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +result +{"fruits": ["apple", "orange", "cherry"]} +SELECT JSON_SET('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +result +{"fruits": ["apple", "banana", "cherry"]} +SELECT JSON_SET('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +result +{"fruits": [{"name": "apple"}, {"color": "yellow", "name": "banana"}]} +SELECT JSON_SET('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +result +{"user": {"age": 31, "city": "New York", "name": "John"}} +SELECT JSON_SET('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +result +{"company": {"employees": [{"id": 1, "name": "John Doe"}, {"department": "HR", "id": 2, "name": "Jane"}], "name": "Moonshot AI"}} +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}') AS result; +invalid argument function json_set, bad value [VARCHAR] +SELECT JSON_SET('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city') AS result; +invalid input: pathList and valList should have the same length +SELECT JSON_SET(null, '$.fruits[1]', 'orange') AS result; +result +null +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', null, 'orange') AS result; +result +null +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', null) AS result; +result +{"fruits": ["apple", null, "cherry"]} +drop table if exists users; +CREATE TABLE users ( +id INT PRIMARY KEY, +info JSON +); +INSERT INTO users (id, info) VALUES (1, '{"name": "Alice", "age": 30}'); +INSERT INTO users (id, info) VALUES (2, '{"name": "Bob", "age": 25}'); +SELECT * FROM users; +id info +1 {"age": 30, "name": "Alice"} +2 {"age": 25, "name": "Bob"} +UPDATE users SET info = JSON_SET(info, '$.age', 31) WHERE id = 1; +SELECT * FROM users; +id info +2 {"age": 25, "name": "Bob"} +1 {"age": 31, "name": "Alice"} +UPDATE users +SET info = JSON_SET(info, '$.phone', '123-456-7890') +WHERE id = 2; +SELECT * FROM users; +id info +1 {"age": 31, "name": "Alice"} +2 {"age": 25, "name": "Bob", "phone": "123-456-7890"} +UPDATE users +SET info = JSON_SET(info, '$.age', 32, '$.address', '123 Main St') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"age": 25, "name": "Bob", "phone": "123-456-7890"} +1 {"address": "123 Main St", "age": 32, "name": "Alice"} +drop table users; +drop table if exists users; +CREATE TABLE users ( +id INT PRIMARY KEY, +info JSON +); +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); +SELECT * FROM users; +id info +1 {"address": {"city": "New York", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +UPDATE users +SET info = JSON_SET(info, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +UPDATE users +SET info = JSON_SET(info, '$.address.state', 'CA') +WHERE id = 2; +SELECT * FROM users; +id info +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["Java", "Python"]} +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +UPDATE users +SET info = JSON_SET(info, '$.skills[0]', 'JavaScript') +WHERE id = 3; +SELECT * FROM users; +id info +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["JavaScript", "Python"]} +UPDATE users +SET info = JSON_SET(info, '$.skills[2]', 'SQL') +WHERE id = 3; +SELECT * FROM users; +id info +1 {"address": {"city": "San Francisco", "zip": "10001"}, "age": 30, "email": "alice@example.com", "name": "Alice"} +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["JavaScript", "Python", "SQL"]} +UPDATE users +SET info = JSON_SET(info, '$.age', 32, '$.address.state', 'NY', '$.skills[3]', 'k8s') +WHERE id = 1; +SELECT * FROM users; +id info +2 {"address": {"city": "Los Angeles", "state": "CA", "zip": "90001"}, "age": 25, "email": "bob@example.com", "name": "Bob"} +3 {"address": {"city": "Chicago", "zip": "60601"}, "age": 28, "email": "charlie@example.com", "name": "Charlie", "skills": ["JavaScript", "Python", "SQL"]} +1 {"address": {"city": "San Francisco", "state": "NY", "zip": "10001"}, "age": 32, "email": "alice@example.com", "name": "Alice"} +drop table users; +drop table if exists employees; +CREATE TABLE employees ( +id INT AUTO_INCREMENT PRIMARY KEY, +info JSON +); +drop table if exists projects; +CREATE TABLE projects ( +id INT AUTO_INCREMENT PRIMARY KEY, +name VARCHAR(255), +members JSON +); +INSERT INTO employees (info) VALUES +('{"name": "John Doe", "age": 30, "department": "Engineering", "skills": ["Java", "Python", "SQL"]}'), +('{"name": "Jane Smith", "age": 25, "department": "Marketing", "skills": ["Social Media", "SEO", "Content Writing"]}'); +INSERT INTO projects (name, members) VALUES +('Project A', '[1, 2]'), +('Project B', '[1]'); +SELECT * FROM employees; +id info +1 {"age": 30, "department": "Engineering", "name": "John Doe", "skills": ["Java", "Python", "SQL"]} +2 {"age": 25, "department": "Marketing", "name": "Jane Smith", "skills": ["Social Media", "SEO", "Content Writing"]} +SELECT * FROM projects; +id name members +1 Project A [1, 2] +2 Project B [1] +UPDATE employees +SET info = JSON_SET(info, '$.skills[3]', 'JavaScript') +WHERE id = 1; +SELECT * FROM employees; +id info +2 {"age": 25, "department": "Marketing", "name": "Jane Smith", "skills": ["Social Media", "SEO", "Content Writing"]} +1 {"age": 30, "department": "Engineering", "name": "John Doe", "skills": ["Java", "Python", "SQL", "JavaScript"]} +UPDATE projects +SET members = JSON_SET(members, '$[2]', 3) +WHERE id = 1; +SELECT * FROM projects; +id name members +2 Project B [1] +1 Project A [1, 2, 3] +UPDATE employees +SET info = JSON_SET(info, '$.department.manager', 'Alice Johnson') +WHERE id = 2; +SELECT * FROM employees; +id info +1 {"age": 30, "department": "Engineering", "name": "John Doe", "skills": ["Java", "Python", "SQL", "JavaScript"]} +2 {"age": 25, "department": "Marketing", "name": "Jane Smith", "skills": ["Social Media", "SEO", "Content Writing"]} +drop table employees; +drop table projects; diff --git a/test/distributed/cases/function/func_json_set.sql b/test/distributed/cases/function/func_json_set.sql new file mode 100644 index 0000000000000..f83845439b8f3 --- /dev/null +++ b/test/distributed/cases/function/func_json_set.sql @@ -0,0 +1,120 @@ +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', 'orange') AS result; +SELECT JSON_SET('{"fruits": ["apple", "banana"]}', '$.fruits[2]', 'cherry') AS result; +SELECT JSON_SET('{"fruits": [{"name": "apple"}, {"name": "banana"}]}', '$.fruits[1].color', 'yellow') AS result; +SELECT JSON_SET('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city', 'New York') AS result; +SELECT JSON_SET('{"company": {"name": "Moonshot AI", "employees": [{"id": 1, "name": "John"}, {"id": 2, "name": "Jane"}]}}', '$.company.employees[0].name', 'John Doe', '$.company.employees[1].department', 'HR') AS result; +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}') AS result; +SELECT JSON_SET('{"user": {"name": "John", "age": 30}}', '$.user.age', 31, '$.user.city') AS result; +SELECT JSON_SET(null, '$.fruits[1]', 'orange') AS result; +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', null, 'orange') AS result; +SELECT JSON_SET('{"fruits": ["apple", "banana", "cherry"]}', '$.fruits[1]', null) AS result; + +drop table if exists users; +CREATE TABLE users ( + id INT PRIMARY KEY, + info JSON +); + +INSERT INTO users (id, info) VALUES (1, '{"name": "Alice", "age": 30}'); +INSERT INTO users (id, info) VALUES (2, '{"name": "Bob", "age": 25}'); + +SELECT * FROM users; + +UPDATE users SET info = JSON_SET(info, '$.age', 31) WHERE id = 1; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.phone', '123-456-7890') +WHERE id = 2; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.age', 32, '$.address', '123 Main St') +WHERE id = 1; +SELECT * FROM users; + +drop table users; + +drop table if exists users; +CREATE TABLE users ( + id INT PRIMARY KEY, + info JSON +); + +INSERT INTO users (id, info) VALUES +(1, '{"name": "Alice", "age": 30, "email": "alice@example.com", "address": {"city": "New York", "zip": "10001"}}'), +(2, '{"name": "Bob", "age": 25, "email": "bob@example.com", "address": {"city": "Los Angeles", "zip": "90001"}}'), +(3, '{"name": "Charlie", "age": 28, "email": "charlie@example.com", "address": {"city": "Chicago", "zip": "60601"}, "skills": ["Java", "Python"]}'); + +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.address.city', 'San Francisco') +WHERE id = 1; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.address.state', 'CA') +WHERE id = 2; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.skills[0]', 'JavaScript') +WHERE id = 3; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.skills[2]', 'SQL') +WHERE id = 3; +SELECT * FROM users; + +UPDATE users +SET info = JSON_SET(info, '$.age', 32, '$.address.state', 'NY', '$.skills[3]', 'k8s') +WHERE id = 1; +SELECT * FROM users; + +drop table users; + +drop table if exists employees; +CREATE TABLE employees ( + id INT AUTO_INCREMENT PRIMARY KEY, + info JSON +); + +drop table if exists projects; +CREATE TABLE projects ( + id INT AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255), + members JSON +); + +INSERT INTO employees (info) VALUES +('{"name": "John Doe", "age": 30, "department": "Engineering", "skills": ["Java", "Python", "SQL"]}'), +('{"name": "Jane Smith", "age": 25, "department": "Marketing", "skills": ["Social Media", "SEO", "Content Writing"]}'); + +INSERT INTO projects (name, members) VALUES +('Project A', '[1, 2]'), +('Project B', '[1]'); + + +SELECT * FROM employees; +SELECT * FROM projects; + +UPDATE employees +SET info = JSON_SET(info, '$.skills[3]', 'JavaScript') +WHERE id = 1; +SELECT * FROM employees; + +UPDATE projects +SET members = JSON_SET(members, '$[2]', 3) +WHERE id = 1; +SELECT * FROM projects; + +UPDATE employees +SET info = JSON_SET(info, '$.department.manager', 'Alice Johnson') +WHERE id = 2; +SELECT * FROM employees; + +drop table employees; +drop table projects; +